4  * The contents of this file are subject to the terms of the 
   5  * Common Development and Distribution License (the "License"). 
   6  * You may not use this file except in compliance with the License. 
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 
   9  * or http://www.opensolaris.org/os/licensing. 
  10  * See the License for the specific language governing permissions 
  11  * and limitations under the License. 
  13  * When distributing Covered Code, include this CDDL HEADER in each 
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 
  15  * If applicable, add the following below this CDDL HEADER, with the 
  16  * fields enclosed by brackets "[]" replaced with your own identifying 
  17  * information: Portions Copyright [yyyy] [name of copyright owner] 
  23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved. 
  24  * Use is subject to license terms. 
  27 /* #pragma ident        "@(#)dtrace.c   1.49    06/08/11 SMI" */ 
  30  * DTrace - Dynamic Tracing for Solaris 
  32  * This is the implementation of the Solaris Dynamic Tracing framework 
  33  * (DTrace).  The user-visible interface to DTrace is described at length in 
  34  * the "Solaris Dynamic Tracing Guide".  The interfaces between the libdtrace 
  35  * library, the in-kernel DTrace framework, and the DTrace providers are 
  36  * described in the block comments in the <sys/dtrace.h> header file.  The 
  37  * internal architecture of DTrace is described in the block comments in the 
  38  * <sys/dtrace_impl.h> header file.  The comments contained within the DTrace 
  39  * implementation very much assume mastery of all of these sources; if one has 
  40  * an unanswered question about the implementation, one should consult them 
  43  * The functions here are ordered roughly as follows: 
  45  *   - Probe context functions 
  46  *   - Probe hashing functions 
  47  *   - Non-probe context utility functions 
  48  *   - Matching functions 
  49  *   - Provider-to-Framework API functions 
  50  *   - Probe management functions 
  51  *   - DIF object functions 
  53  *   - Predicate functions 
  56  *   - Enabling functions 
  58  *   - Anonymous enabling functions 
  59  *   - Consumer state functions 
  62  *   - Driver cookbook functions 
  64  * Each group of functions begins with a block comment labelled the "DTrace 
  65  * [Group] Functions", allowing one to find each block by searching forward 
  66  * on capital-f functions. 
  69 #define _DTRACE_WANT_PROC_GLUE_ 1 
  71 #include <sys/errno.h> 
  72 #include <sys/types.h> 
  75 #include <sys/systm.h> 
  76 #include <sys/dtrace_impl.h> 
  77 #include <sys/param.h> 
  78 #include <sys/ioctl.h> 
  79 #include <sys/fcntl.h> 
  80 #include <miscfs/devfs/devfs.h> 
  81 #include <sys/malloc.h> 
  82 #include <sys/kernel_types.h> 
  83 #include <sys/proc_internal.h> 
  84 #include <sys/uio_internal.h> 
  85 #include <sys/kauth.h> 
  88 #include <mach/exception_types.h> 
  89 #include <sys/signalvar.h> 
  90 #include <kern/zalloc.h> 
  92 #define t_predcache t_dtrace_predcache /* Cosmetic. Helps readability of thread.h */ 
  94 extern void dtrace_suspend(void); 
  95 extern void dtrace_resume(void); 
  96 extern void dtrace_init(void); 
  97 extern void helper_init(void); 
  99 #if defined(__APPLE__) 
 101 #include "../../../osfmk/chud/chud_dtrace.h" 
 103 extern kern_return_t chudxnu_dtrace_callback
 
 104         (uint64_t selector
, uint64_t *args
, uint32_t count
); 
 108  * DTrace Tunable Variables 
 110  * The following variables may be tuned by adding a line to /etc/system that 
 111  * includes both the name of the DTrace module ("dtrace") and the name of the 
 112  * variable.  For example: 
 114  *   set dtrace:dtrace_destructive_disallow = 1 
 116  * In general, the only variables that one should be tuning this way are those 
 117  * that affect system-wide DTrace behavior, and for which the default behavior 
 118  * is undesirable.  Most of these variables are tunable on a per-consumer 
 119  * basis using DTrace options, and need not be tuned on a system-wide basis. 
 120  * When tuning these variables, avoid pathological values; while some attempt 
 121  * is made to verify the integrity of these variables, they are not considered 
 122  * part of the supported interface to DTrace, and they are therefore not 
 123  * checked comprehensively.  Further, these variables should not be tuned 
 124  * dynamically via "mdb -kw" or other means; they should only be tuned via 
 127 int             dtrace_destructive_disallow 
= 0; 
 128 #if defined(__APPLE__) 
 129 #define proc_t struct proc 
 130 #endif /* __APPLE__ */ 
 131 dtrace_optval_t dtrace_nonroot_maxsize 
= (16 * 1024 * 1024); 
 132 size_t          dtrace_difo_maxsize 
= (256 * 1024); 
 133 dtrace_optval_t dtrace_dof_maxsize 
= (256 * 1024); 
 134 size_t          dtrace_global_maxsize 
= (16 * 1024); 
 135 size_t          dtrace_actions_max 
= (16 * 1024); 
 136 size_t          dtrace_retain_max 
= 1024; 
 137 dtrace_optval_t dtrace_helper_actions_max 
= 32; 
 138 dtrace_optval_t dtrace_helper_providers_max 
= 32; 
 139 dtrace_optval_t dtrace_dstate_defsize 
= (1 * 1024 * 1024); 
 140 size_t          dtrace_strsize_default 
= 256; 
 141 dtrace_optval_t dtrace_cleanrate_default 
= 9900990;             /* 101 hz */ 
 142 dtrace_optval_t dtrace_cleanrate_min 
= 200000;                  /* 5000 hz */ 
 143 dtrace_optval_t dtrace_cleanrate_max 
= (uint64_t)60 * NANOSEC
;  /* 1/minute */ 
 144 dtrace_optval_t dtrace_aggrate_default 
= NANOSEC
;               /* 1 hz */ 
 145 dtrace_optval_t dtrace_statusrate_default 
= NANOSEC
;            /* 1 hz */ 
 146 dtrace_optval_t dtrace_statusrate_max 
= (hrtime_t
)10 * NANOSEC
;  /* 6/minute */ 
 147 dtrace_optval_t dtrace_switchrate_default 
= NANOSEC
;            /* 1 hz */ 
 148 dtrace_optval_t dtrace_nspec_default 
= 1; 
 149 dtrace_optval_t dtrace_specsize_default 
= 32 * 1024; 
 150 dtrace_optval_t dtrace_stackframes_default 
= 20; 
 151 dtrace_optval_t dtrace_ustackframes_default 
= 20; 
 152 dtrace_optval_t dtrace_jstackframes_default 
= 50; 
 153 dtrace_optval_t dtrace_jstackstrsize_default 
= 512; 
 154 int             dtrace_msgdsize_max 
= 128; 
 155 hrtime_t        dtrace_chill_max 
= 500 * (NANOSEC 
/ MILLISEC
);  /* 500 ms */ 
 156 hrtime_t        dtrace_chill_interval 
= NANOSEC
;                /* 1000 ms */ 
 157 int             dtrace_devdepth_max 
= 32; 
 158 int             dtrace_err_verbose
; 
 159 hrtime_t        dtrace_deadman_interval 
= NANOSEC
; 
 160 hrtime_t        dtrace_deadman_timeout 
= (hrtime_t
)10 * NANOSEC
; 
 161 hrtime_t        dtrace_deadman_user 
= (hrtime_t
)30 * NANOSEC
; 
 164  * DTrace External Variables 
 166  * As dtrace(7D) is a kernel module, any DTrace variables are obviously 
 167  * available to DTrace consumers via the backtick (`) syntax.  One of these, 
 168  * dtrace_zero, is made deliberately so:  it is provided as a source of 
 169  * well-known, zero-filled memory.  While this variable is not documented, 
 170  * it is used by some translators as an implementation detail. 
 172 const char      dtrace_zero
[256] = { 0 };       /* zero-filled memory */ 
 175  * DTrace Internal Variables 
 177 static dev_info_t       
*dtrace_devi
;           /* device info */ 
 178 static vmem_t           
*dtrace_arena
;          /* probe ID arena */ 
 179 static vmem_t           
*dtrace_minor
;          /* minor number arena */ 
 180 static taskq_t          
*dtrace_taskq
;          /* task queue */ 
 181 static dtrace_probe_t   
**dtrace_probes
;        /* array of all probes */ 
 182 static int              dtrace_nprobes
;         /* number of probes */ 
 183 static dtrace_provider_t 
*dtrace_provider
;      /* provider list */ 
 184 static dtrace_meta_t    
*dtrace_meta_pid
;       /* user-land meta provider */ 
 185 static int              dtrace_opens
;           /* number of opens */ 
 186 static int              dtrace_helpers
;         /* number of helpers */ 
 187 static void             *dtrace_softstate
;      /* softstate pointer */ 
 188 static dtrace_hash_t    
*dtrace_bymod
;          /* probes hashed by module */ 
 189 static dtrace_hash_t    
*dtrace_byfunc
;         /* probes hashed by function */ 
 190 static dtrace_hash_t    
*dtrace_byname
;         /* probes hashed by name */ 
 191 static dtrace_toxrange_t 
*dtrace_toxrange
;      /* toxic range array */ 
 192 static int              dtrace_toxranges
;       /* number of toxic ranges */ 
 193 static int              dtrace_toxranges_max
;   /* size of toxic range array */ 
 194 static dtrace_anon_t    dtrace_anon
;            /* anonymous enabling */ 
 195 static kmem_cache_t     
*dtrace_state_cache
;    /* cache for dynamic state */ 
 196 static uint64_t         dtrace_vtime_references
; /* number of vtimestamp refs */ 
 197 static kthread_t        
*dtrace_panicked
;       /* panicking thread */ 
 198 static dtrace_ecb_t     
*dtrace_ecb_create_cache
; /* cached created ECB */ 
 199 static dtrace_genid_t   dtrace_probegen
;        /* current probe generation */ 
 200 static dtrace_helpers_t 
*dtrace_deferred_pid
;   /* deferred helper list */ 
 201 static dtrace_enabling_t 
*dtrace_retained
;      /* list of retained enablings */ 
 202 static dtrace_dynvar_t  dtrace_dynhash_sink
;    /* end of dynamic hash chains */ 
 203 #if defined(__APPLE__) 
 204 static int              dtrace_dof_mode
;        /* dof mode */ 
 207 #if defined(__APPLE__) 
 210  * To save memory, some common memory allocations are given a 
 211  * unique zone. In example, dtrace_probe_t is 72 bytes in size, 
 212  * which means it would fall into the kalloc.128 bucket. With 
 213  * 20k elements allocated, the space saved is substantial. 
 216 struct zone 
*dtrace_probe_t_zone
; 
 222  * DTrace is protected by three (relatively coarse-grained) locks: 
 224  * (1) dtrace_lock is required to manipulate essentially any DTrace state, 
 225  *     including enabling state, probes, ECBs, consumer state, helper state, 
 226  *     etc.  Importantly, dtrace_lock is _not_ required when in probe context; 
 227  *     probe context is lock-free -- synchronization is handled via the 
 228  *     dtrace_sync() cross call mechanism. 
 230  * (2) dtrace_provider_lock is required when manipulating provider state, or 
 231  *     when provider state must be held constant. 
 233  * (3) dtrace_meta_lock is required when manipulating meta provider state, or 
 234  *     when meta provider state must be held constant. 
 236  * The lock ordering between these three locks is dtrace_meta_lock before 
 237  * dtrace_provider_lock before dtrace_lock.  (In particular, there are 
 238  * several places where dtrace_provider_lock is held by the framework as it 
 239  * calls into the providers -- which then call back into the framework, 
 240  * grabbing dtrace_lock.) 
 242  * There are two other locks in the mix:  mod_lock and cpu_lock.  With respect 
 243  * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical 
 244  * role as a coarse-grained lock; it is acquired before both of these locks. 
 245  * With respect to dtrace_meta_lock, its behavior is stranger:  cpu_lock must 
 246  * be acquired _between_ dtrace_meta_lock and any other DTrace locks. 
 247  * mod_lock is similar with respect to dtrace_provider_lock in that it must be 
 248  * acquired _between_ dtrace_provider_lock and dtrace_lock. 
 254  * All kmutex_t vars have been changed to lck_mtx_t. 
 255  * Note that lck_mtx_t's require explicit initialization. 
 257  * mutex_enter() becomes lck_mtx_lock() 
 258  * mutex_exit() becomes lck_mtx_unlock() 
 260  * Lock asserts are changed like this: 
 262  * ASSERT(MUTEX_HELD(&cpu_lock)); 
 264  * lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); 
 266  * Due to the number of these changes, they are not called out explicitly. 
 268 static lck_mtx_t        dtrace_lock
;            /* probe state lock */ 
 269 static lck_mtx_t        dtrace_provider_lock
;   /* provider state lock */ 
 270 static lck_mtx_t        dtrace_meta_lock
;       /* meta-provider state lock */ 
 271 #if defined(__APPLE__) 
 272 static lck_rw_t         dtrace_dof_mode_lock
;   /* dof mode lock */ 
 276  * DTrace Provider Variables 
 278  * These are the variables relating to DTrace as a provider (that is, the 
 279  * provider of the BEGIN, END, and ERROR probes). 
 281 static dtrace_pattr_t   dtrace_provider_attr 
= { 
 282 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON 
}, 
 283 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN 
}, 
 284 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN 
}, 
 285 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON 
}, 
 286 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON 
}, 
 293 static dtrace_pops_t    dtrace_provider_ops 
= { 
 294         (void (*)(void *, const dtrace_probedesc_t 
*))dtrace_nullop
, 
 295         (void (*)(void *, struct modctl 
*))dtrace_nullop
, 
 296         (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
, 
 297         (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
, 
 298         (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
, 
 299         (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
, 
 303         (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
 
 306 static dtrace_id_t      dtrace_probeid_begin
;   /* special BEGIN probe */ 
 307 static dtrace_id_t      dtrace_probeid_end
;     /* special END probe */ 
 308 dtrace_id_t             dtrace_probeid_error
;   /* special ERROR probe */ 
 311  * DTrace Helper Tracing Variables 
 313 uint32_t dtrace_helptrace_next 
= 0; 
 314 uint32_t dtrace_helptrace_nlocals
; 
 315 char    *dtrace_helptrace_buffer
; 
 316 int     dtrace_helptrace_bufsize 
= 512 * 1024; 
 319 int     dtrace_helptrace_enabled 
= 1; 
 321 int     dtrace_helptrace_enabled 
= 0; 
 325  * DTrace Error Hashing 
 327  * On DEBUG kernels, DTrace will track the errors that has seen in a hash 
 328  * table.  This is very useful for checking coverage of tests that are 
 329  * expected to induce DIF or DOF processing errors, and may be useful for 
 330  * debugging problems in the DIF code generator or in DOF generation .  The 
 331  * error hash may be examined with the ::dtrace_errhash MDB dcmd. 
 334 static dtrace_errhash_t dtrace_errhash
[DTRACE_ERRHASHSZ
]; 
 335 static const char *dtrace_errlast
; 
 336 static kthread_t 
*dtrace_errthread
; 
 337 static lck_mtx_t dtrace_errlock
; 
 341  * DTrace Macros and Constants 
 343  * These are various macros that are useful in various spots in the 
 344  * implementation, along with a few random constants that have no meaning 
 345  * outside of the implementation.  There is no real structure to this cpp 
 346  * mishmash -- but is there ever? 
 348 #define DTRACE_HASHSTR(hash, probe)     \ 
 349         dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs))) 
 351 #define DTRACE_HASHNEXT(hash, probe)    \ 
 352         (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs) 
 354 #define DTRACE_HASHPREV(hash, probe)    \ 
 355         (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs) 
 357 #define DTRACE_HASHEQ(hash, lhs, rhs)   \ 
 358         (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \ 
 359             *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0) 
 361 #define DTRACE_AGGHASHSIZE_SLEW         17 
 364  * The key for a thread-local variable consists of the lower 61 bits of the 
 365  * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL. 
 366  * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never 
 367  * equal to a variable identifier.  This is necessary (but not sufficient) to 
 368  * assure that global associative arrays never collide with thread-local 
 369  * variables.  To guarantee that they cannot collide, we must also define the 
 370  * order for keying dynamic variables.  That order is: 
 372  *   [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ] 
 374  * Because the variable-key and the tls-key are in orthogonal spaces, there is 
 375  * no way for a global variable key signature to match a thread-local key 
 378 #if !defined(__APPLE__) 
 379 #define DTRACE_TLS_THRKEY(where) { \ 
 381         uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \ 
 382         for (; actv; actv >>= 1) \ 
 384         ASSERT(intr < (1 << 3)); \ 
 385         (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \ 
 386             (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ 
 389 #define DTRACE_TLS_THRKEY(where) { \ 
 390         uint_t intr = ml_at_interrupt_context(); /* XXX just one measely bit */ \ 
 391         uint_t thr = (uint_t)current_thread(); \ 
 392         uint_t pid = (uint_t)proc_selfpid(); \ 
 393         ASSERT(intr < (1 << 3)); \ 
 394         (where) = ((((uint64_t)thr << 32 | pid) + DIF_VARIABLE_MAX) & \ 
 395             (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ 
 397 #endif /* __APPLE__ */ 
 399 #define DTRACE_STORE(type, tomax, offset, what) \ 
 400         *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what); 
 402 #if !defined(__APPLE__) 
 403 #if !(defined(__i386__) || defined (__x86_64__)) 
 404 #define DTRACE_ALIGNCHECK(addr, size, flags)                            \ 
 405         if (addr & (size - 1)) {                                        \ 
 406                 *flags |= CPU_DTRACE_BADALIGN;                          \ 
 407                 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr;        \ 
 411 #define DTRACE_ALIGNCHECK(addr, size, flags) 
 414 #define DTRACE_LOADFUNC(bits)                                           \ 
 417 dtrace_load##bits(uintptr_t addr)                                       \ 
 419         size_t size = bits / NBBY;                                      \ 
 421         uint##bits##_t rval;                                            \ 
 423         volatile uint16_t *flags = (volatile uint16_t *)                \ 
 424             &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;                   \ 
 426         DTRACE_ALIGNCHECK(addr, size, flags);                           \ 
 428         for (i = 0; i < dtrace_toxranges; i++) {                        \ 
 429                 if (addr >= dtrace_toxrange[i].dtt_limit)               \ 
 432                 if (addr + size <= dtrace_toxrange[i].dtt_base)         \ 
 436                  * This address falls within a toxic region; return 0.  \ 
 438                 *flags |= CPU_DTRACE_BADADDR;                           \ 
 439                 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr;        \ 
 443         *flags |= CPU_DTRACE_NOFAULT;                                   \ 
 445         rval = *((volatile uint##bits##_t *)addr);                      \ 
 446         *flags &= ~CPU_DTRACE_NOFAULT;                                  \ 
 451 #define DTRACE_ALIGNCHECK(addr, size, flags)                            \ 
 452         if (addr & (MIN(size,4) - 1)) {                                 \ 
 453                 *flags |= CPU_DTRACE_BADALIGN;                          \ 
 454                 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr;        \ 
 458 #define RECOVER_LABEL(bits) __asm__ volatile("_dtraceLoadRecover" #bits ":" ); 
 460 #define DTRACE_LOADFUNC(bits)                                           \ 
 462 extern vm_offset_t dtraceLoadRecover##bits;                             \ 
 463 uint##bits##_t dtrace_load##bits(uintptr_t addr);                       \ 
 466 dtrace_load##bits(uintptr_t addr)                                       \ 
 468         size_t size = bits / NBBY;                                      \ 
 470         uint##bits##_t rval = 0;                                        \ 
 473         volatile uint16_t *flags = (volatile uint16_t *)                \ 
 474             &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;                   \ 
 476         DTRACE_ALIGNCHECK(addr, size, flags);                           \ 
 478         for (i = 0; i < dtrace_toxranges; i++) {                        \ 
 479                 if (addr >= dtrace_toxrange[i].dtt_limit)               \ 
 482                 if (addr + size <= dtrace_toxrange[i].dtt_base)         \ 
 486                  * This address falls within a toxic region; return 0.  \ 
 488                 *flags |= CPU_DTRACE_BADADDR;                           \ 
 489                 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr;        \ 
 493         pp = pmap_find_phys(kernel_pmap, addr);                         \ 
 495         if (0 == pp || /* pmap_find_phys failed ? */                    \ 
 496             !dtxnu_is_RAM_page(pp) /* Backed by RAM? */ ) {             \ 
 497                 *flags |= CPU_DTRACE_BADADDR;                           \ 
 498                 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr;        \ 
 503         volatile vm_offset_t recover = (vm_offset_t)&dtraceLoadRecover##bits;           \ 
 504         *flags |= CPU_DTRACE_NOFAULT;                                   \ 
 505         recover = dtrace_set_thread_recover(current_thread(), recover); \ 
 507         rval = *((volatile uint##bits##_t *)addr);                      \ 
 508         RECOVER_LABEL(bits);                                            \ 
 509         (void)dtrace_set_thread_recover(current_thread(), recover);     \ 
 510         *flags &= ~CPU_DTRACE_NOFAULT;                                  \ 
 515 #endif /* __APPLE__ */ 
 519 #define dtrace_loadptr  dtrace_load64 
 521 #define dtrace_loadptr  dtrace_load32 
 524 #define DTRACE_DYNHASH_FREE     0 
 525 #define DTRACE_DYNHASH_SINK     1 
 526 #define DTRACE_DYNHASH_VALID    2 
 528 #define DTRACE_MATCH_NEXT       0 
 529 #define DTRACE_MATCH_DONE       1 
 530 #define DTRACE_ANCHORED(probe)  ((probe)->dtpr_func[0] != '\0') 
 531 #define DTRACE_STATE_ALIGN      64 
 533 #define DTRACE_FLAGS2FLT(flags)                                         \ 
 534         (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR :           \ 
 535         ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP :                \ 
 536         ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO :            \ 
 537         ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV :                \ 
 538         ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV :                \ 
 539         ((flags) & CPU_DTRACE_TUPOFLOW) ?  DTRACEFLT_TUPOFLOW :         \ 
 540         ((flags) & CPU_DTRACE_BADALIGN) ?  DTRACEFLT_BADALIGN :         \ 
 541         ((flags) & CPU_DTRACE_NOSCRATCH) ?  DTRACEFLT_NOSCRATCH :       \ 
 544 #define DTRACEACT_ISSTRING(act)                                         \ 
 545         ((act)->dta_kind == DTRACEACT_DIFEXPR &&                        \ 
 546         (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) 
 548 static dtrace_probe_t 
*dtrace_probe_lookup_id(dtrace_id_t id
); 
 549 static void dtrace_enabling_provide(dtrace_provider_t 
*); 
 550 static int dtrace_enabling_match(dtrace_enabling_t 
*, int *); 
 551 static void dtrace_enabling_matchall(void); 
 552 static dtrace_state_t 
*dtrace_anon_grab(void); 
 553 static uint64_t dtrace_helper(int, dtrace_mstate_t 
*, 
 554     dtrace_state_t 
*, uint64_t, uint64_t); 
 555 static dtrace_helpers_t 
*dtrace_helpers_create(proc_t 
*); 
 556 static void dtrace_buffer_drop(dtrace_buffer_t 
*); 
 557 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t 
*, size_t, size_t, 
 558     dtrace_state_t 
*, dtrace_mstate_t 
*); 
 559 static int dtrace_state_option(dtrace_state_t 
*, dtrace_optid_t
, 
 561 static int dtrace_ecb_create_enable(dtrace_probe_t 
*, void *); 
 562 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t 
*); 
 565  * DTrace Probe Context Functions 
 567  * These functions are called from probe context.  Because probe context is 
 568  * any context in which C may be called, arbitrarily locks may be held, 
 569  * interrupts may be disabled, we may be in arbitrary dispatched state, etc. 
 570  * As a result, functions called from probe context may only call other DTrace 
 571  * support functions -- they may not interact at all with the system at large. 
 572  * (Note that the ASSERT macro is made probe-context safe by redefining it in 
 573  * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary 
 574  * loads are to be performed from probe context, they _must_ be in terms of 
 575  * the safe dtrace_load*() variants. 
 577  * Some functions in this block are not actually called from probe context; 
 578  * for these functions, there will be a comment above the function reading 
 579  * "Note:  not called from probe context." 
 582 dtrace_panic(const char *format
, ...) 
 586         va_start(alist
, format
); 
 587         dtrace_vpanic(format
, alist
); 
 592 dtrace_assfail(const char *a
, const char *f
, int l
) 
 594         dtrace_panic("assertion failed: %s, file: %s, line: %d", a
, f
, l
); 
 597          * We just need something here that even the most clever compiler 
 598          * cannot optimize away. 
 600         return (a
[(uintptr_t)f
]); 
 604  * Atomically increment a specified error counter from probe context. 
 607 dtrace_error(uint32_t *counter
) 
 610          * Most counters stored to in probe context are per-CPU counters. 
 611          * However, there are some error conditions that are sufficiently 
 612          * arcane that they don't merit per-CPU storage.  If these counters 
 613          * are incremented concurrently on different CPUs, scalability will be 
 614          * adversely affected -- but we don't expect them to be white-hot in a 
 615          * correctly constructed enabling... 
 622                 if ((nval 
= oval 
+ 1) == 0) { 
 624                          * If the counter would wrap, set it to 1 -- assuring 
 625                          * that the counter is never zero when we have seen 
 626                          * errors.  (The counter must be 32-bits because we 
 627                          * aren't guaranteed a 64-bit compare&swap operation.) 
 628                          * To save this code both the infamy of being fingered 
 629                          * by a priggish news story and the indignity of being 
 630                          * the target of a neo-puritan witch trial, we're 
 631                          * carefully avoiding any colorful description of the 
 632                          * likelihood of this condition -- but suffice it to 
 633                          * say that it is only slightly more likely than the 
 634                          * overflow of predicate cache IDs, as discussed in 
 635                          * dtrace_predicate_create(). 
 639         } while (dtrace_cas32(counter
, oval
, nval
) != oval
); 
 643  * Use the DTRACE_LOADFUNC macro to define functions for each of loading a 
 644  * uint8_t, a uint16_t, a uint32_t and a uint64_t. 
 652 dtrace_inscratch(uintptr_t dest
, size_t size
, dtrace_mstate_t 
*mstate
) 
 654         if (dest 
< mstate
->dtms_scratch_base
) 
 657         if (dest 
+ size 
< dest
) 
 660         if (dest 
+ size 
> mstate
->dtms_scratch_ptr
) 
 667 dtrace_canstore_statvar(uint64_t addr
, size_t sz
, 
 668     dtrace_statvar_t 
**svars
, int nsvars
) 
 672         for (i 
= 0; i 
< nsvars
; i
++) { 
 673                 dtrace_statvar_t 
*svar 
= svars
[i
]; 
 675                 if (svar 
== NULL 
|| svar
->dtsv_size 
== 0) 
 678                 if (addr 
- svar
->dtsv_data 
< svar
->dtsv_size 
&& 
 679                     addr 
+ sz 
<= svar
->dtsv_data 
+ svar
->dtsv_size
) 
 687  * Check to see if the address is within a memory region to which a store may 
 688  * be issued.  This includes the DTrace scratch areas, and any DTrace variable 
 689  * region.  The caller of dtrace_canstore() is responsible for performing any 
 690  * alignment checks that are needed before stores are actually executed. 
 693 dtrace_canstore(uint64_t addr
, size_t sz
, dtrace_mstate_t 
*mstate
, 
 694     dtrace_vstate_t 
*vstate
) 
 700          * First, check to see if the address is in scratch space... 
 702         a 
= mstate
->dtms_scratch_base
; 
 703         s 
= mstate
->dtms_scratch_size
; 
 705         if (addr 
- a 
< s 
&& addr 
+ sz 
<= a 
+ s
) 
 709          * Now check to see if it's a dynamic variable.  This check will pick 
 710          * up both thread-local variables and any global dynamically-allocated 
 713         a 
= (uintptr_t)vstate
->dtvs_dynvars
.dtds_base
; 
 714         s 
= vstate
->dtvs_dynvars
.dtds_size
; 
 715         if (addr 
- a 
< s 
&& addr 
+ sz 
<= a 
+ s
) 
 719          * Finally, check the static local and global variables.  These checks 
 720          * take the longest, so we perform them last. 
 722         if (dtrace_canstore_statvar(addr
, sz
, 
 723             vstate
->dtvs_locals
, vstate
->dtvs_nlocals
)) 
 726         if (dtrace_canstore_statvar(addr
, sz
, 
 727             vstate
->dtvs_globals
, vstate
->dtvs_nglobals
)) 
 734  * Compare two strings using safe loads. 
 737 dtrace_strncmp(char *s1
, char *s2
, size_t limit
) 
 740         volatile uint16_t *flags
; 
 742         if (s1 
== s2 
|| limit 
== 0) 
 745         flags 
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
 751                         c1 
= dtrace_load8((uintptr_t)s1
++); 
 756                         c2 
= dtrace_load8((uintptr_t)s2
++); 
 760         } while (--limit 
&& c1 
!= '\0' && !(*flags 
& CPU_DTRACE_FAULT
)); 
 766  * Compute strlen(s) for a string using safe memory accesses.  The additional 
 767  * len parameter is used to specify a maximum length to ensure completion. 
 770 dtrace_strlen(const char *s
, size_t lim
) 
 774         for (len 
= 0; len 
!= lim
; len
++) 
 775                 if (dtrace_load8((uintptr_t)s
++) == '\0') 
 782  * Check if an address falls within a toxic region. 
 785 dtrace_istoxic(uintptr_t kaddr
, size_t size
) 
 787         uintptr_t taddr
, tsize
; 
 790         for (i 
= 0; i 
< dtrace_toxranges
; i
++) { 
 791                 taddr 
= dtrace_toxrange
[i
].dtt_base
; 
 792                 tsize 
= dtrace_toxrange
[i
].dtt_limit 
- taddr
; 
 794                 if (kaddr 
- taddr 
< tsize
) { 
 795                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
 796                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= kaddr
; 
 800                 if (taddr 
- kaddr 
< size
) { 
 801                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
 802                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= taddr
; 
 811  * Copy src to dst using safe memory accesses.  The src is assumed to be unsafe 
 812  * memory specified by the DIF program.  The dst is assumed to be safe memory 
 813  * that we can store to directly because it is managed by DTrace.  As with 
 814  * standard bcopy, overlapping copies are handled properly. 
 817 dtrace_bcopy(const void *src
, void *dst
, size_t len
) 
 821                 const uint8_t *s2 
= src
; 
 825                                 *s1
++ = dtrace_load8((uintptr_t)s2
++); 
 826                         } while (--len 
!= 0); 
 832                                 *--s1 
= dtrace_load8((uintptr_t)--s2
); 
 833                         } while (--len 
!= 0); 
 839  * Copy src to dst using safe memory accesses, up to either the specified 
 840  * length, or the point that a nul byte is encountered.  The src is assumed to 
 841  * be unsafe memory specified by the DIF program.  The dst is assumed to be 
 842  * safe memory that we can store to directly because it is managed by DTrace. 
 843  * Unlike dtrace_bcopy(), overlapping regions are not handled. 
 846 dtrace_strcpy(const void *src
, void *dst
, size_t len
) 
 849                 uint8_t *s1 
= dst
, c
; 
 850                 const uint8_t *s2 
= src
; 
 853                         *s1
++ = c 
= dtrace_load8((uintptr_t)s2
++); 
 854                 } while (--len 
!= 0 && c 
!= '\0'); 
 859  * Copy src to dst, deriving the size and type from the specified (BYREF) 
 860  * variable type.  The src is assumed to be unsafe memory specified by the DIF 
 861  * program.  The dst is assumed to be DTrace variable memory that is of the 
 862  * specified type; we assume that we can store to directly. 
 865 dtrace_vcopy(void *src
, void *dst
, dtrace_diftype_t 
*type
) 
 867         ASSERT(type
->dtdt_flags 
& DIF_TF_BYREF
); 
 869         if (type
->dtdt_kind 
== DIF_TYPE_STRING
) 
 870                 dtrace_strcpy(src
, dst
, type
->dtdt_size
); 
 872                 dtrace_bcopy(src
, dst
, type
->dtdt_size
); 
 876  * Compare s1 to s2 using safe memory accesses.  The s1 data is assumed to be 
 877  * unsafe memory specified by the DIF program.  The s2 data is assumed to be 
 878  * safe memory that we can access directly because it is managed by DTrace. 
 881 dtrace_bcmp(const void *s1
, const void *s2
, size_t len
) 
 883         volatile uint16_t *flags
; 
 885         flags 
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
 890         if (s1 
== NULL 
|| s2 
== NULL
) 
 893         if (s1 
!= s2 
&& len 
!= 0) { 
 894                 const uint8_t *ps1 
= s1
; 
 895                 const uint8_t *ps2 
= s2
; 
 898                         if (dtrace_load8((uintptr_t)ps1
++) != *ps2
++) 
 900                 } while (--len 
!= 0 && !(*flags 
& CPU_DTRACE_FAULT
)); 
 906  * Zero the specified region using a simple byte-by-byte loop.  Note that this 
 907  * is for safe DTrace-managed memory only. 
 910 dtrace_bzero(void *dst
, size_t len
) 
 914         for (cp 
= dst
; len 
!= 0; len
--) 
 919  * This privilege check should be used by actions and subroutines to 
 920  * verify that the user credentials of the process that enabled the 
 921  * invoking ECB match the target credentials 
 924 dtrace_priv_proc_common_user(dtrace_state_t 
*state
) 
 926         cred_t 
*cr
, *s_cr 
= state
->dts_cred
.dcr_cred
; 
 929          * We should always have a non-NULL state cred here, since if cred 
 930          * is null (anonymous tracing), we fast-path bypass this routine. 
 932         ASSERT(s_cr 
!= NULL
); 
 934 #if !defined(__APPLE__) 
 935         if ((cr 
= CRED()) != NULL 
&& 
 937         if ((cr 
= dtrace_CRED()) != NULL 
&& 
 938 #endif /* __APPLE__ */ 
 939             s_cr
->cr_uid 
== cr
->cr_uid 
&& 
 940             s_cr
->cr_uid 
== cr
->cr_ruid 
&& 
 941             s_cr
->cr_uid 
== cr
->cr_suid 
&& 
 942             s_cr
->cr_gid 
== cr
->cr_gid 
&& 
 943             s_cr
->cr_gid 
== cr
->cr_rgid 
&& 
 944             s_cr
->cr_gid 
== cr
->cr_sgid
) 
 951  * This privilege check should be used by actions and subroutines to 
 952  * verify that the zone of the process that enabled the invoking ECB 
 953  * matches the target credentials 
 956 dtrace_priv_proc_common_zone(dtrace_state_t 
*state
) 
 958         cred_t 
*cr
, *s_cr 
= state
->dts_cred
.dcr_cred
; 
 961          * We should always have a non-NULL state cred here, since if cred 
 962          * is null (anonymous tracing), we fast-path bypass this routine. 
 964         ASSERT(s_cr 
!= NULL
); 
 966 #if !defined(__APPLE__) 
 967         if ((cr 
= CRED()) != NULL 
&& 
 968             s_cr
->cr_zone 
== cr
->cr_zone
) 
 973         return 1; /* Darwin doesn't do zones. */ 
 974 #endif /* __APPLE__ */ 
 978  * This privilege check should be used by actions and subroutines to 
 979  * verify that the process has not setuid or changed credentials. 
 981 #if !defined(__APPLE__) 
 983 dtrace_priv_proc_common_nocd() 
 987         if ((proc 
= ttoproc(curthread
)) != NULL 
&& 
 988             !(proc
->p_flag 
& SNOCD
)) 
 995 dtrace_priv_proc_common_nocd(void) 
 997         return 1; /* Darwin omits "No Core Dump" flag. */ 
 999 #endif /* __APPLE__ */ 
1002 dtrace_priv_proc_destructive(dtrace_state_t 
*state
) 
1004         int action 
= state
->dts_cred
.dcr_action
; 
1006         if (((action 
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
) == 0) && 
1007             dtrace_priv_proc_common_zone(state
) == 0) 
1010         if (((action 
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
) == 0) && 
1011             dtrace_priv_proc_common_user(state
) == 0) 
1014         if (((action 
& DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
) == 0) && 
1015             dtrace_priv_proc_common_nocd() == 0) 
1021         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags 
|= CPU_DTRACE_UPRIV
; 
1027 dtrace_priv_proc_control(dtrace_state_t 
*state
) 
1029         if (state
->dts_cred
.dcr_action 
& DTRACE_CRA_PROC_CONTROL
) 
1032         if (dtrace_priv_proc_common_zone(state
) && 
1033             dtrace_priv_proc_common_user(state
) && 
1034             dtrace_priv_proc_common_nocd()) 
1037         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags 
|= CPU_DTRACE_UPRIV
; 
1043 dtrace_priv_proc(dtrace_state_t 
*state
) 
1045         if (state
->dts_cred
.dcr_action 
& DTRACE_CRA_PROC
) 
1048         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags 
|= CPU_DTRACE_UPRIV
; 
1054 dtrace_priv_kernel(dtrace_state_t 
*state
) 
1056         if (state
->dts_cred
.dcr_action 
& DTRACE_CRA_KERNEL
) 
1059         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags 
|= CPU_DTRACE_KPRIV
; 
1065 dtrace_priv_kernel_destructive(dtrace_state_t 
*state
) 
1067         if (state
->dts_cred
.dcr_action 
& DTRACE_CRA_KERNEL_DESTRUCTIVE
) 
1070         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags 
|= CPU_DTRACE_KPRIV
; 
1076  * Note:  not called from probe context.  This function is called 
1077  * asynchronously (and at a regular interval) from outside of probe context to 
1078  * clean the dirty dynamic variable lists on all CPUs.  Dynamic variable 
1079  * cleaning is explained in detail in <sys/dtrace_impl.h>. 
1081 #if defined(__APPLE__) 
1083 #endif /* __APPLE__ */ 
1085 dtrace_dynvar_clean(dtrace_dstate_t 
*dstate
) 
1087         dtrace_dynvar_t 
*dirty
; 
1088         dtrace_dstate_percpu_t 
*dcpu
; 
1091         for (i 
= 0; i 
< NCPU
; i
++) { 
1092                 dcpu 
= &dstate
->dtds_percpu
[i
]; 
1094                 ASSERT(dcpu
->dtdsc_rinsing 
== NULL
); 
1097                  * If the dirty list is NULL, there is no dirty work to do. 
1099                 if (dcpu
->dtdsc_dirty 
== NULL
) 
1103                  * If the clean list is non-NULL, then we're not going to do 
1104                  * any work for this CPU -- it means that there has not been 
1105                  * a dtrace_dynvar() allocation on this CPU (or from this CPU) 
1106                  * since the last time we cleaned house. 
1108                 if (dcpu
->dtdsc_clean 
!= NULL
) 
1114                  * Atomically move the dirty list aside. 
1117                         dirty 
= dcpu
->dtdsc_dirty
; 
1120                          * Before we zap the dirty list, set the rinsing list. 
1121                          * (This allows for a potential assertion in 
1122                          * dtrace_dynvar():  if a free dynamic variable appears 
1123                          * on a hash chain, either the dirty list or the 
1124                          * rinsing list for some CPU must be non-NULL.) 
1126                         dcpu
->dtdsc_rinsing 
= dirty
; 
1127                         dtrace_membar_producer(); 
1128                 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, 
1129                     dirty
, NULL
) != dirty
); 
1134                  * We have no work to do; we can simply return. 
1141         for (i 
= 0; i 
< NCPU
; i
++) { 
1142                 dcpu 
= &dstate
->dtds_percpu
[i
]; 
1144                 if (dcpu
->dtdsc_rinsing 
== NULL
) 
1148                  * We are now guaranteed that no hash chain contains a pointer 
1149                  * into this dirty list; we can make it clean. 
1151                 ASSERT(dcpu
->dtdsc_clean 
== NULL
); 
1152                 dcpu
->dtdsc_clean 
= dcpu
->dtdsc_rinsing
; 
1153                 dcpu
->dtdsc_rinsing 
= NULL
; 
1157          * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make 
1158          * sure that all CPUs have seen all of the dtdsc_clean pointers. 
1159          * This prevents a race whereby a CPU incorrectly decides that 
1160          * the state should be something other than DTRACE_DSTATE_CLEAN 
1161          * after dtrace_dynvar_clean() has completed. 
1165         dstate
->dtds_state 
= DTRACE_DSTATE_CLEAN
; 
1169  * Depending on the value of the op parameter, this function looks-up, 
1170  * allocates or deallocates an arbitrarily-keyed dynamic variable.  If an 
1171  * allocation is requested, this function will return a pointer to a 
1172  * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no 
1173  * variable can be allocated.  If NULL is returned, the appropriate counter 
1174  * will be incremented. 
1176 #if defined(__APPLE__) 
1178 #endif /* __APPLE__ */ 
1180 dtrace_dynvar(dtrace_dstate_t 
*dstate
, uint_t nkeys
, 
1181     dtrace_key_t 
*key
, size_t dsize
, dtrace_dynvar_op_t op
) 
1183         uint64_t hashval 
= DTRACE_DYNHASH_VALID
; 
1184         dtrace_dynhash_t 
*hash 
= dstate
->dtds_hash
; 
1185         dtrace_dynvar_t 
*free
, *new_free
, *next
, *dvar
, *start
, *prev 
= NULL
; 
1186         processorid_t me 
= CPU
->cpu_id
, cpu 
= me
; 
1187         dtrace_dstate_percpu_t 
*dcpu 
= &dstate
->dtds_percpu
[me
]; 
1188         size_t bucket
, ksize
; 
1189         size_t chunksize 
= dstate
->dtds_chunksize
; 
1190         uintptr_t kdata
, lock
, nstate
; 
1196          * Hash the key.  As with aggregations, we use Jenkins' "One-at-a-time" 
1197          * algorithm.  For the by-value portions, we perform the algorithm in 
1198          * 16-bit chunks (as opposed to 8-bit chunks).  This speeds things up a 
1199          * bit, and seems to have only a minute effect on distribution.  For 
1200          * the by-reference data, we perform "One-at-a-time" iterating (safely) 
1201          * over each referenced byte.  It's painful to do this, but it's much 
1202          * better than pathological hash distribution.  The efficacy of the 
1203          * hashing algorithm (and a comparison with other algorithms) may be 
1204          * found by running the ::dtrace_dynstat MDB dcmd. 
1206         for (i 
= 0; i 
< nkeys
; i
++) { 
1207                 if (key
[i
].dttk_size 
== 0) { 
1208                         uint64_t val 
= key
[i
].dttk_value
; 
1210                         hashval 
+= (val 
>> 48) & 0xffff; 
1211                         hashval 
+= (hashval 
<< 10); 
1212                         hashval 
^= (hashval 
>> 6); 
1214                         hashval 
+= (val 
>> 32) & 0xffff; 
1215                         hashval 
+= (hashval 
<< 10); 
1216                         hashval 
^= (hashval 
>> 6); 
1218                         hashval 
+= (val 
>> 16) & 0xffff; 
1219                         hashval 
+= (hashval 
<< 10); 
1220                         hashval 
^= (hashval 
>> 6); 
1222                         hashval 
+= val 
& 0xffff; 
1223                         hashval 
+= (hashval 
<< 10); 
1224                         hashval 
^= (hashval 
>> 6); 
1227                          * This is incredibly painful, but it beats the hell 
1228                          * out of the alternative. 
1230                         uint64_t j
, size 
= key
[i
].dttk_size
; 
1231                         uintptr_t base 
= (uintptr_t)key
[i
].dttk_value
; 
1233                         for (j 
= 0; j 
< size
; j
++) { 
1234                                 hashval 
+= dtrace_load8(base 
+ j
); 
1235                                 hashval 
+= (hashval 
<< 10); 
1236                                 hashval 
^= (hashval 
>> 6); 
1241         hashval 
+= (hashval 
<< 3); 
1242         hashval 
^= (hashval 
>> 11); 
1243         hashval 
+= (hashval 
<< 15); 
1246          * There is a remote chance (ideally, 1 in 2^31) that our hashval 
1247          * comes out to be one of our two sentinel hash values.  If this 
1248          * actually happens, we set the hashval to be a value known to be a 
1249          * non-sentinel value. 
1251         if (hashval 
== DTRACE_DYNHASH_FREE 
|| hashval 
== DTRACE_DYNHASH_SINK
) 
1252                 hashval 
= DTRACE_DYNHASH_VALID
; 
1255          * Yes, it's painful to do a divide here.  If the cycle count becomes 
1256          * important here, tricks can be pulled to reduce it.  (However, it's 
1257          * critical that hash collisions be kept to an absolute minimum; 
1258          * they're much more painful than a divide.)  It's better to have a 
1259          * solution that generates few collisions and still keeps things 
1260          * relatively simple. 
1262         bucket 
= hashval 
% dstate
->dtds_hashsize
; 
1264         if (op 
== DTRACE_DYNVAR_DEALLOC
) { 
1265                 volatile uintptr_t *lockp 
= &hash
[bucket
].dtdh_lock
; 
1268                         while ((lock 
= *lockp
) & 1) 
1271                         if (dtrace_casptr((void *)lockp
, 
1272                             (void *)lock
, (void *)(lock 
+ 1)) == (void *)lock
) 
1276                 dtrace_membar_producer(); 
1281         lock 
= hash
[bucket
].dtdh_lock
; 
1283         dtrace_membar_consumer(); 
1285         start 
= hash
[bucket
].dtdh_chain
; 
1286         ASSERT(start 
!= NULL 
&& (start
->dtdv_hashval 
== DTRACE_DYNHASH_SINK 
|| 
1287             start
->dtdv_hashval 
!= DTRACE_DYNHASH_FREE 
|| 
1288             op 
!= DTRACE_DYNVAR_DEALLOC
)); 
1290         for (dvar 
= start
; dvar 
!= NULL
; dvar 
= dvar
->dtdv_next
) { 
1291                 dtrace_tuple_t 
*dtuple 
= &dvar
->dtdv_tuple
; 
1292                 dtrace_key_t 
*dkey 
= &dtuple
->dtt_key
[0]; 
1294                 if (dvar
->dtdv_hashval 
!= hashval
) { 
1295                         if (dvar
->dtdv_hashval 
== DTRACE_DYNHASH_SINK
) { 
1297                                  * We've reached the sink, and therefore the 
1298                                  * end of the hash chain; we can kick out of 
1299                                  * the loop knowing that we have seen a valid 
1300                                  * snapshot of state. 
1302                                 ASSERT(dvar
->dtdv_next 
== NULL
); 
1303                                 ASSERT(dvar 
== &dtrace_dynhash_sink
); 
1307                         if (dvar
->dtdv_hashval 
== DTRACE_DYNHASH_FREE
) { 
1309                                  * We've gone off the rails:  somewhere along 
1310                                  * the line, one of the members of this hash 
1311                                  * chain was deleted.  Note that we could also 
1312                                  * detect this by simply letting this loop run 
1313                                  * to completion, as we would eventually hit 
1314                                  * the end of the dirty list.  However, we 
1315                                  * want to avoid running the length of the 
1316                                  * dirty list unnecessarily (it might be quite 
1317                                  * long), so we catch this as early as 
1318                                  * possible by detecting the hash marker.  In 
1319                                  * this case, we simply set dvar to NULL and 
1320                                  * break; the conditional after the loop will 
1321                                  * send us back to top. 
1330                 if (dtuple
->dtt_nkeys 
!= nkeys
) 
1333                 for (i 
= 0; i 
< nkeys
; i
++, dkey
++) { 
1334                         if (dkey
->dttk_size 
!= key
[i
].dttk_size
) 
1335                                 goto next
; /* size or type mismatch */ 
1337                         if (dkey
->dttk_size 
!= 0) { 
1339                                     (void *)(uintptr_t)key
[i
].dttk_value
, 
1340                                     (void *)(uintptr_t)dkey
->dttk_value
, 
1344                                 if (dkey
->dttk_value 
!= key
[i
].dttk_value
) 
1349                 if (op 
!= DTRACE_DYNVAR_DEALLOC
) 
1352                 ASSERT(dvar
->dtdv_next 
== NULL 
|| 
1353                     dvar
->dtdv_next
->dtdv_hashval 
!= DTRACE_DYNHASH_FREE
); 
1356                         ASSERT(hash
[bucket
].dtdh_chain 
!= dvar
); 
1357                         ASSERT(start 
!= dvar
); 
1358                         ASSERT(prev
->dtdv_next 
== dvar
); 
1359                         prev
->dtdv_next 
= dvar
->dtdv_next
; 
1361                         if (dtrace_casptr(&hash
[bucket
].dtdh_chain
, 
1362                             start
, dvar
->dtdv_next
) != start
) { 
1364                                  * We have failed to atomically swing the 
1365                                  * hash table head pointer, presumably because 
1366                                  * of a conflicting allocation on another CPU. 
1367                                  * We need to reread the hash chain and try 
1374                 dtrace_membar_producer(); 
1377                  * Now set the hash value to indicate that it's free. 
1379                 ASSERT(hash
[bucket
].dtdh_chain 
!= dvar
); 
1380                 dvar
->dtdv_hashval 
= DTRACE_DYNHASH_FREE
; 
1382                 dtrace_membar_producer(); 
1385                  * Set the next pointer to point at the dirty list, and 
1386                  * atomically swing the dirty pointer to the newly freed dvar. 
1389                         next 
= dcpu
->dtdsc_dirty
; 
1390                         dvar
->dtdv_next 
= next
; 
1391                 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, next
, dvar
) != next
); 
1394                  * Finally, unlock this hash bucket. 
1396                 ASSERT(hash
[bucket
].dtdh_lock 
== lock
); 
1398                 hash
[bucket
].dtdh_lock
++; 
1408                  * If dvar is NULL, it is because we went off the rails: 
1409                  * one of the elements that we traversed in the hash chain 
1410                  * was deleted while we were traversing it.  In this case, 
1411                  * we assert that we aren't doing a dealloc (deallocs lock 
1412                  * the hash bucket to prevent themselves from racing with 
1413                  * one another), and retry the hash chain traversal. 
1415                 ASSERT(op 
!= DTRACE_DYNVAR_DEALLOC
); 
1419         if (op 
!= DTRACE_DYNVAR_ALLOC
) { 
1421                  * If we are not to allocate a new variable, we want to 
1422                  * return NULL now.  Before we return, check that the value 
1423                  * of the lock word hasn't changed.  If it has, we may have 
1424                  * seen an inconsistent snapshot. 
1426                 if (op 
== DTRACE_DYNVAR_NOALLOC
) { 
1427                         if (hash
[bucket
].dtdh_lock 
!= lock
) 
1430                         ASSERT(op 
== DTRACE_DYNVAR_DEALLOC
); 
1431                         ASSERT(hash
[bucket
].dtdh_lock 
== lock
); 
1433                         hash
[bucket
].dtdh_lock
++; 
1440          * We need to allocate a new dynamic variable.  The size we need is the 
1441          * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the 
1442          * size of any auxiliary key data (rounded up to 8-byte alignment) plus 
1443          * the size of any referred-to data (dsize).  We then round the final 
1444          * size up to the chunksize for allocation. 
1446         for (ksize 
= 0, i 
= 0; i 
< nkeys
; i
++) 
1447                 ksize 
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t)); 
1450          * This should be pretty much impossible, but could happen if, say, 
1451          * strange DIF specified the tuple.  Ideally, this should be an 
1452          * assertion and not an error condition -- but that requires that the 
1453          * chunksize calculation in dtrace_difo_chunksize() be absolutely 
1454          * bullet-proof.  (That is, it must not be able to be fooled by 
1455          * malicious DIF.)  Given the lack of backwards branches in DIF, 
1456          * solving this would presumably not amount to solving the Halting 
1457          * Problem -- but it still seems awfully hard. 
1459         if (sizeof (dtrace_dynvar_t
) + sizeof (dtrace_key_t
) * (nkeys 
- 1) + 
1460             ksize 
+ dsize 
> chunksize
) { 
1461                 dcpu
->dtdsc_drops
++; 
1465         nstate 
= DTRACE_DSTATE_EMPTY
; 
1469                 free 
= dcpu
->dtdsc_free
; 
1472                         dtrace_dynvar_t 
*clean 
= dcpu
->dtdsc_clean
; 
1475                         if (clean 
== NULL
) { 
1477                                  * We're out of dynamic variable space on 
1478                                  * this CPU.  Unless we have tried all CPUs, 
1479                                  * we'll try to allocate from a different 
1482                                 switch (dstate
->dtds_state
) { 
1483                                 case DTRACE_DSTATE_CLEAN
: { 
1484                                         void *sp 
= &dstate
->dtds_state
; 
1489                                         if (dcpu
->dtdsc_dirty 
!= NULL 
&& 
1490                                             nstate 
== DTRACE_DSTATE_EMPTY
) 
1491                                                 nstate 
= DTRACE_DSTATE_DIRTY
; 
1493                                         if (dcpu
->dtdsc_rinsing 
!= NULL
) 
1494                                                 nstate 
= DTRACE_DSTATE_RINSING
; 
1496                                         dcpu 
= &dstate
->dtds_percpu
[cpu
]; 
1501                                         (void) dtrace_cas32(sp
, 
1502                                             DTRACE_DSTATE_CLEAN
, nstate
); 
1505                                          * To increment the correct bean 
1506                                          * counter, take another lap. 
1511                                 case DTRACE_DSTATE_DIRTY
: 
1512                                         dcpu
->dtdsc_dirty_drops
++; 
1515                                 case DTRACE_DSTATE_RINSING
: 
1516                                         dcpu
->dtdsc_rinsing_drops
++; 
1519                                 case DTRACE_DSTATE_EMPTY
: 
1520                                         dcpu
->dtdsc_drops
++; 
1524                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP
); 
1529                          * The clean list appears to be non-empty.  We want to 
1530                          * move the clean list to the free list; we start by 
1531                          * moving the clean pointer aside. 
1533                         if (dtrace_casptr(&dcpu
->dtdsc_clean
, 
1534                             clean
, NULL
) != clean
) { 
1536                                  * We are in one of two situations: 
1538                                  *  (a) The clean list was switched to the 
1539                                  *      free list by another CPU. 
1541                                  *  (b) The clean list was added to by the 
1544                                  * In either of these situations, we can 
1545                                  * just reattempt the free list allocation. 
1550                         ASSERT(clean
->dtdv_hashval 
== DTRACE_DYNHASH_FREE
); 
1553                          * Now we'll move the clean list to the free list. 
1554                          * It's impossible for this to fail:  the only way 
1555                          * the free list can be updated is through this 
1556                          * code path, and only one CPU can own the clean list. 
1557                          * Thus, it would only be possible for this to fail if 
1558                          * this code were racing with dtrace_dynvar_clean(). 
1559                          * (That is, if dtrace_dynvar_clean() updated the clean 
1560                          * list, and we ended up racing to update the free 
1561                          * list.)  This race is prevented by the dtrace_sync() 
1562                          * in dtrace_dynvar_clean() -- which flushes the 
1563                          * owners of the clean lists out before resetting 
1566                         rval 
= dtrace_casptr(&dcpu
->dtdsc_free
, NULL
, clean
); 
1567                         ASSERT(rval 
== NULL
); 
1572                 new_free 
= dvar
->dtdv_next
; 
1573         } while (dtrace_casptr(&dcpu
->dtdsc_free
, free
, new_free
) != free
); 
1576          * We have now allocated a new chunk.  We copy the tuple keys into the 
1577          * tuple array and copy any referenced key data into the data space 
1578          * following the tuple array.  As we do this, we relocate dttk_value 
1579          * in the final tuple to point to the key data address in the chunk. 
1581         kdata 
= (uintptr_t)&dvar
->dtdv_tuple
.dtt_key
[nkeys
]; 
1582         dvar
->dtdv_data 
= (void *)(kdata 
+ ksize
); 
1583         dvar
->dtdv_tuple
.dtt_nkeys 
= nkeys
; 
1585         for (i 
= 0; i 
< nkeys
; i
++) { 
1586                 dtrace_key_t 
*dkey 
= &dvar
->dtdv_tuple
.dtt_key
[i
]; 
1587                 size_t kesize 
= key
[i
].dttk_size
; 
1591                             (const void *)(uintptr_t)key
[i
].dttk_value
, 
1592                             (void *)kdata
, kesize
); 
1593                         dkey
->dttk_value 
= kdata
; 
1594                         kdata 
+= P2ROUNDUP(kesize
, sizeof (uint64_t)); 
1596                         dkey
->dttk_value 
= key
[i
].dttk_value
; 
1599                 dkey
->dttk_size 
= kesize
; 
1602         ASSERT(dvar
->dtdv_hashval 
== DTRACE_DYNHASH_FREE
); 
1603         dvar
->dtdv_hashval 
= hashval
; 
1604         dvar
->dtdv_next 
= start
; 
1606         if (dtrace_casptr(&hash
[bucket
].dtdh_chain
, start
, dvar
) == start
) 
1610          * The cas has failed.  Either another CPU is adding an element to 
1611          * this hash chain, or another CPU is deleting an element from this 
1612          * hash chain.  The simplest way to deal with both of these cases 
1613          * (though not necessarily the most efficient) is to free our 
1614          * allocated block and tail-call ourselves.  Note that the free is 
1615          * to the dirty list and _not_ to the free list.  This is to prevent 
1616          * races with allocators, above. 
1618         dvar
->dtdv_hashval 
= DTRACE_DYNHASH_FREE
; 
1620         dtrace_membar_producer(); 
1623                 free 
= dcpu
->dtdsc_dirty
; 
1624                 dvar
->dtdv_next 
= free
; 
1625         } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, free
, dvar
) != free
); 
1627         return (dtrace_dynvar(dstate
, nkeys
, key
, dsize
, op
)); 
1632 dtrace_aggregate_min(uint64_t *oval
, uint64_t nval
, uint64_t arg
) 
1640 dtrace_aggregate_max(uint64_t *oval
, uint64_t nval
, uint64_t arg
) 
1647 dtrace_aggregate_quantize(uint64_t *quanta
, uint64_t nval
, uint64_t incr
) 
1649         int i
, zero 
= DTRACE_QUANTIZE_ZEROBUCKET
; 
1650         int64_t val 
= (int64_t)nval
; 
1653                 for (i 
= 0; i 
< zero
; i
++) { 
1654                         if (val 
<= DTRACE_QUANTIZE_BUCKETVAL(i
)) { 
1660                 for (i 
= zero 
+ 1; i 
< DTRACE_QUANTIZE_NBUCKETS
; i
++) { 
1661                         if (val 
< DTRACE_QUANTIZE_BUCKETVAL(i
)) { 
1662                                 quanta
[i 
- 1] += incr
; 
1667                 quanta
[DTRACE_QUANTIZE_NBUCKETS 
- 1] += incr
; 
1675 dtrace_aggregate_lquantize(uint64_t *lquanta
, uint64_t nval
, uint64_t incr
) 
1677         uint64_t arg 
= *lquanta
++; 
1678         int32_t base 
= DTRACE_LQUANTIZE_BASE(arg
); 
1679         uint16_t step 
= DTRACE_LQUANTIZE_STEP(arg
); 
1680         uint16_t levels 
= DTRACE_LQUANTIZE_LEVELS(arg
); 
1681         int32_t val 
= (int32_t)nval
, level
; 
1684         ASSERT(levels 
!= 0); 
1688                  * This is an underflow. 
1694         level 
= (val 
- base
) / step
; 
1696         if (level 
< levels
) { 
1697                 lquanta
[level 
+ 1] += incr
; 
1702          * This is an overflow. 
1704         lquanta
[levels 
+ 1] += incr
; 
1709 dtrace_aggregate_avg(uint64_t *data
, uint64_t nval
, uint64_t arg
) 
1717 dtrace_aggregate_count(uint64_t *oval
, uint64_t nval
, uint64_t arg
) 
1724 dtrace_aggregate_sum(uint64_t *oval
, uint64_t nval
, uint64_t arg
) 
1730  * Aggregate given the tuple in the principal data buffer, and the aggregating 
1731  * action denoted by the specified dtrace_aggregation_t.  The aggregation 
1732  * buffer is specified as the buf parameter.  This routine does not return 
1733  * failure; if there is no space in the aggregation buffer, the data will be 
1734  * dropped, and a corresponding counter incremented. 
1737 dtrace_aggregate(dtrace_aggregation_t 
*agg
, dtrace_buffer_t 
*dbuf
, 
1738     intptr_t offset
, dtrace_buffer_t 
*buf
, uint64_t expr
, uint64_t arg
) 
1740         dtrace_recdesc_t 
*rec 
= &agg
->dtag_action
.dta_rec
; 
1741         uint32_t i
, ndx
, size
, fsize
; 
1742         uint32_t align 
= sizeof (uint64_t) - 1; 
1743         dtrace_aggbuffer_t 
*agb
; 
1744         dtrace_aggkey_t 
*key
; 
1745         uint32_t hashval 
= 0, limit
, isstr
; 
1746         caddr_t tomax
, data
, kdata
; 
1747         dtrace_actkind_t action
; 
1748         dtrace_action_t 
*act
; 
1754         if (!agg
->dtag_hasarg
) { 
1756                  * Currently, only quantize() and lquantize() take additional 
1757                  * arguments, and they have the same semantics:  an increment 
1758                  * value that defaults to 1 when not present.  If additional 
1759                  * aggregating actions take arguments, the setting of the 
1760                  * default argument value will presumably have to become more 
1766         action 
= agg
->dtag_action
.dta_kind 
- DTRACEACT_AGGREGATION
; 
1767         size 
= rec
->dtrd_offset 
- agg
->dtag_base
; 
1768         fsize 
= size 
+ rec
->dtrd_size
; 
1770         ASSERT(dbuf
->dtb_tomax 
!= NULL
); 
1771         data 
= dbuf
->dtb_tomax 
+ offset 
+ agg
->dtag_base
; 
1773         if ((tomax 
= buf
->dtb_tomax
) == NULL
) { 
1774                 dtrace_buffer_drop(buf
); 
1779          * The metastructure is always at the bottom of the buffer. 
1781         agb 
= (dtrace_aggbuffer_t 
*)(tomax 
+ buf
->dtb_size 
- 
1782             sizeof (dtrace_aggbuffer_t
)); 
1784         if (buf
->dtb_offset 
== 0) { 
1786                  * We just kludge up approximately 1/8th of the size to be 
1787                  * buckets.  If this guess ends up being routinely 
1788                  * off-the-mark, we may need to dynamically readjust this 
1789                  * based on past performance. 
1791                 uintptr_t hashsize 
= (buf
->dtb_size 
>> 3) / sizeof (uintptr_t); 
1793                 if ((uintptr_t)agb 
- hashsize 
* sizeof (dtrace_aggkey_t 
*) < 
1794                     (uintptr_t)tomax 
|| hashsize 
== 0) { 
1796                          * We've been given a ludicrously small buffer; 
1797                          * increment our drop count and leave. 
1799                         dtrace_buffer_drop(buf
); 
1804                  * And now, a pathetic attempt to try to get a an odd (or 
1805                  * perchance, a prime) hash size for better hash distribution. 
1807                 if (hashsize 
> (DTRACE_AGGHASHSIZE_SLEW 
<< 3)) 
1808                         hashsize 
-= DTRACE_AGGHASHSIZE_SLEW
; 
1810                 agb
->dtagb_hashsize 
= hashsize
; 
1811                 agb
->dtagb_hash 
= (dtrace_aggkey_t 
**)((uintptr_t)agb 
- 
1812                     agb
->dtagb_hashsize 
* sizeof (dtrace_aggkey_t 
*)); 
1813                 agb
->dtagb_free 
= (uintptr_t)agb
->dtagb_hash
; 
1815                 for (i 
= 0; i 
< agb
->dtagb_hashsize
; i
++) 
1816                         agb
->dtagb_hash
[i
] = NULL
; 
1819         ASSERT(agg
->dtag_first 
!= NULL
); 
1820         ASSERT(agg
->dtag_first
->dta_intuple
); 
1823          * Calculate the hash value based on the key.  Note that we _don't_ 
1824          * include the aggid in the hashing (but we will store it as part of 
1825          * the key).  The hashing algorithm is Bob Jenkins' "One-at-a-time" 
1826          * algorithm: a simple, quick algorithm that has no known funnels, and 
1827          * gets good distribution in practice.  The efficacy of the hashing 
1828          * algorithm (and a comparison with other algorithms) may be found by 
1829          * running the ::dtrace_aggstat MDB dcmd. 
1831         for (act 
= agg
->dtag_first
; act
->dta_intuple
; act 
= act
->dta_next
) { 
1832                 i 
= act
->dta_rec
.dtrd_offset 
- agg
->dtag_base
; 
1833                 limit 
= i 
+ act
->dta_rec
.dtrd_size
; 
1834                 ASSERT(limit 
<= size
); 
1835                 isstr 
= DTRACEACT_ISSTRING(act
); 
1837                 for (; i 
< limit
; i
++) { 
1839                         hashval 
+= (hashval 
<< 10); 
1840                         hashval 
^= (hashval 
>> 6); 
1842                         if (isstr 
&& data
[i
] == '\0') 
1847         hashval 
+= (hashval 
<< 3); 
1848         hashval 
^= (hashval 
>> 11); 
1849         hashval 
+= (hashval 
<< 15); 
1852          * Yes, the divide here is expensive -- but it's generally the least 
1853          * of the performance issues given the amount of data that we iterate 
1854          * over to compute hash values, compare data, etc. 
1856         ndx 
= hashval 
% agb
->dtagb_hashsize
; 
1858         for (key 
= agb
->dtagb_hash
[ndx
]; key 
!= NULL
; key 
= key
->dtak_next
) { 
1859                 ASSERT((caddr_t
)key 
>= tomax
); 
1860                 ASSERT((caddr_t
)key 
< tomax 
+ buf
->dtb_size
); 
1862                 if (hashval 
!= key
->dtak_hashval 
|| key
->dtak_size 
!= size
) 
1865                 kdata 
= key
->dtak_data
; 
1866                 ASSERT(kdata 
>= tomax 
&& kdata 
< tomax 
+ buf
->dtb_size
); 
1868                 for (act 
= agg
->dtag_first
; act
->dta_intuple
; 
1869                     act 
= act
->dta_next
) { 
1870                         i 
= act
->dta_rec
.dtrd_offset 
- agg
->dtag_base
; 
1871                         limit 
= i 
+ act
->dta_rec
.dtrd_size
; 
1872                         ASSERT(limit 
<= size
); 
1873                         isstr 
= DTRACEACT_ISSTRING(act
); 
1875                         for (; i 
< limit
; i
++) { 
1876                                 if (kdata
[i
] != data
[i
]) 
1879                                 if (isstr 
&& data
[i
] == '\0') 
1884                 if (action 
!= key
->dtak_action
) { 
1886                          * We are aggregating on the same value in the same 
1887                          * aggregation with two different aggregating actions. 
1888                          * (This should have been picked up in the compiler, 
1889                          * so we may be dealing with errant or devious DIF.) 
1890                          * This is an error condition; we indicate as much, 
1893                         DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
); 
1898                  * This is a hit:  we need to apply the aggregator to 
1899                  * the value at this key. 
1901                 agg
->dtag_aggregate((uint64_t *)(kdata 
+ size
), expr
, arg
); 
1908          * We didn't find it.  We need to allocate some zero-filled space, 
1909          * link it into the hash table appropriately, and apply the aggregator 
1910          * to the (zero-filled) value. 
1912         offs 
= buf
->dtb_offset
; 
1913         while (offs 
& (align 
- 1)) 
1914                 offs 
+= sizeof (uint32_t); 
1917          * If we don't have enough room to both allocate a new key _and_ 
1918          * its associated data, increment the drop count and return. 
1920         if ((uintptr_t)tomax 
+ offs 
+ fsize 
> 
1921             agb
->dtagb_free 
- sizeof (dtrace_aggkey_t
)) { 
1922                 dtrace_buffer_drop(buf
); 
1927         ASSERT(!(sizeof (dtrace_aggkey_t
) & (sizeof (uintptr_t) - 1))); 
1928         key 
= (dtrace_aggkey_t 
*)(agb
->dtagb_free 
- sizeof (dtrace_aggkey_t
)); 
1929         agb
->dtagb_free 
-= sizeof (dtrace_aggkey_t
); 
1931         key
->dtak_data 
= kdata 
= tomax 
+ offs
; 
1932         buf
->dtb_offset 
= offs 
+ fsize
; 
1935          * Now copy the data across. 
1937         *((dtrace_aggid_t 
*)kdata
) = agg
->dtag_id
; 
1939         for (i 
= sizeof (dtrace_aggid_t
); i 
< size
; i
++) 
1943          * Because strings are not zeroed out by default, we need to iterate 
1944          * looking for actions that store strings, and we need to explicitly 
1945          * pad these strings out with zeroes. 
1947         for (act 
= agg
->dtag_first
; act
->dta_intuple
; act 
= act
->dta_next
) { 
1950                 if (!DTRACEACT_ISSTRING(act
)) 
1953                 i 
= act
->dta_rec
.dtrd_offset 
- agg
->dtag_base
; 
1954                 limit 
= i 
+ act
->dta_rec
.dtrd_size
; 
1955                 ASSERT(limit 
<= size
); 
1957                 for (nul 
= 0; i 
< limit
; i
++) { 
1963                         if (data
[i
] != '\0') 
1970         for (i 
= size
; i 
< fsize
; i
++) 
1973         key
->dtak_hashval 
= hashval
; 
1974         key
->dtak_size 
= size
; 
1975         key
->dtak_action 
= action
; 
1976         key
->dtak_next 
= agb
->dtagb_hash
[ndx
]; 
1977         agb
->dtagb_hash
[ndx
] = key
; 
1980          * Finally, apply the aggregator. 
1982         *((uint64_t *)(key
->dtak_data 
+ size
)) = agg
->dtag_initial
; 
1983         agg
->dtag_aggregate((uint64_t *)(key
->dtak_data 
+ size
), expr
, arg
); 
1987  * Given consumer state, this routine finds a speculation in the INACTIVE 
1988  * state and transitions it into the ACTIVE state.  If there is no speculation 
1989  * in the INACTIVE state, 0 is returned.  In this case, no error counter is 
1990  * incremented -- it is up to the caller to take appropriate action. 
1993 dtrace_speculation(dtrace_state_t 
*state
) 
1996         dtrace_speculation_state_t current
; 
1997         uint32_t *stat 
= &state
->dts_speculations_unavail
, count
; 
1999         while (i 
< state
->dts_nspeculations
) { 
2000                 dtrace_speculation_t 
*spec 
= &state
->dts_speculations
[i
]; 
2002                 current 
= spec
->dtsp_state
; 
2004                 if (current 
!= DTRACESPEC_INACTIVE
) { 
2005                         if (current 
== DTRACESPEC_COMMITTINGMANY 
|| 
2006                             current 
== DTRACESPEC_COMMITTING 
|| 
2007                             current 
== DTRACESPEC_DISCARDING
) 
2008                                 stat 
= &state
->dts_speculations_busy
; 
2013                 if (dtrace_cas32((uint32_t *)&spec
->dtsp_state
, 
2014                     current
, DTRACESPEC_ACTIVE
) == current
) 
2019          * We couldn't find a speculation.  If we found as much as a single 
2020          * busy speculation buffer, we'll attribute this failure as "busy" 
2021          * instead of "unavail". 
2025         } while (dtrace_cas32(stat
, count
, count 
+ 1) != count
); 
2031  * This routine commits an active speculation.  If the specified speculation 
2032  * is not in a valid state to perform a commit(), this routine will silently do 
2033  * nothing.  The state of the specified speculation is transitioned according 
2034  * to the state transition diagram outlined in <sys/dtrace_impl.h> 
2037 dtrace_speculation_commit(dtrace_state_t 
*state
, processorid_t cpu
, 
2038     dtrace_specid_t which
) 
2040         dtrace_speculation_t 
*spec
; 
2041         dtrace_buffer_t 
*src
, *dest
; 
2042         uintptr_t daddr
, saddr
, dlimit
; 
2043         dtrace_speculation_state_t current
, new; 
2049         if (which 
> state
->dts_nspeculations
) { 
2050                 cpu_core
[cpu
].cpuc_dtrace_flags 
|= CPU_DTRACE_ILLOP
; 
2054         spec 
= &state
->dts_speculations
[which 
- 1]; 
2055         src 
= &spec
->dtsp_buffer
[cpu
]; 
2056         dest 
= &state
->dts_buffer
[cpu
]; 
2059                 current 
= spec
->dtsp_state
; 
2061                 if (current 
== DTRACESPEC_COMMITTINGMANY
) 
2065                 case DTRACESPEC_INACTIVE
: 
2066                 case DTRACESPEC_DISCARDING
: 
2069                 case DTRACESPEC_COMMITTING
: 
2071                          * This is only possible if we are (a) commit()'ing 
2072                          * without having done a prior speculate() on this CPU 
2073                          * and (b) racing with another commit() on a different 
2074                          * CPU.  There's nothing to do -- we just assert that 
2077                         ASSERT(src
->dtb_offset 
== 0); 
2080                 case DTRACESPEC_ACTIVE
: 
2081                         new = DTRACESPEC_COMMITTING
; 
2084                 case DTRACESPEC_ACTIVEONE
: 
2086                          * This speculation is active on one CPU.  If our 
2087                          * buffer offset is non-zero, we know that the one CPU 
2088                          * must be us.  Otherwise, we are committing on a 
2089                          * different CPU from the speculate(), and we must 
2090                          * rely on being asynchronously cleaned. 
2092                         if (src
->dtb_offset 
!= 0) { 
2093                                 new = DTRACESPEC_COMMITTING
; 
2098                 case DTRACESPEC_ACTIVEMANY
: 
2099                         new = DTRACESPEC_COMMITTINGMANY
; 
2105         } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
, 
2106             current
, new) != current
); 
2109          * We have set the state to indicate that we are committing this 
2110          * speculation.  Now reserve the necessary space in the destination 
2113         if ((offs 
= dtrace_buffer_reserve(dest
, src
->dtb_offset
, 
2114             sizeof (uint64_t), state
, NULL
)) < 0) { 
2115                 dtrace_buffer_drop(dest
); 
2120          * We have the space; copy the buffer across.  (Note that this is a 
2121          * highly subobtimal bcopy(); in the unlikely event that this becomes 
2122          * a serious performance issue, a high-performance DTrace-specific 
2123          * bcopy() should obviously be invented.) 
2125         daddr 
= (uintptr_t)dest
->dtb_tomax 
+ offs
; 
2126         dlimit 
= daddr 
+ src
->dtb_offset
; 
2127         saddr 
= (uintptr_t)src
->dtb_tomax
; 
2130          * First, the aligned portion. 
2132         while (dlimit 
- daddr 
>= sizeof (uint64_t)) { 
2133                 *((uint64_t *)daddr
) = *((uint64_t *)saddr
); 
2135                 daddr 
+= sizeof (uint64_t); 
2136                 saddr 
+= sizeof (uint64_t); 
2140          * Now any left-over bit... 
2142         while (dlimit 
- daddr
) 
2143                 *((uint8_t *)daddr
++) = *((uint8_t *)saddr
++); 
2146          * Finally, commit the reserved space in the destination buffer. 
2148         dest
->dtb_offset 
= offs 
+ src
->dtb_offset
; 
2152          * If we're lucky enough to be the only active CPU on this speculation 
2153          * buffer, we can just set the state back to DTRACESPEC_INACTIVE. 
2155         if (current 
== DTRACESPEC_ACTIVE 
|| 
2156             (current 
== DTRACESPEC_ACTIVEONE 
&& new == DTRACESPEC_COMMITTING
)) { 
2157                 uint32_t rval 
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
, 
2158                     DTRACESPEC_COMMITTING
, DTRACESPEC_INACTIVE
); 
2160                 ASSERT(rval 
== DTRACESPEC_COMMITTING
); 
2163         src
->dtb_offset 
= 0; 
2164         src
->dtb_xamot_drops 
+= src
->dtb_drops
; 
2169  * This routine discards an active speculation.  If the specified speculation 
2170  * is not in a valid state to perform a discard(), this routine will silently 
2171  * do nothing.  The state of the specified speculation is transitioned 
2172  * according to the state transition diagram outlined in <sys/dtrace_impl.h> 
2175 dtrace_speculation_discard(dtrace_state_t 
*state
, processorid_t cpu
, 
2176     dtrace_specid_t which
) 
2178         dtrace_speculation_t 
*spec
; 
2179         dtrace_speculation_state_t current
, new; 
2180         dtrace_buffer_t 
*buf
; 
2185         if (which 
> state
->dts_nspeculations
) { 
2186                 cpu_core
[cpu
].cpuc_dtrace_flags 
|= CPU_DTRACE_ILLOP
; 
2190         spec 
= &state
->dts_speculations
[which 
- 1]; 
2191         buf 
= &spec
->dtsp_buffer
[cpu
]; 
2194                 current 
= spec
->dtsp_state
; 
2197                 case DTRACESPEC_INACTIVE
: 
2198                 case DTRACESPEC_COMMITTINGMANY
: 
2199                 case DTRACESPEC_COMMITTING
: 
2200                 case DTRACESPEC_DISCARDING
: 
2203                 case DTRACESPEC_ACTIVE
: 
2204                 case DTRACESPEC_ACTIVEMANY
: 
2205                         new = DTRACESPEC_DISCARDING
; 
2208                 case DTRACESPEC_ACTIVEONE
: 
2209                         if (buf
->dtb_offset 
!= 0) { 
2210                                 new = DTRACESPEC_INACTIVE
; 
2212                                 new = DTRACESPEC_DISCARDING
; 
2219         } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
, 
2220             current
, new) != current
); 
2222         buf
->dtb_offset 
= 0; 
2227  * Note:  not called from probe context.  This function is called 
2228  * asynchronously from cross call context to clean any speculations that are 
2229  * in the COMMITTINGMANY or DISCARDING states.  These speculations may not be 
2230  * transitioned back to the INACTIVE state until all CPUs have cleaned the 
2234 dtrace_speculation_clean_here(dtrace_state_t 
*state
) 
2236         dtrace_icookie_t cookie
; 
2237         processorid_t cpu 
= CPU
->cpu_id
; 
2238         dtrace_buffer_t 
*dest 
= &state
->dts_buffer
[cpu
]; 
2241         cookie 
= dtrace_interrupt_disable(); 
2243         if (dest
->dtb_tomax 
== NULL
) { 
2244                 dtrace_interrupt_enable(cookie
); 
2248         for (i 
= 0; i 
< state
->dts_nspeculations
; i
++) { 
2249                 dtrace_speculation_t 
*spec 
= &state
->dts_speculations
[i
]; 
2250                 dtrace_buffer_t 
*src 
= &spec
->dtsp_buffer
[cpu
]; 
2252                 if (src
->dtb_tomax 
== NULL
) 
2255                 if (spec
->dtsp_state 
== DTRACESPEC_DISCARDING
) { 
2256                         src
->dtb_offset 
= 0; 
2260                 if (spec
->dtsp_state 
!= DTRACESPEC_COMMITTINGMANY
) 
2263                 if (src
->dtb_offset 
== 0) 
2266                 dtrace_speculation_commit(state
, cpu
, i 
+ 1); 
2269         dtrace_interrupt_enable(cookie
); 
2273  * Note:  not called from probe context.  This function is called 
2274  * asynchronously (and at a regular interval) to clean any speculations that 
2275  * are in the COMMITTINGMANY or DISCARDING states.  If it discovers that there 
2276  * is work to be done, it cross calls all CPUs to perform that work; 
2277  * COMMITMANY and DISCARDING speculations may not be transitioned back to the 
2278  * INACTIVE state until they have been cleaned by all CPUs. 
2281 dtrace_speculation_clean(dtrace_state_t 
*state
) 
2286         for (i 
= 0; i 
< state
->dts_nspeculations
; i
++) { 
2287                 dtrace_speculation_t 
*spec 
= &state
->dts_speculations
[i
]; 
2289                 ASSERT(!spec
->dtsp_cleaning
); 
2291                 if (spec
->dtsp_state 
!= DTRACESPEC_DISCARDING 
&& 
2292                     spec
->dtsp_state 
!= DTRACESPEC_COMMITTINGMANY
) 
2296                 spec
->dtsp_cleaning 
= 1; 
2302         dtrace_xcall(DTRACE_CPUALL
, 
2303             (dtrace_xcall_t
)dtrace_speculation_clean_here
, state
); 
2306          * We now know that all CPUs have committed or discarded their 
2307          * speculation buffers, as appropriate.  We can now set the state 
2310         for (i 
= 0; i 
< state
->dts_nspeculations
; i
++) { 
2311                 dtrace_speculation_t 
*spec 
= &state
->dts_speculations
[i
]; 
2312                 dtrace_speculation_state_t current
, new; 
2314                 if (!spec
->dtsp_cleaning
) 
2317                 current 
= spec
->dtsp_state
; 
2318                 ASSERT(current 
== DTRACESPEC_DISCARDING 
|| 
2319                     current 
== DTRACESPEC_COMMITTINGMANY
); 
2321                 new = DTRACESPEC_INACTIVE
; 
2323                 rv 
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
, current
, new); 
2324                 ASSERT(rv 
== current
); 
2325                 spec
->dtsp_cleaning 
= 0; 
2330  * Called as part of a speculate() to get the speculative buffer associated 
2331  * with a given speculation.  Returns NULL if the specified speculation is not 
2332  * in an ACTIVE state.  If the speculation is in the ACTIVEONE state -- and 
2333  * the active CPU is not the specified CPU -- the speculation will be 
2334  * atomically transitioned into the ACTIVEMANY state. 
2336 static dtrace_buffer_t 
* 
2337 dtrace_speculation_buffer(dtrace_state_t 
*state
, processorid_t cpuid
, 
2338     dtrace_specid_t which
) 
2340         dtrace_speculation_t 
*spec
; 
2341         dtrace_speculation_state_t current
, new; 
2342         dtrace_buffer_t 
*buf
; 
2347         if (which 
> state
->dts_nspeculations
) { 
2348                 cpu_core
[cpuid
].cpuc_dtrace_flags 
|= CPU_DTRACE_ILLOP
; 
2352         spec 
= &state
->dts_speculations
[which 
- 1]; 
2353         buf 
= &spec
->dtsp_buffer
[cpuid
]; 
2356                 current 
= spec
->dtsp_state
; 
2359                 case DTRACESPEC_INACTIVE
: 
2360                 case DTRACESPEC_COMMITTINGMANY
: 
2361                 case DTRACESPEC_DISCARDING
: 
2364                 case DTRACESPEC_COMMITTING
: 
2365                         ASSERT(buf
->dtb_offset 
== 0); 
2368                 case DTRACESPEC_ACTIVEONE
: 
2370                          * This speculation is currently active on one CPU. 
2371                          * Check the offset in the buffer; if it's non-zero, 
2372                          * that CPU must be us (and we leave the state alone). 
2373                          * If it's zero, assume that we're starting on a new 
2374                          * CPU -- and change the state to indicate that the 
2375                          * speculation is active on more than one CPU. 
2377                         if (buf
->dtb_offset 
!= 0) 
2380                         new = DTRACESPEC_ACTIVEMANY
; 
2383                 case DTRACESPEC_ACTIVEMANY
: 
2386                 case DTRACESPEC_ACTIVE
: 
2387                         new = DTRACESPEC_ACTIVEONE
; 
2393         } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
, 
2394             current
, new) != current
); 
2396         ASSERT(new == DTRACESPEC_ACTIVEONE 
|| new == DTRACESPEC_ACTIVEMANY
); 
2401  * This function implements the DIF emulator's variable lookups.  The emulator 
2402  * passes a reserved variable identifier and optional built-in array index. 
2405 dtrace_dif_variable(dtrace_mstate_t 
*mstate
, dtrace_state_t 
*state
, uint64_t v
, 
2409          * If we're accessing one of the uncached arguments, we'll turn this 
2410          * into a reference in the args array. 
2412         if (v 
>= DIF_VAR_ARG0 
&& v 
<= DIF_VAR_ARG9
) { 
2413                 ndx 
= v 
- DIF_VAR_ARG0
; 
2419                 ASSERT(mstate
->dtms_present 
& DTRACE_MSTATE_ARGS
); 
2420                 if (ndx 
>= sizeof (mstate
->dtms_arg
) / 
2421                     sizeof (mstate
->dtms_arg
[0])) { 
2422 #if !defined(__APPLE__) 
2423                         int aframes 
= mstate
->dtms_probe
->dtpr_aframes 
+ 2; 
2425                         /* Account for introduction of __dtrace_probe() on xnu. */ 
2426                         int aframes 
= mstate
->dtms_probe
->dtpr_aframes 
+ 3; 
2427 #endif /* __APPLE__ */ 
2428                         dtrace_provider_t 
*pv
; 
2431                         pv 
= mstate
->dtms_probe
->dtpr_provider
; 
2432                         if (pv
->dtpv_pops
.dtps_getargval 
!= NULL
) 
2433                                 val 
= pv
->dtpv_pops
.dtps_getargval(pv
->dtpv_arg
, 
2434                                     mstate
->dtms_probe
->dtpr_id
, 
2435                                     mstate
->dtms_probe
->dtpr_arg
, ndx
, aframes
); 
2436 #if defined(__APPLE__) 
2437                         /* Special case access of arg5 as passed to dtrace_probeid_error (which see.) */ 
2438                         else if (mstate
->dtms_probe
->dtpr_id 
== dtrace_probeid_error 
&& ndx 
== 5) { 
2439                                 return ((dtrace_state_t 
*)(mstate
->dtms_arg
[0]))->dts_arg_error_illval
; 
2441 #endif /* __APPLE__ */ 
2443                                 val 
= dtrace_getarg(ndx
, aframes
); 
2446                          * This is regrettably required to keep the compiler 
2447                          * from tail-optimizing the call to dtrace_getarg(). 
2448                          * The condition always evaluates to true, but the 
2449                          * compiler has no way of figuring that out a priori. 
2450                          * (None of this would be necessary if the compiler 
2451                          * could be relied upon to _always_ tail-optimize 
2452                          * the call to dtrace_getarg() -- but it can't.) 
2454                         if (mstate
->dtms_probe 
!= NULL
) 
2460                 return (mstate
->dtms_arg
[ndx
]); 
2462 #if !defined(__APPLE__) 
2463         case DIF_VAR_UREGS
: { 
2466                 if (!dtrace_priv_proc(state
)) 
2469                 if ((lwp 
= curthread
->t_lwp
) == NULL
) { 
2470                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
2471                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= NULL
; 
2475                 return (dtrace_getreg(lwp
->lwp_regs
, ndx
)); 
2478         case DIF_VAR_UREGS
: { 
2481                 if (!dtrace_priv_proc(state
)) 
2484                 if ((thread 
= current_thread()) == NULL
) { 
2485                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
2486                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= 0; 
2490                 return (dtrace_getreg(find_user_regs(thread
), ndx
)); 
2492 #endif /* __APPLE__ */ 
2494 #if !defined(__APPLE__) 
2495         case DIF_VAR_CURTHREAD
: 
2496                 if (!dtrace_priv_kernel(state
)) 
2498                 return ((uint64_t)(uintptr_t)curthread
); 
2500         case DIF_VAR_CURTHREAD
: 
2501                 if (!dtrace_priv_kernel(state
)) 
2504                 return ((uint64_t)(uintptr_t)current_thread()); 
2505 #endif /* __APPLE__ */ 
2507         case DIF_VAR_TIMESTAMP
: 
2508                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_TIMESTAMP
)) { 
2509                         mstate
->dtms_timestamp 
= dtrace_gethrtime(); 
2510                         mstate
->dtms_present 
|= DTRACE_MSTATE_TIMESTAMP
; 
2512                 return (mstate
->dtms_timestamp
); 
2514 #if !defined(__APPLE__) 
2515         case DIF_VAR_VTIMESTAMP
: 
2516                 ASSERT(dtrace_vtime_references 
!= 0); 
2517                 return (curthread
->t_dtrace_vtime
); 
2519         case DIF_VAR_VTIMESTAMP
: 
2520                 ASSERT(dtrace_vtime_references 
!= 0); 
2521                 return (dtrace_get_thread_vtime(current_thread())); 
2522 #endif /* __APPLE__ */ 
2524         case DIF_VAR_WALLTIMESTAMP
: 
2525                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_WALLTIMESTAMP
)) { 
2526                         mstate
->dtms_walltimestamp 
= dtrace_gethrestime(); 
2527                         mstate
->dtms_present 
|= DTRACE_MSTATE_WALLTIMESTAMP
; 
2529                 return (mstate
->dtms_walltimestamp
); 
2532                 if (!dtrace_priv_kernel(state
)) 
2534                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_IPL
)) { 
2535                         mstate
->dtms_ipl 
= dtrace_getipl(); 
2536                         mstate
->dtms_present 
|= DTRACE_MSTATE_IPL
; 
2538                 return (mstate
->dtms_ipl
); 
2541                 ASSERT(mstate
->dtms_present 
& DTRACE_MSTATE_EPID
); 
2542                 return (mstate
->dtms_epid
); 
2545                 ASSERT(mstate
->dtms_present 
& DTRACE_MSTATE_PROBE
); 
2546                 return (mstate
->dtms_probe
->dtpr_id
); 
2548         case DIF_VAR_STACKDEPTH
: 
2549                 if (!dtrace_priv_kernel(state
)) 
2551                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_STACKDEPTH
)) { 
2552 #if !defined(__APPLE__) 
2553                         int aframes 
= mstate
->dtms_probe
->dtpr_aframes 
+ 2; 
2555                         /* Account for introduction of __dtrace_probe() on xnu. */ 
2556                         int aframes 
= mstate
->dtms_probe
->dtpr_aframes 
+ 3; 
2557 #endif /* __APPLE__ */ 
2559                         mstate
->dtms_stackdepth 
= dtrace_getstackdepth(aframes
); 
2560                         mstate
->dtms_present 
|= DTRACE_MSTATE_STACKDEPTH
; 
2562                 return (mstate
->dtms_stackdepth
); 
2564         case DIF_VAR_USTACKDEPTH
: 
2565                 if (!dtrace_priv_proc(state
)) 
2567                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_USTACKDEPTH
)) { 
2569                          * See comment in DIF_VAR_PID. 
2571                         if (DTRACE_ANCHORED(mstate
->dtms_probe
) && 
2573                                 mstate
->dtms_ustackdepth 
= 0; 
2575                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
2576                                 mstate
->dtms_ustackdepth 
= 
2577                                     dtrace_getustackdepth(); 
2578                                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
2580                         mstate
->dtms_present 
|= DTRACE_MSTATE_USTACKDEPTH
; 
2582                 return (mstate
->dtms_ustackdepth
); 
2584         case DIF_VAR_CALLER
: 
2585                 if (!dtrace_priv_kernel(state
)) 
2587                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_CALLER
)) { 
2588 #if !defined(__APPLE__) 
2589                         int aframes 
= mstate
->dtms_probe
->dtpr_aframes 
+ 2; 
2591                         /* Account for introduction of __dtrace_probe() on xnu. */ 
2592                         int aframes 
= mstate
->dtms_probe
->dtpr_aframes 
+ 3; 
2593 #endif /* __APPLE__ */ 
2595                         if (!DTRACE_ANCHORED(mstate
->dtms_probe
)) { 
2597                                  * If this is an unanchored probe, we are 
2598                                  * required to go through the slow path: 
2599                                  * dtrace_caller() only guarantees correct 
2600                                  * results for anchored probes. 
2604                                 dtrace_getpcstack(caller
, 2, aframes
, 
2605                                     (uint32_t *)(uintptr_t)mstate
->dtms_arg
[0]); 
2606                                 mstate
->dtms_caller 
= caller
[1]; 
2607                         } else if ((mstate
->dtms_caller 
= 
2608                             dtrace_caller(aframes
)) == -1) { 
2610                                  * We have failed to do this the quick way; 
2611                                  * we must resort to the slower approach of 
2612                                  * calling dtrace_getpcstack(). 
2616                                 dtrace_getpcstack(&caller
, 1, aframes
, NULL
); 
2617                                 mstate
->dtms_caller 
= caller
; 
2620                         mstate
->dtms_present 
|= DTRACE_MSTATE_CALLER
; 
2622                 return (mstate
->dtms_caller
); 
2624         case DIF_VAR_UCALLER
: 
2625                 if (!dtrace_priv_proc(state
)) 
2628                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_UCALLER
)) { 
2632                          * dtrace_getupcstack() fills in the first uint64_t 
2633                          * with the current PID.  The second uint64_t will 
2634                          * be the program counter at user-level.  The third 
2635                          * uint64_t will contain the caller, which is what 
2639                         dtrace_getupcstack(ustack
, 3); 
2640                         mstate
->dtms_ucaller 
= ustack
[2]; 
2641                         mstate
->dtms_present 
|= DTRACE_MSTATE_UCALLER
; 
2644                 return (mstate
->dtms_ucaller
); 
2646         case DIF_VAR_PROBEPROV
: 
2647                 ASSERT(mstate
->dtms_present 
& DTRACE_MSTATE_PROBE
); 
2648                 return ((uint64_t)(uintptr_t) 
2649                     mstate
->dtms_probe
->dtpr_provider
->dtpv_name
); 
2651         case DIF_VAR_PROBEMOD
: 
2652                 ASSERT(mstate
->dtms_present 
& DTRACE_MSTATE_PROBE
); 
2653                 return ((uint64_t)(uintptr_t) 
2654                     mstate
->dtms_probe
->dtpr_mod
); 
2656         case DIF_VAR_PROBEFUNC
: 
2657                 ASSERT(mstate
->dtms_present 
& DTRACE_MSTATE_PROBE
); 
2658                 return ((uint64_t)(uintptr_t) 
2659                     mstate
->dtms_probe
->dtpr_func
); 
2661         case DIF_VAR_PROBENAME
: 
2662                 ASSERT(mstate
->dtms_present 
& DTRACE_MSTATE_PROBE
); 
2663                 return ((uint64_t)(uintptr_t) 
2664                     mstate
->dtms_probe
->dtpr_name
); 
2666 #if !defined(__APPLE__) 
2668                 if (!dtrace_priv_proc(state
)) 
2672                  * Note that we are assuming that an unanchored probe is 
2673                  * always due to a high-level interrupt.  (And we're assuming 
2674                  * that there is only a single high level interrupt.) 
2676                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
2677                         return (pid0
.pid_id
); 
2680                  * It is always safe to dereference one's own t_procp pointer: 
2681                  * it always points to a valid, allocated proc structure. 
2682                  * Further, it is always safe to dereference the p_pidp member 
2683                  * of one's own proc structure.  (These are truisms becuase 
2684                  * threads and processes don't clean up their own state -- 
2685                  * they leave that task to whomever reaps them.) 
2687                 return ((uint64_t)curthread
->t_procp
->p_pidp
->pid_id
); 
2691                 if (!dtrace_priv_proc(state
)) 
2695                  * Note that we are assuming that an unanchored probe is 
2696                  * always due to a high-level interrupt.  (And we're assuming 
2697                  * that there is only a single high level interrupt.) 
2699                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
2700                         /* Anchored probe that fires while on an interrupt accrues to process 0 */ 
2703                 return ((uint64_t)proc_selfpid()); 
2704 #endif /* __APPLE__ */ 
2706 #if !defined(__APPLE__) 
2708                 if (!dtrace_priv_proc(state
)) 
2712                  * See comment in DIF_VAR_PID. 
2714                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
2715                         return (pid0
.pid_id
); 
2717                 return ((uint64_t)curthread
->t_procp
->p_ppid
); 
2720                 if (!dtrace_priv_proc(state
)) 
2724                  * See comment in DIF_VAR_PID. 
2726                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
2729                 return ((uint64_t)(uintptr_t)(current_proc()->p_ppid
)); 
2730 #endif /* __APPLE__ */ 
2732 #if !defined(__APPLE__) 
2735                  * See comment in DIF_VAR_PID. 
2737                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
2740                 return ((uint64_t)curthread
->t_tid
); 
2744                  * See comment in DIF_VAR_PID. 
2746                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
2749                 return ((uint64_t)(uintptr_t)current_thread()); /* Is user's (pthread_t)t->kernel_thread */ 
2750 #endif /* __APPLE__ */ 
2752 #if !defined(__APPLE__) 
2753         case DIF_VAR_EXECNAME
: 
2754                 if (!dtrace_priv_proc(state
)) 
2758                  * See comment in DIF_VAR_PID. 
2760                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
2761                         return ((uint64_t)(uintptr_t)p0
.p_user
.u_comm
); 
2764                  * It is always safe to dereference one's own t_procp pointer: 
2765                  * it always points to a valid, allocated proc structure. 
2766                  * (This is true because threads don't clean up their own 
2767                  * state -- they leave that task to whomever reaps them.) 
2769                 return ((uint64_t)(uintptr_t) 
2770                     curthread
->t_procp
->p_user
.u_comm
); 
2772         case DIF_VAR_EXECNAME
: 
2774                 char *xname 
= (char *)mstate
->dtms_scratch_ptr
; 
2775                 size_t scratch_size 
= MAXCOMLEN
+1; 
2777                 /* The scratch allocation's lifetime is that of the clause. */ 
2778                 if (mstate
->dtms_scratch_ptr 
+ scratch_size 
> 
2779                     mstate
->dtms_scratch_base 
+ mstate
->dtms_scratch_size
) 
2782                 if (!dtrace_priv_proc(state
)) 
2785                 mstate
->dtms_scratch_ptr 
+= scratch_size
; 
2786                 proc_selfname( xname
, MAXCOMLEN 
); 
2788                 return ((uint64_t)(uintptr_t)xname
); 
2790 #endif /* __APPLE__ */ 
2791 #if !defined(__APPLE__) 
2792         case DIF_VAR_ZONENAME
: 
2793                 if (!dtrace_priv_proc(state
)) 
2797                  * See comment in DIF_VAR_PID. 
2799                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
2800                         return ((uint64_t)(uintptr_t)p0
.p_zone
->zone_name
); 
2803                  * It is always safe to dereference one's own t_procp pointer: 
2804                  * it always points to a valid, allocated proc structure. 
2805                  * (This is true because threads don't clean up their own 
2806                  * state -- they leave that task to whomever reaps them.) 
2808                 return ((uint64_t)(uintptr_t) 
2809                     curthread
->t_procp
->p_zone
->zone_name
); 
2812         case DIF_VAR_ZONENAME
: 
2813                 if (!dtrace_priv_proc(state
)) 
2816                 return ((uint64_t)(uintptr_t)NULL
); /* Darwin doesn't do "zones" */ 
2817 #endif /* __APPLE__ */ 
2819 #if !defined(__APPLE__) 
2821                 if (!dtrace_priv_proc(state
)) 
2825                  * See comment in DIF_VAR_PID. 
2827                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
2828                         return ((uint64_t)p0
.p_cred
->cr_uid
); 
2830                 return ((uint64_t)curthread
->t_cred
->cr_uid
); 
2833                 if (!dtrace_priv_proc(state
)) 
2837                  * See comment in DIF_VAR_PID. 
2839                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
2842                 if (dtrace_CRED() != NULL
) 
2843                         return ((uint64_t)kauth_getuid()); 
2846 #endif /* __APPLE__ */ 
2848 #if !defined(__APPLE__) 
2850                 if (!dtrace_priv_proc(state
)) 
2854                  * See comment in DIF_VAR_PID. 
2856                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
2857                         return ((uint64_t)p0
.p_cred
->cr_gid
); 
2859                 return ((uint64_t)curthread
->t_cred
->cr_gid
); 
2862                 if (!dtrace_priv_proc(state
)) 
2866                  * See comment in DIF_VAR_PID. 
2868                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
2871                 if (dtrace_CRED() != NULL
) 
2872                         return ((uint64_t)kauth_getgid()); 
2875 #endif /* __APPLE__ */ 
2877 #if !defined(__APPLE__) 
2878         case DIF_VAR_ERRNO
: { 
2880                 if (!dtrace_priv_proc(state
)) 
2884                  * See comment in DIF_VAR_PID. 
2886                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
2889                 if ((lwp 
= curthread
->t_lwp
) == NULL
) 
2892                 return ((uint64_t)lwp
->lwp_errno
); 
2895         case DIF_VAR_ERRNO
: { 
2896                 uthread_t uthread 
= (uthread_t
)get_bsdthread_info(current_thread()); 
2897                 if (!dtrace_priv_proc(state
)) 
2901                  * See comment in DIF_VAR_PID. 
2903                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
2906                 return (uthread 
? uthread
->t_dtrace_errno 
: -1); 
2908 #endif /* __APPLE__ */ 
2911                 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
); 
2917  * Emulate the execution of DTrace ID subroutines invoked by the call opcode. 
2918  * Notice that we don't bother validating the proper number of arguments or 
2919  * their types in the tuple stack.  This isn't needed because all argument 
2920  * interpretation is safe because of our load safety -- the worst that can 
2921  * happen is that a bogus program can obtain bogus results. 
2924 dtrace_dif_subr(uint_t subr
, uint_t rd
, uint64_t *regs
, 
2925     dtrace_key_t 
*tupregs
, int nargs
, 
2926     dtrace_mstate_t 
*mstate
, dtrace_state_t 
*state
) 
2928         volatile uint16_t *flags 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
2929 #if !defined(__APPLE__) 
2930         volatile uintptr_t *illval 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
; 
2932         volatile uint64_t *illval 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
; 
2933 #endif /* __APPLE__ */ 
2935 #if !defined(__APPLE__) 
2946 /* XXX awaits lock/mutex work */ 
2947 #endif /* __APPLE__ */ 
2951                 regs
[rd
] = (dtrace_gethrtime() * 2416 + 374441) % 1771875; 
2954 #if !defined(__APPLE__) 
2955         case DIF_SUBR_MUTEX_OWNED
: 
2956                 m
.mx 
= dtrace_load64(tupregs
[0].dttk_value
); 
2957                 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
)) 
2958                         regs
[rd
] = MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
; 
2960                         regs
[rd
] = LOCK_HELD(&m
.mi
.m_spin
.m_spinlock
); 
2963         case DIF_SUBR_MUTEX_OWNER
: 
2964                 m
.mx 
= dtrace_load64(tupregs
[0].dttk_value
); 
2965                 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
) && 
2966                     MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
) 
2967                         regs
[rd
] = (uintptr_t)MUTEX_OWNER(&m
.mi
); 
2972         case DIF_SUBR_MUTEX_TYPE_ADAPTIVE
: 
2973                 m
.mx 
= dtrace_load64(tupregs
[0].dttk_value
); 
2974                 regs
[rd
] = MUTEX_TYPE_ADAPTIVE(&m
.mi
); 
2977         case DIF_SUBR_MUTEX_TYPE_SPIN
: 
2978                 m
.mx 
= dtrace_load64(tupregs
[0].dttk_value
); 
2979                 regs
[rd
] = MUTEX_TYPE_SPIN(&m
.mi
); 
2982         case DIF_SUBR_RW_READ_HELD
: { 
2985                 r
.rw 
= dtrace_loadptr(tupregs
[0].dttk_value
); 
2986                 regs
[rd
] = _RW_READ_HELD(&r
.ri
, tmp
); 
2990         case DIF_SUBR_RW_WRITE_HELD
: 
2991                 r
.rw 
= dtrace_loadptr(tupregs
[0].dttk_value
); 
2992                 regs
[rd
] = _RW_WRITE_HELD(&r
.ri
); 
2995         case DIF_SUBR_RW_ISWRITER
: 
2996                 r
.rw 
= dtrace_loadptr(tupregs
[0].dttk_value
); 
2997                 regs
[rd
] = _RW_ISWRITER(&r
.ri
); 
3000 /* XXX awaits lock/mutex work */ 
3001 #endif /* __APPLE__ */ 
3003         case DIF_SUBR_BCOPY
: { 
3005                  * We need to be sure that the destination is in the scratch 
3006                  * region -- no other region is allowed. 
3008                 uintptr_t src 
= tupregs
[0].dttk_value
; 
3009                 uintptr_t dest 
= tupregs
[1].dttk_value
; 
3010                 size_t size 
= tupregs
[2].dttk_value
; 
3012                 if (!dtrace_inscratch(dest
, size
, mstate
)) { 
3013                         *flags 
|= CPU_DTRACE_BADADDR
; 
3018                 dtrace_bcopy((void *)src
, (void *)dest
, size
); 
3022         case DIF_SUBR_ALLOCA
: 
3023         case DIF_SUBR_COPYIN
: { 
3024                 uintptr_t dest 
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8); 
3026                     tupregs
[subr 
== DIF_SUBR_ALLOCA 
? 0 : 1].dttk_value
; 
3027                 size_t scratch_size 
= (dest 
- mstate
->dtms_scratch_ptr
) + size
; 
3030                  * This action doesn't require any credential checks since 
3031                  * probes will not activate in user contexts to which the 
3032                  * enabling user does not have permissions. 
3034                 if (mstate
->dtms_scratch_ptr 
+ scratch_size 
> 
3035                     mstate
->dtms_scratch_base 
+ mstate
->dtms_scratch_size
) { 
3036                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3041                 if (subr 
== DIF_SUBR_COPYIN
) { 
3042                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3043                         dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
); 
3044                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3047                 mstate
->dtms_scratch_ptr 
+= scratch_size
; 
3052         case DIF_SUBR_COPYINTO
: { 
3053                 uint64_t size 
= tupregs
[1].dttk_value
; 
3054                 uintptr_t dest 
= tupregs
[2].dttk_value
; 
3057                  * This action doesn't require any credential checks since 
3058                  * probes will not activate in user contexts to which the 
3059                  * enabling user does not have permissions. 
3061                 if (!dtrace_inscratch(dest
, size
, mstate
)) { 
3062                         *flags 
|= CPU_DTRACE_BADADDR
; 
3067                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3068                 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
); 
3069                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3073         case DIF_SUBR_COPYINSTR
: { 
3074                 uintptr_t dest 
= mstate
->dtms_scratch_ptr
; 
3075                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
3077                 if (nargs 
> 1 && tupregs
[1].dttk_value 
< size
) 
3078                         size 
= tupregs
[1].dttk_value 
+ 1; 
3081                  * This action doesn't require any credential checks since 
3082                  * probes will not activate in user contexts to which the 
3083                  * enabling user does not have permissions. 
3085                 if (mstate
->dtms_scratch_ptr 
+ size 
> 
3086                     mstate
->dtms_scratch_base 
+ mstate
->dtms_scratch_size
) { 
3087                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3092                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3093                 dtrace_copyinstr(tupregs
[0].dttk_value
, dest
, size
); 
3094                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3096                 ((char *)dest
)[size 
- 1] = '\0'; 
3097                 mstate
->dtms_scratch_ptr 
+= size
; 
3102 #if !defined(__APPLE__) 
3103         case DIF_SUBR_MSGSIZE
: 
3104         case DIF_SUBR_MSGDSIZE
: { 
3105                 uintptr_t baddr 
= tupregs
[0].dttk_value
, daddr
; 
3106                 uintptr_t wptr
, rptr
; 
3110                 while (baddr 
!= NULL 
&& !(*flags 
& CPU_DTRACE_FAULT
)) { 
3111                         wptr 
= dtrace_loadptr(baddr 
+ 
3112                             offsetof(mblk_t
, b_wptr
)); 
3114                         rptr 
= dtrace_loadptr(baddr 
+ 
3115                             offsetof(mblk_t
, b_rptr
)); 
3118                                 *flags 
|= CPU_DTRACE_BADADDR
; 
3119                                 *illval 
= tupregs
[0].dttk_value
; 
3123                         daddr 
= dtrace_loadptr(baddr 
+ 
3124                             offsetof(mblk_t
, b_datap
)); 
3126                         baddr 
= dtrace_loadptr(baddr 
+ 
3127                             offsetof(mblk_t
, b_cont
)); 
3130                          * We want to prevent against denial-of-service here, 
3131                          * so we're only going to search the list for 
3132                          * dtrace_msgdsize_max mblks. 
3134                         if (cont
++ > dtrace_msgdsize_max
) { 
3135                                 *flags 
|= CPU_DTRACE_ILLOP
; 
3139                         if (subr 
== DIF_SUBR_MSGDSIZE
) { 
3140                                 if (dtrace_load8(daddr 
+ 
3141                                     offsetof(dblk_t
, db_type
)) != M_DATA
) 
3145                         count 
+= wptr 
- rptr
; 
3148                 if (!(*flags 
& CPU_DTRACE_FAULT
)) 
3154         case DIF_SUBR_MSGSIZE
: 
3155         case DIF_SUBR_MSGDSIZE
: { 
3156                 /* Darwin does not implement SysV streams messages */ 
3160 #endif /* __APPLE__ */ 
3162 #if !defined(__APPLE__) 
3163         case DIF_SUBR_PROGENYOF
: { 
3164                 pid_t pid 
= tupregs
[0].dttk_value
; 
3168                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3170                 for (p 
= curthread
->t_procp
; p 
!= NULL
; p 
= p
->p_parent
) { 
3171                         if (p
->p_pidp
->pid_id 
== pid
) { 
3177                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3183         case DIF_SUBR_PROGENYOF
: { 
3184                 pid_t pid 
= tupregs
[0].dttk_value
; 
3185                 struct proc 
*p 
= current_proc(); 
3186                 int rval 
= 0, lim 
= nprocs
; 
3188                 while(p 
&& (lim
-- > 0)) { 
3191                         ppid 
= (pid_t
)dtrace_load32((uintptr_t)&(p
->p_pid
)); 
3192                         if (*flags 
& CPU_DTRACE_FAULT
) 
3201                                 break; /* Can't climb process tree any further. */ 
3203                         p 
= (struct proc 
*)dtrace_loadptr((uintptr_t)&(p
->p_pptr
)); 
3204                         if (*flags 
& CPU_DTRACE_FAULT
) 
3211 #endif /* __APPLE__ */ 
3213         case DIF_SUBR_SPECULATION
: 
3214                 regs
[rd
] = dtrace_speculation(state
); 
3217 #if !defined(__APPLE__) 
3218         case DIF_SUBR_COPYOUT
: { 
3219                 uintptr_t kaddr 
= tupregs
[0].dttk_value
; 
3220                 uintptr_t uaddr 
= tupregs
[1].dttk_value
; 
3221                 uint64_t size 
= tupregs
[2].dttk_value
; 
3223                 if (!dtrace_destructive_disallow 
&& 
3224                     dtrace_priv_proc_control(state
) && 
3225                     !dtrace_istoxic(kaddr
, size
)) { 
3226                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3227                         dtrace_copyout(kaddr
, uaddr
, size
); 
3228                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3233         case DIF_SUBR_COPYOUTSTR
: { 
3234                 uintptr_t kaddr 
= tupregs
[0].dttk_value
; 
3235                 uintptr_t uaddr 
= tupregs
[1].dttk_value
; 
3236                 uint64_t size 
= tupregs
[2].dttk_value
; 
3238                 if (!dtrace_destructive_disallow 
&& 
3239                     dtrace_priv_proc_control(state
) && 
3240                     !dtrace_istoxic(kaddr
, size
)) { 
3241                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3242                         dtrace_copyoutstr(kaddr
, uaddr
, size
); 
3243                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3248         case DIF_SUBR_COPYOUT
: { 
3249                 uintptr_t kaddr 
= tupregs
[0].dttk_value
; 
3250                 user_addr_t uaddr 
= tupregs
[1].dttk_value
; 
3251                 uint64_t size 
= tupregs
[2].dttk_value
; 
3253                 if (!dtrace_destructive_disallow 
&& 
3254                     dtrace_priv_proc_control(state
) && 
3255                     !dtrace_istoxic(kaddr
, size
)) { 
3256                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3257                         dtrace_copyout(kaddr
, uaddr
, size
); 
3258                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3263         case DIF_SUBR_COPYOUTSTR
: { 
3264                 uintptr_t kaddr 
= tupregs
[0].dttk_value
; 
3265                 user_addr_t uaddr 
= tupregs
[1].dttk_value
; 
3266                 uint64_t size 
= tupregs
[2].dttk_value
; 
3268                 if (!dtrace_destructive_disallow 
&& 
3269                     dtrace_priv_proc_control(state
) && 
3270                     !dtrace_istoxic(kaddr
, size
)) { 
3271                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3272                         dtrace_copyoutstr(kaddr
, uaddr
, size
); 
3273                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3277 #endif /* __APPLE__ */ 
3279         case DIF_SUBR_STRLEN
: 
3280                 regs
[rd
] = dtrace_strlen((char *)(uintptr_t) 
3281                     tupregs
[0].dttk_value
, 
3282                     state
->dts_options
[DTRACEOPT_STRSIZE
]); 
3285         case DIF_SUBR_STRCHR
: 
3286         case DIF_SUBR_STRRCHR
: { 
3288                  * We're going to iterate over the string looking for the 
3289                  * specified character.  We will iterate until we have reached 
3290                  * the string length or we have found the character.  If this 
3291                  * is DIF_SUBR_STRRCHR, we will look for the last occurrence 
3292                  * of the specified character instead of the first. 
3294                 uintptr_t addr 
= tupregs
[0].dttk_value
; 
3295                 uintptr_t limit 
= addr 
+ state
->dts_options
[DTRACEOPT_STRSIZE
]; 
3296                 char c
, target 
= (char)tupregs
[1].dttk_value
; 
3298                 for (regs
[rd
] = NULL
; addr 
< limit
; addr
++) { 
3299                         if ((c 
= dtrace_load8(addr
)) == target
) { 
3302                                 if (subr 
== DIF_SUBR_STRCHR
) 
3313         case DIF_SUBR_STRSTR
: 
3314         case DIF_SUBR_INDEX
: 
3315         case DIF_SUBR_RINDEX
: { 
3317                  * We're going to iterate over the string looking for the 
3318                  * specified string.  We will iterate until we have reached 
3319                  * the string length or we have found the string.  (Yes, this 
3320                  * is done in the most naive way possible -- but considering 
3321                  * that the string we're searching for is likely to be 
3322                  * relatively short, the complexity of Rabin-Karp or similar 
3323                  * hardly seems merited.) 
3325                 char *addr 
= (char *)(uintptr_t)tupregs
[0].dttk_value
; 
3326                 char *substr 
= (char *)(uintptr_t)tupregs
[1].dttk_value
; 
3327                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
3328                 size_t len 
= dtrace_strlen(addr
, size
); 
3329                 size_t sublen 
= dtrace_strlen(substr
, size
); 
3330                 char *limit 
= addr 
+ len
, *orig 
= addr
; 
3331                 int notfound 
= subr 
== DIF_SUBR_STRSTR 
? 0 : -1; 
3334                 regs
[rd
] = notfound
; 
3337                  * strstr() and index()/rindex() have similar semantics if 
3338                  * both strings are the empty string: strstr() returns a 
3339                  * pointer to the (empty) string, and index() and rindex() 
3340                  * both return index 0 (regardless of any position argument). 
3342                 if (sublen 
== 0 && len 
== 0) { 
3343                         if (subr 
== DIF_SUBR_STRSTR
) 
3344                                 regs
[rd
] = (uintptr_t)addr
; 
3350                 if (subr 
!= DIF_SUBR_STRSTR
) { 
3351                         if (subr 
== DIF_SUBR_RINDEX
) { 
3358                          * Both index() and rindex() take an optional position 
3359                          * argument that denotes the starting position. 
3362                                 int64_t pos 
= (int64_t)tupregs
[2].dttk_value
; 
3365                                  * If the position argument to index() is 
3366                                  * negative, Perl implicitly clamps it at 
3367                                  * zero.  This semantic is a little surprising 
3368                                  * given the special meaning of negative 
3369                                  * positions to similar Perl functions like 
3370                                  * substr(), but it appears to reflect a 
3371                                  * notion that index() can start from a 
3372                                  * negative index and increment its way up to 
3373                                  * the string.  Given this notion, Perl's 
3374                                  * rindex() is at least self-consistent in 
3375                                  * that it implicitly clamps positions greater 
3376                                  * than the string length to be the string 
3377                                  * length.  Where Perl completely loses 
3378                                  * coherence, however, is when the specified 
3379                                  * substring is the empty string ("").  In 
3380                                  * this case, even if the position is 
3381                                  * negative, rindex() returns 0 -- and even if 
3382                                  * the position is greater than the length, 
3383                                  * index() returns the string length.  These 
3384                                  * semantics violate the notion that index() 
3385                                  * should never return a value less than the 
3386                                  * specified position and that rindex() should 
3387                                  * never return a value greater than the 
3388                                  * specified position.  (One assumes that 
3389                                  * these semantics are artifacts of Perl's 
3390                                  * implementation and not the results of 
3391                                  * deliberate design -- it beggars belief that 
3392                                  * even Larry Wall could desire such oddness.) 
3393                                  * While in the abstract one would wish for 
3394                                  * consistent position semantics across 
3395                                  * substr(), index() and rindex() -- or at the 
3396                                  * very least self-consistent position 
3397                                  * semantics for index() and rindex() -- we 
3398                                  * instead opt to keep with the extant Perl 
3399                                  * semantics, in all their broken glory.  (Do 
3400                                  * we have more desire to maintain Perl's 
3401                                  * semantics than Perl does?  Probably.) 
3403                                 if (subr 
== DIF_SUBR_RINDEX
) { 
3427                 for (regs
[rd
] = notfound
; addr 
!= limit
; addr 
+= inc
) { 
3428                         if (dtrace_strncmp(addr
, substr
, sublen
) == 0) { 
3429                                 if (subr 
!= DIF_SUBR_STRSTR
) { 
3431                                          * As D index() and rindex() are 
3432                                          * modeled on Perl (and not on awk), 
3433                                          * we return a zero-based (and not a 
3434                                          * one-based) index.  (For you Perl 
3435                                          * weenies: no, we're not going to add 
3436                                          * $[ -- and shouldn't you be at a con 
3439                                         regs
[rd
] = (uintptr_t)(addr 
- orig
); 
3443                                 ASSERT(subr 
== DIF_SUBR_STRSTR
); 
3444                                 regs
[rd
] = (uintptr_t)addr
; 
3452         case DIF_SUBR_STRTOK
: { 
3453                 uintptr_t addr 
= tupregs
[0].dttk_value
; 
3454                 uintptr_t tokaddr 
= tupregs
[1].dttk_value
; 
3455                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
3456                 uintptr_t limit
, toklimit 
= tokaddr 
+ size
; 
3457                 uint8_t c
, tokmap
[32];   /* 256 / 8 */ 
3458                 char *dest 
= (char *)mstate
->dtms_scratch_ptr
; 
3461                 if (mstate
->dtms_scratch_ptr 
+ size 
> 
3462                     mstate
->dtms_scratch_base 
+ mstate
->dtms_scratch_size
) { 
3463                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3470                          * If the address specified is NULL, we use our saved 
3471                          * strtok pointer from the mstate.  Note that this 
3472                          * means that the saved strtok pointer is _only_ 
3473                          * valid within multiple enablings of the same probe -- 
3474                          * it behaves like an implicit clause-local variable. 
3476                         addr 
= mstate
->dtms_strtok
; 
3480                  * First, zero the token map, and then process the token 
3481                  * string -- setting a bit in the map for every character 
3482                  * found in the token string. 
3484                 for (i 
= 0; i 
< sizeof (tokmap
); i
++) 
3487                 for (; tokaddr 
< toklimit
; tokaddr
++) { 
3488                         if ((c 
= dtrace_load8(tokaddr
)) == '\0') 
3491                         ASSERT((c 
>> 3) < sizeof (tokmap
)); 
3492                         tokmap
[c 
>> 3] |= (1 << (c 
& 0x7)); 
3495                 for (limit 
= addr 
+ size
; addr 
< limit
; addr
++) { 
3497                          * We're looking for a character that is _not_ contained 
3498                          * in the token string. 
3500                         if ((c 
= dtrace_load8(addr
)) == '\0') 
3503                         if (!(tokmap
[c 
>> 3] & (1 << (c 
& 0x7)))) 
3509                          * We reached the end of the string without finding 
3510                          * any character that was not in the token string. 
3511                          * We return NULL in this case, and we set the saved 
3512                          * address to NULL as well. 
3515                         mstate
->dtms_strtok 
= NULL
; 
3520                  * From here on, we're copying into the destination string. 
3522                 for (i 
= 0; addr 
< limit 
&& i 
< size 
- 1; addr
++) { 
3523                         if ((c 
= dtrace_load8(addr
)) == '\0') 
3526                         if (tokmap
[c 
>> 3] & (1 << (c 
& 0x7))) 
3535                 regs
[rd
] = (uintptr_t)dest
; 
3536                 mstate
->dtms_scratch_ptr 
+= size
; 
3537                 mstate
->dtms_strtok 
= addr
; 
3541         case DIF_SUBR_SUBSTR
: { 
3542                 uintptr_t s 
= tupregs
[0].dttk_value
; 
3543                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
3544                 char *d 
= (char *)mstate
->dtms_scratch_ptr
; 
3545                 int64_t index 
= (int64_t)tupregs
[1].dttk_value
; 
3546                 int64_t remaining 
= (int64_t)tupregs
[2].dttk_value
; 
3547                 size_t len 
= dtrace_strlen((char *)s
, size
); 
3551                         remaining 
= (int64_t)size
; 
3553                 if (mstate
->dtms_scratch_ptr 
+ size 
> 
3554                     mstate
->dtms_scratch_base 
+ mstate
->dtms_scratch_size
) { 
3555                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3563                         if (index 
< 0 && index 
+ remaining 
> 0) { 
3569                 if (index 
>= len 
|| index 
< 0) 
3572                 for (d
[0] = '\0'; remaining 
> 0; remaining
--) { 
3573                         if ((d
[i
++] = dtrace_load8(s
++ + index
)) == '\0') 
3582                 mstate
->dtms_scratch_ptr 
+= size
; 
3583                 regs
[rd
] = (uintptr_t)d
; 
3587 #if !defined(__APPLE__) 
3588         case DIF_SUBR_GETMAJOR
: 
3590                 regs
[rd
] = (tupregs
[0].dttk_value 
>> NBITSMINOR64
) & MAXMAJ64
; 
3592                 regs
[rd
] = (tupregs
[0].dttk_value 
>> NBITSMINOR
) & MAXMAJ
; 
3596 #else  /* __APPLE__ */ 
3597         case DIF_SUBR_GETMAJOR
: 
3598                 regs
[rd
] = (uintptr_t)major( (dev_t
)tupregs
[0].dttk_value 
); 
3600 #endif /* __APPLE__ */ 
3602 #if !defined(__APPLE__) 
3603         case DIF_SUBR_GETMINOR
: 
3605                 regs
[rd
] = tupregs
[0].dttk_value 
& MAXMIN64
; 
3607                 regs
[rd
] = tupregs
[0].dttk_value 
& MAXMIN
; 
3611 #else  /* __APPLE__ */ 
3612         case DIF_SUBR_GETMINOR
: 
3613                 regs
[rd
] = (uintptr_t)minor( (dev_t
)tupregs
[0].dttk_value 
); 
3615 #endif /* __APPLE__ */ 
3617 #if !defined(__APPLE__) 
3618         case DIF_SUBR_DDI_PATHNAME
: { 
3620                  * This one is a galactic mess.  We are going to roughly 
3621                  * emulate ddi_pathname(), but it's made more complicated 
3622                  * by the fact that we (a) want to include the minor name and 
3623                  * (b) must proceed iteratively instead of recursively. 
3625                 uintptr_t dest 
= mstate
->dtms_scratch_ptr
; 
3626                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
3627                 char *start 
= (char *)dest
, *end 
= start 
+ size 
- 1; 
3628                 uintptr_t daddr 
= tupregs
[0].dttk_value
; 
3629                 int64_t minor 
= (int64_t)tupregs
[1].dttk_value
; 
3631                 int i
, len
, depth 
= 0; 
3633                 if (size 
== 0 || mstate
->dtms_scratch_ptr 
+ size 
> 
3634                     mstate
->dtms_scratch_base 
+ mstate
->dtms_scratch_size
) { 
3635                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3643                  * We want to have a name for the minor.  In order to do this, 
3644                  * we need to walk the minor list from the devinfo.  We want 
3645                  * to be sure that we don't infinitely walk a circular list, 
3646                  * so we check for circularity by sending a scout pointer 
3647                  * ahead two elements for every element that we iterate over; 
3648                  * if the list is circular, these will ultimately point to the 
3649                  * same element.  You may recognize this little trick as the 
3650                  * answer to a stupid interview question -- one that always 
3651                  * seems to be asked by those who had to have it laboriously 
3652                  * explained to them, and who can't even concisely describe 
3653                  * the conditions under which one would be forced to resort to 
3654                  * this technique.  Needless to say, those conditions are 
3655                  * found here -- and probably only here.  Is this is the only 
3656                  * use of this infamous trick in shipping, production code? 
3657                  * If it isn't, it probably should be... 
3660                         uintptr_t maddr 
= dtrace_loadptr(daddr 
+ 
3661                             offsetof(struct dev_info
, devi_minor
)); 
3663                         uintptr_t next 
= offsetof(struct ddi_minor_data
, next
); 
3664                         uintptr_t name 
= offsetof(struct ddi_minor_data
, 
3665                             d_minor
) + offsetof(struct ddi_minor
, name
); 
3666                         uintptr_t dev 
= offsetof(struct ddi_minor_data
, 
3667                             d_minor
) + offsetof(struct ddi_minor
, dev
); 
3671                                 scout 
= dtrace_loadptr(maddr 
+ next
); 
3673                         while (maddr 
!= NULL 
&& !(*flags 
& CPU_DTRACE_FAULT
)) { 
3676                                 m 
= dtrace_load64(maddr 
+ dev
) & MAXMIN64
; 
3678                                 m 
= dtrace_load32(maddr 
+ dev
) & MAXMIN
; 
3681                                         maddr 
= dtrace_loadptr(maddr 
+ next
); 
3686                                         scout 
= dtrace_loadptr(scout 
+ next
); 
3691                                         scout 
= dtrace_loadptr(scout 
+ next
); 
3696                                         if (scout 
== maddr
) { 
3697                                                 *flags 
|= CPU_DTRACE_ILLOP
; 
3705                                  * We have the minor data.  Now we need to 
3706                                  * copy the minor's name into the end of the 
3709                                 s 
= (char *)dtrace_loadptr(maddr 
+ name
); 
3710                                 len 
= dtrace_strlen(s
, size
); 
3712                                 if (*flags 
& CPU_DTRACE_FAULT
) 
3716                                         if ((end 
-= (len 
+ 1)) < start
) 
3722                                 for (i 
= 1; i 
<= len
; i
++) 
3723                                         end
[i
] = dtrace_load8((uintptr_t)s
++); 
3728                 while (daddr 
!= NULL 
&& !(*flags 
& CPU_DTRACE_FAULT
)) { 
3729                         ddi_node_state_t devi_state
; 
3731                         devi_state 
= dtrace_load32(daddr 
+ 
3732                             offsetof(struct dev_info
, devi_node_state
)); 
3734                         if (*flags 
& CPU_DTRACE_FAULT
) 
3737                         if (devi_state 
>= DS_INITIALIZED
) { 
3738                                 s 
= (char *)dtrace_loadptr(daddr 
+ 
3739                                     offsetof(struct dev_info
, devi_addr
)); 
3740                                 len 
= dtrace_strlen(s
, size
); 
3742                                 if (*flags 
& CPU_DTRACE_FAULT
) 
3746                                         if ((end 
-= (len 
+ 1)) < start
) 
3752                                 for (i 
= 1; i 
<= len
; i
++) 
3753                                         end
[i
] = dtrace_load8((uintptr_t)s
++); 
3757                          * Now for the node name... 
3759                         s 
= (char *)dtrace_loadptr(daddr 
+ 
3760                             offsetof(struct dev_info
, devi_node_name
)); 
3762                         daddr 
= dtrace_loadptr(daddr 
+ 
3763                             offsetof(struct dev_info
, devi_parent
)); 
3766                          * If our parent is NULL (that is, if we're the root 
3767                          * node), we're going to use the special path 
3773                         len 
= dtrace_strlen(s
, size
); 
3774                         if (*flags 
& CPU_DTRACE_FAULT
) 
3777                         if ((end 
-= (len 
+ 1)) < start
) 
3780                         for (i 
= 1; i 
<= len
; i
++) 
3781                                 end
[i
] = dtrace_load8((uintptr_t)s
++); 
3784                         if (depth
++ > dtrace_devdepth_max
) { 
3785                                 *flags 
|= CPU_DTRACE_ILLOP
; 
3791                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3793                 if (daddr 
== NULL
) { 
3794                         regs
[rd
] = (uintptr_t)end
; 
3795                         mstate
->dtms_scratch_ptr 
+= size
; 
3801         case DIF_SUBR_DDI_PATHNAME
: { 
3802                 /* XXX awaits galactic disentanglement ;-} */ 
3806 #endif /* __APPLE__ */ 
3808         case DIF_SUBR_STRJOIN
: { 
3809                 char *d 
= (char *)mstate
->dtms_scratch_ptr
; 
3810                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
3811                 uintptr_t s1 
= tupregs
[0].dttk_value
; 
3812                 uintptr_t s2 
= tupregs
[1].dttk_value
; 
3815                 if (mstate
->dtms_scratch_ptr 
+ size 
> 
3816                     mstate
->dtms_scratch_base 
+ mstate
->dtms_scratch_size
) { 
3817                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3824                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3829                         if ((d
[i
++] = dtrace_load8(s1
++)) == '\0') { 
3837                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3842                         if ((d
[i
++] = dtrace_load8(s2
++)) == '\0') 
3847                         mstate
->dtms_scratch_ptr 
+= i
; 
3848                         regs
[rd
] = (uintptr_t)d
; 
3854         case DIF_SUBR_LLTOSTR
: { 
3855                 int64_t i 
= (int64_t)tupregs
[0].dttk_value
; 
3856                 int64_t val 
= i 
< 0 ? i 
* -1 : i
; 
3857                 uint64_t size 
= 22;     /* enough room for 2^64 in decimal */ 
3858                 char *end 
= (char *)mstate
->dtms_scratch_ptr 
+ size 
- 1; 
3860                 if (mstate
->dtms_scratch_ptr 
+ size 
> 
3861                     mstate
->dtms_scratch_base 
+ mstate
->dtms_scratch_size
) { 
3862                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3867                 for (*end
-- = '\0'; val
; val 
/= 10) 
3868                         *end
-- = '0' + (val 
% 10); 
3876                 regs
[rd
] = (uintptr_t)end 
+ 1; 
3877                 mstate
->dtms_scratch_ptr 
+= size
; 
3881         case DIF_SUBR_DIRNAME
: 
3882         case DIF_SUBR_BASENAME
: { 
3883                 char *dest 
= (char *)mstate
->dtms_scratch_ptr
; 
3884                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
3885                 uintptr_t src 
= tupregs
[0].dttk_value
; 
3886                 int i
, j
, len 
= dtrace_strlen((char *)src
, size
); 
3887                 int lastbase 
= -1, firstbase 
= -1, lastdir 
= -1; 
3890                 if (mstate
->dtms_scratch_ptr 
+ size 
> 
3891                     mstate
->dtms_scratch_base 
+ mstate
->dtms_scratch_size
) { 
3892                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3898                  * The basename and dirname for a zero-length string is 
3903                         src 
= (uintptr_t)"."; 
3907                  * Start from the back of the string, moving back toward the 
3908                  * front until we see a character that isn't a slash.  That 
3909                  * character is the last character in the basename. 
3911                 for (i 
= len 
- 1; i 
>= 0; i
--) { 
3912                         if (dtrace_load8(src 
+ i
) != '/') 
3920                  * Starting from the last character in the basename, move 
3921                  * towards the front until we find a slash.  The character 
3922                  * that we processed immediately before that is the first 
3923                  * character in the basename. 
3925                 for (; i 
>= 0; i
--) { 
3926                         if (dtrace_load8(src 
+ i
) == '/') 
3934                  * Now keep going until we find a non-slash character.  That 
3935                  * character is the last character in the dirname. 
3937                 for (; i 
>= 0; i
--) { 
3938                         if (dtrace_load8(src 
+ i
) != '/') 
3945                 ASSERT(!(lastbase 
== -1 && firstbase 
!= -1)); 
3946                 ASSERT(!(firstbase 
== -1 && lastdir 
!= -1)); 
3948                 if (lastbase 
== -1) { 
3950                          * We didn't find a non-slash character.  We know that 
3951                          * the length is non-zero, so the whole string must be 
3952                          * slashes.  In either the dirname or the basename 
3953                          * case, we return '/'. 
3955                         ASSERT(firstbase 
== -1); 
3956                         firstbase 
= lastbase 
= lastdir 
= 0; 
3959                 if (firstbase 
== -1) { 
3961                          * The entire string consists only of a basename 
3962                          * component.  If we're looking for dirname, we need 
3963                          * to change our string to be just "."; if we're 
3964                          * looking for a basename, we'll just set the first 
3965                          * character of the basename to be 0. 
3967                         if (subr 
== DIF_SUBR_DIRNAME
) { 
3968                                 ASSERT(lastdir 
== -1); 
3969                                 src 
= (uintptr_t)"."; 
3976                 if (subr 
== DIF_SUBR_DIRNAME
) { 
3977                         if (lastdir 
== -1) { 
3979                                  * We know that we have a slash in the name -- 
3980                                  * or lastdir would be set to 0, above.  And 
3981                                  * because lastdir is -1, we know that this 
3982                                  * slash must be the first character.  (That 
3983                                  * is, the full string must be of the form 
3984                                  * "/basename".)  In this case, the last 
3985                                  * character of the directory name is 0. 
3993                         ASSERT(subr 
== DIF_SUBR_BASENAME
); 
3994                         ASSERT(firstbase 
!= -1 && lastbase 
!= -1); 
3999                 for (i 
= start
, j 
= 0; i 
<= end 
&& j 
< size 
- 1; i
++, j
++) 
4000                         dest
[j
] = dtrace_load8(src 
+ i
); 
4003                 regs
[rd
] = (uintptr_t)dest
; 
4004                 mstate
->dtms_scratch_ptr 
+= size
; 
4008         case DIF_SUBR_CLEANPATH
: { 
4009                 char *dest 
= (char *)mstate
->dtms_scratch_ptr
, c
; 
4010                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
4011                 uintptr_t src 
= tupregs
[0].dttk_value
; 
4014                 if (mstate
->dtms_scratch_ptr 
+ size 
> 
4015                     mstate
->dtms_scratch_base 
+ mstate
->dtms_scratch_size
) { 
4016                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
4022                  * Move forward, loading each character. 
4025                         c 
= dtrace_load8(src 
+ i
++); 
4027                         if (j 
+ 5 >= size
)      /* 5 = strlen("/..c\0") */ 
4035                         c 
= dtrace_load8(src 
+ i
++); 
4039                                  * We have two slashes -- we can just advance 
4040                                  * to the next character. 
4047                                  * This is not "." and it's not ".." -- we can 
4048                                  * just store the "/" and this character and 
4056                         c 
= dtrace_load8(src 
+ i
++); 
4060                                  * This is a "/./" component.  We're not going 
4061                                  * to store anything in the destination buffer; 
4062                                  * we're just going to go to the next component. 
4069                                  * This is not ".." -- we can just store the 
4070                                  * "/." and this character and continue 
4079                         c 
= dtrace_load8(src 
+ i
++); 
4081                         if (c 
!= '/' && c 
!= '\0') { 
4083                                  * This is not ".." -- it's "..[mumble]". 
4084                                  * We'll store the "/.." and this character 
4085                                  * and continue processing. 
4095                          * This is "/../" or "/..\0".  We need to back up 
4096                          * our destination pointer until we find a "/". 
4099                         while (j 
!= 0 && dest
[--j
] != '/') 
4104                 } while (c 
!= '\0'); 
4107                 regs
[rd
] = (uintptr_t)dest
; 
4108                 mstate
->dtms_scratch_ptr 
+= size
; 
4113         /* CHUD callback ('chud(uint64_t, [uint64_t], [uint64_t] ...)') */ 
4114         case DIF_SUBR_CHUD
: { 
4115                 uint64_t selector 
= tupregs
[0].dttk_value
; 
4116                 uint64_t args
[DIF_DTR_NREGS
-1] = {0ULL}; 
4119                 /* copy in any variadic argument list */ 
4120                 for(ii 
= 0; ii 
< DIF_DTR_NREGS
-1; ii
++) { 
4121                         args
[ii
] = tupregs
[ii
+1].dttk_value
; 
4125                         chudxnu_dtrace_callback(selector
, args
, DIF_DTR_NREGS
-1); 
4126                 if(KERN_SUCCESS 
!= ret
) { 
4132 #endif /* __APPLE__ */ 
4138  * Emulate the execution of DTrace IR instructions specified by the given 
4139  * DIF object.  This function is deliberately void of assertions as all of 
4140  * the necessary checks are handled by a call to dtrace_difo_validate(). 
4143 dtrace_dif_emulate(dtrace_difo_t 
*difo
, dtrace_mstate_t 
*mstate
, 
4144     dtrace_vstate_t 
*vstate
, dtrace_state_t 
*state
) 
4146         const dif_instr_t 
*text 
= difo
->dtdo_buf
; 
4147         const uint_t textlen 
= difo
->dtdo_len
; 
4148         const char *strtab 
= difo
->dtdo_strtab
; 
4149         const uint64_t *inttab 
= difo
->dtdo_inttab
; 
4152         dtrace_statvar_t 
*svar
; 
4153         dtrace_dstate_t 
*dstate 
= &vstate
->dtvs_dynvars
; 
4155         volatile uint16_t *flags 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
4156 #if !defined(__APPLE__) 
4157         volatile uintptr_t *illval 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
; 
4159         volatile uint64_t *illval 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
; 
4160 #endif /* __APPLE__ */ 
4162         dtrace_key_t tupregs
[DIF_DTR_NREGS 
+ 2]; /* +2 for thread and id */ 
4163         uint64_t regs
[DIF_DIR_NREGS
]; 
4166         uint8_t cc_n 
= 0, cc_z 
= 0, cc_v 
= 0, cc_c 
= 0; 
4168         uint_t pc 
= 0, id
, opc
; 
4173         regs
[DIF_REG_R0
] = 0;           /* %r0 is fixed at zero */ 
4175         while (pc 
< textlen 
&& !(*flags 
& CPU_DTRACE_FAULT
)) { 
4179                 r1 
= DIF_INSTR_R1(instr
); 
4180                 r2 
= DIF_INSTR_R2(instr
); 
4181                 rd 
= DIF_INSTR_RD(instr
); 
4183                 switch (DIF_INSTR_OP(instr
)) { 
4185                         regs
[rd
] = regs
[r1
] | regs
[r2
]; 
4188                         regs
[rd
] = regs
[r1
] ^ regs
[r2
]; 
4191                         regs
[rd
] = regs
[r1
] & regs
[r2
]; 
4194                         regs
[rd
] = regs
[r1
] << regs
[r2
]; 
4197                         regs
[rd
] = regs
[r1
] >> regs
[r2
]; 
4200                         regs
[rd
] = regs
[r1
] - regs
[r2
]; 
4203                         regs
[rd
] = regs
[r1
] + regs
[r2
]; 
4206                         regs
[rd
] = regs
[r1
] * regs
[r2
]; 
4209                         if (regs
[r2
] == 0) { 
4211                                 *flags 
|= CPU_DTRACE_DIVZERO
; 
4213                                 regs
[rd
] = (int64_t)regs
[r1
] / 
4219                         if (regs
[r2
] == 0) { 
4221                                 *flags 
|= CPU_DTRACE_DIVZERO
; 
4223                                 regs
[rd
] = regs
[r1
] / regs
[r2
]; 
4228                         if (regs
[r2
] == 0) { 
4230                                 *flags 
|= CPU_DTRACE_DIVZERO
; 
4232                                 regs
[rd
] = (int64_t)regs
[r1
] % 
4238                         if (regs
[r2
] == 0) { 
4240                                 *flags 
|= CPU_DTRACE_DIVZERO
; 
4242                                 regs
[rd
] = regs
[r1
] % regs
[r2
]; 
4247                         regs
[rd
] = ~regs
[r1
]; 
4250                         regs
[rd
] = regs
[r1
]; 
4253                         cc_r 
= regs
[r1
] - regs
[r2
]; 
4257                         cc_c 
= regs
[r1
] < regs
[r2
]; 
4260                         cc_n 
= cc_v 
= cc_c 
= 0; 
4261                         cc_z 
= regs
[r1
] == 0; 
4264                         pc 
= DIF_INSTR_LABEL(instr
); 
4268                                 pc 
= DIF_INSTR_LABEL(instr
); 
4272                                 pc 
= DIF_INSTR_LABEL(instr
); 
4275                         if ((cc_z 
| (cc_n 
^ cc_v
)) == 0) 
4276                                 pc 
= DIF_INSTR_LABEL(instr
); 
4279                         if ((cc_c 
| cc_z
) == 0) 
4280                                 pc 
= DIF_INSTR_LABEL(instr
); 
4283                         if ((cc_n 
^ cc_v
) == 0) 
4284                                 pc 
= DIF_INSTR_LABEL(instr
); 
4288                                 pc 
= DIF_INSTR_LABEL(instr
); 
4292                                 pc 
= DIF_INSTR_LABEL(instr
); 
4296                                 pc 
= DIF_INSTR_LABEL(instr
); 
4299                         if (cc_z 
| (cc_n 
^ cc_v
)) 
4300                                 pc 
= DIF_INSTR_LABEL(instr
); 
4304                                 pc 
= DIF_INSTR_LABEL(instr
); 
4307                         if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) { 
4308                                 *flags 
|= CPU_DTRACE_KPRIV
; 
4314                         regs
[rd
] = (int8_t)dtrace_load8(regs
[r1
]); 
4317                         if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) { 
4318                                 *flags 
|= CPU_DTRACE_KPRIV
; 
4324                         regs
[rd
] = (int16_t)dtrace_load16(regs
[r1
]); 
4327                         if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) { 
4328                                 *flags 
|= CPU_DTRACE_KPRIV
; 
4334                         regs
[rd
] = (int32_t)dtrace_load32(regs
[r1
]); 
4337                         if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) { 
4338                                 *flags 
|= CPU_DTRACE_KPRIV
; 
4344                         regs
[rd
] = dtrace_load8(regs
[r1
]); 
4347                         if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) { 
4348                                 *flags 
|= CPU_DTRACE_KPRIV
; 
4354                         regs
[rd
] = dtrace_load16(regs
[r1
]); 
4357                         if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) { 
4358                                 *flags 
|= CPU_DTRACE_KPRIV
; 
4364                         regs
[rd
] = dtrace_load32(regs
[r1
]); 
4367                         if (!dtrace_canstore(regs
[r1
], 8, mstate
, vstate
)) { 
4368                                 *flags 
|= CPU_DTRACE_KPRIV
; 
4374                         regs
[rd
] = dtrace_load64(regs
[r1
]); 
4378                             dtrace_fuword8(regs
[r1
]); 
4381                         regs
[rd
] = (int16_t) 
4382                             dtrace_fuword16(regs
[r1
]); 
4385                         regs
[rd
] = (int32_t) 
4386                             dtrace_fuword32(regs
[r1
]); 
4390                             dtrace_fuword8(regs
[r1
]); 
4394                             dtrace_fuword16(regs
[r1
]); 
4398                             dtrace_fuword32(regs
[r1
]); 
4402                             dtrace_fuword64(regs
[r1
]); 
4410                         regs
[rd
] = inttab
[DIF_INSTR_INTEGER(instr
)]; 
4413                         regs
[rd
] = (uint64_t)(uintptr_t) 
4414                             (strtab 
+ DIF_INSTR_STRING(instr
)); 
4417                         cc_r 
= dtrace_strncmp((char *)(uintptr_t)regs
[r1
], 
4418                             (char *)(uintptr_t)regs
[r2
], 
4419                             state
->dts_options
[DTRACEOPT_STRSIZE
]); 
4426                         regs
[rd
] = dtrace_dif_variable(mstate
, state
, 
4430                         id 
= DIF_INSTR_VAR(instr
); 
4432                         if (id 
>= DIF_VAR_OTHER_UBASE
) { 
4435                                 id 
-= DIF_VAR_OTHER_UBASE
; 
4436                                 svar 
= vstate
->dtvs_globals
[id
]; 
4437                                 ASSERT(svar 
!= NULL
); 
4438                                 v 
= &svar
->dtsv_var
; 
4440                                 if (!(v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
)) { 
4441                                         regs
[rd
] = svar
->dtsv_data
; 
4445                                 a 
= (uintptr_t)svar
->dtsv_data
; 
4447                                 if (*(uint8_t *)a 
== UINT8_MAX
) { 
4449                                          * If the 0th byte is set to UINT8_MAX 
4450                                          * then this is to be treated as a 
4451                                          * reference to a NULL variable. 
4455                                         regs
[rd
] = a 
+ sizeof (uint64_t); 
4461                         regs
[rd
] = dtrace_dif_variable(mstate
, state
, id
, 0); 
4465                         id 
= DIF_INSTR_VAR(instr
); 
4467                         ASSERT(id 
>= DIF_VAR_OTHER_UBASE
); 
4468                         id 
-= DIF_VAR_OTHER_UBASE
; 
4470                         svar 
= vstate
->dtvs_globals
[id
]; 
4471                         ASSERT(svar 
!= NULL
); 
4472                         v 
= &svar
->dtsv_var
; 
4474                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) { 
4475                                 uintptr_t a 
= (uintptr_t)svar
->dtsv_data
; 
4478                                 ASSERT(svar
->dtsv_size 
!= 0); 
4480                                 if (regs
[rd
] == NULL
) { 
4481                                         *(uint8_t *)a 
= UINT8_MAX
; 
4485                                         a 
+= sizeof (uint64_t); 
4488                                 dtrace_vcopy((void *)(uintptr_t)regs
[rd
], 
4489                                     (void *)a
, &v
->dtdv_type
); 
4493                         svar
->dtsv_data 
= regs
[rd
]; 
4498                          * There are no DTrace built-in thread-local arrays at 
4499                          * present.  This opcode is saved for future work. 
4501                         *flags 
|= CPU_DTRACE_ILLOP
; 
4506                         id 
= DIF_INSTR_VAR(instr
); 
4508                         if (id 
< DIF_VAR_OTHER_UBASE
) { 
4510                                  * For now, this has no meaning. 
4516                         id 
-= DIF_VAR_OTHER_UBASE
; 
4518                         ASSERT(id 
< vstate
->dtvs_nlocals
); 
4519                         ASSERT(vstate
->dtvs_locals 
!= NULL
); 
4521                         svar 
= vstate
->dtvs_locals
[id
]; 
4522                         ASSERT(svar 
!= NULL
); 
4523                         v 
= &svar
->dtsv_var
; 
4525                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) { 
4526                                 uintptr_t a 
= (uintptr_t)svar
->dtsv_data
; 
4527                                 size_t sz 
= v
->dtdv_type
.dtdt_size
; 
4529                                 sz 
+= sizeof (uint64_t); 
4530                                 ASSERT(svar
->dtsv_size 
== NCPU 
* sz
); 
4531                                 a 
+= CPU
->cpu_id 
* sz
; 
4533                                 if (*(uint8_t *)a 
== UINT8_MAX
) { 
4535                                          * If the 0th byte is set to UINT8_MAX 
4536                                          * then this is to be treated as a 
4537                                          * reference to a NULL variable. 
4541                                         regs
[rd
] = a 
+ sizeof (uint64_t); 
4547                         ASSERT(svar
->dtsv_size 
== NCPU 
* sizeof (uint64_t)); 
4548                         tmp 
= (uint64_t *)(uintptr_t)svar
->dtsv_data
; 
4549                         regs
[rd
] = tmp
[CPU
->cpu_id
]; 
4553                         id 
= DIF_INSTR_VAR(instr
); 
4555                         ASSERT(id 
>= DIF_VAR_OTHER_UBASE
); 
4556                         id 
-= DIF_VAR_OTHER_UBASE
; 
4557                         ASSERT(id 
< vstate
->dtvs_nlocals
); 
4559                         ASSERT(vstate
->dtvs_locals 
!= NULL
); 
4560                         svar 
= vstate
->dtvs_locals
[id
]; 
4561                         ASSERT(svar 
!= NULL
); 
4562                         v 
= &svar
->dtsv_var
; 
4564                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) { 
4565                                 uintptr_t a 
= (uintptr_t)svar
->dtsv_data
; 
4566                                 size_t sz 
= v
->dtdv_type
.dtdt_size
; 
4568                                 sz 
+= sizeof (uint64_t); 
4569                                 ASSERT(svar
->dtsv_size 
== NCPU 
* sz
); 
4570                                 a 
+= CPU
->cpu_id 
* sz
; 
4572                                 if (regs
[rd
] == NULL
) { 
4573                                         *(uint8_t *)a 
= UINT8_MAX
; 
4577                                         a 
+= sizeof (uint64_t); 
4580                                 dtrace_vcopy((void *)(uintptr_t)regs
[rd
], 
4581                                     (void *)a
, &v
->dtdv_type
); 
4585                         ASSERT(svar
->dtsv_size 
== NCPU 
* sizeof (uint64_t)); 
4586                         tmp 
= (uint64_t *)(uintptr_t)svar
->dtsv_data
; 
4587                         tmp
[CPU
->cpu_id
] = regs
[rd
]; 
4591                         dtrace_dynvar_t 
*dvar
; 
4594                         id 
= DIF_INSTR_VAR(instr
); 
4595                         ASSERT(id 
>= DIF_VAR_OTHER_UBASE
); 
4596                         id 
-= DIF_VAR_OTHER_UBASE
; 
4597                         v 
= &vstate
->dtvs_tlocals
[id
]; 
4599                         key 
= &tupregs
[DIF_DTR_NREGS
]; 
4600                         key
[0].dttk_value 
= (uint64_t)id
; 
4601                         key
[0].dttk_size 
= 0; 
4602                         DTRACE_TLS_THRKEY(key
[1].dttk_value
); 
4603                         key
[1].dttk_size 
= 0; 
4605                         dvar 
= dtrace_dynvar(dstate
, 2, key
, 
4606                             sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC
); 
4613                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) { 
4614                                 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
; 
4616                                 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
); 
4623                         dtrace_dynvar_t 
*dvar
; 
4626                         id 
= DIF_INSTR_VAR(instr
); 
4627                         ASSERT(id 
>= DIF_VAR_OTHER_UBASE
); 
4628                         id 
-= DIF_VAR_OTHER_UBASE
; 
4630                         key 
= &tupregs
[DIF_DTR_NREGS
]; 
4631                         key
[0].dttk_value 
= (uint64_t)id
; 
4632                         key
[0].dttk_size 
= 0; 
4633                         DTRACE_TLS_THRKEY(key
[1].dttk_value
); 
4634                         key
[1].dttk_size 
= 0; 
4635                         v 
= &vstate
->dtvs_tlocals
[id
]; 
4637                         dvar 
= dtrace_dynvar(dstate
, 2, key
, 
4638                             v
->dtdv_type
.dtdt_size 
> sizeof (uint64_t) ? 
4639                             v
->dtdv_type
.dtdt_size 
: sizeof (uint64_t), 
4640                             regs
[rd
] ? DTRACE_DYNVAR_ALLOC 
: 
4641                             DTRACE_DYNVAR_DEALLOC
); 
4644                          * Given that we're storing to thread-local data, 
4645                          * we need to flush our predicate cache. 
4647 #if !defined(__APPLE__) 
4648                         curthread
->t_predcache 
= NULL
; 
4650                         dtrace_set_thread_predcache(current_thread(), 0); 
4651 #endif /* __APPLE__ */ 
4657                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) { 
4658                                 dtrace_vcopy((void *)(uintptr_t)regs
[rd
], 
4659                                     dvar
->dtdv_data
, &v
->dtdv_type
); 
4661                                 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
]; 
4668                         regs
[rd
] = (int64_t)regs
[r1
] >> regs
[r2
]; 
4672                         dtrace_dif_subr(DIF_INSTR_SUBR(instr
), rd
, 
4673                             regs
, tupregs
, ttop
, mstate
, state
); 
4677                         if (ttop 
== DIF_DTR_NREGS
) { 
4678                                 *flags 
|= CPU_DTRACE_TUPOFLOW
; 
4682                         if (r1 
== DIF_TYPE_STRING
) { 
4684                                  * If this is a string type and the size is 0, 
4685                                  * we'll use the system-wide default string 
4686                                  * size.  Note that we are _not_ looking at 
4687                                  * the value of the DTRACEOPT_STRSIZE option; 
4688                                  * had this been set, we would expect to have 
4689                                  * a non-zero size value in the "pushtr". 
4691                                 tupregs
[ttop
].dttk_size 
= 
4692                                     dtrace_strlen((char *)(uintptr_t)regs
[rd
], 
4693                                     regs
[r2
] ? regs
[r2
] : 
4694                                     dtrace_strsize_default
) + 1; 
4696                                 tupregs
[ttop
].dttk_size 
= regs
[r2
]; 
4699                         tupregs
[ttop
++].dttk_value 
= regs
[rd
]; 
4703                         if (ttop 
== DIF_DTR_NREGS
) { 
4704                                 *flags 
|= CPU_DTRACE_TUPOFLOW
; 
4708                         tupregs
[ttop
].dttk_value 
= regs
[rd
]; 
4709                         tupregs
[ttop
++].dttk_size 
= 0; 
4717                 case DIF_OP_FLUSHTS
: 
4722                 case DIF_OP_LDTAA
: { 
4723                         dtrace_dynvar_t 
*dvar
; 
4724                         dtrace_key_t 
*key 
= tupregs
; 
4725                         uint_t nkeys 
= ttop
; 
4727                         id 
= DIF_INSTR_VAR(instr
); 
4728                         ASSERT(id 
>= DIF_VAR_OTHER_UBASE
); 
4729                         id 
-= DIF_VAR_OTHER_UBASE
; 
4731                         key
[nkeys
].dttk_value 
= (uint64_t)id
; 
4732                         key
[nkeys
++].dttk_size 
= 0; 
4734                         if (DIF_INSTR_OP(instr
) == DIF_OP_LDTAA
) { 
4735                                 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
); 
4736                                 key
[nkeys
++].dttk_size 
= 0; 
4737                                 v 
= &vstate
->dtvs_tlocals
[id
]; 
4739                                 v 
= &vstate
->dtvs_globals
[id
]->dtsv_var
; 
4742                         dvar 
= dtrace_dynvar(dstate
, nkeys
, key
, 
4743                             v
->dtdv_type
.dtdt_size 
> sizeof (uint64_t) ? 
4744                             v
->dtdv_type
.dtdt_size 
: sizeof (uint64_t), 
4745                             DTRACE_DYNVAR_NOALLOC
); 
4752                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) { 
4753                                 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
; 
4755                                 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
); 
4762                 case DIF_OP_STTAA
: { 
4763                         dtrace_dynvar_t 
*dvar
; 
4764                         dtrace_key_t 
*key 
= tupregs
; 
4765                         uint_t nkeys 
= ttop
; 
4767                         id 
= DIF_INSTR_VAR(instr
); 
4768                         ASSERT(id 
>= DIF_VAR_OTHER_UBASE
); 
4769                         id 
-= DIF_VAR_OTHER_UBASE
; 
4771                         key
[nkeys
].dttk_value 
= (uint64_t)id
; 
4772                         key
[nkeys
++].dttk_size 
= 0; 
4774                         if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
) { 
4775                                 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
); 
4776                                 key
[nkeys
++].dttk_size 
= 0; 
4777                                 v 
= &vstate
->dtvs_tlocals
[id
]; 
4779                                 v 
= &vstate
->dtvs_globals
[id
]->dtsv_var
; 
4782                         dvar 
= dtrace_dynvar(dstate
, nkeys
, key
, 
4783                             v
->dtdv_type
.dtdt_size 
> sizeof (uint64_t) ? 
4784                             v
->dtdv_type
.dtdt_size 
: sizeof (uint64_t), 
4785                             regs
[rd
] ? DTRACE_DYNVAR_ALLOC 
: 
4786                             DTRACE_DYNVAR_DEALLOC
); 
4791                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) { 
4792                                 dtrace_vcopy((void *)(uintptr_t)regs
[rd
], 
4793                                     dvar
->dtdv_data
, &v
->dtdv_type
); 
4795                                 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
]; 
4801                 case DIF_OP_ALLOCS
: { 
4802                         uintptr_t ptr 
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8); 
4803                         size_t size 
= ptr 
- mstate
->dtms_scratch_ptr 
+ regs
[r1
]; 
4805                         if (mstate
->dtms_scratch_ptr 
+ size 
> 
4806                             mstate
->dtms_scratch_base 
+ 
4807                             mstate
->dtms_scratch_size
) { 
4808                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
4811                                 dtrace_bzero((void *) 
4812                                     mstate
->dtms_scratch_ptr
, size
); 
4813                                 mstate
->dtms_scratch_ptr 
+= size
; 
4820                         if (!dtrace_canstore(regs
[rd
], regs
[r2
], 
4822                                 *flags 
|= CPU_DTRACE_BADADDR
; 
4827                         dtrace_bcopy((void *)(uintptr_t)regs
[r1
], 
4828                             (void *)(uintptr_t)regs
[rd
], (size_t)regs
[r2
]); 
4832                         if (!dtrace_canstore(regs
[rd
], 1, mstate
, vstate
)) { 
4833                                 *flags 
|= CPU_DTRACE_BADADDR
; 
4837                         *((uint8_t *)(uintptr_t)regs
[rd
]) = (uint8_t)regs
[r1
]; 
4841                         if (!dtrace_canstore(regs
[rd
], 2, mstate
, vstate
)) { 
4842                                 *flags 
|= CPU_DTRACE_BADADDR
; 
4847                                 *flags 
|= CPU_DTRACE_BADALIGN
; 
4851                         *((uint16_t *)(uintptr_t)regs
[rd
]) = (uint16_t)regs
[r1
]; 
4855                         if (!dtrace_canstore(regs
[rd
], 4, mstate
, vstate
)) { 
4856                                 *flags 
|= CPU_DTRACE_BADADDR
; 
4861                                 *flags 
|= CPU_DTRACE_BADALIGN
; 
4865                         *((uint32_t *)(uintptr_t)regs
[rd
]) = (uint32_t)regs
[r1
]; 
4869                         if (!dtrace_canstore(regs
[rd
], 8, mstate
, vstate
)) { 
4870                                 *flags 
|= CPU_DTRACE_BADADDR
; 
4874 #if !defined(__APPLE__) 
4877                         if (regs
[rd
] & 3) { /* Darwin kmem_zalloc() called from dtrace_difo_init() is 4-byte aligned. */ 
4878 #endif /* __APPLE__ */ 
4879                                 *flags 
|= CPU_DTRACE_BADALIGN
; 
4883                         *((uint64_t *)(uintptr_t)regs
[rd
]) = regs
[r1
]; 
4888         if (!(*flags 
& CPU_DTRACE_FAULT
)) 
4891         mstate
->dtms_fltoffs 
= opc 
* sizeof (dif_instr_t
); 
4892         mstate
->dtms_present 
|= DTRACE_MSTATE_FLTOFFS
; 
4898 dtrace_action_breakpoint(dtrace_ecb_t 
*ecb
) 
4900         dtrace_probe_t 
*probe 
= ecb
->dte_probe
; 
4901         dtrace_provider_t 
*prov 
= probe
->dtpr_provider
; 
4902         char c
[DTRACE_FULLNAMELEN 
+ 80], *str
; 
4903         char *msg 
= "dtrace: breakpoint action at probe "; 
4904         char *ecbmsg 
= " (ecb "; 
4905         uintptr_t mask 
= (0xf << (sizeof (uintptr_t) * NBBY 
/ 4)); 
4906         uintptr_t val 
= (uintptr_t)ecb
; 
4907         int shift 
= (sizeof (uintptr_t) * NBBY
) - 4, i 
= 0; 
4909         if (dtrace_destructive_disallow
) 
4913          * It's impossible to be taking action on the NULL probe. 
4915         ASSERT(probe 
!= NULL
); 
4918          * This is a poor man's (destitute man's?) sprintf():  we want to 
4919          * print the provider name, module name, function name and name of 
4920          * the probe, along with the hex address of the ECB with the breakpoint 
4921          * action -- all of which we must place in the character buffer by 
4924         while (*msg 
!= '\0') 
4927         for (str 
= prov
->dtpv_name
; *str 
!= '\0'; str
++) 
4931         for (str 
= probe
->dtpr_mod
; *str 
!= '\0'; str
++) 
4935         for (str 
= probe
->dtpr_func
; *str 
!= '\0'; str
++) 
4939         for (str 
= probe
->dtpr_name
; *str 
!= '\0'; str
++) 
4942         while (*ecbmsg 
!= '\0') 
4945         while (shift 
>= 0) { 
4946                 mask 
= (uintptr_t)0xf << shift
; 
4948                 if (val 
>= ((uintptr_t)1 << shift
)) 
4949                         c
[i
++] = "0123456789abcdef"[(val 
& mask
) >> shift
]; 
4960 dtrace_action_panic(dtrace_ecb_t 
*ecb
) 
4962         dtrace_probe_t 
*probe 
= ecb
->dte_probe
; 
4965          * It's impossible to be taking action on the NULL probe. 
4967         ASSERT(probe 
!= NULL
); 
4969         if (dtrace_destructive_disallow
) 
4972         if (dtrace_panicked 
!= NULL
) 
4975 #if !defined(__APPLE__) 
4976         if (dtrace_casptr(&dtrace_panicked
, NULL
, curthread
) != NULL
) 
4979         if (dtrace_casptr(&dtrace_panicked
, NULL
, current_thread()) != NULL
) 
4981 #endif /* __APPLE__ */ 
4984          * We won the right to panic.  (We want to be sure that only one 
4985          * thread calls panic() from dtrace_probe(), and that panic() is 
4986          * called exactly once.) 
4988         dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)", 
4989             probe
->dtpr_provider
->dtpv_name
, probe
->dtpr_mod
, 
4990             probe
->dtpr_func
, probe
->dtpr_name
, (void *)ecb
); 
4992 #if defined(__APPLE__) 
4993         /* Mac OS X debug feature -- can return from panic() */ 
4994         dtrace_panicked 
= NULL
; 
4995 #endif /* __APPLE__ */ 
4999 dtrace_action_raise(uint64_t sig
) 
5001         if (dtrace_destructive_disallow
) 
5005                 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
); 
5009 #if !defined(__APPLE__) 
5011          * raise() has a queue depth of 1 -- we ignore all subsequent 
5012          * invocations of the raise() action. 
5014         if (curthread
->t_dtrace_sig 
== 0) 
5015                 curthread
->t_dtrace_sig 
= (uint8_t)sig
; 
5017         curthread
->t_sig_check 
= 1; 
5020         uthread_t uthread 
= (uthread_t
)get_bsdthread_info(current_thread()); 
5022         if (uthread 
&& uthread
->t_dtrace_sig 
== 0) { 
5023                 uthread
->t_dtrace_sig 
= sig
; 
5024                 psignal(current_proc(), (int)sig
); 
5026 #endif /* __APPLE__ */ 
5030 dtrace_action_stop(void) 
5032         if (dtrace_destructive_disallow
) 
5035 #if !defined(__APPLE__) 
5036         if (!curthread
->t_dtrace_stop
) { 
5037                 curthread
->t_dtrace_stop 
= 1; 
5038                 curthread
->t_sig_check 
= 1; 
5042         psignal(current_proc(), SIGSTOP
); 
5043 #endif /* __APPLE__ */ 
5047 dtrace_action_chill(dtrace_mstate_t 
*mstate
, hrtime_t val
) 
5050         volatile uint16_t *flags
; 
5053         if (dtrace_destructive_disallow
) 
5056         flags 
= (volatile uint16_t *)&cpu_core
[cpu
->cpu_id
].cpuc_dtrace_flags
; 
5058         now 
= dtrace_gethrtime(); 
5060         if (now 
- cpu
->cpu_dtrace_chillmark 
> dtrace_chill_interval
) { 
5062                  * We need to advance the mark to the current time. 
5064                 cpu
->cpu_dtrace_chillmark 
= now
; 
5065                 cpu
->cpu_dtrace_chilled 
= 0; 
5069          * Now check to see if the requested chill time would take us over 
5070          * the maximum amount of time allowed in the chill interval.  (Or 
5071          * worse, if the calculation itself induces overflow.) 
5073         if (cpu
->cpu_dtrace_chilled 
+ val 
> dtrace_chill_max 
|| 
5074             cpu
->cpu_dtrace_chilled 
+ val 
< cpu
->cpu_dtrace_chilled
) { 
5075                 *flags 
|= CPU_DTRACE_ILLOP
; 
5079         while (dtrace_gethrtime() - now 
< val
) 
5083          * Normally, we assure that the value of the variable "timestamp" does 
5084          * not change within an ECB.  The presence of chill() represents an 
5085          * exception to this rule, however. 
5087         mstate
->dtms_present 
&= ~DTRACE_MSTATE_TIMESTAMP
; 
5088         cpu
->cpu_dtrace_chilled 
+= val
; 
5092 dtrace_action_ustack(dtrace_mstate_t 
*mstate
, dtrace_state_t 
*state
, 
5093     uint64_t *buf
, uint64_t arg
) 
5095         int nframes 
= DTRACE_USTACK_NFRAMES(arg
); 
5096         int strsize 
= DTRACE_USTACK_STRSIZE(arg
); 
5097         uint64_t *pcs 
= &buf
[1], *fps
; 
5098         char *str 
= (char *)&pcs
[nframes
]; 
5099         int size
, offs 
= 0, i
, j
; 
5100         uintptr_t old 
= mstate
->dtms_scratch_ptr
, saved
; 
5101         uint16_t *flags 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
5105          * Should be taking a faster path if string space has not been 
5108         ASSERT(strsize 
!= 0); 
5111          * We will first allocate some temporary space for the frame pointers. 
5113         fps 
= (uint64_t *)P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8); 
5114         size 
= (uintptr_t)fps 
- mstate
->dtms_scratch_ptr 
+ 
5115             (nframes 
* sizeof (uint64_t)); 
5117         if (mstate
->dtms_scratch_ptr 
+ size 
> 
5118             mstate
->dtms_scratch_base 
+ mstate
->dtms_scratch_size
) { 
5120                  * Not enough room for our frame pointers -- need to indicate 
5121                  * that we ran out of scratch space. 
5123                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
5127         mstate
->dtms_scratch_ptr 
+= size
; 
5128         saved 
= mstate
->dtms_scratch_ptr
; 
5131          * Now get a stack with both program counters and frame pointers. 
5133         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
5134         dtrace_getufpstack(buf
, fps
, nframes 
+ 1); 
5135         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
5138          * If that faulted, we're cooked. 
5140         if (*flags 
& CPU_DTRACE_FAULT
) 
5144          * Now we want to walk up the stack, calling the USTACK helper.  For 
5145          * each iteration, we restore the scratch pointer. 
5147         for (i 
= 0; i 
< nframes
; i
++) { 
5148                 mstate
->dtms_scratch_ptr 
= saved
; 
5150                 if (offs 
>= strsize
) 
5153                 sym 
= (char *)(uintptr_t)dtrace_helper( 
5154                     DTRACE_HELPER_ACTION_USTACK
, 
5155                     mstate
, state
, pcs
[i
], fps
[i
]); 
5158                  * If we faulted while running the helper, we're going to 
5159                  * clear the fault and null out the corresponding string. 
5161                 if (*flags 
& CPU_DTRACE_FAULT
) { 
5162                         *flags 
&= ~CPU_DTRACE_FAULT
; 
5172                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
5175                  * Now copy in the string that the helper returned to us. 
5177                 for (j 
= 0; offs 
+ j 
< strsize
; j
++) { 
5178                         if ((str
[offs 
+ j
] = sym
[j
]) == '\0') 
5182                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
5187         if (offs 
>= strsize
) { 
5189                  * If we didn't have room for all of the strings, we don't 
5190                  * abort processing -- this needn't be a fatal error -- but we 
5191                  * still want to increment a counter (dts_stkstroverflows) to 
5192                  * allow this condition to be warned about.  (If this is from 
5193                  * a jstack() action, it is easily tuned via jstackstrsize.) 
5195                 dtrace_error(&state
->dts_stkstroverflows
); 
5198         while (offs 
< strsize
) 
5202         mstate
->dtms_scratch_ptr 
= old
; 
5206  * If you're looking for the epicenter of DTrace, you just found it.  This 
5207  * is the function called by the provider to fire a probe -- from which all 
5208  * subsequent probe-context DTrace activity emanates. 
5210 #if !defined(__APPLE__) 
5212 dtrace_probe(dtrace_id_t id
, uintptr_t arg0
, uintptr_t arg1
, 
5213     uintptr_t arg2
, uintptr_t arg3
, uintptr_t arg4
) 
5216 __dtrace_probe(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
, 
5217     uint64_t arg2
, uint64_t arg3
, uint64_t arg4
) 
5218 #endif /* __APPLE__ */ 
5220         processorid_t cpuid
; 
5221         dtrace_icookie_t cookie
; 
5222         dtrace_probe_t 
*probe
; 
5223         dtrace_mstate_t mstate
; 
5225         dtrace_action_t 
*act
; 
5229         volatile uint16_t *flags
; 
5232 #if !defined(__APPLE__) 
5234          * Kick out immediately if this CPU is still being born (in which case 
5235          * curthread will be set to -1) 
5237         if ((uintptr_t)curthread 
& 1) 
5240 #endif /* __APPLE__ */ 
5242         cookie 
= dtrace_interrupt_disable(); 
5243         probe 
= dtrace_probes
[id 
- 1]; 
5244         cpuid 
= CPU
->cpu_id
; 
5245         onintr 
= CPU_ON_INTR(CPU
); 
5247 #if !defined(__APPLE__) 
5248         if (!onintr 
&& probe
->dtpr_predcache 
!= DTRACE_CACHEIDNONE 
&& 
5249             probe
->dtpr_predcache 
== curthread
->t_predcache
) { 
5251         if (!onintr 
&& probe
->dtpr_predcache 
!= DTRACE_CACHEIDNONE 
&& 
5252             probe
->dtpr_predcache 
== dtrace_get_thread_predcache(current_thread())) { 
5253 #endif /* __APPLE__ */ 
5255                  * We have hit in the predicate cache; we know that 
5256                  * this predicate would evaluate to be false. 
5258                 dtrace_interrupt_enable(cookie
); 
5262         if (panic_quiesce
) { 
5264                  * We don't trace anything if we're panicking. 
5266                 dtrace_interrupt_enable(cookie
); 
5270 #if !defined(__APPLE__) 
5271         now 
= dtrace_gethrtime(); 
5272         vtime 
= dtrace_vtime_references 
!= 0; 
5274         if (vtime 
&& curthread
->t_dtrace_start
) 
5275                 curthread
->t_dtrace_vtime 
+= now 
- curthread
->t_dtrace_start
; 
5277         vtime 
= dtrace_vtime_references 
!= 0; 
5281                 int64_t dtrace_accum_time
, recent_vtime
; 
5282                 thread_t thread 
= current_thread(); 
5284                 dtrace_accum_time 
= dtrace_get_thread_tracing(thread
); /* Time spent inside DTrace so far (nanoseconds) */ 
5286                 if (dtrace_accum_time 
>= 0) { 
5287                         recent_vtime 
= dtrace_abs_to_nano(dtrace_calc_thread_recent_vtime(thread
)); /* up to the moment thread vtime */ 
5289                         recent_vtime 
= recent_vtime 
- dtrace_accum_time
; /* Time without DTrace contribution */ 
5291                         dtrace_set_thread_vtime(thread
, recent_vtime
); 
5295         now 
= dtrace_gethrtime(); /* must not precede dtrace_calc_thread_recent_vtime() call! */ 
5296 #endif /* __APPLE__ */ 
5298         mstate
.dtms_probe 
= probe
; 
5299         mstate
.dtms_arg
[0] = arg0
; 
5300         mstate
.dtms_arg
[1] = arg1
; 
5301         mstate
.dtms_arg
[2] = arg2
; 
5302         mstate
.dtms_arg
[3] = arg3
; 
5303         mstate
.dtms_arg
[4] = arg4
; 
5305         flags 
= (volatile uint16_t *)&cpu_core
[cpuid
].cpuc_dtrace_flags
; 
5307         for (ecb 
= probe
->dtpr_ecb
; ecb 
!= NULL
; ecb 
= ecb
->dte_next
) { 
5308                 dtrace_predicate_t 
*pred 
= ecb
->dte_predicate
; 
5309                 dtrace_state_t 
*state 
= ecb
->dte_state
; 
5310                 dtrace_buffer_t 
*buf 
= &state
->dts_buffer
[cpuid
]; 
5311                 dtrace_buffer_t 
*aggbuf 
= &state
->dts_aggbuffer
[cpuid
]; 
5312                 dtrace_vstate_t 
*vstate 
= &state
->dts_vstate
; 
5313                 dtrace_provider_t 
*prov 
= probe
->dtpr_provider
; 
5318                  * A little subtlety with the following (seemingly innocuous) 
5319                  * declaration of the automatic 'val':  by looking at the 
5320                  * code, you might think that it could be declared in the 
5321                  * action processing loop, below.  (That is, it's only used in 
5322                  * the action processing loop.)  However, it must be declared 
5323                  * out of that scope because in the case of DIF expression 
5324                  * arguments to aggregating actions, one iteration of the 
5325                  * action loop will use the last iteration's value. 
5333                 mstate
.dtms_present 
= DTRACE_MSTATE_ARGS 
| DTRACE_MSTATE_PROBE
; 
5334                 *flags 
&= ~CPU_DTRACE_ERROR
; 
5336                 if (prov 
== dtrace_provider
) { 
5338                          * If dtrace itself is the provider of this probe, 
5339                          * we're only going to continue processing the ECB if 
5340                          * arg0 (the dtrace_state_t) is equal to the ECB's 
5341                          * creating state.  (This prevents disjoint consumers 
5342                          * from seeing one another's metaprobes.) 
5344                         if (arg0 
!= (uint64_t)(uintptr_t)state
) 
5348                 if (state
->dts_activity 
!= DTRACE_ACTIVITY_ACTIVE
) { 
5350                          * We're not currently active.  If our provider isn't 
5351                          * the dtrace pseudo provider, we're not interested. 
5353                         if (prov 
!= dtrace_provider
) 
5357                          * Now we must further check if we are in the BEGIN 
5358                          * probe.  If we are, we will only continue processing 
5359                          * if we're still in WARMUP -- if one BEGIN enabling 
5360                          * has invoked the exit() action, we don't want to 
5361                          * evaluate subsequent BEGIN enablings. 
5363                         if (probe
->dtpr_id 
== dtrace_probeid_begin 
&& 
5364                             state
->dts_activity 
!= DTRACE_ACTIVITY_WARMUP
) { 
5365                                 ASSERT(state
->dts_activity 
== 
5366                                     DTRACE_ACTIVITY_DRAINING
); 
5371 #if defined(__APPLE__) 
5373          * If the thread on which this probe has fired belongs to a process marked P_LNOATTACH 
5374          * then this enabling is not permitted to observe it. Move along, nothing to see here. 
5376         if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
)) { 
5379 #endif /* __APPLE__ */ 
5381                 if (ecb
->dte_cond
) { 
5383                          * If the dte_cond bits indicate that this 
5384                          * consumer is only allowed to see user-mode firings 
5385                          * of this probe, call the provider's dtps_usermode() 
5386                          * entry point to check that the probe was fired 
5387                          * while in a user context. Skip this ECB if that's 
5390                         if ((ecb
->dte_cond 
& DTRACE_COND_USERMODE
) && 
5391                             prov
->dtpv_pops
.dtps_usermode(prov
->dtpv_arg
, 
5392                             probe
->dtpr_id
, probe
->dtpr_arg
) == 0) 
5396                          * This is more subtle than it looks. We have to be 
5397                          * absolutely certain that CRED() isn't going to 
5398                          * change out from under us so it's only legit to 
5399                          * examine that structure if we're in constrained 
5400                          * situations. Currently, the only times we'll this 
5401                          * check is if a non-super-user has enabled the 
5402                          * profile or syscall providers -- providers that 
5403                          * allow visibility of all processes. For the 
5404                          * profile case, the check above will ensure that 
5405                          * we're examining a user context. 
5407                         if (ecb
->dte_cond 
& DTRACE_COND_OWNER
) { 
5410                                     ecb
->dte_state
->dts_cred
.dcr_cred
; 
5413                                 ASSERT(s_cr 
!= NULL
); 
5415 #if !defined(__APPLE__) 
5416                                 if ((cr 
= CRED()) == NULL 
|| 
5418                                 if ((cr 
= dtrace_CRED()) == NULL 
|| 
5419 #endif /* __APPLE__ */ 
5420                                     s_cr
->cr_uid 
!= cr
->cr_uid 
|| 
5421                                     s_cr
->cr_uid 
!= cr
->cr_ruid 
|| 
5422                                     s_cr
->cr_uid 
!= cr
->cr_suid 
|| 
5423                                     s_cr
->cr_gid 
!= cr
->cr_gid 
|| 
5424                                     s_cr
->cr_gid 
!= cr
->cr_rgid 
|| 
5425                                     s_cr
->cr_gid 
!= cr
->cr_sgid 
|| 
5426 #if !defined(__APPLE__) 
5427                                     (proc 
= ttoproc(curthread
)) == NULL 
|| 
5428                                     (proc
->p_flag 
& SNOCD
)) 
5430                                         1) /* Darwin omits "No Core Dump" flag. */ 
5431 #endif /* __APPLE__ */ 
5435                         if (ecb
->dte_cond 
& DTRACE_COND_ZONEOWNER
) { 
5438                                     ecb
->dte_state
->dts_cred
.dcr_cred
; 
5440                                 ASSERT(s_cr 
!= NULL
); 
5442 #if !defined(__APPLE__) /* Darwin doesn't do zones. */ 
5443                                 if ((cr 
= CRED()) == NULL 
|| 
5444                                     s_cr
->cr_zone
->zone_id 
!= 
5445                                     cr
->cr_zone
->zone_id
) 
5447 #endif /* __APPLE__ */ 
5451                 if (now 
- state
->dts_alive 
> dtrace_deadman_timeout
) { 
5453                          * We seem to be dead.  Unless we (a) have kernel 
5454                          * destructive permissions (b) have expicitly enabled 
5455                          * destructive actions and (c) destructive actions have 
5456                          * not been disabled, we're going to transition into 
5457                          * the KILLED state, from which no further processing 
5458                          * on this state will be performed. 
5460                         if (!dtrace_priv_kernel_destructive(state
) || 
5461                             !state
->dts_cred
.dcr_destructive 
|| 
5462                             dtrace_destructive_disallow
) { 
5463                                 void *activity 
= &state
->dts_activity
; 
5464                                 dtrace_activity_t current
; 
5467                                         current 
= state
->dts_activity
; 
5468                                 } while (dtrace_cas32(activity
, current
, 
5469                                     DTRACE_ACTIVITY_KILLED
) != current
); 
5475                 if ((offs 
= dtrace_buffer_reserve(buf
, ecb
->dte_needed
, 
5476                     ecb
->dte_alignment
, state
, &mstate
)) < 0) 
5479                 tomax 
= buf
->dtb_tomax
; 
5480                 ASSERT(tomax 
!= NULL
); 
5482                 if (ecb
->dte_size 
!= 0) 
5483                         DTRACE_STORE(uint32_t, tomax
, offs
, ecb
->dte_epid
); 
5485                 mstate
.dtms_epid 
= ecb
->dte_epid
; 
5486                 mstate
.dtms_present 
|= DTRACE_MSTATE_EPID
; 
5489                         dtrace_difo_t 
*dp 
= pred
->dtp_difo
; 
5492                         rval 
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
); 
5494                         if (!(*flags 
& CPU_DTRACE_ERROR
) && !rval
) { 
5495                                 dtrace_cacheid_t cid 
= probe
->dtpr_predcache
; 
5497                                 if (cid 
!= DTRACE_CACHEIDNONE 
&& !onintr
) { 
5499                                          * Update the predicate cache... 
5501                                         ASSERT(cid 
== pred
->dtp_cacheid
); 
5502 #if !defined(__APPLE__) 
5503                                         curthread
->t_predcache 
= cid
; 
5505                                         dtrace_set_thread_predcache(current_thread(), cid
); 
5506 #endif /* __APPLE__ */ 
5513                 for (act 
= ecb
->dte_action
; !(*flags 
& CPU_DTRACE_ERROR
) && 
5514                     act 
!= NULL
; act 
= act
->dta_next
) { 
5517                         dtrace_recdesc_t 
*rec 
= &act
->dta_rec
; 
5519                         size 
= rec
->dtrd_size
; 
5520                         valoffs 
= offs 
+ rec
->dtrd_offset
; 
5522                         if (DTRACEACT_ISAGG(act
->dta_kind
)) { 
5524                                 dtrace_aggregation_t 
*agg
; 
5526                                 agg 
= (dtrace_aggregation_t 
*)act
; 
5528                                 if ((dp 
= act
->dta_difo
) != NULL
) 
5529                                         v 
= dtrace_dif_emulate(dp
, 
5530                                             &mstate
, vstate
, state
); 
5532                                 if (*flags 
& CPU_DTRACE_ERROR
) 
5536                                  * Note that we always pass the expression 
5537                                  * value from the previous iteration of the 
5538                                  * action loop.  This value will only be used 
5539                                  * if there is an expression argument to the 
5540                                  * aggregating action, denoted by the 
5541                                  * dtag_hasarg field. 
5543                                 dtrace_aggregate(agg
, buf
, 
5544                                     offs
, aggbuf
, v
, val
); 
5548                         switch (act
->dta_kind
) { 
5549                         case DTRACEACT_STOP
: 
5550                                 if (dtrace_priv_proc_destructive(state
)) 
5551                                         dtrace_action_stop(); 
5554                         case DTRACEACT_BREAKPOINT
: 
5555                                 if (dtrace_priv_kernel_destructive(state
)) 
5556                                         dtrace_action_breakpoint(ecb
); 
5559                         case DTRACEACT_PANIC
: 
5560                                 if (dtrace_priv_kernel_destructive(state
)) 
5561                                         dtrace_action_panic(ecb
); 
5564                         case DTRACEACT_STACK
: 
5565                                 if (!dtrace_priv_kernel(state
)) 
5568                                 dtrace_getpcstack((pc_t 
*)(tomax 
+ valoffs
), 
5569                                     size 
/ sizeof (pc_t
), probe
->dtpr_aframes
, 
5570                                     DTRACE_ANCHORED(probe
) ? NULL 
: 
5575                         case DTRACEACT_JSTACK
: 
5576                         case DTRACEACT_USTACK
: 
5577                                 if (!dtrace_priv_proc(state
)) 
5581                                  * See comment in DIF_VAR_PID. 
5583                                 if (DTRACE_ANCHORED(mstate
.dtms_probe
) && 
5585                                         int depth 
= DTRACE_USTACK_NFRAMES( 
5588                                         dtrace_bzero((void *)(tomax 
+ valoffs
), 
5589                                             DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
) 
5590                                             + depth 
* sizeof (uint64_t)); 
5595                                 if (DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
) != 0 && 
5596                                     curproc
->p_dtrace_helpers 
!= NULL
) { 
5598                                          * This is the slow path -- we have 
5599                                          * allocated string space, and we're 
5600                                          * getting the stack of a process that 
5601                                          * has helpers.  Call into a separate 
5602                                          * routine to perform this processing. 
5604                                         dtrace_action_ustack(&mstate
, state
, 
5605                                             (uint64_t *)(tomax 
+ valoffs
), 
5610                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
5611                                 dtrace_getupcstack((uint64_t *) 
5613                                     DTRACE_USTACK_NFRAMES(rec
->dtrd_arg
) + 1); 
5614                                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
5624                         val 
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
); 
5626                         if (*flags 
& CPU_DTRACE_ERROR
) 
5629                         switch (act
->dta_kind
) { 
5630                         case DTRACEACT_SPECULATE
: 
5631                                 ASSERT(buf 
== &state
->dts_buffer
[cpuid
]); 
5632                                 buf 
= dtrace_speculation_buffer(state
, 
5636                                         *flags 
|= CPU_DTRACE_DROP
; 
5640                                 offs 
= dtrace_buffer_reserve(buf
, 
5641                                     ecb
->dte_needed
, ecb
->dte_alignment
, 
5645                                         *flags 
|= CPU_DTRACE_DROP
; 
5649                                 tomax 
= buf
->dtb_tomax
; 
5650                                 ASSERT(tomax 
!= NULL
); 
5652                                 if (ecb
->dte_size 
!= 0) 
5653                                         DTRACE_STORE(uint32_t, tomax
, offs
, 
5657                         case DTRACEACT_CHILL
: 
5658                                 if (dtrace_priv_kernel_destructive(state
)) 
5659                                         dtrace_action_chill(&mstate
, val
); 
5662                         case DTRACEACT_RAISE
: 
5663                                 if (dtrace_priv_proc_destructive(state
)) 
5664                                         dtrace_action_raise(val
); 
5667                         case DTRACEACT_COMMIT
: 
5671                                  * We need to commit our buffer state. 
5674                                         buf
->dtb_offset 
= offs 
+ ecb
->dte_size
; 
5675                                 buf 
= &state
->dts_buffer
[cpuid
]; 
5676                                 dtrace_speculation_commit(state
, cpuid
, val
); 
5680                         case DTRACEACT_DISCARD
: 
5681                                 dtrace_speculation_discard(state
, cpuid
, val
); 
5684                         case DTRACEACT_DIFEXPR
: 
5685                         case DTRACEACT_LIBACT
: 
5686                         case DTRACEACT_PRINTF
: 
5687                         case DTRACEACT_PRINTA
: 
5688                         case DTRACEACT_SYSTEM
: 
5689                         case DTRACEACT_FREOPEN
: 
5694                                 if (!dtrace_priv_kernel(state
)) 
5698 #if !defined(__APPLE__) 
5699                         case DTRACEACT_USYM
: 
5700                         case DTRACEACT_UMOD
: 
5701                         case DTRACEACT_UADDR
: { 
5702                                 struct pid 
*pid 
= curthread
->t_procp
->p_pidp
; 
5704                                 if (!dtrace_priv_proc(state
)) 
5707                                 DTRACE_STORE(uint64_t, tomax
, 
5708                                     valoffs
, (uint64_t)pid
->pid_id
); 
5709                                 DTRACE_STORE(uint64_t, tomax
, 
5710                                     valoffs 
+ sizeof (uint64_t), val
); 
5715                         case DTRACEACT_USYM
: 
5716                         case DTRACEACT_UMOD
: 
5717                         case DTRACEACT_UADDR
: { 
5718                                 if (!dtrace_priv_proc(state
)) 
5721                                 DTRACE_STORE(uint64_t, tomax
, 
5722                                     valoffs
, (uint64_t)proc_selfpid()); 
5723                                 DTRACE_STORE(uint64_t, tomax
, 
5724                                     valoffs 
+ sizeof (uint64_t), val
); 
5728 #endif /* __APPLE__ */ 
5730                         case DTRACEACT_EXIT
: { 
5732                                  * For the exit action, we are going to attempt 
5733                                  * to atomically set our activity to be 
5734                                  * draining.  If this fails (either because 
5735                                  * another CPU has beat us to the exit action, 
5736                                  * or because our current activity is something 
5737                                  * other than ACTIVE or WARMUP), we will 
5738                                  * continue.  This assures that the exit action 
5739                                  * can be successfully recorded at most once 
5740                                  * when we're in the ACTIVE state.  If we're 
5741                                  * encountering the exit() action while in 
5742                                  * COOLDOWN, however, we want to honor the new 
5743                                  * status code.  (We know that we're the only 
5744                                  * thread in COOLDOWN, so there is no race.) 
5746                                 void *activity 
= &state
->dts_activity
; 
5747                                 dtrace_activity_t current 
= state
->dts_activity
; 
5749                                 if (current 
== DTRACE_ACTIVITY_COOLDOWN
) 
5752                                 if (current 
!= DTRACE_ACTIVITY_WARMUP
) 
5753                                         current 
= DTRACE_ACTIVITY_ACTIVE
; 
5755                                 if (dtrace_cas32(activity
, current
, 
5756                                     DTRACE_ACTIVITY_DRAINING
) != current
) { 
5757                                         *flags 
|= CPU_DTRACE_DROP
; 
5768                         if (dp
->dtdo_rtype
.dtdt_flags 
& DIF_TF_BYREF
) { 
5769                                 uintptr_t end 
= valoffs 
+ size
; 
5772                                  * If this is a string, we're going to only 
5773                                  * load until we find the zero byte -- after 
5774                                  * which we'll store zero bytes. 
5776                                 if (dp
->dtdo_rtype
.dtdt_kind 
== 
5779                                         int intuple 
= act
->dta_intuple
; 
5782                                         for (s 
= 0; s 
< size
; s
++) { 
5784                                                         c 
= dtrace_load8(val
++); 
5786                                                 DTRACE_STORE(uint8_t, tomax
, 
5789                                                 if (c 
== '\0' && intuple
) 
5796                                 while (valoffs 
< end
) { 
5797                                         DTRACE_STORE(uint8_t, tomax
, valoffs
++, 
5798                                             dtrace_load8(val
++)); 
5808                         case sizeof (uint8_t): 
5809                                 DTRACE_STORE(uint8_t, tomax
, valoffs
, val
); 
5811                         case sizeof (uint16_t): 
5812                                 DTRACE_STORE(uint16_t, tomax
, valoffs
, val
); 
5814                         case sizeof (uint32_t): 
5815                                 DTRACE_STORE(uint32_t, tomax
, valoffs
, val
); 
5817                         case sizeof (uint64_t): 
5818                                 DTRACE_STORE(uint64_t, tomax
, valoffs
, val
); 
5822                                  * Any other size should have been returned by 
5823                                  * reference, not by value. 
5830                 if (*flags 
& CPU_DTRACE_DROP
) 
5833                 if (*flags 
& CPU_DTRACE_FAULT
) { 
5835                         dtrace_action_t 
*err
; 
5839                         if (probe
->dtpr_id 
== dtrace_probeid_error
) { 
5841                                  * There's nothing we can do -- we had an 
5842                                  * error on the error probe.  We bump an 
5843                                  * error counter to at least indicate that 
5844                                  * this condition happened. 
5846                                 dtrace_error(&state
->dts_dblerrors
); 
5852                                  * Before recursing on dtrace_probe(), we 
5853                                  * need to explicitly clear out our start 
5854                                  * time to prevent it from being accumulated 
5855                                  * into t_dtrace_vtime. 
5857 #if !defined(__APPLE__) 
5858                                 curthread
->t_dtrace_start 
= 0; 
5860                                 /* Set the sign bit on t_dtrace_tracing to suspend accumulation to it. */ 
5861                                 dtrace_set_thread_tracing(current_thread(),  
5862                                                         (1ULL<<63) | dtrace_get_thread_tracing(current_thread())); 
5863 #endif /* __APPLE__ */ 
5867                          * Iterate over the actions to figure out which action 
5868                          * we were processing when we experienced the error. 
5869                          * Note that act points _past_ the faulting action; if 
5870                          * act is ecb->dte_action, the fault was in the 
5871                          * predicate, if it's ecb->dte_action->dta_next it's 
5872                          * in action #1, and so on. 
5874                         for (err 
= ecb
->dte_action
, ndx 
= 0; 
5875                             err 
!= act
; err 
= err
->dta_next
, ndx
++) 
5878                         dtrace_probe_error(state
, ecb
->dte_epid
, ndx
, 
5879                             (mstate
.dtms_present 
& DTRACE_MSTATE_FLTOFFS
) ? 
5880                             mstate
.dtms_fltoffs 
: -1, DTRACE_FLAGS2FLT(*flags
), 
5881                             cpu_core
[cpuid
].cpuc_dtrace_illval
); 
5887                         buf
->dtb_offset 
= offs 
+ ecb
->dte_size
; 
5890 #if !defined(__APPLE__) 
5892                 curthread
->t_dtrace_start 
= dtrace_gethrtime(); 
5895                 thread_t thread 
= current_thread(); 
5896                 int64_t t 
= dtrace_get_thread_tracing(thread
); 
5899                         /* Usual case, accumulate time spent here into t_dtrace_tracing */ 
5900                         dtrace_set_thread_tracing(thread
, t 
+ (dtrace_gethrtime() - now
)); 
5902                         /* Return from error recursion. No accumulation, just clear the sign bit on t_dtrace_tracing. */ 
5903                         dtrace_set_thread_tracing(thread
, (~(1ULL<<63)) & t
);  
5906 #endif /* __APPLE__ */ 
5908         dtrace_interrupt_enable(cookie
); 
5911 #if defined(__APPLE__) 
5912 /* Don't allow a thread to re-enter dtrace_probe() */ 
5914 dtrace_probe(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
, 
5915     uint64_t arg2
, uint64_t arg3
, uint64_t arg4
) 
5917         thread_t thread 
= current_thread(); 
5919         if (id 
== dtrace_probeid_error
) { 
5920                 __dtrace_probe(id
, arg0
, arg1
, arg2
, arg3
, arg4
); 
5921                 dtrace_getfp(); /* Defeat tail-call optimization of __dtrace_probe() */ 
5922         } else if (!dtrace_get_thread_reentering(thread
)) { 
5923                 dtrace_set_thread_reentering(thread
, TRUE
); 
5924                 __dtrace_probe(id
, arg0
, arg1
, arg2
, arg3
, arg4
); 
5925                 dtrace_set_thread_reentering(thread
, FALSE
); 
5928 #endif /* __APPLE__ */ 
5931  * DTrace Probe Hashing Functions 
5933  * The functions in this section (and indeed, the functions in remaining 
5934  * sections) are not _called_ from probe context.  (Any exceptions to this are 
5935  * marked with a "Note:".)  Rather, they are called from elsewhere in the 
5936  * DTrace framework to look-up probes in, add probes to and remove probes from 
5937  * the DTrace probe hashes.  (Each probe is hashed by each element of the 
5938  * probe tuple -- allowing for fast lookups, regardless of what was 
5942 dtrace_hash_str(char *p
) 
5948                 hval 
= (hval 
<< 4) + *p
++; 
5949                 if ((g 
= (hval 
& 0xf0000000)) != 0) 
5956 static dtrace_hash_t 
* 
5957 dtrace_hash_create(uintptr_t stroffs
, uintptr_t nextoffs
, uintptr_t prevoffs
) 
5959         dtrace_hash_t 
*hash 
= kmem_zalloc(sizeof (dtrace_hash_t
), KM_SLEEP
); 
5961         hash
->dth_stroffs 
= stroffs
; 
5962         hash
->dth_nextoffs 
= nextoffs
; 
5963         hash
->dth_prevoffs 
= prevoffs
; 
5966         hash
->dth_mask 
= hash
->dth_size 
- 1; 
5968         hash
->dth_tab 
= kmem_zalloc(hash
->dth_size 
* 
5969             sizeof (dtrace_hashbucket_t 
*), KM_SLEEP
); 
5974 #if !defined(__APPLE__) /* Quiet compiler warning */ 
5976 dtrace_hash_destroy(dtrace_hash_t 
*hash
) 
5981         for (i 
= 0; i 
< hash
->dth_size
; i
++) 
5982                 ASSERT(hash
->dth_tab
[i
] == NULL
); 
5985         kmem_free(hash
->dth_tab
, 
5986             hash
->dth_size 
* sizeof (dtrace_hashbucket_t 
*)); 
5987         kmem_free(hash
, sizeof (dtrace_hash_t
)); 
5989 #endif /* __APPLE__ */ 
5992 dtrace_hash_resize(dtrace_hash_t 
*hash
) 
5994         int size 
= hash
->dth_size
, i
, ndx
; 
5995         int new_size 
= hash
->dth_size 
<< 1; 
5996         int new_mask 
= new_size 
- 1; 
5997         dtrace_hashbucket_t 
**new_tab
, *bucket
, *next
; 
5999         ASSERT((new_size 
& new_mask
) == 0); 
6001         new_tab 
= kmem_zalloc(new_size 
* sizeof (void *), KM_SLEEP
); 
6003         for (i 
= 0; i 
< size
; i
++) { 
6004                 for (bucket 
= hash
->dth_tab
[i
]; bucket 
!= NULL
; bucket 
= next
) { 
6005                         dtrace_probe_t 
*probe 
= bucket
->dthb_chain
; 
6007                         ASSERT(probe 
!= NULL
); 
6008                         ndx 
= DTRACE_HASHSTR(hash
, probe
) & new_mask
; 
6010                         next 
= bucket
->dthb_next
; 
6011                         bucket
->dthb_next 
= new_tab
[ndx
]; 
6012                         new_tab
[ndx
] = bucket
; 
6016         kmem_free(hash
->dth_tab
, hash
->dth_size 
* sizeof (void *)); 
6017         hash
->dth_tab 
= new_tab
; 
6018         hash
->dth_size 
= new_size
; 
6019         hash
->dth_mask 
= new_mask
; 
6023 dtrace_hash_add(dtrace_hash_t 
*hash
, dtrace_probe_t 
*new) 
6025         int hashval 
= DTRACE_HASHSTR(hash
, new); 
6026         int ndx 
= hashval 
& hash
->dth_mask
; 
6027         dtrace_hashbucket_t 
*bucket 
= hash
->dth_tab
[ndx
]; 
6028         dtrace_probe_t 
**nextp
, **prevp
; 
6030         for (; bucket 
!= NULL
; bucket 
= bucket
->dthb_next
) { 
6031                 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, new)) 
6035         if ((hash
->dth_nbuckets 
>> 1) > hash
->dth_size
) { 
6036                 dtrace_hash_resize(hash
); 
6037                 dtrace_hash_add(hash
, new); 
6041         bucket 
= kmem_zalloc(sizeof (dtrace_hashbucket_t
), KM_SLEEP
); 
6042         bucket
->dthb_next 
= hash
->dth_tab
[ndx
]; 
6043         hash
->dth_tab
[ndx
] = bucket
; 
6044         hash
->dth_nbuckets
++; 
6047         nextp 
= DTRACE_HASHNEXT(hash
, new); 
6048         ASSERT(*nextp 
== NULL 
&& *(DTRACE_HASHPREV(hash
, new)) == NULL
); 
6049         *nextp 
= bucket
->dthb_chain
; 
6051         if (bucket
->dthb_chain 
!= NULL
) { 
6052                 prevp 
= DTRACE_HASHPREV(hash
, bucket
->dthb_chain
); 
6053                 ASSERT(*prevp 
== NULL
); 
6057         bucket
->dthb_chain 
= new; 
6061 static dtrace_probe_t 
* 
6062 dtrace_hash_lookup(dtrace_hash_t 
*hash
, dtrace_probe_t 
*template) 
6064         int hashval 
= DTRACE_HASHSTR(hash
, template); 
6065         int ndx 
= hashval 
& hash
->dth_mask
; 
6066         dtrace_hashbucket_t 
*bucket 
= hash
->dth_tab
[ndx
]; 
6068         for (; bucket 
!= NULL
; bucket 
= bucket
->dthb_next
) { 
6069                 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template)) 
6070                         return (bucket
->dthb_chain
); 
6077 dtrace_hash_collisions(dtrace_hash_t 
*hash
, dtrace_probe_t 
*template) 
6079         int hashval 
= DTRACE_HASHSTR(hash
, template); 
6080         int ndx 
= hashval 
& hash
->dth_mask
; 
6081         dtrace_hashbucket_t 
*bucket 
= hash
->dth_tab
[ndx
]; 
6083         for (; bucket 
!= NULL
; bucket 
= bucket
->dthb_next
) { 
6084                 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template)) 
6085                         return (bucket
->dthb_len
); 
6092 dtrace_hash_remove(dtrace_hash_t 
*hash
, dtrace_probe_t 
*probe
) 
6094         int ndx 
= DTRACE_HASHSTR(hash
, probe
) & hash
->dth_mask
; 
6095         dtrace_hashbucket_t 
*bucket 
= hash
->dth_tab
[ndx
]; 
6097         dtrace_probe_t 
**prevp 
= DTRACE_HASHPREV(hash
, probe
); 
6098         dtrace_probe_t 
**nextp 
= DTRACE_HASHNEXT(hash
, probe
); 
6101          * Find the bucket that we're removing this probe from. 
6103         for (; bucket 
!= NULL
; bucket 
= bucket
->dthb_next
) { 
6104                 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, probe
)) 
6108         ASSERT(bucket 
!= NULL
); 
6110         if (*prevp 
== NULL
) { 
6111                 if (*nextp 
== NULL
) { 
6113                          * The removed probe was the only probe on this 
6114                          * bucket; we need to remove the bucket. 
6116                         dtrace_hashbucket_t 
*b 
= hash
->dth_tab
[ndx
]; 
6118                         ASSERT(bucket
->dthb_chain 
== probe
); 
6122                                 hash
->dth_tab
[ndx
] = bucket
->dthb_next
; 
6124                                 while (b
->dthb_next 
!= bucket
) 
6126                                 b
->dthb_next 
= bucket
->dthb_next
; 
6129                         ASSERT(hash
->dth_nbuckets 
> 0); 
6130                         hash
->dth_nbuckets
--; 
6131                         kmem_free(bucket
, sizeof (dtrace_hashbucket_t
)); 
6135                 bucket
->dthb_chain 
= *nextp
; 
6137                 *(DTRACE_HASHNEXT(hash
, *prevp
)) = *nextp
; 
6141                 *(DTRACE_HASHPREV(hash
, *nextp
)) = *prevp
; 
6145  * DTrace Utility Functions 
6147  * These are random utility functions that are _not_ called from probe context. 
6150 dtrace_badattr(const dtrace_attribute_t 
*a
) 
6152         return (a
->dtat_name 
> DTRACE_STABILITY_MAX 
|| 
6153             a
->dtat_data 
> DTRACE_STABILITY_MAX 
|| 
6154             a
->dtat_class 
> DTRACE_CLASS_MAX
); 
6158  * Return a duplicate copy of a string.  If the specified string is NULL, 
6159  * this function returns a zero-length string. 
6162 dtrace_strdup(const char *str
) 
6164         char *new = kmem_zalloc((str 
!= NULL 
? strlen(str
) : 0) + 1, KM_SLEEP
); 
6167                 (void) strcpy(new, str
); 
6172 #define DTRACE_ISALPHA(c)       \ 
6173         (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) 
6176 dtrace_badname(const char *s
) 
6180         if (s 
== NULL 
|| (c 
= *s
++) == '\0') 
6183         if (!DTRACE_ISALPHA(c
) && c 
!= '-' && c 
!= '_' && c 
!= '.') 
6186         while ((c 
= *s
++) != '\0') { 
6187                 if (!DTRACE_ISALPHA(c
) && (c 
< '0' || c 
> '9') && 
6188                     c 
!= '-' && c 
!= '_' && c 
!= '.' && c 
!= '`') 
6196 dtrace_cred2priv(cred_t 
*cr
, uint32_t *privp
, uid_t 
*uidp
, zoneid_t 
*zoneidp
) 
6200         if (cr 
== NULL 
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) { 
6202                  * For DTRACE_PRIV_ALL, the uid and zoneid don't matter. 
6204                 priv 
= DTRACE_PRIV_ALL
; 
6206                 *uidp 
= crgetuid(cr
); 
6207                 *zoneidp 
= crgetzoneid(cr
); 
6210                 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
)) 
6211                         priv 
|= DTRACE_PRIV_KERNEL 
| DTRACE_PRIV_USER
; 
6212                 else if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
)) 
6213                         priv 
|= DTRACE_PRIV_USER
; 
6214                 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) 
6215                         priv 
|= DTRACE_PRIV_PROC
; 
6216                 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
)) 
6217                         priv 
|= DTRACE_PRIV_OWNER
; 
6218                 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
)) 
6219                         priv 
|= DTRACE_PRIV_ZONEOWNER
; 
6225 #ifdef DTRACE_ERRDEBUG 
6227 dtrace_errdebug(const char *str
) 
6229         int hval 
= dtrace_hash_str((char *)str
) % DTRACE_ERRHASHSZ
; 
6232         lck_mtx_lock(&dtrace_errlock
); 
6233         dtrace_errlast 
= str
; 
6234 #if !defined(__APPLE__) 
6235         dtrace_errthread 
= curthread
; 
6237         dtrace_errthread 
= current_thread(); 
6238 #endif /* __APPLE__ */ 
6240         while (occupied
++ < DTRACE_ERRHASHSZ
) { 
6241                 if (dtrace_errhash
[hval
].dter_msg 
== str
) { 
6242                         dtrace_errhash
[hval
].dter_count
++; 
6246                 if (dtrace_errhash
[hval
].dter_msg 
!= NULL
) { 
6247                         hval 
= (hval 
+ 1) % DTRACE_ERRHASHSZ
; 
6251                 dtrace_errhash
[hval
].dter_msg 
= str
; 
6252                 dtrace_errhash
[hval
].dter_count 
= 1; 
6256         panic("dtrace: undersized error hash"); 
6258         lck_mtx_unlock(&dtrace_errlock
); 
6263  * DTrace Matching Functions 
6265  * These functions are used to match groups of probes, given some elements of 
6266  * a probe tuple, or some globbed expressions for elements of a probe tuple. 
6269 dtrace_match_priv(const dtrace_probe_t 
*prp
, uint32_t priv
, uid_t uid
, 
6272         if (priv 
!= DTRACE_PRIV_ALL
) { 
6273                 uint32_t ppriv 
= prp
->dtpr_provider
->dtpv_priv
.dtpp_flags
; 
6274                 uint32_t match 
= priv 
& ppriv
; 
6277                  * No PRIV_DTRACE_* privileges... 
6279                 if ((priv 
& (DTRACE_PRIV_PROC 
| DTRACE_PRIV_USER 
| 
6280                     DTRACE_PRIV_KERNEL
)) == 0) 
6284                  * No matching bits, but there were bits to match... 
6286                 if (match 
== 0 && ppriv 
!= 0) 
6290                  * Need to have permissions to the process, but don't... 
6292                 if (((ppriv 
& ~match
) & DTRACE_PRIV_OWNER
) != 0 && 
6293                     uid 
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_uid
) { 
6298                  * Need to be in the same zone unless we possess the 
6299                  * privilege to examine all zones. 
6301                 if (((ppriv 
& ~match
) & DTRACE_PRIV_ZONEOWNER
) != 0 && 
6302                     zoneid 
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_zoneid
) { 
6311  * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which 
6312  * consists of input pattern strings and an ops-vector to evaluate them. 
6313  * This function returns >0 for match, 0 for no match, and <0 for error. 
6316 dtrace_match_probe(const dtrace_probe_t 
*prp
, const dtrace_probekey_t 
*pkp
, 
6317     uint32_t priv
, uid_t uid
, zoneid_t zoneid
) 
6319         dtrace_provider_t 
*pvp 
= prp
->dtpr_provider
; 
6322         if (pvp
->dtpv_defunct
) 
6325         if ((rv 
= pkp
->dtpk_pmatch(pvp
->dtpv_name
, pkp
->dtpk_prov
, 0)) <= 0) 
6328         if ((rv 
= pkp
->dtpk_mmatch(prp
->dtpr_mod
, pkp
->dtpk_mod
, 0)) <= 0) 
6331         if ((rv 
= pkp
->dtpk_fmatch(prp
->dtpr_func
, pkp
->dtpk_func
, 0)) <= 0) 
6334         if ((rv 
= pkp
->dtpk_nmatch(prp
->dtpr_name
, pkp
->dtpk_name
, 0)) <= 0) 
6337         if (dtrace_match_priv(prp
, priv
, uid
, zoneid
) == 0) 
6344  * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN) 
6345  * interface for matching a glob pattern 'p' to an input string 's'.  Unlike 
6346  * libc's version, the kernel version only applies to 8-bit ASCII strings. 
6347  * In addition, all of the recursion cases except for '*' matching have been 
6348  * unwound.  For '*', we still implement recursive evaluation, but a depth 
6349  * counter is maintained and matching is aborted if we recurse too deep. 
6350  * The function returns 0 if no match, >0 if match, and <0 if recursion error. 
6353 dtrace_match_glob(const char *s
, const char *p
, int depth
) 
6359         if (depth 
> DTRACE_PROBEKEY_MAXDEPTH
) 
6363                 s 
= ""; /* treat NULL as empty string */ 
6372         if ((c 
= *p
++) == '\0') 
6373                 return (s1 
== '\0'); 
6377                 int ok 
= 0, notflag 
= 0; 
6388                 if ((c 
= *p
++) == '\0') 
6392                         if (c 
== '-' && lc 
!= '\0' && *p 
!= ']') { 
6393                                 if ((c 
= *p
++) == '\0') 
6395                                 if (c 
== '\\' && (c 
= *p
++) == '\0') 
6399                                         if (s1 
< lc 
|| s1 
> c
) 
6403                                 } else if (lc 
<= s1 
&& s1 
<= c
) 
6406                         } else if (c 
== '\\' && (c 
= *p
++) == '\0') 
6409                         lc 
= c
; /* save left-hand 'c' for next iteration */ 
6419                         if ((c 
= *p
++) == '\0') 
6431                 if ((c 
= *p
++) == '\0') 
6447                         p
++; /* consecutive *'s are identical to a single one */ 
6452                 for (s 
= olds
; *s 
!= '\0'; s
++) { 
6453                         if ((gs 
= dtrace_match_glob(s
, p
, depth 
+ 1)) != 0) 
6463 dtrace_match_string(const char *s
, const char *p
, int depth
) 
6465         return (s 
!= NULL 
&& strcmp(s
, p
) == 0); 
6470 dtrace_match_nul(const char *s
, const char *p
, int depth
) 
6472         return (1); /* always match the empty pattern */ 
6477 dtrace_match_nonzero(const char *s
, const char *p
, int depth
) 
6479         return (s 
!= NULL 
&& s
[0] != '\0'); 
6483 dtrace_match(const dtrace_probekey_t 
*pkp
, uint32_t priv
, uid_t uid
, 
6484     zoneid_t zoneid
, int (*matched
)(dtrace_probe_t 
*, void *), void *arg
) 
6486         dtrace_probe_t 
template, *probe
; 
6487         dtrace_hash_t 
*hash 
= NULL
; 
6488         int len
, best 
= INT_MAX
, nmatched 
= 0; 
6491         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
6494          * If the probe ID is specified in the key, just lookup by ID and 
6495          * invoke the match callback once if a matching probe is found. 
6497         if (pkp
->dtpk_id 
!= DTRACE_IDNONE
) { 
6498                 if ((probe 
= dtrace_probe_lookup_id(pkp
->dtpk_id
)) != NULL 
&& 
6499                     dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) > 0) { 
6500                         (void) (*matched
)(probe
, arg
); 
6506         template.dtpr_mod 
= (char *)pkp
->dtpk_mod
; 
6507         template.dtpr_func 
= (char *)pkp
->dtpk_func
; 
6508         template.dtpr_name 
= (char *)pkp
->dtpk_name
; 
6511          * We want to find the most distinct of the module name, function 
6512          * name, and name.  So for each one that is not a glob pattern or 
6513          * empty string, we perform a lookup in the corresponding hash and 
6514          * use the hash table with the fewest collisions to do our search. 
6516         if (pkp
->dtpk_mmatch 
== &dtrace_match_string 
&& 
6517             (len 
= dtrace_hash_collisions(dtrace_bymod
, &template)) < best
) { 
6519                 hash 
= dtrace_bymod
; 
6522         if (pkp
->dtpk_fmatch 
== &dtrace_match_string 
&& 
6523             (len 
= dtrace_hash_collisions(dtrace_byfunc
, &template)) < best
) { 
6525                 hash 
= dtrace_byfunc
; 
6528         if (pkp
->dtpk_nmatch 
== &dtrace_match_string 
&& 
6529             (len 
= dtrace_hash_collisions(dtrace_byname
, &template)) < best
) { 
6531                 hash 
= dtrace_byname
; 
6535          * If we did not select a hash table, iterate over every probe and 
6536          * invoke our callback for each one that matches our input probe key. 
6539                 for (i 
= 0; i 
< dtrace_nprobes
; i
++) { 
6540                         if ((probe 
= dtrace_probes
[i
]) == NULL 
|| 
6541                             dtrace_match_probe(probe
, pkp
, priv
, uid
, 
6547                         if ((*matched
)(probe
, arg
) != DTRACE_MATCH_NEXT
) 
6555          * If we selected a hash table, iterate over each probe of the same key 
6556          * name and invoke the callback for every probe that matches the other 
6557          * attributes of our input probe key. 
6559         for (probe 
= dtrace_hash_lookup(hash
, &template); probe 
!= NULL
; 
6560             probe 
= *(DTRACE_HASHNEXT(hash
, probe
))) { 
6562                 if (dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) <= 0) 
6567                 if ((*matched
)(probe
, arg
) != DTRACE_MATCH_NEXT
) 
6575  * Return the function pointer dtrace_probecmp() should use to compare the 
6576  * specified pattern with a string.  For NULL or empty patterns, we select 
6577  * dtrace_match_nul().  For glob pattern strings, we use dtrace_match_glob(). 
6578  * For non-empty non-glob strings, we use dtrace_match_string(). 
6580 static dtrace_probekey_f 
* 
6581 dtrace_probekey_func(const char *p
) 
6585         if (p 
== NULL 
|| *p 
== '\0') 
6586                 return (&dtrace_match_nul
); 
6588         while ((c 
= *p
++) != '\0') { 
6589                 if (c 
== '[' || c 
== '?' || c 
== '*' || c 
== '\\') 
6590                         return (&dtrace_match_glob
); 
6593         return (&dtrace_match_string
); 
6597  * Build a probe comparison key for use with dtrace_match_probe() from the 
6598  * given probe description.  By convention, a null key only matches anchored 
6599  * probes: if each field is the empty string, reset dtpk_fmatch to 
6600  * dtrace_match_nonzero(). 
6603 dtrace_probekey(const dtrace_probedesc_t 
*pdp
, dtrace_probekey_t 
*pkp
) 
6605         pkp
->dtpk_prov 
= pdp
->dtpd_provider
; 
6606         pkp
->dtpk_pmatch 
= dtrace_probekey_func(pdp
->dtpd_provider
); 
6608         pkp
->dtpk_mod 
= pdp
->dtpd_mod
; 
6609         pkp
->dtpk_mmatch 
= dtrace_probekey_func(pdp
->dtpd_mod
); 
6611         pkp
->dtpk_func 
= pdp
->dtpd_func
; 
6612         pkp
->dtpk_fmatch 
= dtrace_probekey_func(pdp
->dtpd_func
); 
6614         pkp
->dtpk_name 
= pdp
->dtpd_name
; 
6615         pkp
->dtpk_nmatch 
= dtrace_probekey_func(pdp
->dtpd_name
); 
6617         pkp
->dtpk_id 
= pdp
->dtpd_id
; 
6619         if (pkp
->dtpk_id 
== DTRACE_IDNONE 
&& 
6620             pkp
->dtpk_pmatch 
== &dtrace_match_nul 
&& 
6621             pkp
->dtpk_mmatch 
== &dtrace_match_nul 
&& 
6622             pkp
->dtpk_fmatch 
== &dtrace_match_nul 
&& 
6623             pkp
->dtpk_nmatch 
== &dtrace_match_nul
) 
6624                 pkp
->dtpk_fmatch 
= &dtrace_match_nonzero
; 
6628  * DTrace Provider-to-Framework API Functions 
6630  * These functions implement much of the Provider-to-Framework API, as 
6631  * described in <sys/dtrace.h>.  The parts of the API not in this section are 
6632  * the functions in the API for probe management (found below), and 
6633  * dtrace_probe() itself (found above). 
6637  * Register the calling provider with the DTrace framework.  This should 
6638  * generally be called by DTrace providers in their attach(9E) entry point. 
6641 dtrace_register(const char *name
, const dtrace_pattr_t 
*pap
, uint32_t priv
, 
6642     cred_t 
*cr
, const dtrace_pops_t 
*pops
, void *arg
, dtrace_provider_id_t 
*idp
) 
6644         dtrace_provider_t 
*provider
; 
6646         if (name 
== NULL 
|| pap 
== NULL 
|| pops 
== NULL 
|| idp 
== NULL
) { 
6647                 cmn_err(CE_WARN
, "failed to register provider '%s': invalid " 
6648                     "arguments", name 
? name 
: "<NULL>"); 
6652         if (name
[0] == '\0' || dtrace_badname(name
)) { 
6653                 cmn_err(CE_WARN
, "failed to register provider '%s': invalid " 
6654                     "provider name", name
); 
6658         if ((pops
->dtps_provide 
== NULL 
&& pops
->dtps_provide_module 
== NULL
) || 
6659             pops
->dtps_enable 
== NULL 
|| pops
->dtps_disable 
== NULL 
|| 
6660             pops
->dtps_destroy 
== NULL 
|| 
6661             ((pops
->dtps_resume 
== NULL
) != (pops
->dtps_suspend 
== NULL
))) { 
6662                 cmn_err(CE_WARN
, "failed to register provider '%s': invalid " 
6663                     "provider ops", name
); 
6667         if (dtrace_badattr(&pap
->dtpa_provider
) || 
6668             dtrace_badattr(&pap
->dtpa_mod
) || 
6669             dtrace_badattr(&pap
->dtpa_func
) || 
6670             dtrace_badattr(&pap
->dtpa_name
) || 
6671             dtrace_badattr(&pap
->dtpa_args
)) { 
6672                 cmn_err(CE_WARN
, "failed to register provider '%s': invalid " 
6673                     "provider attributes", name
); 
6677         if (priv 
& ~DTRACE_PRIV_ALL
) { 
6678                 cmn_err(CE_WARN
, "failed to register provider '%s': invalid " 
6679                     "privilege attributes", name
); 
6683         if ((priv 
& DTRACE_PRIV_KERNEL
) && 
6684             (priv 
& (DTRACE_PRIV_USER 
| DTRACE_PRIV_OWNER
)) && 
6685             pops
->dtps_usermode 
== NULL
) { 
6686                 cmn_err(CE_WARN
, "failed to register provider '%s': need " 
6687                     "dtps_usermode() op for given privilege attributes", name
); 
6691         provider 
= kmem_zalloc(sizeof (dtrace_provider_t
), KM_SLEEP
); 
6692         provider
->dtpv_name 
= kmem_alloc(strlen(name
) + 1, KM_SLEEP
); 
6693         (void) strcpy(provider
->dtpv_name
, name
); 
6695         provider
->dtpv_attr 
= *pap
; 
6696         provider
->dtpv_priv
.dtpp_flags 
= priv
; 
6698                 provider
->dtpv_priv
.dtpp_uid 
= crgetuid(cr
); 
6699                 provider
->dtpv_priv
.dtpp_zoneid 
= crgetzoneid(cr
); 
6701         provider
->dtpv_pops 
= *pops
; 
6703         if (pops
->dtps_provide 
== NULL
) { 
6704                 ASSERT(pops
->dtps_provide_module 
!= NULL
); 
6705                 provider
->dtpv_pops
.dtps_provide 
= 
6706                     (void (*)(void *, const dtrace_probedesc_t 
*))dtrace_nullop
; 
6709         if (pops
->dtps_provide_module 
== NULL
) { 
6710                 ASSERT(pops
->dtps_provide 
!= NULL
); 
6711                 provider
->dtpv_pops
.dtps_provide_module 
= 
6712                     (void (*)(void *, struct modctl 
*))dtrace_nullop
; 
6715         if (pops
->dtps_suspend 
== NULL
) { 
6716                 ASSERT(pops
->dtps_resume 
== NULL
); 
6717                 provider
->dtpv_pops
.dtps_suspend 
= 
6718                     (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
; 
6719                 provider
->dtpv_pops
.dtps_resume 
= 
6720                     (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
; 
6723         provider
->dtpv_arg 
= arg
; 
6724         *idp 
= (dtrace_provider_id_t
)provider
; 
6726         if (pops 
== &dtrace_provider_ops
) { 
6727                 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
); 
6728                 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
6729                 ASSERT(dtrace_anon
.dta_enabling 
== NULL
); 
6732                  * We make sure that the DTrace provider is at the head of 
6733                  * the provider chain. 
6735                 provider
->dtpv_next 
= dtrace_provider
; 
6736                 dtrace_provider 
= provider
; 
6740         lck_mtx_lock(&dtrace_provider_lock
); 
6741         lck_mtx_lock(&dtrace_lock
); 
6744          * If there is at least one provider registered, we'll add this 
6745          * provider after the first provider. 
6747         if (dtrace_provider 
!= NULL
) { 
6748                 provider
->dtpv_next 
= dtrace_provider
->dtpv_next
; 
6749                 dtrace_provider
->dtpv_next 
= provider
; 
6751                 dtrace_provider 
= provider
; 
6754         if (dtrace_retained 
!= NULL
) { 
6755                 dtrace_enabling_provide(provider
); 
6758                  * Now we need to call dtrace_enabling_matchall() -- which 
6759                  * will acquire cpu_lock and dtrace_lock.  We therefore need 
6760                  * to drop all of our locks before calling into it... 
6762                 lck_mtx_unlock(&dtrace_lock
); 
6763                 lck_mtx_unlock(&dtrace_provider_lock
); 
6764                 dtrace_enabling_matchall(); 
6769         lck_mtx_unlock(&dtrace_lock
); 
6770         lck_mtx_unlock(&dtrace_provider_lock
); 
6776  * Unregister the specified provider from the DTrace framework.  This should 
6777  * generally be called by DTrace providers in their detach(9E) entry point. 
6780 dtrace_unregister(dtrace_provider_id_t id
) 
6782         dtrace_provider_t 
*old 
= (dtrace_provider_t 
*)id
; 
6783         dtrace_provider_t 
*prev 
= NULL
; 
6785         dtrace_probe_t 
*probe
, *first 
= NULL
; 
6787         if (old
->dtpv_pops
.dtps_enable 
== 
6788             (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
) { 
6790                  * If DTrace itself is the provider, we're called with locks 
6793                 ASSERT(old 
== dtrace_provider
); 
6794                 ASSERT(dtrace_devi 
!= NULL
); 
6795                 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
); 
6796                 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
6800                 if (dtrace_provider
->dtpv_next 
!= NULL
) { 
6802                          * There's another provider here; return failure. 
6807                 lck_mtx_lock(&dtrace_provider_lock
); 
6808                 lck_mtx_lock(&mod_lock
); 
6809                 lck_mtx_lock(&dtrace_lock
); 
6813          * If anyone has /dev/dtrace open, or if there are anonymous enabled 
6814          * probes, we refuse to let providers slither away, unless this 
6815          * provider has already been explicitly invalidated. 
6817         if (!old
->dtpv_defunct 
&& 
6818             (dtrace_opens 
|| (dtrace_anon
.dta_state 
!= NULL 
&& 
6819             dtrace_anon
.dta_state
->dts_necbs 
> 0))) { 
6821                         lck_mtx_unlock(&dtrace_lock
); 
6822                         lck_mtx_unlock(&mod_lock
); 
6823                         lck_mtx_unlock(&dtrace_provider_lock
); 
6829          * Attempt to destroy the probes associated with this provider. 
6831         for (i 
= 0; i 
< dtrace_nprobes
; i
++) { 
6832                 if ((probe 
= dtrace_probes
[i
]) == NULL
) 
6835                 if (probe
->dtpr_provider 
!= old
) 
6838                 if (probe
->dtpr_ecb 
== NULL
) 
6842                  * We have at least one ECB; we can't remove this provider. 
6845                         lck_mtx_unlock(&dtrace_lock
); 
6846                         lck_mtx_unlock(&mod_lock
); 
6847                         lck_mtx_unlock(&dtrace_provider_lock
); 
6853          * All of the probes for this provider are disabled; we can safely 
6854          * remove all of them from their hash chains and from the probe array. 
6856         for (i 
= 0; i 
< dtrace_nprobes
; i
++) { 
6857                 if ((probe 
= dtrace_probes
[i
]) == NULL
) 
6860                 if (probe
->dtpr_provider 
!= old
) 
6863                 dtrace_probes
[i
] = NULL
; 
6865                 dtrace_hash_remove(dtrace_bymod
, probe
); 
6866                 dtrace_hash_remove(dtrace_byfunc
, probe
); 
6867                 dtrace_hash_remove(dtrace_byname
, probe
); 
6869                 if (first 
== NULL
) { 
6871                         probe
->dtpr_nextmod 
= NULL
; 
6873                         probe
->dtpr_nextmod 
= first
; 
6879          * The provider's probes have been removed from the hash chains and 
6880          * from the probe array.  Now issue a dtrace_sync() to be sure that 
6881          * everyone has cleared out from any probe array processing. 
6885         for (probe 
= first
; probe 
!= NULL
; probe 
= first
) { 
6886                 first 
= probe
->dtpr_nextmod
; 
6888                 old
->dtpv_pops
.dtps_destroy(old
->dtpv_arg
, probe
->dtpr_id
, 
6890                 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1); 
6891                 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1); 
6892                 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1); 
6893                 vmem_free(dtrace_arena
, (void *)(uintptr_t)(probe
->dtpr_id
), 1); 
6894 #if !defined(__APPLE__) 
6895                 kmem_free(probe
, sizeof (dtrace_probe_t
)); 
6897                 zfree(dtrace_probe_t_zone
, probe
); 
6901         if ((prev 
= dtrace_provider
) == old
) { 
6902                 ASSERT(self 
|| dtrace_devi 
== NULL
); 
6903                 ASSERT(old
->dtpv_next 
== NULL 
|| dtrace_devi 
== NULL
); 
6904                 dtrace_provider 
= old
->dtpv_next
; 
6906                 while (prev 
!= NULL 
&& prev
->dtpv_next 
!= old
) 
6907                         prev 
= prev
->dtpv_next
; 
6910                         panic("attempt to unregister non-existent " 
6911                             "dtrace provider %p\n", (void *)id
); 
6914                 prev
->dtpv_next 
= old
->dtpv_next
; 
6918                 lck_mtx_unlock(&dtrace_lock
); 
6919                 lck_mtx_unlock(&mod_lock
); 
6920                 lck_mtx_unlock(&dtrace_provider_lock
); 
6923         kmem_free(old
->dtpv_name
, strlen(old
->dtpv_name
) + 1); 
6924         kmem_free(old
, sizeof (dtrace_provider_t
)); 
6930  * Invalidate the specified provider.  All subsequent probe lookups for the 
6931  * specified provider will fail, but its probes will not be removed. 
6934 dtrace_invalidate(dtrace_provider_id_t id
) 
6936         dtrace_provider_t 
*pvp 
= (dtrace_provider_t 
*)id
; 
6938         ASSERT(pvp
->dtpv_pops
.dtps_enable 
!= 
6939             (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
); 
6941         lck_mtx_lock(&dtrace_provider_lock
); 
6942         lck_mtx_lock(&dtrace_lock
); 
6944         pvp
->dtpv_defunct 
= 1; 
6946         lck_mtx_unlock(&dtrace_lock
); 
6947         lck_mtx_unlock(&dtrace_provider_lock
); 
6951  * Indicate whether or not DTrace has attached. 
6954 dtrace_attached(void) 
6957          * dtrace_provider will be non-NULL iff the DTrace driver has 
6958          * attached.  (It's non-NULL because DTrace is always itself a 
6961         return (dtrace_provider 
!= NULL
); 
6965  * Remove all the unenabled probes for the given provider.  This function is 
6966  * not unlike dtrace_unregister(), except that it doesn't remove the provider 
6967  * -- just as many of its associated probes as it can. 
6970 dtrace_condense(dtrace_provider_id_t id
) 
6972         dtrace_provider_t 
*prov 
= (dtrace_provider_t 
*)id
; 
6974         dtrace_probe_t 
*probe
; 
6977          * Make sure this isn't the dtrace provider itself. 
6979         ASSERT(prov
->dtpv_pops
.dtps_enable 
!= 
6980             (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
); 
6982         lck_mtx_lock(&dtrace_provider_lock
); 
6983         lck_mtx_lock(&dtrace_lock
); 
6986          * Attempt to destroy the probes associated with this provider. 
6988         for (i 
= 0; i 
< dtrace_nprobes
; i
++) { 
6989                 if ((probe 
= dtrace_probes
[i
]) == NULL
) 
6992                 if (probe
->dtpr_provider 
!= prov
) 
6995                 if (probe
->dtpr_ecb 
!= NULL
) 
6998                 dtrace_probes
[i
] = NULL
; 
7000                 dtrace_hash_remove(dtrace_bymod
, probe
); 
7001                 dtrace_hash_remove(dtrace_byfunc
, probe
); 
7002                 dtrace_hash_remove(dtrace_byname
, probe
); 
7004                 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, i 
+ 1, 
7006                 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1); 
7007                 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1); 
7008                 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1); 
7009 #if !defined(__APPLE__) 
7010                 kmem_free(probe
, sizeof (dtrace_probe_t
)); 
7012                 zfree(dtrace_probe_t_zone
, probe
); 
7014                 vmem_free(dtrace_arena
, (void *)((uintptr_t)i 
+ 1), 1); 
7017         lck_mtx_unlock(&dtrace_lock
); 
7018         lck_mtx_unlock(&dtrace_provider_lock
); 
7024  * DTrace Probe Management Functions 
7026  * The functions in this section perform the DTrace probe management, 
7027  * including functions to create probes, look-up probes, and call into the 
7028  * providers to request that probes be provided.  Some of these functions are 
7029  * in the Provider-to-Framework API; these functions can be identified by the 
7030  * fact that they are not declared "static". 
7034  * Create a probe with the specified module name, function name, and name. 
7037 dtrace_probe_create(dtrace_provider_id_t prov
, const char *mod
, 
7038     const char *func
, const char *name
, int aframes
, void *arg
) 
7040         dtrace_probe_t 
*probe
, **probes
; 
7041         dtrace_provider_t 
*provider 
= (dtrace_provider_t 
*)prov
; 
7044         if (provider 
== dtrace_provider
) { 
7045                 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
7047                 lck_mtx_lock(&dtrace_lock
); 
7050         id 
= (dtrace_id_t
)(uintptr_t)vmem_alloc(dtrace_arena
, 1, 
7051             VM_BESTFIT 
| VM_SLEEP
); 
7052 #if !defined(__APPLE__) 
7053         probe 
= kmem_zalloc(sizeof (dtrace_probe_t
), KM_SLEEP
); 
7055         probe 
= zalloc(dtrace_probe_t_zone
); 
7056         bzero(probe
, sizeof (dtrace_probe_t
)); 
7059         probe
->dtpr_id 
= id
; 
7060         probe
->dtpr_gen 
= dtrace_probegen
++; 
7061         probe
->dtpr_mod 
= dtrace_strdup(mod
); 
7062         probe
->dtpr_func 
= dtrace_strdup(func
); 
7063         probe
->dtpr_name 
= dtrace_strdup(name
); 
7064         probe
->dtpr_arg 
= arg
; 
7065         probe
->dtpr_aframes 
= aframes
; 
7066         probe
->dtpr_provider 
= provider
; 
7068         dtrace_hash_add(dtrace_bymod
, probe
); 
7069         dtrace_hash_add(dtrace_byfunc
, probe
); 
7070         dtrace_hash_add(dtrace_byname
, probe
); 
7072         if (id 
- 1 >= dtrace_nprobes
) { 
7073                 size_t osize 
= dtrace_nprobes 
* sizeof (dtrace_probe_t 
*); 
7074                 size_t nsize 
= osize 
<< 1; 
7078                         ASSERT(dtrace_probes 
== NULL
); 
7079                         nsize 
= sizeof (dtrace_probe_t 
*); 
7082                 probes 
= kmem_zalloc(nsize
, KM_SLEEP
); 
7084                 if (dtrace_probes 
== NULL
) { 
7086                         dtrace_probes 
= probes
; 
7089                         dtrace_probe_t 
**oprobes 
= dtrace_probes
; 
7091                         bcopy(oprobes
, probes
, osize
); 
7092                         dtrace_membar_producer(); 
7093                         dtrace_probes 
= probes
; 
7098                          * All CPUs are now seeing the new probes array; we can 
7099                          * safely free the old array. 
7101                         kmem_free(oprobes
, osize
); 
7102                         dtrace_nprobes 
<<= 1; 
7105                 ASSERT(id 
- 1 < dtrace_nprobes
); 
7108         ASSERT(dtrace_probes
[id 
- 1] == NULL
); 
7109         dtrace_probes
[id 
- 1] = probe
; 
7111         if (provider 
!= dtrace_provider
) 
7112                 lck_mtx_unlock(&dtrace_lock
); 
7117 static dtrace_probe_t 
* 
7118 dtrace_probe_lookup_id(dtrace_id_t id
) 
7120         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
7122         if (id 
== 0 || id 
> dtrace_nprobes
) 
7125         return (dtrace_probes
[id 
- 1]); 
7129 dtrace_probe_lookup_match(dtrace_probe_t 
*probe
, void *arg
) 
7131         *((dtrace_id_t 
*)arg
) = probe
->dtpr_id
; 
7133         return (DTRACE_MATCH_DONE
); 
7137  * Look up a probe based on provider and one or more of module name, function 
7138  * name and probe name. 
7141 dtrace_probe_lookup(dtrace_provider_id_t prid
, const char *mod
, 
7142     const char *func
, const char *name
) 
7144         dtrace_probekey_t pkey
; 
7148         pkey
.dtpk_prov 
= ((dtrace_provider_t 
*)prid
)->dtpv_name
; 
7149         pkey
.dtpk_pmatch 
= &dtrace_match_string
; 
7150         pkey
.dtpk_mod 
= mod
; 
7151         pkey
.dtpk_mmatch 
= mod 
? &dtrace_match_string 
: &dtrace_match_nul
; 
7152         pkey
.dtpk_func 
= func
; 
7153         pkey
.dtpk_fmatch 
= func 
? &dtrace_match_string 
: &dtrace_match_nul
; 
7154         pkey
.dtpk_name 
= name
; 
7155         pkey
.dtpk_nmatch 
= name 
? &dtrace_match_string 
: &dtrace_match_nul
; 
7156         pkey
.dtpk_id 
= DTRACE_IDNONE
; 
7158         lck_mtx_lock(&dtrace_lock
); 
7159         match 
= dtrace_match(&pkey
, DTRACE_PRIV_ALL
, 0, 0, 
7160             dtrace_probe_lookup_match
, &id
); 
7161         lck_mtx_unlock(&dtrace_lock
); 
7163         ASSERT(match 
== 1 || match 
== 0); 
7164         return (match 
? id 
: 0); 
7168  * Returns the probe argument associated with the specified probe. 
7171 dtrace_probe_arg(dtrace_provider_id_t id
, dtrace_id_t pid
) 
7173         dtrace_probe_t 
*probe
; 
7176         lck_mtx_lock(&dtrace_lock
); 
7178         if ((probe 
= dtrace_probe_lookup_id(pid
)) != NULL 
&& 
7179             probe
->dtpr_provider 
== (dtrace_provider_t 
*)id
) 
7180                 rval 
= probe
->dtpr_arg
; 
7182         lck_mtx_unlock(&dtrace_lock
); 
7188  * Copy a probe into a probe description. 
7191 dtrace_probe_description(const dtrace_probe_t 
*prp
, dtrace_probedesc_t 
*pdp
) 
7193         bzero(pdp
, sizeof (dtrace_probedesc_t
)); 
7194         pdp
->dtpd_id 
= prp
->dtpr_id
; 
7196         (void) strlcpy(pdp
->dtpd_provider
, 
7197             prp
->dtpr_provider
->dtpv_name
, DTRACE_PROVNAMELEN
); 
7199         (void) strlcpy(pdp
->dtpd_mod
, prp
->dtpr_mod
, DTRACE_MODNAMELEN
); 
7200         (void) strlcpy(pdp
->dtpd_func
, prp
->dtpr_func
, DTRACE_FUNCNAMELEN
); 
7201         (void) strlcpy(pdp
->dtpd_name
, prp
->dtpr_name
, DTRACE_NAMELEN
); 
7205  * Called to indicate that a probe -- or probes -- should be provided by a 
7206  * specfied provider.  If the specified description is NULL, the provider will 
7207  * be told to provide all of its probes.  (This is done whenever a new 
7208  * consumer comes along, or whenever a retained enabling is to be matched.) If 
7209  * the specified description is non-NULL, the provider is given the 
7210  * opportunity to dynamically provide the specified probe, allowing providers 
7211  * to support the creation of probes on-the-fly.  (So-called _autocreated_ 
7212  * probes.)  If the provider is NULL, the operations will be applied to all 
7213  * providers; if the provider is non-NULL the operations will only be applied 
7214  * to the specified provider.  The dtrace_provider_lock must be held, and the 
7215  * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation 
7216  * will need to grab the dtrace_lock when it reenters the framework through 
7217  * dtrace_probe_lookup(), dtrace_probe_create(), etc. 
7220 dtrace_probe_provide(dtrace_probedesc_t 
*desc
, dtrace_provider_t 
*prv
) 
7225         lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
); 
7229                 prv 
= dtrace_provider
; 
7235                  * First, call the blanket provide operation. 
7237                 prv
->dtpv_pops
.dtps_provide(prv
->dtpv_arg
, desc
); 
7239 #if !defined(__APPLE__) 
7241                  * Now call the per-module provide operation.  We will grab 
7242                  * mod_lock to prevent the list from being modified.  Note 
7243                  * that this also prevents the mod_busy bits from changing. 
7244                  * (mod_busy can only be changed with mod_lock held.) 
7246                 lck_mtx_lock(&mod_lock
); 
7250                         if (ctl
->mod_busy 
|| ctl
->mod_mp 
== NULL
) 
7253                         prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
); 
7255                 } while ((ctl 
= ctl
->mod_next
) != &modules
); 
7257                 lck_mtx_unlock(&mod_lock
); 
7259 #if 0 /* XXX Workaround for PR_4643546 XXX */ 
7260                 simple_lock(&kmod_lock
); 
7264                         prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ktl
); 
7268                 simple_unlock(&kmod_lock
); 
7271                  * Don't bother to iterate over the kmod list. At present only fbt 
7272                  * offers a provide_module in its dtpv_pops, and then it ignores the 
7275                 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, NULL
); 
7277 #endif /* __APPLE__ */ 
7278         } while (all 
&& (prv 
= prv
->dtpv_next
) != NULL
); 
7282  * Iterate over each probe, and call the Framework-to-Provider API function 
7286 dtrace_probe_foreach(uintptr_t offs
) 
7288         dtrace_provider_t 
*prov
; 
7289         void (*func
)(void *, dtrace_id_t
, void *); 
7290         dtrace_probe_t 
*probe
; 
7291         dtrace_icookie_t cookie
; 
7295          * We disable interrupts to walk through the probe array.  This is 
7296          * safe -- the dtrace_sync() in dtrace_unregister() assures that we 
7297          * won't see stale data. 
7299         cookie 
= dtrace_interrupt_disable(); 
7301         for (i 
= 0; i 
< dtrace_nprobes
; i
++) { 
7302                 if ((probe 
= dtrace_probes
[i
]) == NULL
) 
7305                 if (probe
->dtpr_ecb 
== NULL
) { 
7307                          * This probe isn't enabled -- don't call the function. 
7312                 prov 
= probe
->dtpr_provider
; 
7313                 func 
= *((void(**)(void *, dtrace_id_t
, void *)) 
7314                     ((uintptr_t)&prov
->dtpv_pops 
+ offs
)); 
7316                 func(prov
->dtpv_arg
, i 
+ 1, probe
->dtpr_arg
); 
7319         dtrace_interrupt_enable(cookie
); 
7323 dtrace_probe_enable(const dtrace_probedesc_t 
*desc
, dtrace_enabling_t 
*enab
) 
7325         dtrace_probekey_t pkey
; 
7330         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
7332         dtrace_ecb_create_cache 
= NULL
; 
7336                  * If we're passed a NULL description, we're being asked to 
7337                  * create an ECB with a NULL probe. 
7339                 (void) dtrace_ecb_create_enable(NULL
, enab
); 
7343         dtrace_probekey(desc
, &pkey
); 
7344         dtrace_cred2priv(enab
->dten_vstate
->dtvs_state
->dts_cred
.dcr_cred
, 
7345             &priv
, &uid
, &zoneid
); 
7347         return (dtrace_match(&pkey
, priv
, uid
, zoneid
, dtrace_ecb_create_enable
, 
7352  * DTrace Helper Provider Functions 
7355 dtrace_dofattr2attr(dtrace_attribute_t 
*attr
, const dof_attr_t dofattr
) 
7357         attr
->dtat_name 
= DOF_ATTR_NAME(dofattr
); 
7358         attr
->dtat_data 
= DOF_ATTR_DATA(dofattr
); 
7359         attr
->dtat_class 
= DOF_ATTR_CLASS(dofattr
); 
7363 dtrace_dofprov2hprov(dtrace_helper_provdesc_t 
*hprov
, 
7364     const dof_provider_t 
*dofprov
, char *strtab
) 
7366         hprov
->dthpv_provname 
= strtab 
+ dofprov
->dofpv_name
; 
7367         dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_provider
, 
7368             dofprov
->dofpv_provattr
); 
7369         dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_mod
, 
7370             dofprov
->dofpv_modattr
); 
7371         dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_func
, 
7372             dofprov
->dofpv_funcattr
); 
7373         dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_name
, 
7374             dofprov
->dofpv_nameattr
); 
7375         dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_args
, 
7376             dofprov
->dofpv_argsattr
); 
7380 dtrace_helper_provide_one(dof_helper_t 
*dhp
, dof_sec_t 
*sec
, pid_t pid
) 
7382         uintptr_t daddr 
= (uintptr_t)dhp
->dofhp_dof
; 
7383         dof_hdr_t 
*dof 
= (dof_hdr_t 
*)daddr
; 
7384         dof_sec_t 
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
; 
7385         dof_provider_t 
*provider
; 
7387         uint32_t *off
, *enoff
; 
7391         dtrace_helper_provdesc_t dhpv
; 
7392         dtrace_helper_probedesc_t dhpb
; 
7393         dtrace_meta_t 
*meta 
= dtrace_meta_pid
; 
7394         dtrace_mops_t 
*mops 
= &meta
->dtm_mops
; 
7397         provider 
= (dof_provider_t 
*)(uintptr_t)(daddr 
+ sec
->dofs_offset
); 
7398         str_sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ dof
->dofh_secoff 
+ 
7399             provider
->dofpv_strtab 
* dof
->dofh_secsize
); 
7400         prb_sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ dof
->dofh_secoff 
+ 
7401             provider
->dofpv_probes 
* dof
->dofh_secsize
); 
7402         arg_sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ dof
->dofh_secoff 
+ 
7403             provider
->dofpv_prargs 
* dof
->dofh_secsize
); 
7404         off_sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ dof
->dofh_secoff 
+ 
7405             provider
->dofpv_proffs 
* dof
->dofh_secsize
); 
7407         strtab 
= (char *)(uintptr_t)(daddr 
+ str_sec
->dofs_offset
); 
7408         off 
= (uint32_t *)(uintptr_t)(daddr 
+ off_sec
->dofs_offset
); 
7409         arg 
= (uint8_t *)(uintptr_t)(daddr 
+ arg_sec
->dofs_offset
); 
7413          * See dtrace_helper_provider_validate(). 
7415         if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1 
&& 
7416             provider
->dofpv_prenoffs 
!= DOF_SECT_NONE
) { 
7417                 enoff_sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ dof
->dofh_secoff 
+ 
7418                     provider
->dofpv_prenoffs 
* dof
->dofh_secsize
); 
7419                 enoff 
= (uint32_t *)(uintptr_t)(daddr 
+ enoff_sec
->dofs_offset
); 
7422         nprobes 
= prb_sec
->dofs_size 
/ prb_sec
->dofs_entsize
; 
7425          * Create the provider. 
7427         dtrace_dofprov2hprov(&dhpv
, provider
, strtab
); 
7429         if ((parg 
= mops
->dtms_provide_pid(meta
->dtm_arg
, &dhpv
, pid
)) == NULL
) 
7435          * Create the probes. 
7437         for (i 
= 0; i 
< nprobes
; i
++) { 
7438                 probe 
= (dof_probe_t 
*)(uintptr_t)(daddr 
+ 
7439                     prb_sec
->dofs_offset 
+ i 
* prb_sec
->dofs_entsize
); 
7441                 dhpb
.dthpb_mod 
= dhp
->dofhp_mod
; 
7442                 dhpb
.dthpb_func 
= strtab 
+ probe
->dofpr_func
; 
7443                 dhpb
.dthpb_name 
= strtab 
+ probe
->dofpr_name
; 
7444 #if defined(__APPLE__) 
7445                 dhpb
.dthpb_base 
= dhp
->dofhp_addr
; 
7447                 dhpb
.dthpb_base 
= probe
->dofpr_addr
; 
7449                 dhpb
.dthpb_offs 
= off 
+ probe
->dofpr_offidx
; 
7450                 dhpb
.dthpb_noffs 
= probe
->dofpr_noffs
; 
7451                 if (enoff 
!= NULL
) { 
7452                         dhpb
.dthpb_enoffs 
= enoff 
+ probe
->dofpr_enoffidx
; 
7453                         dhpb
.dthpb_nenoffs 
= probe
->dofpr_nenoffs
; 
7455                         dhpb
.dthpb_enoffs 
= NULL
; 
7456                         dhpb
.dthpb_nenoffs 
= 0; 
7458                 dhpb
.dthpb_args 
= arg 
+ probe
->dofpr_argidx
; 
7459                 dhpb
.dthpb_nargc 
= probe
->dofpr_nargc
; 
7460                 dhpb
.dthpb_xargc 
= probe
->dofpr_xargc
; 
7461                 dhpb
.dthpb_ntypes 
= strtab 
+ probe
->dofpr_nargv
; 
7462                 dhpb
.dthpb_xtypes 
= strtab 
+ probe
->dofpr_xargv
; 
7464                 mops
->dtms_create_probe(meta
->dtm_arg
, parg
, &dhpb
); 
7469 dtrace_helper_provide(dof_helper_t 
*dhp
, pid_t pid
) 
7471         uintptr_t daddr 
= (uintptr_t)dhp
->dofhp_dof
; 
7472         dof_hdr_t 
*dof 
= (dof_hdr_t 
*)daddr
; 
7475         lck_mtx_assert(&dtrace_meta_lock
, LCK_MTX_ASSERT_OWNED
); 
7477         for (i 
= 0; i 
< dof
->dofh_secnum
; i
++) { 
7478                 dof_sec_t 
*sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ 
7479                     dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
7481                 if (sec
->dofs_type 
!= DOF_SECT_PROVIDER
) 
7484                 dtrace_helper_provide_one(dhp
, sec
, pid
); 
7488          * We may have just created probes, so we must now rematch against 
7489          * any retained enablings.  Note that this call will acquire both 
7490          * cpu_lock and dtrace_lock; the fact that we are holding 
7491          * dtrace_meta_lock now is what defines the ordering with respect to 
7492          * these three locks. 
7494         dtrace_enabling_matchall(); 
7498 dtrace_helper_provider_remove_one(dof_helper_t 
*dhp
, dof_sec_t 
*sec
, pid_t pid
) 
7500         uintptr_t daddr 
= (uintptr_t)dhp
->dofhp_dof
; 
7501         dof_hdr_t 
*dof 
= (dof_hdr_t 
*)daddr
; 
7503         dof_provider_t 
*provider
; 
7505         dtrace_helper_provdesc_t dhpv
; 
7506         dtrace_meta_t 
*meta 
= dtrace_meta_pid
; 
7507         dtrace_mops_t 
*mops 
= &meta
->dtm_mops
; 
7509         provider 
= (dof_provider_t 
*)(uintptr_t)(daddr 
+ sec
->dofs_offset
); 
7510         str_sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ dof
->dofh_secoff 
+ 
7511             provider
->dofpv_strtab 
* dof
->dofh_secsize
); 
7513         strtab 
= (char *)(uintptr_t)(daddr 
+ str_sec
->dofs_offset
); 
7516          * Create the provider. 
7518         dtrace_dofprov2hprov(&dhpv
, provider
, strtab
); 
7520         mops
->dtms_remove_pid(meta
->dtm_arg
, &dhpv
, pid
); 
7526 dtrace_helper_provider_remove(dof_helper_t 
*dhp
, pid_t pid
) 
7528         uintptr_t daddr 
= (uintptr_t)dhp
->dofhp_dof
; 
7529         dof_hdr_t 
*dof 
= (dof_hdr_t 
*)daddr
; 
7532         lck_mtx_assert(&dtrace_meta_lock
, LCK_MTX_ASSERT_OWNED
); 
7534         for (i 
= 0; i 
< dof
->dofh_secnum
; i
++) { 
7535                 dof_sec_t 
*sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ 
7536                     dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
7538                 if (sec
->dofs_type 
!= DOF_SECT_PROVIDER
) 
7541                 dtrace_helper_provider_remove_one(dhp
, sec
, pid
); 
7546  * DTrace Meta Provider-to-Framework API Functions 
7548  * These functions implement the Meta Provider-to-Framework API, as described 
7549  * in <sys/dtrace.h>. 
7552 dtrace_meta_register(const char *name
, const dtrace_mops_t 
*mops
, void *arg
, 
7553     dtrace_meta_provider_id_t 
*idp
) 
7555         dtrace_meta_t 
*meta
; 
7556         dtrace_helpers_t 
*help
, *next
; 
7559         *idp 
= DTRACE_METAPROVNONE
; 
7562          * We strictly don't need the name, but we hold onto it for 
7563          * debuggability. All hail error queues! 
7566                 cmn_err(CE_WARN
, "failed to register meta-provider: " 
7572             mops
->dtms_create_probe 
== NULL 
|| 
7573             mops
->dtms_provide_pid 
== NULL 
|| 
7574             mops
->dtms_remove_pid 
== NULL
) { 
7575                 cmn_err(CE_WARN
, "failed to register meta-register %s: " 
7576                     "invalid ops", name
); 
7580         meta 
= kmem_zalloc(sizeof (dtrace_meta_t
), KM_SLEEP
); 
7581         meta
->dtm_mops 
= *mops
; 
7582         meta
->dtm_name 
= kmem_alloc(strlen(name
) + 1, KM_SLEEP
); 
7583         (void) strcpy(meta
->dtm_name
, name
); 
7584         meta
->dtm_arg 
= arg
; 
7586         lck_mtx_lock(&dtrace_meta_lock
); 
7587         lck_mtx_lock(&dtrace_lock
); 
7589         if (dtrace_meta_pid 
!= NULL
) { 
7590                 lck_mtx_unlock(&dtrace_lock
); 
7591                 lck_mtx_unlock(&dtrace_meta_lock
); 
7592                 cmn_err(CE_WARN
, "failed to register meta-register %s: " 
7593                     "user-land meta-provider exists", name
); 
7594                 kmem_free(meta
->dtm_name
, strlen(meta
->dtm_name
) + 1); 
7595                 kmem_free(meta
, sizeof (dtrace_meta_t
)); 
7599         dtrace_meta_pid 
= meta
; 
7600         *idp 
= (dtrace_meta_provider_id_t
)meta
; 
7603          * If there are providers and probes ready to go, pass them 
7604          * off to the new meta provider now. 
7607         help 
= dtrace_deferred_pid
; 
7608         dtrace_deferred_pid 
= NULL
; 
7610         lck_mtx_unlock(&dtrace_lock
); 
7612         while (help 
!= NULL
) { 
7613                 for (i 
= 0; i 
< help
->dthps_nprovs
; i
++) { 
7614                         dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
, 
7618                 next 
= help
->dthps_next
; 
7619                 help
->dthps_next 
= NULL
; 
7620                 help
->dthps_prev 
= NULL
; 
7621                 help
->dthps_deferred 
= 0; 
7625         lck_mtx_unlock(&dtrace_meta_lock
); 
7631 dtrace_meta_unregister(dtrace_meta_provider_id_t id
) 
7633         dtrace_meta_t 
**pp
, *old 
= (dtrace_meta_t 
*)id
; 
7635         lck_mtx_lock(&dtrace_meta_lock
); 
7636         lck_mtx_lock(&dtrace_lock
); 
7638         if (old 
== dtrace_meta_pid
) { 
7639                 pp 
= &dtrace_meta_pid
; 
7641                 panic("attempt to unregister non-existent " 
7642                     "dtrace meta-provider %p\n", (void *)old
); 
7645         if (old
->dtm_count 
!= 0) { 
7646                 lck_mtx_unlock(&dtrace_lock
); 
7647                 lck_mtx_unlock(&dtrace_meta_lock
); 
7653         lck_mtx_unlock(&dtrace_lock
); 
7654         lck_mtx_unlock(&dtrace_meta_lock
); 
7656         kmem_free(old
->dtm_name
, strlen(old
->dtm_name
) + 1); 
7657         kmem_free(old
, sizeof (dtrace_meta_t
)); 
7664  * DTrace DIF Object Functions 
7667 dtrace_difo_err(uint_t pc
, const char *format
, ...) 
7669         if (dtrace_err_verbose
) { 
7672                 (void) uprintf("dtrace DIF object error: [%u]: ", pc
); 
7673                 va_start(alist
, format
); 
7674                 (void) vuprintf(format
, alist
); 
7678 #ifdef DTRACE_ERRDEBUG 
7679         dtrace_errdebug(format
); 
7685  * Validate a DTrace DIF object by checking the IR instructions.  The following 
7686  * rules are currently enforced by dtrace_difo_validate(): 
7688  * 1. Each instruction must have a valid opcode 
7689  * 2. Each register, string, variable, or subroutine reference must be valid 
7690  * 3. No instruction can modify register %r0 (must be zero) 
7691  * 4. All instruction reserved bits must be set to zero 
7692  * 5. The last instruction must be a "ret" instruction 
7693  * 6. All branch targets must reference a valid instruction _after_ the branch 
7696 dtrace_difo_validate(dtrace_difo_t 
*dp
, dtrace_vstate_t 
*vstate
, uint_t nregs
, 
7700         int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
; 
7704         kcheck 
= cr 
== NULL 
|| 
7705             PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
) == 0; 
7707         dp
->dtdo_destructive 
= 0; 
7709         for (pc 
= 0; pc 
< dp
->dtdo_len 
&& err 
== 0; pc
++) { 
7710                 dif_instr_t instr 
= dp
->dtdo_buf
[pc
]; 
7712                 uint_t r1 
= DIF_INSTR_R1(instr
); 
7713                 uint_t r2 
= DIF_INSTR_R2(instr
); 
7714                 uint_t rd 
= DIF_INSTR_RD(instr
); 
7715                 uint_t rs 
= DIF_INSTR_RS(instr
); 
7716                 uint_t label 
= DIF_INSTR_LABEL(instr
); 
7717                 uint_t v 
= DIF_INSTR_VAR(instr
); 
7718                 uint_t subr 
= DIF_INSTR_SUBR(instr
); 
7719                 uint_t type 
= DIF_INSTR_TYPE(instr
); 
7720                 uint_t op 
= DIF_INSTR_OP(instr
); 
7738                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
7740                                 err 
+= efunc(pc
, "invalid register %u\n", r2
); 
7742                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
7744                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
7750                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
7752                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
7754                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
7756                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
7766                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
7768                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
7770                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
7772                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
7774                                 dp
->dtdo_buf
[pc
] = DIF_INSTR_LOAD(op 
+ 
7775                                     DIF_OP_RLDSB 
- DIF_OP_LDSB
, r1
, rd
); 
7785                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
7787                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
7789                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
7791                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
7801                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
7803                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
7805                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
7807                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
7814                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
7816                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
7818                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
7820                                 err 
+= efunc(pc
, "cannot write to 0 address\n"); 
7825                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
7827                                 err 
+= efunc(pc
, "invalid register %u\n", r2
); 
7829                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
7833                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
7834                         if (r2 
!= 0 || rd 
!= 0) 
7835                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
7848                         if (label 
>= dp
->dtdo_len
) { 
7849                                 err 
+= efunc(pc
, "invalid branch target %u\n", 
7853                                 err 
+= efunc(pc
, "backward branch to %u\n", 
7858                         if (r1 
!= 0 || r2 
!= 0) 
7859                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
7861                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
7865                 case DIF_OP_FLUSHTS
: 
7866                         if (r1 
!= 0 || r2 
!= 0 || rd 
!= 0) 
7867                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
7870                         if (DIF_INSTR_INTEGER(instr
) >= dp
->dtdo_intlen
) { 
7871                                 err 
+= efunc(pc
, "invalid integer ref %u\n", 
7872                                     DIF_INSTR_INTEGER(instr
)); 
7875                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
7877                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
7880                         if (DIF_INSTR_STRING(instr
) >= dp
->dtdo_strlen
) { 
7881                                 err 
+= efunc(pc
, "invalid string ref %u\n", 
7882                                     DIF_INSTR_STRING(instr
)); 
7885                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
7887                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
7891                         if (r1 
> DIF_VAR_ARRAY_MAX
) 
7892                                 err 
+= efunc(pc
, "invalid array %u\n", r1
); 
7894                                 err 
+= efunc(pc
, "invalid register %u\n", r2
); 
7896                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
7898                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
7905                         if (v 
< DIF_VAR_OTHER_MIN 
|| v 
> DIF_VAR_OTHER_MAX
) 
7906                                 err 
+= efunc(pc
, "invalid variable %u\n", v
); 
7908                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
7910                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
7917                         if (v 
< DIF_VAR_OTHER_UBASE 
|| v 
> DIF_VAR_OTHER_MAX
) 
7918                                 err 
+= efunc(pc
, "invalid variable %u\n", v
); 
7920                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
7923                         if (subr 
> DIF_SUBR_MAX
) 
7924                                 err 
+= efunc(pc
, "invalid subr %u\n", subr
); 
7926                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
7928                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
7930                         if (subr 
== DIF_SUBR_COPYOUT 
|| 
7931                             subr 
== DIF_SUBR_COPYOUTSTR
) { 
7932                                 dp
->dtdo_destructive 
= 1; 
7936                         if (type 
!= DIF_TYPE_STRING 
&& type 
!= DIF_TYPE_CTF
) 
7937                                 err 
+= efunc(pc
, "invalid ref type %u\n", type
); 
7939                                 err 
+= efunc(pc
, "invalid register %u\n", r2
); 
7941                                 err 
+= efunc(pc
, "invalid register %u\n", rs
); 
7944                         if (type 
!= DIF_TYPE_CTF
) 
7945                                 err 
+= efunc(pc
, "invalid val type %u\n", type
); 
7947                                 err 
+= efunc(pc
, "invalid register %u\n", r2
); 
7949                                 err 
+= efunc(pc
, "invalid register %u\n", rs
); 
7952                         err 
+= efunc(pc
, "invalid opcode %u\n", 
7953                             DIF_INSTR_OP(instr
)); 
7957         if (dp
->dtdo_len 
!= 0 && 
7958             DIF_INSTR_OP(dp
->dtdo_buf
[dp
->dtdo_len 
- 1]) != DIF_OP_RET
) { 
7959                 err 
+= efunc(dp
->dtdo_len 
- 1, 
7960                     "expected 'ret' as last DIF instruction\n"); 
7963         if (!(dp
->dtdo_rtype
.dtdt_flags 
& DIF_TF_BYREF
)) { 
7965                  * If we're not returning by reference, the size must be either 
7966                  * 0 or the size of one of the base types. 
7968                 switch (dp
->dtdo_rtype
.dtdt_size
) { 
7970                 case sizeof (uint8_t): 
7971                 case sizeof (uint16_t): 
7972                 case sizeof (uint32_t): 
7973                 case sizeof (uint64_t): 
7977                         err 
+= efunc(dp
->dtdo_len 
- 1, "bad return size"); 
7981         for (i 
= 0; i 
< dp
->dtdo_varlen 
&& err 
== 0; i
++) { 
7982                 dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
], *existing 
= NULL
; 
7983                 dtrace_diftype_t 
*vt
, *et
; 
7986                 if (v
->dtdv_scope 
!= DIFV_SCOPE_GLOBAL 
&& 
7987                     v
->dtdv_scope 
!= DIFV_SCOPE_THREAD 
&& 
7988                     v
->dtdv_scope 
!= DIFV_SCOPE_LOCAL
) { 
7989                         err 
+= efunc(i
, "unrecognized variable scope %d\n", 
7994                 if (v
->dtdv_kind 
!= DIFV_KIND_ARRAY 
&& 
7995                     v
->dtdv_kind 
!= DIFV_KIND_SCALAR
) { 
7996                         err 
+= efunc(i
, "unrecognized variable type %d\n", 
8001                 if ((id 
= v
->dtdv_id
) > DIF_VARIABLE_MAX
) { 
8002                         err 
+= efunc(i
, "%d exceeds variable id limit\n", id
); 
8006                 if (id 
< DIF_VAR_OTHER_UBASE
) 
8010                  * For user-defined variables, we need to check that this 
8011                  * definition is identical to any previous definition that we 
8014                 ndx 
= id 
- DIF_VAR_OTHER_UBASE
; 
8016                 switch (v
->dtdv_scope
) { 
8017                 case DIFV_SCOPE_GLOBAL
: 
8018                         if (ndx 
< vstate
->dtvs_nglobals
) { 
8019                                 dtrace_statvar_t 
*svar
; 
8021                                 if ((svar 
= vstate
->dtvs_globals
[ndx
]) != NULL
) 
8022                                         existing 
= &svar
->dtsv_var
; 
8027                 case DIFV_SCOPE_THREAD
: 
8028                         if (ndx 
< vstate
->dtvs_ntlocals
) 
8029                                 existing 
= &vstate
->dtvs_tlocals
[ndx
]; 
8032                 case DIFV_SCOPE_LOCAL
: 
8033                         if (ndx 
< vstate
->dtvs_nlocals
) { 
8034                                 dtrace_statvar_t 
*svar
; 
8036                                 if ((svar 
= vstate
->dtvs_locals
[ndx
]) != NULL
) 
8037                                         existing 
= &svar
->dtsv_var
; 
8045                 if (vt
->dtdt_flags 
& DIF_TF_BYREF
) { 
8046                         if (vt
->dtdt_size 
== 0) { 
8047                                 err 
+= efunc(i
, "zero-sized variable\n"); 
8051                         if (v
->dtdv_scope 
== DIFV_SCOPE_GLOBAL 
&& 
8052                             vt
->dtdt_size 
> dtrace_global_maxsize
) { 
8053                                 err 
+= efunc(i
, "oversized by-ref global\n"); 
8058                 if (existing 
== NULL 
|| existing
->dtdv_id 
== 0) 
8061                 ASSERT(existing
->dtdv_id 
== v
->dtdv_id
); 
8062                 ASSERT(existing
->dtdv_scope 
== v
->dtdv_scope
); 
8064                 if (existing
->dtdv_kind 
!= v
->dtdv_kind
) 
8065                         err 
+= efunc(i
, "%d changed variable kind\n", id
); 
8067                 et 
= &existing
->dtdv_type
; 
8069                 if (vt
->dtdt_flags 
!= et
->dtdt_flags
) { 
8070                         err 
+= efunc(i
, "%d changed variable type flags\n", id
); 
8074                 if (vt
->dtdt_size 
!= 0 && vt
->dtdt_size 
!= et
->dtdt_size
) { 
8075                         err 
+= efunc(i
, "%d changed variable type size\n", id
); 
8084  * Validate a DTrace DIF object that it is to be used as a helper.  Helpers 
8085  * are much more constrained than normal DIFOs.  Specifically, they may 
8088  * 1. Make calls to subroutines other than copyin(), copyinstr() or 
8089  *    miscellaneous string routines 
8090  * 2. Access DTrace variables other than the args[] array, and the 
8091  *    curthread, pid, ppid, tid, execname, zonename, uid and gid variables. 
8092  * 3. Have thread-local variables. 
8093  * 4. Have dynamic variables. 
8096 dtrace_difo_validate_helper(dtrace_difo_t 
*dp
) 
8098         int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
; 
8102         for (pc 
= 0; pc 
< dp
->dtdo_len
; pc
++) { 
8103                 dif_instr_t instr 
= dp
->dtdo_buf
[pc
]; 
8105                 uint_t v 
= DIF_INSTR_VAR(instr
); 
8106                 uint_t subr 
= DIF_INSTR_SUBR(instr
); 
8107                 uint_t op 
= DIF_INSTR_OP(instr
); 
8162                 case DIF_OP_FLUSHTS
: 
8174                         if (v 
>= DIF_VAR_OTHER_UBASE
) 
8177                         if (v 
>= DIF_VAR_ARG0 
&& v 
<= DIF_VAR_ARG9
) 
8180                         if (v 
== DIF_VAR_CURTHREAD 
|| v 
== DIF_VAR_PID 
|| 
8181                             v 
== DIF_VAR_PPID 
|| v 
== DIF_VAR_TID 
|| 
8182                             v 
== DIF_VAR_EXECNAME 
|| v 
== DIF_VAR_ZONENAME 
|| 
8183                             v 
== DIF_VAR_UID 
|| v 
== DIF_VAR_GID
) 
8186                         err 
+= efunc(pc
, "illegal variable %u\n", v
); 
8193                         err 
+= efunc(pc
, "illegal dynamic variable load\n"); 
8199                         err 
+= efunc(pc
, "illegal dynamic variable store\n"); 
8203                         if (subr 
== DIF_SUBR_ALLOCA 
|| 
8204                             subr 
== DIF_SUBR_BCOPY 
|| 
8205                             subr 
== DIF_SUBR_COPYIN 
|| 
8206                             subr 
== DIF_SUBR_COPYINTO 
|| 
8207                             subr 
== DIF_SUBR_COPYINSTR 
|| 
8208                             subr 
== DIF_SUBR_INDEX 
|| 
8209                             subr 
== DIF_SUBR_LLTOSTR 
|| 
8210                             subr 
== DIF_SUBR_RINDEX 
|| 
8211                             subr 
== DIF_SUBR_STRCHR 
|| 
8212                             subr 
== DIF_SUBR_STRJOIN 
|| 
8213                             subr 
== DIF_SUBR_STRRCHR 
|| 
8214                             subr 
== DIF_SUBR_STRSTR 
|| 
8215                                 subr 
== DIF_SUBR_CHUD
) 
8218                         err 
+= efunc(pc
, "invalid subr %u\n", subr
); 
8222                         err 
+= efunc(pc
, "invalid opcode %u\n", 
8223                             DIF_INSTR_OP(instr
)); 
8231  * Returns 1 if the expression in the DIF object can be cached on a per-thread 
8235 dtrace_difo_cacheable(dtrace_difo_t 
*dp
) 
8242         for (i 
= 0; i 
< dp
->dtdo_varlen
; i
++) { 
8243                 dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
]; 
8245                 if (v
->dtdv_scope 
!= DIFV_SCOPE_GLOBAL
) 
8248                 switch (v
->dtdv_id
) { 
8249                 case DIF_VAR_CURTHREAD
: 
8252                 case DIF_VAR_EXECNAME
: 
8253                 case DIF_VAR_ZONENAME
: 
8262          * This DIF object may be cacheable.  Now we need to look for any 
8263          * array loading instructions, any memory loading instructions, or 
8264          * any stores to thread-local variables. 
8266         for (i 
= 0; i 
< dp
->dtdo_len
; i
++) { 
8267                 uint_t op 
= DIF_INSTR_OP(dp
->dtdo_buf
[i
]); 
8269                 if ((op 
>= DIF_OP_LDSB 
&& op 
<= DIF_OP_LDX
) || 
8270                     (op 
>= DIF_OP_ULDSB 
&& op 
<= DIF_OP_ULDX
) || 
8271                     (op 
>= DIF_OP_RLDSB 
&& op 
<= DIF_OP_RLDX
) || 
8272                     op 
== DIF_OP_LDGA 
|| op 
== DIF_OP_STTS
) 
8280 dtrace_difo_hold(dtrace_difo_t 
*dp
) 
8284         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
8287         ASSERT(dp
->dtdo_refcnt 
!= 0); 
8290          * We need to check this DIF object for references to the variable 
8291          * DIF_VAR_VTIMESTAMP. 
8293         for (i 
= 0; i 
< dp
->dtdo_varlen
; i
++) { 
8294                 dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
]; 
8296                 if (v
->dtdv_id 
!= DIF_VAR_VTIMESTAMP
) 
8299                 if (dtrace_vtime_references
++ == 0) 
8300                         dtrace_vtime_enable(); 
8305  * This routine calculates the dynamic variable chunksize for a given DIF 
8306  * object.  The calculation is not fool-proof, and can probably be tricked by 
8307  * malicious DIF -- but it works for all compiler-generated DIF.  Because this 
8308  * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail 
8309  * if a dynamic variable size exceeds the chunksize. 
8312 dtrace_difo_chunksize(dtrace_difo_t 
*dp
, dtrace_vstate_t 
*vstate
) 
8315         dtrace_key_t tupregs
[DIF_DTR_NREGS 
+ 2]; /* +2 for thread and id */ 
8316         const dif_instr_t 
*text 
= dp
->dtdo_buf
; 
8322         for (pc 
= 0; pc 
< dp
->dtdo_len
; pc
++) { 
8323                 dif_instr_t instr 
= text
[pc
]; 
8324                 uint_t op 
= DIF_INSTR_OP(instr
); 
8325                 uint_t rd 
= DIF_INSTR_RD(instr
); 
8326                 uint_t r1 
= DIF_INSTR_R1(instr
); 
8330                 dtrace_key_t 
*key 
= tupregs
; 
8334                         sval 
= dp
->dtdo_inttab
[DIF_INSTR_INTEGER(instr
)]; 
8339                         key 
= &tupregs
[DIF_DTR_NREGS
]; 
8340                         key
[0].dttk_size 
= 0; 
8341                         key
[1].dttk_size 
= 0; 
8343                         scope 
= DIFV_SCOPE_THREAD
; 
8350                         if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
) 
8351                                 key
[nkeys
++].dttk_size 
= 0; 
8353                         key
[nkeys
++].dttk_size 
= 0; 
8355                         if (op 
== DIF_OP_STTAA
) { 
8356                                 scope 
= DIFV_SCOPE_THREAD
; 
8358                                 scope 
= DIFV_SCOPE_GLOBAL
; 
8364                         if (ttop 
== DIF_DTR_NREGS
) 
8367                         if ((srd 
== 0 || sval 
== 0) && r1 
== DIF_TYPE_STRING
) { 
8369                                  * If the register for the size of the "pushtr" 
8370                                  * is %r0 (or the value is 0) and the type is 
8371                                  * a string, we'll use the system-wide default 
8374                                 tupregs
[ttop
++].dttk_size 
= 
8375                                     dtrace_strsize_default
; 
8380                                 tupregs
[ttop
++].dttk_size 
= sval
; 
8386                         if (ttop 
== DIF_DTR_NREGS
) 
8389                         tupregs
[ttop
++].dttk_size 
= 0; 
8392                 case DIF_OP_FLUSHTS
: 
8409                  * We have a dynamic variable allocation; calculate its size. 
8411                 for (ksize 
= 0, i 
= 0; i 
< nkeys
; i
++) 
8412                         ksize 
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t)); 
8414                 size 
= sizeof (dtrace_dynvar_t
); 
8415                 size 
+= sizeof (dtrace_key_t
) * (nkeys 
- 1); 
8419                  * Now we need to determine the size of the stored data. 
8421                 id 
= DIF_INSTR_VAR(instr
); 
8423                 for (i 
= 0; i 
< dp
->dtdo_varlen
; i
++) { 
8424                         dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
]; 
8426                         if (v
->dtdv_id 
== id 
&& v
->dtdv_scope 
== scope
) { 
8427                                 size 
+= v
->dtdv_type
.dtdt_size
; 
8432                 if (i 
== dp
->dtdo_varlen
) 
8436                  * We have the size.  If this is larger than the chunk size 
8437                  * for our dynamic variable state, reset the chunk size. 
8439                 size 
= P2ROUNDUP(size
, sizeof (uint64_t)); 
8441                 if (size 
> vstate
->dtvs_dynvars
.dtds_chunksize
) 
8442                         vstate
->dtvs_dynvars
.dtds_chunksize 
= size
; 
8447 dtrace_difo_init(dtrace_difo_t 
*dp
, dtrace_vstate_t 
*vstate
) 
8449         int i
, oldsvars
, osz
, nsz
, otlocals
, ntlocals
; 
8452         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
8453         ASSERT(dp
->dtdo_buf 
!= NULL 
&& dp
->dtdo_len 
!= 0); 
8455         for (i 
= 0; i 
< dp
->dtdo_varlen
; i
++) { 
8456                 dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
]; 
8457                 dtrace_statvar_t 
*svar
, ***svarp
; 
8459                 uint8_t scope 
= v
->dtdv_scope
; 
8462                 if ((id 
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
) 
8465                 id 
-= DIF_VAR_OTHER_UBASE
; 
8468                 case DIFV_SCOPE_THREAD
: 
8469                         while (id 
>= (otlocals 
= vstate
->dtvs_ntlocals
)) { 
8470                                 dtrace_difv_t 
*tlocals
; 
8472                                 if ((ntlocals 
= (otlocals 
<< 1)) == 0) 
8475                                 osz 
= otlocals 
* sizeof (dtrace_difv_t
); 
8476                                 nsz 
= ntlocals 
* sizeof (dtrace_difv_t
); 
8478                                 tlocals 
= kmem_zalloc(nsz
, KM_SLEEP
); 
8481                                         bcopy(vstate
->dtvs_tlocals
, 
8483                                         kmem_free(vstate
->dtvs_tlocals
, osz
); 
8486                                 vstate
->dtvs_tlocals 
= tlocals
; 
8487                                 vstate
->dtvs_ntlocals 
= ntlocals
; 
8490                         vstate
->dtvs_tlocals
[id
] = *v
; 
8493                 case DIFV_SCOPE_LOCAL
: 
8494                         np 
= &vstate
->dtvs_nlocals
; 
8495                         svarp 
= &vstate
->dtvs_locals
; 
8497                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) 
8498                                 dsize 
= NCPU 
* (v
->dtdv_type
.dtdt_size 
+ 
8501                                 dsize 
= NCPU 
* sizeof (uint64_t); 
8505                 case DIFV_SCOPE_GLOBAL
: 
8506                         np 
= &vstate
->dtvs_nglobals
; 
8507                         svarp 
= &vstate
->dtvs_globals
; 
8509                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) 
8510                                 dsize 
= v
->dtdv_type
.dtdt_size 
+ 
8519                 while (id 
>= (oldsvars 
= *np
)) { 
8520                         dtrace_statvar_t 
**statics
; 
8521                         int newsvars
, oldsize
, newsize
; 
8523                         if ((newsvars 
= (oldsvars 
<< 1)) == 0) 
8526                         oldsize 
= oldsvars 
* sizeof (dtrace_statvar_t 
*); 
8527                         newsize 
= newsvars 
* sizeof (dtrace_statvar_t 
*); 
8529                         statics 
= kmem_zalloc(newsize
, KM_SLEEP
); 
8532                                 bcopy(*svarp
, statics
, oldsize
); 
8533                                 kmem_free(*svarp
, oldsize
); 
8540                 if ((svar 
= (*svarp
)[id
]) == NULL
) { 
8541                         svar 
= kmem_zalloc(sizeof (dtrace_statvar_t
), KM_SLEEP
); 
8542                         svar
->dtsv_var 
= *v
; 
8544                         if ((svar
->dtsv_size 
= dsize
) != 0) { 
8545                                 svar
->dtsv_data 
= (uint64_t)(uintptr_t) 
8546                                     kmem_zalloc(dsize
, KM_SLEEP
); 
8549                         (*svarp
)[id
] = svar
; 
8552                 svar
->dtsv_refcnt
++; 
8555         dtrace_difo_chunksize(dp
, vstate
); 
8556         dtrace_difo_hold(dp
); 
8559 static dtrace_difo_t 
* 
8560 dtrace_difo_duplicate(dtrace_difo_t 
*dp
, dtrace_vstate_t 
*vstate
) 
8565         ASSERT(dp
->dtdo_buf 
!= NULL
); 
8566         ASSERT(dp
->dtdo_refcnt 
!= 0); 
8568         new = kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
); 
8570         ASSERT(dp
->dtdo_buf 
!= NULL
); 
8571         sz 
= dp
->dtdo_len 
* sizeof (dif_instr_t
); 
8572         new->dtdo_buf 
= kmem_alloc(sz
, KM_SLEEP
); 
8573         bcopy(dp
->dtdo_buf
, new->dtdo_buf
, sz
); 
8574         new->dtdo_len 
= dp
->dtdo_len
; 
8576         if (dp
->dtdo_strtab 
!= NULL
) { 
8577                 ASSERT(dp
->dtdo_strlen 
!= 0); 
8578                 new->dtdo_strtab 
= kmem_alloc(dp
->dtdo_strlen
, KM_SLEEP
); 
8579                 bcopy(dp
->dtdo_strtab
, new->dtdo_strtab
, dp
->dtdo_strlen
); 
8580                 new->dtdo_strlen 
= dp
->dtdo_strlen
; 
8583         if (dp
->dtdo_inttab 
!= NULL
) { 
8584                 ASSERT(dp
->dtdo_intlen 
!= 0); 
8585                 sz 
= dp
->dtdo_intlen 
* sizeof (uint64_t); 
8586                 new->dtdo_inttab 
= kmem_alloc(sz
, KM_SLEEP
); 
8587                 bcopy(dp
->dtdo_inttab
, new->dtdo_inttab
, sz
); 
8588                 new->dtdo_intlen 
= dp
->dtdo_intlen
; 
8591         if (dp
->dtdo_vartab 
!= NULL
) { 
8592                 ASSERT(dp
->dtdo_varlen 
!= 0); 
8593                 sz 
= dp
->dtdo_varlen 
* sizeof (dtrace_difv_t
); 
8594                 new->dtdo_vartab 
= kmem_alloc(sz
, KM_SLEEP
); 
8595                 bcopy(dp
->dtdo_vartab
, new->dtdo_vartab
, sz
); 
8596                 new->dtdo_varlen 
= dp
->dtdo_varlen
; 
8599         dtrace_difo_init(new, vstate
); 
8604 dtrace_difo_destroy(dtrace_difo_t 
*dp
, dtrace_vstate_t 
*vstate
) 
8608         ASSERT(dp
->dtdo_refcnt 
== 0); 
8610         for (i 
= 0; i 
< dp
->dtdo_varlen
; i
++) { 
8611                 dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
]; 
8612                 dtrace_statvar_t 
*svar
, **svarp
; 
8614                 uint8_t scope 
= v
->dtdv_scope
; 
8618                 case DIFV_SCOPE_THREAD
: 
8621                 case DIFV_SCOPE_LOCAL
: 
8622                         np 
= &vstate
->dtvs_nlocals
; 
8623                         svarp 
= vstate
->dtvs_locals
; 
8626                 case DIFV_SCOPE_GLOBAL
: 
8627                         np 
= &vstate
->dtvs_nglobals
; 
8628                         svarp 
= vstate
->dtvs_globals
; 
8635                 if ((id 
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
) 
8638                 id 
-= DIF_VAR_OTHER_UBASE
; 
8642                 ASSERT(svar 
!= NULL
); 
8643                 ASSERT(svar
->dtsv_refcnt 
> 0); 
8645                 if (--svar
->dtsv_refcnt 
> 0) 
8648                 if (svar
->dtsv_size 
!= 0) { 
8649                         ASSERT(svar
->dtsv_data 
!= NULL
); 
8650                         kmem_free((void *)(uintptr_t)svar
->dtsv_data
, 
8654                 kmem_free(svar
, sizeof (dtrace_statvar_t
)); 
8658         kmem_free(dp
->dtdo_buf
, dp
->dtdo_len 
* sizeof (dif_instr_t
)); 
8659         kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen 
* sizeof (uint64_t)); 
8660         kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
); 
8661         kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen 
* sizeof (dtrace_difv_t
)); 
8663         kmem_free(dp
, sizeof (dtrace_difo_t
)); 
8667 dtrace_difo_release(dtrace_difo_t 
*dp
, dtrace_vstate_t 
*vstate
) 
8671         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
8672         ASSERT(dp
->dtdo_refcnt 
!= 0); 
8674         for (i 
= 0; i 
< dp
->dtdo_varlen
; i
++) { 
8675                 dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
]; 
8677                 if (v
->dtdv_id 
!= DIF_VAR_VTIMESTAMP
) 
8680                 ASSERT(dtrace_vtime_references 
> 0); 
8681                 if (--dtrace_vtime_references 
== 0) 
8682                         dtrace_vtime_disable(); 
8685         if (--dp
->dtdo_refcnt 
== 0) 
8686                 dtrace_difo_destroy(dp
, vstate
); 
8690  * DTrace Format Functions 
8693 dtrace_format_add(dtrace_state_t 
*state
, char *str
) 
8696         uint16_t ndx
, len 
= strlen(str
) + 1; 
8698         fmt 
= kmem_zalloc(len
, KM_SLEEP
); 
8699         bcopy(str
, fmt
, len
); 
8701         for (ndx 
= 0; ndx 
< state
->dts_nformats
; ndx
++) { 
8702                 if (state
->dts_formats
[ndx
] == NULL
) { 
8703                         state
->dts_formats
[ndx
] = fmt
; 
8708         if (state
->dts_nformats 
== USHRT_MAX
) { 
8710                  * This is only likely if a denial-of-service attack is being 
8711                  * attempted.  As such, it's okay to fail silently here. 
8713                 kmem_free(fmt
, len
); 
8718          * For simplicity, we always resize the formats array to be exactly the 
8719          * number of formats. 
8721         ndx 
= state
->dts_nformats
++; 
8722         new = kmem_alloc((ndx 
+ 1) * sizeof (char *), KM_SLEEP
); 
8724         if (state
->dts_formats 
!= NULL
) { 
8726                 bcopy(state
->dts_formats
, new, ndx 
* sizeof (char *)); 
8727                 kmem_free(state
->dts_formats
, ndx 
* sizeof (char *)); 
8730         state
->dts_formats 
= new; 
8731         state
->dts_formats
[ndx
] = fmt
; 
8737 dtrace_format_remove(dtrace_state_t 
*state
, uint16_t format
) 
8741         ASSERT(state
->dts_formats 
!= NULL
); 
8742         ASSERT(format 
<= state
->dts_nformats
); 
8743         ASSERT(state
->dts_formats
[format 
- 1] != NULL
); 
8745         fmt 
= state
->dts_formats
[format 
- 1]; 
8746         kmem_free(fmt
, strlen(fmt
) + 1); 
8747         state
->dts_formats
[format 
- 1] = NULL
; 
8751 dtrace_format_destroy(dtrace_state_t 
*state
) 
8755         if (state
->dts_nformats 
== 0) { 
8756                 ASSERT(state
->dts_formats 
== NULL
); 
8760         ASSERT(state
->dts_formats 
!= NULL
); 
8762         for (i 
= 0; i 
< state
->dts_nformats
; i
++) { 
8763                 char *fmt 
= state
->dts_formats
[i
]; 
8768                 kmem_free(fmt
, strlen(fmt
) + 1); 
8771         kmem_free(state
->dts_formats
, state
->dts_nformats 
* sizeof (char *)); 
8772         state
->dts_nformats 
= 0; 
8773         state
->dts_formats 
= NULL
; 
8777  * DTrace Predicate Functions 
8779 static dtrace_predicate_t 
* 
8780 dtrace_predicate_create(dtrace_difo_t 
*dp
) 
8782         dtrace_predicate_t 
*pred
; 
8784         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
8785         ASSERT(dp
->dtdo_refcnt 
!= 0); 
8787         pred 
= kmem_zalloc(sizeof (dtrace_predicate_t
), KM_SLEEP
); 
8788         pred
->dtp_difo 
= dp
; 
8789         pred
->dtp_refcnt 
= 1; 
8791         if (!dtrace_difo_cacheable(dp
)) 
8794         if (dtrace_predcache_id 
== DTRACE_CACHEIDNONE
) { 
8796                  * This is only theoretically possible -- we have had 2^32 
8797                  * cacheable predicates on this machine.  We cannot allow any 
8798                  * more predicates to become cacheable:  as unlikely as it is, 
8799                  * there may be a thread caching a (now stale) predicate cache 
8800                  * ID. (N.B.: the temptation is being successfully resisted to 
8801                  * have this cmn_err() "Holy shit -- we executed this code!") 
8806         pred
->dtp_cacheid 
= dtrace_predcache_id
++; 
8812 dtrace_predicate_hold(dtrace_predicate_t 
*pred
) 
8814         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
8815         ASSERT(pred
->dtp_difo 
!= NULL 
&& pred
->dtp_difo
->dtdo_refcnt 
!= 0); 
8816         ASSERT(pred
->dtp_refcnt 
> 0); 
8822 dtrace_predicate_release(dtrace_predicate_t 
*pred
, dtrace_vstate_t 
*vstate
) 
8824         dtrace_difo_t 
*dp 
= pred
->dtp_difo
; 
8826         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
8827         ASSERT(dp 
!= NULL 
&& dp
->dtdo_refcnt 
!= 0); 
8828         ASSERT(pred
->dtp_refcnt 
> 0); 
8830         if (--pred
->dtp_refcnt 
== 0) { 
8831                 dtrace_difo_release(pred
->dtp_difo
, vstate
); 
8832                 kmem_free(pred
, sizeof (dtrace_predicate_t
)); 
8837  * DTrace Action Description Functions 
8839 static dtrace_actdesc_t 
* 
8840 dtrace_actdesc_create(dtrace_actkind_t kind
, uint32_t ntuple
, 
8841     uint64_t uarg
, uint64_t arg
) 
8843         dtrace_actdesc_t 
*act
; 
8845 /*      ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL && 
8846   arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));*/ 
8848         act 
= kmem_zalloc(sizeof (dtrace_actdesc_t
), KM_SLEEP
); 
8849         act
->dtad_kind 
= kind
; 
8850         act
->dtad_ntuple 
= ntuple
; 
8851         act
->dtad_uarg 
= uarg
; 
8852         act
->dtad_arg 
= arg
; 
8853         act
->dtad_refcnt 
= 1; 
8859 dtrace_actdesc_hold(dtrace_actdesc_t 
*act
) 
8861         ASSERT(act
->dtad_refcnt 
>= 1); 
8866 dtrace_actdesc_release(dtrace_actdesc_t 
*act
, dtrace_vstate_t 
*vstate
) 
8868         dtrace_actkind_t kind 
= act
->dtad_kind
; 
8871         ASSERT(act
->dtad_refcnt 
>= 1); 
8873         if (--act
->dtad_refcnt 
!= 0) 
8876         if ((dp 
= act
->dtad_difo
) != NULL
) 
8877                 dtrace_difo_release(dp
, vstate
); 
8879         if (DTRACEACT_ISPRINTFLIKE(kind
)) { 
8880                 char *str 
= (char *)(uintptr_t)act
->dtad_arg
; 
8882 /*              ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) || 
8883   (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));*/ 
8886                         kmem_free(str
, strlen(str
) + 1); 
8889         kmem_free(act
, sizeof (dtrace_actdesc_t
)); 
8893  * DTrace ECB Functions 
8895 static dtrace_ecb_t 
* 
8896 dtrace_ecb_add(dtrace_state_t 
*state
, dtrace_probe_t 
*probe
) 
8901         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
8903         ecb 
= kmem_zalloc(sizeof (dtrace_ecb_t
), KM_SLEEP
); 
8904         ecb
->dte_predicate 
= NULL
; 
8905         ecb
->dte_probe 
= probe
; 
8908          * The default size is the size of the default action: recording 
8911         ecb
->dte_size 
= ecb
->dte_needed 
= sizeof (dtrace_epid_t
); 
8912         ecb
->dte_alignment 
= sizeof (dtrace_epid_t
); 
8914         epid 
= state
->dts_epid
++; 
8916         if (epid 
- 1 >= state
->dts_necbs
) { 
8917                 dtrace_ecb_t 
**oecbs 
= state
->dts_ecbs
, **ecbs
; 
8918                 int necbs 
= state
->dts_necbs 
<< 1; 
8920                 ASSERT(epid 
== state
->dts_necbs 
+ 1); 
8923                         ASSERT(oecbs 
== NULL
); 
8927                 ecbs 
= kmem_zalloc(necbs 
* sizeof (*ecbs
), KM_SLEEP
); 
8930                         bcopy(oecbs
, ecbs
, state
->dts_necbs 
* sizeof (*ecbs
)); 
8932                 dtrace_membar_producer(); 
8933                 state
->dts_ecbs 
= ecbs
; 
8935                 if (oecbs 
!= NULL
) { 
8937                          * If this state is active, we must dtrace_sync() 
8938                          * before we can free the old dts_ecbs array:  we're 
8939                          * coming in hot, and there may be active ring 
8940                          * buffer processing (which indexes into the dts_ecbs 
8941                          * array) on another CPU. 
8943                         if (state
->dts_activity 
!= DTRACE_ACTIVITY_INACTIVE
) 
8946                         kmem_free(oecbs
, state
->dts_necbs 
* sizeof (*ecbs
)); 
8949                 dtrace_membar_producer(); 
8950                 state
->dts_necbs 
= necbs
; 
8953         ecb
->dte_state 
= state
; 
8955         ASSERT(state
->dts_ecbs
[epid 
- 1] == NULL
); 
8956         dtrace_membar_producer(); 
8957         state
->dts_ecbs
[(ecb
->dte_epid 
= epid
) - 1] = ecb
; 
8963 dtrace_ecb_enable(dtrace_ecb_t 
*ecb
) 
8965         dtrace_probe_t 
*probe 
= ecb
->dte_probe
; 
8967         lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
8968         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
8969         ASSERT(ecb
->dte_next 
== NULL
); 
8971         if (probe 
== NULL
) { 
8973                  * This is the NULL probe -- there's nothing to do. 
8978         if (probe
->dtpr_ecb 
== NULL
) { 
8979                 dtrace_provider_t 
*prov 
= probe
->dtpr_provider
; 
8982                  * We're the first ECB on this probe. 
8984                 probe
->dtpr_ecb 
= probe
->dtpr_ecb_last 
= ecb
; 
8986                 if (ecb
->dte_predicate 
!= NULL
) 
8987                         probe
->dtpr_predcache 
= ecb
->dte_predicate
->dtp_cacheid
; 
8989                 prov
->dtpv_pops
.dtps_enable(prov
->dtpv_arg
, 
8990                     probe
->dtpr_id
, probe
->dtpr_arg
); 
8993                  * This probe is already active.  Swing the last pointer to 
8994                  * point to the new ECB, and issue a dtrace_sync() to assure 
8995                  * that all CPUs have seen the change. 
8997                 ASSERT(probe
->dtpr_ecb_last 
!= NULL
); 
8998                 probe
->dtpr_ecb_last
->dte_next 
= ecb
; 
8999                 probe
->dtpr_ecb_last 
= ecb
; 
9000                 probe
->dtpr_predcache 
= 0; 
9007 dtrace_ecb_resize(dtrace_ecb_t 
*ecb
) 
9009         uint32_t maxalign 
= sizeof (dtrace_epid_t
); 
9010         uint32_t align 
= sizeof (uint8_t), offs
, diff
; 
9011         dtrace_action_t 
*act
; 
9013         uint32_t aggbase 
= UINT32_MAX
; 
9014         dtrace_state_t 
*state 
= ecb
->dte_state
; 
9017          * If we record anything, we always record the epid.  (And we always 
9020         offs 
= sizeof (dtrace_epid_t
); 
9021         ecb
->dte_size 
= ecb
->dte_needed 
= sizeof (dtrace_epid_t
); 
9023         for (act 
= ecb
->dte_action
; act 
!= NULL
; act 
= act
->dta_next
) { 
9024                 dtrace_recdesc_t 
*rec 
= &act
->dta_rec
; 
9026                 if ((align 
= rec
->dtrd_alignment
) > maxalign
) 
9029                 if (!wastuple 
&& act
->dta_intuple
) { 
9031                          * This is the first record in a tuple.  Align the 
9032                          * offset to be at offset 4 in an 8-byte aligned 
9035                         diff 
= offs 
+ sizeof (dtrace_aggid_t
); 
9037                         if (diff 
= (diff 
& (sizeof (uint64_t) - 1))) 
9038                                 offs 
+= sizeof (uint64_t) - diff
; 
9040                         aggbase 
= offs 
- sizeof (dtrace_aggid_t
); 
9041                         ASSERT(!(aggbase 
& (sizeof (uint64_t) - 1))); 
9045                 if (rec
->dtrd_size 
!= 0 && (diff 
= (offs 
& (align 
- 1)))) { 
9047                          * The current offset is not properly aligned; align it. 
9049                         offs 
+= align 
- diff
; 
9052                 rec
->dtrd_offset 
= offs
; 
9054                 if (offs 
+ rec
->dtrd_size 
> ecb
->dte_needed
) { 
9055                         ecb
->dte_needed 
= offs 
+ rec
->dtrd_size
; 
9057                         if (ecb
->dte_needed 
> state
->dts_needed
) 
9058                                 state
->dts_needed 
= ecb
->dte_needed
; 
9061                 if (DTRACEACT_ISAGG(act
->dta_kind
)) { 
9062                         dtrace_aggregation_t 
*agg 
= (dtrace_aggregation_t 
*)act
; 
9063                         dtrace_action_t 
*first 
= agg
->dtag_first
, *prev
; 
9065                         ASSERT(rec
->dtrd_size 
!= 0 && first 
!= NULL
); 
9067                         ASSERT(aggbase 
!= UINT32_MAX
); 
9069                         agg
->dtag_base 
= aggbase
; 
9071                         while ((prev 
= first
->dta_prev
) != NULL 
&& 
9072                             DTRACEACT_ISAGG(prev
->dta_kind
)) { 
9073                                 agg 
= (dtrace_aggregation_t 
*)prev
; 
9074                                 first 
= agg
->dtag_first
; 
9078                                 offs 
= prev
->dta_rec
.dtrd_offset 
+ 
9079                                     prev
->dta_rec
.dtrd_size
; 
9081                                 offs 
= sizeof (dtrace_epid_t
); 
9085                         if (!act
->dta_intuple
) 
9086                                 ecb
->dte_size 
= offs 
+ rec
->dtrd_size
; 
9088                         offs 
+= rec
->dtrd_size
; 
9091                 wastuple 
= act
->dta_intuple
; 
9094         if ((act 
= ecb
->dte_action
) != NULL 
&& 
9095             !(act
->dta_kind 
== DTRACEACT_SPECULATE 
&& act
->dta_next 
== NULL
) && 
9096             ecb
->dte_size 
== sizeof (dtrace_epid_t
)) { 
9098                  * If the size is still sizeof (dtrace_epid_t), then all 
9099                  * actions store no data; set the size to 0. 
9101                 ecb
->dte_alignment 
= maxalign
; 
9105                  * If the needed space is still sizeof (dtrace_epid_t), then 
9106                  * all actions need no additional space; set the needed 
9109                 if (ecb
->dte_needed 
== sizeof (dtrace_epid_t
)) 
9110                         ecb
->dte_needed 
= 0; 
9116          * Set our alignment, and make sure that the dte_size and dte_needed 
9117          * are aligned to the size of an EPID. 
9119         ecb
->dte_alignment 
= maxalign
; 
9120         ecb
->dte_size 
= (ecb
->dte_size 
+ (sizeof (dtrace_epid_t
) - 1)) & 
9121             ~(sizeof (dtrace_epid_t
) - 1); 
9122         ecb
->dte_needed 
= (ecb
->dte_needed 
+ (sizeof (dtrace_epid_t
) - 1)) & 
9123             ~(sizeof (dtrace_epid_t
) - 1); 
9124         ASSERT(ecb
->dte_size 
<= ecb
->dte_needed
); 
9127 static dtrace_action_t 
* 
9128 dtrace_ecb_aggregation_create(dtrace_ecb_t 
*ecb
, dtrace_actdesc_t 
*desc
) 
9130         dtrace_aggregation_t 
*agg
; 
9131         size_t size 
= sizeof (uint64_t); 
9132         int ntuple 
= desc
->dtad_ntuple
; 
9133         dtrace_action_t 
*act
; 
9134         dtrace_recdesc_t 
*frec
; 
9135         dtrace_aggid_t aggid
; 
9136         dtrace_state_t 
*state 
= ecb
->dte_state
; 
9138         agg 
= kmem_zalloc(sizeof (dtrace_aggregation_t
), KM_SLEEP
); 
9139         agg
->dtag_ecb 
= ecb
; 
9141         ASSERT(DTRACEACT_ISAGG(desc
->dtad_kind
)); 
9143         switch (desc
->dtad_kind
) { 
9145                 agg
->dtag_initial 
= UINT64_MAX
; 
9146                 agg
->dtag_aggregate 
= dtrace_aggregate_min
; 
9150                 agg
->dtag_aggregate 
= dtrace_aggregate_max
; 
9153         case DTRACEAGG_COUNT
: 
9154                 agg
->dtag_aggregate 
= dtrace_aggregate_count
; 
9157         case DTRACEAGG_QUANTIZE
: 
9158                 agg
->dtag_aggregate 
= dtrace_aggregate_quantize
; 
9159                 size 
= (((sizeof (uint64_t) * NBBY
) - 1) * 2 + 1) * 
9163         case DTRACEAGG_LQUANTIZE
: { 
9164                 uint16_t step 
= DTRACE_LQUANTIZE_STEP(desc
->dtad_arg
); 
9165                 uint16_t levels 
= DTRACE_LQUANTIZE_LEVELS(desc
->dtad_arg
); 
9167                 agg
->dtag_initial 
= desc
->dtad_arg
; 
9168                 agg
->dtag_aggregate 
= dtrace_aggregate_lquantize
; 
9170                 if (step 
== 0 || levels 
== 0) 
9173                 size 
= levels 
* sizeof (uint64_t) + 3 * sizeof (uint64_t); 
9178                 agg
->dtag_aggregate 
= dtrace_aggregate_avg
; 
9179                 size 
= sizeof (uint64_t) * 2; 
9183                 agg
->dtag_aggregate 
= dtrace_aggregate_sum
; 
9190         agg
->dtag_action
.dta_rec
.dtrd_size 
= size
; 
9196          * We must make sure that we have enough actions for the n-tuple. 
9198         for (act 
= ecb
->dte_action_last
; act 
!= NULL
; act 
= act
->dta_prev
) { 
9199                 if (DTRACEACT_ISAGG(act
->dta_kind
)) 
9202                 if (--ntuple 
== 0) { 
9204                          * This is the action with which our n-tuple begins. 
9206                         agg
->dtag_first 
= act
; 
9212          * This n-tuple is short by ntuple elements.  Return failure. 
9214         ASSERT(ntuple 
!= 0); 
9216         kmem_free(agg
, sizeof (dtrace_aggregation_t
)); 
9221          * If the last action in the tuple has a size of zero, it's actually 
9222          * an expression argument for the aggregating action. 
9224         ASSERT(ecb
->dte_action_last 
!= NULL
); 
9225         act 
= ecb
->dte_action_last
; 
9227         if (act
->dta_kind 
== DTRACEACT_DIFEXPR
) { 
9228                 ASSERT(act
->dta_difo 
!= NULL
); 
9230                 if (act
->dta_difo
->dtdo_rtype
.dtdt_size 
== 0) 
9231                         agg
->dtag_hasarg 
= 1; 
9235          * We need to allocate an id for this aggregation. 
9237         aggid 
= (dtrace_aggid_t
)(uintptr_t)vmem_alloc(state
->dts_aggid_arena
, 1, 
9238             VM_BESTFIT 
| VM_SLEEP
); 
9240         if (aggid 
- 1 >= state
->dts_naggregations
) { 
9241                 dtrace_aggregation_t 
**oaggs 
= state
->dts_aggregations
; 
9242                 dtrace_aggregation_t 
**aggs
; 
9243                 int naggs 
= state
->dts_naggregations 
<< 1; 
9244                 int onaggs 
= state
->dts_naggregations
; 
9246                 ASSERT(aggid 
== state
->dts_naggregations 
+ 1); 
9249                         ASSERT(oaggs 
== NULL
); 
9253                 aggs 
= kmem_zalloc(naggs 
* sizeof (*aggs
), KM_SLEEP
); 
9255                 if (oaggs 
!= NULL
) { 
9256                         bcopy(oaggs
, aggs
, onaggs 
* sizeof (*aggs
)); 
9257                         kmem_free(oaggs
, onaggs 
* sizeof (*aggs
)); 
9260                 state
->dts_aggregations 
= aggs
; 
9261                 state
->dts_naggregations 
= naggs
; 
9264         ASSERT(state
->dts_aggregations
[aggid 
- 1] == NULL
); 
9265         state
->dts_aggregations
[(agg
->dtag_id 
= aggid
) - 1] = agg
; 
9267         frec 
= &agg
->dtag_first
->dta_rec
; 
9268         if (frec
->dtrd_alignment 
< sizeof (dtrace_aggid_t
)) 
9269                 frec
->dtrd_alignment 
= sizeof (dtrace_aggid_t
); 
9271         for (act 
= agg
->dtag_first
; act 
!= NULL
; act 
= act
->dta_next
) { 
9272                 ASSERT(!act
->dta_intuple
); 
9273                 act
->dta_intuple 
= 1; 
9276         return (&agg
->dtag_action
); 
9280 dtrace_ecb_aggregation_destroy(dtrace_ecb_t 
*ecb
, dtrace_action_t 
*act
) 
9282         dtrace_aggregation_t 
*agg 
= (dtrace_aggregation_t 
*)act
; 
9283         dtrace_state_t 
*state 
= ecb
->dte_state
; 
9284         dtrace_aggid_t aggid 
= agg
->dtag_id
; 
9286         ASSERT(DTRACEACT_ISAGG(act
->dta_kind
)); 
9287         vmem_free(state
->dts_aggid_arena
, (void *)(uintptr_t)aggid
, 1); 
9289         ASSERT(state
->dts_aggregations
[aggid 
- 1] == agg
); 
9290         state
->dts_aggregations
[aggid 
- 1] = NULL
; 
9292         kmem_free(agg
, sizeof (dtrace_aggregation_t
)); 
9296 dtrace_ecb_action_add(dtrace_ecb_t 
*ecb
, dtrace_actdesc_t 
*desc
) 
9298         dtrace_action_t 
*action
, *last
; 
9299         dtrace_difo_t 
*dp 
= desc
->dtad_difo
; 
9300         uint32_t size 
= 0, align 
= sizeof (uint8_t), mask
; 
9301         uint16_t format 
= 0; 
9302         dtrace_recdesc_t 
*rec
; 
9303         dtrace_state_t 
*state 
= ecb
->dte_state
; 
9304         dtrace_optval_t 
*opt 
= state
->dts_options
, nframes
, strsize
; 
9305         uint64_t arg 
= desc
->dtad_arg
; 
9307         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
9308         ASSERT(ecb
->dte_action 
== NULL 
|| ecb
->dte_action
->dta_refcnt 
== 1); 
9310         if (DTRACEACT_ISAGG(desc
->dtad_kind
)) { 
9312                  * If this is an aggregating action, there must be neither 
9313                  * a speculate nor a commit on the action chain. 
9315                 dtrace_action_t 
*act
; 
9317                 for (act 
= ecb
->dte_action
; act 
!= NULL
; act 
= act
->dta_next
) { 
9318                         if (act
->dta_kind 
== DTRACEACT_COMMIT
) 
9321                         if (act
->dta_kind 
== DTRACEACT_SPECULATE
) 
9325                 action 
= dtrace_ecb_aggregation_create(ecb
, desc
); 
9330                 if (DTRACEACT_ISDESTRUCTIVE(desc
->dtad_kind
) || 
9331                     (desc
->dtad_kind 
== DTRACEACT_DIFEXPR 
&& 
9332                     dp 
!= NULL 
&& dp
->dtdo_destructive
)) { 
9333                         state
->dts_destructive 
= 1; 
9336                 switch (desc
->dtad_kind
) { 
9337                 case DTRACEACT_PRINTF
: 
9338                 case DTRACEACT_PRINTA
: 
9339                 case DTRACEACT_SYSTEM
: 
9340                 case DTRACEACT_FREOPEN
: 
9342                          * We know that our arg is a string -- turn it into a 
9346                                 ASSERT(desc
->dtad_kind 
== DTRACEACT_PRINTA
); 
9349                                 ASSERT(arg 
!= NULL
); 
9350                                 /* ASSERT(arg > KERNELBASE); */ 
9351                                 format 
= dtrace_format_add(state
, 
9352                                     (char *)(uintptr_t)arg
); 
9356                 case DTRACEACT_LIBACT
: 
9357                 case DTRACEACT_DIFEXPR
: 
9361                         if ((size 
= dp
->dtdo_rtype
.dtdt_size
) != 0) 
9364                         if (dp
->dtdo_rtype
.dtdt_kind 
== DIF_TYPE_STRING
) { 
9365                                 if (!(dp
->dtdo_rtype
.dtdt_flags 
& DIF_TF_BYREF
)) 
9368                                 size 
= opt
[DTRACEOPT_STRSIZE
]; 
9373                 case DTRACEACT_STACK
: 
9374                         if ((nframes 
= arg
) == 0) { 
9375                                 nframes 
= opt
[DTRACEOPT_STACKFRAMES
]; 
9376                                 ASSERT(nframes 
> 0); 
9380                         size 
= nframes 
* sizeof (pc_t
); 
9383                 case DTRACEACT_JSTACK
: 
9384                         if ((strsize 
= DTRACE_USTACK_STRSIZE(arg
)) == 0) 
9385                                 strsize 
= opt
[DTRACEOPT_JSTACKSTRSIZE
]; 
9387                         if ((nframes 
= DTRACE_USTACK_NFRAMES(arg
)) == 0) 
9388                                 nframes 
= opt
[DTRACEOPT_JSTACKFRAMES
]; 
9390                         arg 
= DTRACE_USTACK_ARG(nframes
, strsize
); 
9393                 case DTRACEACT_USTACK
: 
9394                         if (desc
->dtad_kind 
!= DTRACEACT_JSTACK 
&& 
9395                             (nframes 
= DTRACE_USTACK_NFRAMES(arg
)) == 0) { 
9396                                 strsize 
= DTRACE_USTACK_STRSIZE(arg
); 
9397                                 nframes 
= opt
[DTRACEOPT_USTACKFRAMES
]; 
9398                                 ASSERT(nframes 
> 0); 
9399                                 arg 
= DTRACE_USTACK_ARG(nframes
, strsize
); 
9403                          * Save a slot for the pid. 
9405                         size 
= (nframes 
+ 1) * sizeof (uint64_t); 
9406                         size 
+= DTRACE_USTACK_STRSIZE(arg
); 
9407                         size 
= P2ROUNDUP(size
, (uint32_t)(sizeof (uintptr_t))); 
9413                         if (dp 
== NULL 
|| ((size 
= dp
->dtdo_rtype
.dtdt_size
) != 
9414                             sizeof (uint64_t)) || 
9415                             (dp
->dtdo_rtype
.dtdt_flags 
& DIF_TF_BYREF
)) 
9419                 case DTRACEACT_USYM
: 
9420                 case DTRACEACT_UMOD
: 
9421                 case DTRACEACT_UADDR
: 
9423                             (dp
->dtdo_rtype
.dtdt_size 
!= sizeof (uint64_t)) || 
9424                             (dp
->dtdo_rtype
.dtdt_flags 
& DIF_TF_BYREF
)) 
9428                          * We have a slot for the pid, plus a slot for the 
9429                          * argument.  To keep things simple (aligned with 
9430                          * bitness-neutral sizing), we store each as a 64-bit 
9433                         size 
= 2 * sizeof (uint64_t); 
9436                 case DTRACEACT_STOP
: 
9437                 case DTRACEACT_BREAKPOINT
: 
9438                 case DTRACEACT_PANIC
: 
9441                 case DTRACEACT_CHILL
: 
9442                 case DTRACEACT_DISCARD
: 
9443                 case DTRACEACT_RAISE
: 
9448                 case DTRACEACT_EXIT
: 
9450                             (size 
= dp
->dtdo_rtype
.dtdt_size
) != sizeof (int) || 
9451                             (dp
->dtdo_rtype
.dtdt_flags 
& DIF_TF_BYREF
)) 
9455                 case DTRACEACT_SPECULATE
: 
9456                         if (ecb
->dte_size 
> sizeof (dtrace_epid_t
)) 
9462                         state
->dts_speculates 
= 1; 
9465                 case DTRACEACT_COMMIT
: { 
9466                         dtrace_action_t 
*act 
= ecb
->dte_action
; 
9468                         for (; act 
!= NULL
; act 
= act
->dta_next
) { 
9469                                 if (act
->dta_kind 
== DTRACEACT_COMMIT
) 
9482                 if (size 
!= 0 || desc
->dtad_kind 
== DTRACEACT_SPECULATE
) { 
9484                          * If this is a data-storing action or a speculate, 
9485                          * we must be sure that there isn't a commit on the 
9488                         dtrace_action_t 
*act 
= ecb
->dte_action
; 
9490                         for (; act 
!= NULL
; act 
= act
->dta_next
) { 
9491                                 if (act
->dta_kind 
== DTRACEACT_COMMIT
) 
9496                 action 
= kmem_zalloc(sizeof (dtrace_action_t
), KM_SLEEP
); 
9497                 action
->dta_rec
.dtrd_size 
= size
; 
9500         action
->dta_refcnt 
= 1; 
9501         rec 
= &action
->dta_rec
; 
9502         size 
= rec
->dtrd_size
; 
9504         for (mask 
= sizeof (uint64_t) - 1; size 
!= 0 && mask 
> 0; mask 
>>= 1) { 
9505                 if (!(size 
& mask
)) { 
9511         action
->dta_kind 
= desc
->dtad_kind
; 
9513         if ((action
->dta_difo 
= dp
) != NULL
) 
9514                 dtrace_difo_hold(dp
); 
9516         rec
->dtrd_action 
= action
->dta_kind
; 
9517         rec
->dtrd_arg 
= arg
; 
9518         rec
->dtrd_uarg 
= desc
->dtad_uarg
; 
9519         rec
->dtrd_alignment 
= (uint16_t)align
; 
9520         rec
->dtrd_format 
= format
; 
9522         if ((last 
= ecb
->dte_action_last
) != NULL
) { 
9523                 ASSERT(ecb
->dte_action 
!= NULL
); 
9524                 action
->dta_prev 
= last
; 
9525                 last
->dta_next 
= action
; 
9527                 ASSERT(ecb
->dte_action 
== NULL
); 
9528                 ecb
->dte_action 
= action
; 
9531         ecb
->dte_action_last 
= action
; 
9537 dtrace_ecb_action_remove(dtrace_ecb_t 
*ecb
) 
9539         dtrace_action_t 
*act 
= ecb
->dte_action
, *next
; 
9540         dtrace_vstate_t 
*vstate 
= &ecb
->dte_state
->dts_vstate
; 
9544         if (act 
!= NULL 
&& act
->dta_refcnt 
> 1) { 
9545                 ASSERT(act
->dta_next 
== NULL 
|| act
->dta_next
->dta_refcnt 
== 1); 
9548                 for (; act 
!= NULL
; act 
= next
) { 
9549                         next 
= act
->dta_next
; 
9550                         ASSERT(next 
!= NULL 
|| act 
== ecb
->dte_action_last
); 
9551                         ASSERT(act
->dta_refcnt 
== 1); 
9553                         if ((format 
= act
->dta_rec
.dtrd_format
) != 0) 
9554                                 dtrace_format_remove(ecb
->dte_state
, format
); 
9556                         if ((dp 
= act
->dta_difo
) != NULL
) 
9557                                 dtrace_difo_release(dp
, vstate
); 
9559                         if (DTRACEACT_ISAGG(act
->dta_kind
)) { 
9560                                 dtrace_ecb_aggregation_destroy(ecb
, act
); 
9562                                 kmem_free(act
, sizeof (dtrace_action_t
)); 
9567         ecb
->dte_action 
= NULL
; 
9568         ecb
->dte_action_last 
= NULL
; 
9569         ecb
->dte_size 
= sizeof (dtrace_epid_t
); 
9573 dtrace_ecb_disable(dtrace_ecb_t 
*ecb
) 
9576          * We disable the ECB by removing it from its probe. 
9578         dtrace_ecb_t 
*pecb
, *prev 
= NULL
; 
9579         dtrace_probe_t 
*probe 
= ecb
->dte_probe
; 
9581         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
9583         if (probe 
== NULL
) { 
9585                  * This is the NULL probe; there is nothing to disable. 
9590         for (pecb 
= probe
->dtpr_ecb
; pecb 
!= NULL
; pecb 
= pecb
->dte_next
) { 
9596         ASSERT(pecb 
!= NULL
); 
9599                 probe
->dtpr_ecb 
= ecb
->dte_next
; 
9601                 prev
->dte_next 
= ecb
->dte_next
; 
9604         if (ecb 
== probe
->dtpr_ecb_last
) { 
9605                 ASSERT(ecb
->dte_next 
== NULL
); 
9606                 probe
->dtpr_ecb_last 
= prev
; 
9610          * The ECB has been disconnected from the probe; now sync to assure 
9611          * that all CPUs have seen the change before returning. 
9615         if (probe
->dtpr_ecb 
== NULL
) { 
9617                  * That was the last ECB on the probe; clear the predicate 
9618                  * cache ID for the probe, disable it and sync one more time 
9619                  * to assure that we'll never hit it again. 
9621                 dtrace_provider_t 
*prov 
= probe
->dtpr_provider
; 
9623                 ASSERT(ecb
->dte_next 
== NULL
); 
9624                 ASSERT(probe
->dtpr_ecb_last 
== NULL
); 
9625                 probe
->dtpr_predcache 
= DTRACE_CACHEIDNONE
; 
9626                 prov
->dtpv_pops
.dtps_disable(prov
->dtpv_arg
, 
9627                     probe
->dtpr_id
, probe
->dtpr_arg
); 
9631                  * There is at least one ECB remaining on the probe.  If there 
9632                  * is _exactly_ one, set the probe's predicate cache ID to be 
9633                  * the predicate cache ID of the remaining ECB. 
9635                 ASSERT(probe
->dtpr_ecb_last 
!= NULL
); 
9636                 ASSERT(probe
->dtpr_predcache 
== DTRACE_CACHEIDNONE
); 
9638                 if (probe
->dtpr_ecb 
== probe
->dtpr_ecb_last
) { 
9639                         dtrace_predicate_t 
*p 
= probe
->dtpr_ecb
->dte_predicate
; 
9641                         ASSERT(probe
->dtpr_ecb
->dte_next 
== NULL
); 
9644                                 probe
->dtpr_predcache 
= p
->dtp_cacheid
; 
9647                 ecb
->dte_next 
= NULL
; 
9652 dtrace_ecb_destroy(dtrace_ecb_t 
*ecb
) 
9654         dtrace_state_t 
*state 
= ecb
->dte_state
; 
9655         dtrace_vstate_t 
*vstate 
= &state
->dts_vstate
; 
9656         dtrace_predicate_t 
*pred
; 
9657         dtrace_epid_t epid 
= ecb
->dte_epid
; 
9659         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
9660         ASSERT(ecb
->dte_next 
== NULL
); 
9661         ASSERT(ecb
->dte_probe 
== NULL 
|| ecb
->dte_probe
->dtpr_ecb 
!= ecb
); 
9663         if ((pred 
= ecb
->dte_predicate
) != NULL
) 
9664                 dtrace_predicate_release(pred
, vstate
); 
9666         dtrace_ecb_action_remove(ecb
); 
9668         ASSERT(state
->dts_ecbs
[epid 
- 1] == ecb
); 
9669         state
->dts_ecbs
[epid 
- 1] = NULL
; 
9671         kmem_free(ecb
, sizeof (dtrace_ecb_t
)); 
9674 static dtrace_ecb_t 
* 
9675 dtrace_ecb_create(dtrace_state_t 
*state
, dtrace_probe_t 
*probe
, 
9676     dtrace_enabling_t 
*enab
) 
9679         dtrace_predicate_t 
*pred
; 
9680         dtrace_actdesc_t 
*act
; 
9681         dtrace_provider_t 
*prov
; 
9682         dtrace_ecbdesc_t 
*desc 
= enab
->dten_current
; 
9684         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
9685         ASSERT(state 
!= NULL
); 
9687         ecb 
= dtrace_ecb_add(state
, probe
); 
9688         ecb
->dte_uarg 
= desc
->dted_uarg
; 
9690         if ((pred 
= desc
->dted_pred
.dtpdd_predicate
) != NULL
) { 
9691                 dtrace_predicate_hold(pred
); 
9692                 ecb
->dte_predicate 
= pred
; 
9695         if (probe 
!= NULL
) { 
9697                  * If the provider shows more leg than the consumer is old 
9698                  * enough to see, we need to enable the appropriate implicit 
9699                  * predicate bits to prevent the ecb from activating at 
9702                  * Providers specifying DTRACE_PRIV_USER at register time 
9703                  * are stating that they need the /proc-style privilege 
9704                  * model to be enforced, and this is what DTRACE_COND_OWNER 
9705                  * and DTRACE_COND_ZONEOWNER will then do at probe time. 
9707                 prov 
= probe
->dtpr_provider
; 
9708                 if (!(state
->dts_cred
.dcr_visible 
& DTRACE_CRV_ALLPROC
) && 
9709                     (prov
->dtpv_priv
.dtpp_flags 
& DTRACE_PRIV_USER
)) 
9710                         ecb
->dte_cond 
|= DTRACE_COND_OWNER
; 
9712                 if (!(state
->dts_cred
.dcr_visible 
& DTRACE_CRV_ALLZONE
) && 
9713                     (prov
->dtpv_priv
.dtpp_flags 
& DTRACE_PRIV_USER
)) 
9714                         ecb
->dte_cond 
|= DTRACE_COND_ZONEOWNER
; 
9717                  * If the provider shows us kernel innards and the user 
9718                  * is lacking sufficient privilege, enable the 
9719                  * DTRACE_COND_USERMODE implicit predicate. 
9721                 if (!(state
->dts_cred
.dcr_visible 
& DTRACE_CRV_KERNEL
) && 
9722                     (prov
->dtpv_priv
.dtpp_flags 
& DTRACE_PRIV_KERNEL
)) 
9723                         ecb
->dte_cond 
|= DTRACE_COND_USERMODE
; 
9726         if (dtrace_ecb_create_cache 
!= NULL
) { 
9728                  * If we have a cached ecb, we'll use its action list instead 
9729                  * of creating our own (saving both time and space). 
9731                 dtrace_ecb_t 
*cached 
= dtrace_ecb_create_cache
; 
9732                 dtrace_action_t 
*act 
= cached
->dte_action
; 
9735                         ASSERT(act
->dta_refcnt 
> 0); 
9737                         ecb
->dte_action 
= act
; 
9738                         ecb
->dte_action_last 
= cached
->dte_action_last
; 
9739                         ecb
->dte_needed 
= cached
->dte_needed
; 
9740                         ecb
->dte_size 
= cached
->dte_size
; 
9741                         ecb
->dte_alignment 
= cached
->dte_alignment
; 
9747         for (act 
= desc
->dted_action
; act 
!= NULL
; act 
= act
->dtad_next
) { 
9748                 if ((enab
->dten_error 
= dtrace_ecb_action_add(ecb
, act
)) != 0) { 
9749                         dtrace_ecb_destroy(ecb
); 
9754         dtrace_ecb_resize(ecb
); 
9756         return (dtrace_ecb_create_cache 
= ecb
); 
9760 dtrace_ecb_create_enable(dtrace_probe_t 
*probe
, void *arg
) 
9763         dtrace_enabling_t 
*enab 
= arg
; 
9764         dtrace_state_t 
*state 
= enab
->dten_vstate
->dtvs_state
; 
9766         ASSERT(state 
!= NULL
); 
9768         if (probe 
!= NULL 
&& probe
->dtpr_gen 
< enab
->dten_probegen
) { 
9770                  * This probe was created in a generation for which this 
9771                  * enabling has previously created ECBs; we don't want to 
9772                  * enable it again, so just kick out. 
9774                 return (DTRACE_MATCH_NEXT
); 
9777         if ((ecb 
= dtrace_ecb_create(state
, probe
, enab
)) == NULL
) 
9778                 return (DTRACE_MATCH_DONE
); 
9780         dtrace_ecb_enable(ecb
); 
9781         return (DTRACE_MATCH_NEXT
); 
9784 static dtrace_ecb_t 
* 
9785 dtrace_epid2ecb(dtrace_state_t 
*state
, dtrace_epid_t id
) 
9789         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
9791         if (id 
== 0 || id 
> state
->dts_necbs
) 
9794         ASSERT(state
->dts_necbs 
> 0 && state
->dts_ecbs 
!= NULL
); 
9795         ASSERT((ecb 
= state
->dts_ecbs
[id 
- 1]) == NULL 
|| ecb
->dte_epid 
== id
); 
9797         return (state
->dts_ecbs
[id 
- 1]); 
9800 static dtrace_aggregation_t 
* 
9801 dtrace_aggid2agg(dtrace_state_t 
*state
, dtrace_aggid_t id
) 
9803         dtrace_aggregation_t 
*agg
; 
9805         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
9807         if (id 
== 0 || id 
> state
->dts_naggregations
) 
9810         ASSERT(state
->dts_naggregations 
> 0 && state
->dts_aggregations 
!= NULL
); 
9811         ASSERT((agg 
= state
->dts_aggregations
[id 
- 1]) == NULL 
|| 
9812             agg
->dtag_id 
== id
); 
9814         return (state
->dts_aggregations
[id 
- 1]); 
9818  * DTrace Buffer Functions 
9820  * The following functions manipulate DTrace buffers.  Most of these functions 
9821  * are called in the context of establishing or processing consumer state; 
9822  * exceptions are explicitly noted. 
9826  * Note:  called from cross call context.  This function switches the two 
9827  * buffers on a given CPU.  The atomicity of this operation is assured by 
9828  * disabling interrupts while the actual switch takes place; the disabling of 
9829  * interrupts serializes the execution with any execution of dtrace_probe() on 
9833 dtrace_buffer_switch(dtrace_buffer_t 
*buf
) 
9835         caddr_t tomax 
= buf
->dtb_tomax
; 
9836         caddr_t xamot 
= buf
->dtb_xamot
; 
9837         dtrace_icookie_t cookie
; 
9839         ASSERT(!(buf
->dtb_flags 
& DTRACEBUF_NOSWITCH
)); 
9840         ASSERT(!(buf
->dtb_flags 
& DTRACEBUF_RING
)); 
9842         cookie 
= dtrace_interrupt_disable(); 
9843         buf
->dtb_tomax 
= xamot
; 
9844         buf
->dtb_xamot 
= tomax
; 
9845         buf
->dtb_xamot_drops 
= buf
->dtb_drops
; 
9846         buf
->dtb_xamot_offset 
= buf
->dtb_offset
; 
9847         buf
->dtb_xamot_errors 
= buf
->dtb_errors
; 
9848         buf
->dtb_xamot_flags 
= buf
->dtb_flags
; 
9849         buf
->dtb_offset 
= 0; 
9851         buf
->dtb_errors 
= 0; 
9852         buf
->dtb_flags 
&= ~(DTRACEBUF_ERROR 
| DTRACEBUF_DROPPED
); 
9853         dtrace_interrupt_enable(cookie
); 
9857  * Note:  called from cross call context.  This function activates a buffer 
9858  * on a CPU.  As with dtrace_buffer_switch(), the atomicity of the operation 
9859  * is guaranteed by the disabling of interrupts. 
9862 dtrace_buffer_activate(dtrace_state_t 
*state
) 
9864         dtrace_buffer_t 
*buf
; 
9865         dtrace_icookie_t cookie 
= dtrace_interrupt_disable(); 
9867         buf 
= &state
->dts_buffer
[CPU
->cpu_id
]; 
9869         if (buf
->dtb_tomax 
!= NULL
) { 
9871                  * We might like to assert that the buffer is marked inactive, 
9872                  * but this isn't necessarily true:  the buffer for the CPU 
9873                  * that processes the BEGIN probe has its buffer activated 
9874                  * manually.  In this case, we take the (harmless) action 
9875                  * re-clearing the bit INACTIVE bit. 
9877                 buf
->dtb_flags 
&= ~DTRACEBUF_INACTIVE
; 
9880         dtrace_interrupt_enable(cookie
); 
9884 dtrace_buffer_alloc(dtrace_buffer_t 
*bufs
, size_t size
, int flags
, 
9888         dtrace_buffer_t 
*buf
; 
9890         lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
9891         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
9893         if (size 
> dtrace_nonroot_maxsize 
&& 
9894             !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL
, B_FALSE
)) 
9897 #if defined(__APPLE__) 
9898         if (size 
> (sane_size 
/ 8) / NCPU
) /* As in kdbg_set_nkdbufs(), roughly. */ 
9900 #endif /* __APPLE__ */ 
9905                 if (cpu 
!= DTRACE_CPUALL 
&& cpu 
!= cp
->cpu_id
) 
9908                 buf 
= &bufs
[cp
->cpu_id
]; 
9911                  * If there is already a buffer allocated for this CPU, it 
9912                  * is only possible that this is a DR event.  In this case, 
9913                  * the buffer size must match our specified size. 
9915                 if (buf
->dtb_tomax 
!= NULL
) { 
9916                         ASSERT(buf
->dtb_size 
== size
); 
9920                 ASSERT(buf
->dtb_xamot 
== NULL
); 
9922                 if ((buf
->dtb_tomax 
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
) 
9925                 buf
->dtb_size 
= size
; 
9926                 buf
->dtb_flags 
= flags
; 
9927                 buf
->dtb_offset 
= 0; 
9930                 if (flags 
& DTRACEBUF_NOSWITCH
) 
9933                 if ((buf
->dtb_xamot 
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
) 
9935         } while ((cp 
= cp
->cpu_next
) != cpu_list
); 
9943                 if (cpu 
!= DTRACE_CPUALL 
&& cpu 
!= cp
->cpu_id
) 
9946                 buf 
= &bufs
[cp
->cpu_id
]; 
9948                 if (buf
->dtb_xamot 
!= NULL
) { 
9949                         ASSERT(buf
->dtb_tomax 
!= NULL
); 
9950                         ASSERT(buf
->dtb_size 
== size
); 
9951                         kmem_free(buf
->dtb_xamot
, size
); 
9954                 if (buf
->dtb_tomax 
!= NULL
) { 
9955                         ASSERT(buf
->dtb_size 
== size
); 
9956                         kmem_free(buf
->dtb_tomax
, size
); 
9959                 buf
->dtb_tomax 
= NULL
; 
9960                 buf
->dtb_xamot 
= NULL
; 
9962         } while ((cp 
= cp
->cpu_next
) != cpu_list
); 
9968  * Note:  called from probe context.  This function just increments the drop 
9969  * count on a buffer.  It has been made a function to allow for the 
9970  * possibility of understanding the source of mysterious drop counts.  (A 
9971  * problem for which one may be particularly disappointed that DTrace cannot 
9972  * be used to understand DTrace.) 
9975 dtrace_buffer_drop(dtrace_buffer_t 
*buf
) 
9981  * Note:  called from probe context.  This function is called to reserve space 
9982  * in a buffer.  If mstate is non-NULL, sets the scratch base and size in the 
9983  * mstate.  Returns the new offset in the buffer, or a negative value if an 
9984  * error has occurred. 
9987 dtrace_buffer_reserve(dtrace_buffer_t 
*buf
, size_t needed
, size_t align
, 
9988     dtrace_state_t 
*state
, dtrace_mstate_t 
*mstate
) 
9990         intptr_t offs 
= buf
->dtb_offset
, soffs
; 
9995         if (buf
->dtb_flags 
& DTRACEBUF_INACTIVE
) 
9998         if ((tomax 
= buf
->dtb_tomax
) == NULL
) { 
9999                 dtrace_buffer_drop(buf
); 
10003         if (!(buf
->dtb_flags 
& (DTRACEBUF_RING 
| DTRACEBUF_FILL
))) { 
10004                 while (offs 
& (align 
- 1)) { 
10006                          * Assert that our alignment is off by a number which 
10007                          * is itself sizeof (uint32_t) aligned. 
10009                         ASSERT(!((align 
- (offs 
& (align 
- 1))) & 
10010                             (sizeof (uint32_t) - 1))); 
10011                         DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
); 
10012                         offs 
+= sizeof (uint32_t); 
10015                 if ((soffs 
= offs 
+ needed
) > buf
->dtb_size
) { 
10016                         dtrace_buffer_drop(buf
); 
10020                 if (mstate 
== NULL
) 
10023                 mstate
->dtms_scratch_base 
= (uintptr_t)tomax 
+ soffs
; 
10024                 mstate
->dtms_scratch_size 
= buf
->dtb_size 
- soffs
; 
10025                 mstate
->dtms_scratch_ptr 
= mstate
->dtms_scratch_base
; 
10030         if (buf
->dtb_flags 
& DTRACEBUF_FILL
) { 
10031                 if (state
->dts_activity 
!= DTRACE_ACTIVITY_COOLDOWN 
&& 
10032                     (buf
->dtb_flags 
& DTRACEBUF_FULL
)) 
10037         total 
= needed 
+ (offs 
& (align 
- 1)); 
10040          * For a ring buffer, life is quite a bit more complicated.  Before 
10041          * we can store any padding, we need to adjust our wrapping offset. 
10042          * (If we've never before wrapped or we're not about to, no adjustment 
10045         if ((buf
->dtb_flags 
& DTRACEBUF_WRAPPED
) || 
10046             offs 
+ total 
> buf
->dtb_size
) { 
10047                 woffs 
= buf
->dtb_xamot_offset
; 
10049                 if (offs 
+ total 
> buf
->dtb_size
) { 
10051                          * We can't fit in the end of the buffer.  First, a 
10052                          * sanity check that we can fit in the buffer at all. 
10054                         if (total 
> buf
->dtb_size
) { 
10055                                 dtrace_buffer_drop(buf
); 
10060                          * We're going to be storing at the top of the buffer, 
10061                          * so now we need to deal with the wrapped offset.  We 
10062                          * only reset our wrapped offset to 0 if it is 
10063                          * currently greater than the current offset.  If it 
10064                          * is less than the current offset, it is because a 
10065                          * previous allocation induced a wrap -- but the 
10066                          * allocation didn't subsequently take the space due 
10067                          * to an error or false predicate evaluation.  In this 
10068                          * case, we'll just leave the wrapped offset alone: if 
10069                          * the wrapped offset hasn't been advanced far enough 
10070                          * for this allocation, it will be adjusted in the 
10073                         if (buf
->dtb_flags 
& DTRACEBUF_WRAPPED
) { 
10081                          * Now we know that we're going to be storing to the 
10082                          * top of the buffer and that there is room for us 
10083                          * there.  We need to clear the buffer from the current 
10084                          * offset to the end (there may be old gunk there). 
10086                         while (offs 
< buf
->dtb_size
) 
10090                          * We need to set our offset to zero.  And because we 
10091                          * are wrapping, we need to set the bit indicating as 
10092                          * much.  We can also adjust our needed space back 
10093                          * down to the space required by the ECB -- we know 
10094                          * that the top of the buffer is aligned. 
10098                         buf
->dtb_flags 
|= DTRACEBUF_WRAPPED
; 
10101                          * There is room for us in the buffer, so we simply 
10102                          * need to check the wrapped offset. 
10104                         if (woffs 
< offs
) { 
10106                                  * The wrapped offset is less than the offset. 
10107                                  * This can happen if we allocated buffer space 
10108                                  * that induced a wrap, but then we didn't 
10109                                  * subsequently take the space due to an error 
10110                                  * or false predicate evaluation.  This is 
10111                                  * okay; we know that _this_ allocation isn't 
10112                                  * going to induce a wrap.  We still can't 
10113                                  * reset the wrapped offset to be zero, 
10114                                  * however: the space may have been trashed in 
10115                                  * the previous failed probe attempt.  But at 
10116                                  * least the wrapped offset doesn't need to 
10117                                  * be adjusted at all... 
10123                 while (offs 
+ total 
> woffs
) { 
10124                         dtrace_epid_t epid 
= *(uint32_t *)(tomax 
+ woffs
); 
10127                         if (epid 
== DTRACE_EPIDNONE
) { 
10128                                 size 
= sizeof (uint32_t); 
10130                                 ASSERT(epid 
<= state
->dts_necbs
); 
10131                                 ASSERT(state
->dts_ecbs
[epid 
- 1] != NULL
); 
10133                                 size 
= state
->dts_ecbs
[epid 
- 1]->dte_size
; 
10136                         ASSERT(woffs 
+ size 
<= buf
->dtb_size
); 
10139                         if (woffs 
+ size 
== buf
->dtb_size
) { 
10141                                  * We've reached the end of the buffer; we want 
10142                                  * to set the wrapped offset to 0 and break 
10143                                  * out.  However, if the offs is 0, then we're 
10144                                  * in a strange edge-condition:  the amount of 
10145                                  * space that we want to reserve plus the size 
10146                                  * of the record that we're overwriting is 
10147                                  * greater than the size of the buffer.  This 
10148                                  * is problematic because if we reserve the 
10149                                  * space but subsequently don't consume it (due 
10150                                  * to a failed predicate or error) the wrapped 
10151                                  * offset will be 0 -- yet the EPID at offset 0 
10152                                  * will not be committed.  This situation is 
10153                                  * relatively easy to deal with:  if we're in 
10154                                  * this case, the buffer is indistinguishable 
10155                                  * from one that hasn't wrapped; we need only 
10156                                  * finish the job by clearing the wrapped bit, 
10157                                  * explicitly setting the offset to be 0, and 
10158                                  * zero'ing out the old data in the buffer. 
10161                                         buf
->dtb_flags 
&= ~DTRACEBUF_WRAPPED
; 
10162                                         buf
->dtb_offset 
= 0; 
10165                                         while (woffs 
< buf
->dtb_size
) 
10166                                                 tomax
[woffs
++] = 0; 
10177                  * We have a wrapped offset.  It may be that the wrapped offset 
10178                  * has become zero -- that's okay. 
10180                 buf
->dtb_xamot_offset 
= woffs
; 
10185          * Now we can plow the buffer with any necessary padding. 
10187         while (offs 
& (align 
- 1)) { 
10189                  * Assert that our alignment is off by a number which 
10190                  * is itself sizeof (uint32_t) aligned. 
10192                 ASSERT(!((align 
- (offs 
& (align 
- 1))) & 
10193                     (sizeof (uint32_t) - 1))); 
10194                 DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
); 
10195                 offs 
+= sizeof (uint32_t); 
10198         if (buf
->dtb_flags 
& DTRACEBUF_FILL
) { 
10199                 if (offs 
+ needed 
> buf
->dtb_size 
- state
->dts_reserve
) { 
10200                         buf
->dtb_flags 
|= DTRACEBUF_FULL
; 
10205         if (mstate 
== NULL
) 
10209          * For ring buffers and fill buffers, the scratch space is always 
10210          * the inactive buffer. 
10212         mstate
->dtms_scratch_base 
= (uintptr_t)buf
->dtb_xamot
; 
10213         mstate
->dtms_scratch_size 
= buf
->dtb_size
; 
10214         mstate
->dtms_scratch_ptr 
= mstate
->dtms_scratch_base
; 
10220 dtrace_buffer_polish(dtrace_buffer_t 
*buf
) 
10222         ASSERT(buf
->dtb_flags 
& DTRACEBUF_RING
); 
10223         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10225         if (!(buf
->dtb_flags 
& DTRACEBUF_WRAPPED
)) 
10229          * We need to polish the ring buffer.  There are three cases: 
10231          * - The first (and presumably most common) is that there is no gap 
10232          *   between the buffer offset and the wrapped offset.  In this case, 
10233          *   there is nothing in the buffer that isn't valid data; we can 
10234          *   mark the buffer as polished and return. 
10236          * - The second (less common than the first but still more common 
10237          *   than the third) is that there is a gap between the buffer offset 
10238          *   and the wrapped offset, and the wrapped offset is larger than the 
10239          *   buffer offset.  This can happen because of an alignment issue, or 
10240          *   can happen because of a call to dtrace_buffer_reserve() that 
10241          *   didn't subsequently consume the buffer space.  In this case, 
10242          *   we need to zero the data from the buffer offset to the wrapped 
10245          * - The third (and least common) is that there is a gap between the 
10246          *   buffer offset and the wrapped offset, but the wrapped offset is 
10247          *   _less_ than the buffer offset.  This can only happen because a 
10248          *   call to dtrace_buffer_reserve() induced a wrap, but the space 
10249          *   was not subsequently consumed.  In this case, we need to zero the 
10250          *   space from the offset to the end of the buffer _and_ from the 
10251          *   top of the buffer to the wrapped offset. 
10253         if (buf
->dtb_offset 
< buf
->dtb_xamot_offset
) { 
10254                 bzero(buf
->dtb_tomax 
+ buf
->dtb_offset
, 
10255                     buf
->dtb_xamot_offset 
- buf
->dtb_offset
); 
10258         if (buf
->dtb_offset 
> buf
->dtb_xamot_offset
) { 
10259                 bzero(buf
->dtb_tomax 
+ buf
->dtb_offset
, 
10260                     buf
->dtb_size 
- buf
->dtb_offset
); 
10261                 bzero(buf
->dtb_tomax
, buf
->dtb_xamot_offset
); 
10266 dtrace_buffer_free(dtrace_buffer_t 
*bufs
) 
10270         for (i 
= 0; i 
< NCPU
; i
++) { 
10271                 dtrace_buffer_t 
*buf 
= &bufs
[i
]; 
10273                 if (buf
->dtb_tomax 
== NULL
) { 
10274                         ASSERT(buf
->dtb_xamot 
== NULL
); 
10275                         ASSERT(buf
->dtb_size 
== 0); 
10279                 if (buf
->dtb_xamot 
!= NULL
) { 
10280                         ASSERT(!(buf
->dtb_flags 
& DTRACEBUF_NOSWITCH
)); 
10281                         kmem_free(buf
->dtb_xamot
, buf
->dtb_size
); 
10284                 kmem_free(buf
->dtb_tomax
, buf
->dtb_size
); 
10286                 buf
->dtb_tomax 
= NULL
; 
10287                 buf
->dtb_xamot 
= NULL
; 
10292  * DTrace Enabling Functions 
10294 static dtrace_enabling_t 
* 
10295 dtrace_enabling_create(dtrace_vstate_t 
*vstate
) 
10297         dtrace_enabling_t 
*enab
; 
10299         enab 
= kmem_zalloc(sizeof (dtrace_enabling_t
), KM_SLEEP
); 
10300         enab
->dten_vstate 
= vstate
; 
10306 dtrace_enabling_add(dtrace_enabling_t 
*enab
, dtrace_ecbdesc_t 
*ecb
) 
10308         dtrace_ecbdesc_t 
**ndesc
; 
10309         size_t osize
, nsize
; 
10312          * We can't add to enablings after we've enabled them, or after we've 
10315         ASSERT(enab
->dten_probegen 
== 0); 
10316         ASSERT(enab
->dten_next 
== NULL 
&& enab
->dten_prev 
== NULL
); 
10318 #if defined(__APPLE__) 
10319         if (ecb 
== NULL
) return; /* XXX protection against gcc 4.0 botch on x86 */ 
10320 #endif /* __APPLE__ */ 
10322         if (enab
->dten_ndesc 
< enab
->dten_maxdesc
) { 
10323                 enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
; 
10327         osize 
= enab
->dten_maxdesc 
* sizeof (dtrace_enabling_t 
*); 
10329         if (enab
->dten_maxdesc 
== 0) { 
10330                 enab
->dten_maxdesc 
= 1; 
10332                 enab
->dten_maxdesc 
<<= 1; 
10335         ASSERT(enab
->dten_ndesc 
< enab
->dten_maxdesc
); 
10337         nsize 
= enab
->dten_maxdesc 
* sizeof (dtrace_enabling_t 
*); 
10338         ndesc 
= kmem_zalloc(nsize
, KM_SLEEP
); 
10339         bcopy(enab
->dten_desc
, ndesc
, osize
); 
10340         kmem_free(enab
->dten_desc
, osize
); 
10342         enab
->dten_desc 
= ndesc
; 
10343         enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
; 
10347 dtrace_enabling_addlike(dtrace_enabling_t 
*enab
, dtrace_ecbdesc_t 
*ecb
, 
10348     dtrace_probedesc_t 
*pd
) 
10350         dtrace_ecbdesc_t 
*new; 
10351         dtrace_predicate_t 
*pred
; 
10352         dtrace_actdesc_t 
*act
; 
10355          * We're going to create a new ECB description that matches the 
10356          * specified ECB in every way, but has the specified probe description. 
10358         new = kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
); 
10360         if ((pred 
= ecb
->dted_pred
.dtpdd_predicate
) != NULL
) 
10361                 dtrace_predicate_hold(pred
); 
10363         for (act 
= ecb
->dted_action
; act 
!= NULL
; act 
= act
->dtad_next
) 
10364                 dtrace_actdesc_hold(act
); 
10366         new->dted_action 
= ecb
->dted_action
; 
10367         new->dted_pred 
= ecb
->dted_pred
; 
10368         new->dted_probe 
= *pd
; 
10369         new->dted_uarg 
= ecb
->dted_uarg
; 
10371         dtrace_enabling_add(enab
, new); 
10375 dtrace_enabling_dump(dtrace_enabling_t 
*enab
) 
10379         for (i 
= 0; i 
< enab
->dten_ndesc
; i
++) { 
10380                 dtrace_probedesc_t 
*desc 
= &enab
->dten_desc
[i
]->dted_probe
; 
10382                 cmn_err(CE_NOTE
, "enabling probe %d (%s:%s:%s:%s)", i
, 
10383                     desc
->dtpd_provider
, desc
->dtpd_mod
, 
10384                     desc
->dtpd_func
, desc
->dtpd_name
); 
10389 dtrace_enabling_destroy(dtrace_enabling_t 
*enab
) 
10392         dtrace_ecbdesc_t 
*ep
; 
10393         dtrace_vstate_t 
*vstate 
= enab
->dten_vstate
; 
10395         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10397         for (i 
= 0; i 
< enab
->dten_ndesc
; i
++) { 
10398                 dtrace_actdesc_t 
*act
, *next
; 
10399                 dtrace_predicate_t 
*pred
; 
10401                 ep 
= enab
->dten_desc
[i
]; 
10403                 if ((pred 
= ep
->dted_pred
.dtpdd_predicate
) != NULL
) 
10404                         dtrace_predicate_release(pred
, vstate
); 
10406                 for (act 
= ep
->dted_action
; act 
!= NULL
; act 
= next
) { 
10407                         next 
= act
->dtad_next
; 
10408                         dtrace_actdesc_release(act
, vstate
); 
10411                 kmem_free(ep
, sizeof (dtrace_ecbdesc_t
)); 
10414         kmem_free(enab
->dten_desc
, 
10415             enab
->dten_maxdesc 
* sizeof (dtrace_enabling_t 
*)); 
10418          * If this was a retained enabling, decrement the dts_nretained count 
10419          * and take it off of the dtrace_retained list. 
10421         if (enab
->dten_prev 
!= NULL 
|| enab
->dten_next 
!= NULL 
|| 
10422             dtrace_retained 
== enab
) { 
10423                 ASSERT(enab
->dten_vstate
->dtvs_state 
!= NULL
); 
10424                 ASSERT(enab
->dten_vstate
->dtvs_state
->dts_nretained 
> 0); 
10425                 enab
->dten_vstate
->dtvs_state
->dts_nretained
--; 
10428         if (enab
->dten_prev 
== NULL
) { 
10429                 if (dtrace_retained 
== enab
) { 
10430                         dtrace_retained 
= enab
->dten_next
; 
10432                         if (dtrace_retained 
!= NULL
) 
10433                                 dtrace_retained
->dten_prev 
= NULL
; 
10436                 ASSERT(enab 
!= dtrace_retained
); 
10437                 ASSERT(dtrace_retained 
!= NULL
); 
10438                 enab
->dten_prev
->dten_next 
= enab
->dten_next
; 
10441         if (enab
->dten_next 
!= NULL
) { 
10442                 ASSERT(dtrace_retained 
!= NULL
); 
10443                 enab
->dten_next
->dten_prev 
= enab
->dten_prev
; 
10446         kmem_free(enab
, sizeof (dtrace_enabling_t
)); 
10450 dtrace_enabling_retain(dtrace_enabling_t 
*enab
) 
10452         dtrace_state_t 
*state
; 
10454         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10455         ASSERT(enab
->dten_next 
== NULL 
&& enab
->dten_prev 
== NULL
); 
10456         ASSERT(enab
->dten_vstate 
!= NULL
); 
10458         state 
= enab
->dten_vstate
->dtvs_state
; 
10459         ASSERT(state 
!= NULL
); 
10462          * We only allow each state to retain dtrace_retain_max enablings. 
10464         if (state
->dts_nretained 
>= dtrace_retain_max
) 
10467         state
->dts_nretained
++; 
10469         if (dtrace_retained 
== NULL
) { 
10470                 dtrace_retained 
= enab
; 
10474         enab
->dten_next 
= dtrace_retained
; 
10475         dtrace_retained
->dten_prev 
= enab
; 
10476         dtrace_retained 
= enab
; 
10482 dtrace_enabling_replicate(dtrace_state_t 
*state
, dtrace_probedesc_t 
*match
, 
10483     dtrace_probedesc_t 
*create
) 
10485         dtrace_enabling_t 
*new, *enab
; 
10486         int found 
= 0, err 
= ENOENT
; 
10488         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10489         ASSERT(strlen(match
->dtpd_provider
) < DTRACE_PROVNAMELEN
); 
10490         ASSERT(strlen(match
->dtpd_mod
) < DTRACE_MODNAMELEN
); 
10491         ASSERT(strlen(match
->dtpd_func
) < DTRACE_FUNCNAMELEN
); 
10492         ASSERT(strlen(match
->dtpd_name
) < DTRACE_NAMELEN
); 
10494         new = dtrace_enabling_create(&state
->dts_vstate
); 
10497          * Iterate over all retained enablings, looking for enablings that 
10498          * match the specified state. 
10500         for (enab 
= dtrace_retained
; enab 
!= NULL
; enab 
= enab
->dten_next
) { 
10504                  * dtvs_state can only be NULL for helper enablings -- and 
10505                  * helper enablings can't be retained. 
10507                 ASSERT(enab
->dten_vstate
->dtvs_state 
!= NULL
); 
10509                 if (enab
->dten_vstate
->dtvs_state 
!= state
) 
10513                  * Now iterate over each probe description; we're looking for 
10514                  * an exact match to the specified probe description. 
10516                 for (i 
= 0; i 
< enab
->dten_ndesc
; i
++) { 
10517                         dtrace_ecbdesc_t 
*ep 
= enab
->dten_desc
[i
]; 
10518                         dtrace_probedesc_t 
*pd 
= &ep
->dted_probe
; 
10520                         if (strcmp(pd
->dtpd_provider
, match
->dtpd_provider
)) 
10523                         if (strcmp(pd
->dtpd_mod
, match
->dtpd_mod
)) 
10526                         if (strcmp(pd
->dtpd_func
, match
->dtpd_func
)) 
10529                         if (strcmp(pd
->dtpd_name
, match
->dtpd_name
)) 
10533                          * We have a winning probe!  Add it to our growing 
10537                         dtrace_enabling_addlike(new, ep
, create
); 
10541         if (!found 
|| (err 
= dtrace_enabling_retain(new)) != 0) { 
10542                 dtrace_enabling_destroy(new); 
10550 dtrace_enabling_retract(dtrace_state_t 
*state
) 
10552         dtrace_enabling_t 
*enab
, *next
; 
10554         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10557          * Iterate over all retained enablings, destroy the enablings retained 
10558          * for the specified state. 
10560         for (enab 
= dtrace_retained
; enab 
!= NULL
; enab 
= next
) { 
10561                 next 
= enab
->dten_next
; 
10564                  * dtvs_state can only be NULL for helper enablings -- and 
10565                  * helper enablings can't be retained. 
10567                 ASSERT(enab
->dten_vstate
->dtvs_state 
!= NULL
); 
10569                 if (enab
->dten_vstate
->dtvs_state 
== state
) { 
10570                         ASSERT(state
->dts_nretained 
> 0); 
10571                         dtrace_enabling_destroy(enab
); 
10575         ASSERT(state
->dts_nretained 
== 0); 
10579 dtrace_enabling_match(dtrace_enabling_t 
*enab
, int *nmatched
) 
10584         lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
10585         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10587         for (i 
= 0; i 
< enab
->dten_ndesc
; i
++) { 
10588                 dtrace_ecbdesc_t 
*ep 
= enab
->dten_desc
[i
]; 
10590                 enab
->dten_current 
= ep
; 
10591                 enab
->dten_error 
= 0; 
10593                 matched 
+= dtrace_probe_enable(&ep
->dted_probe
, enab
); 
10595                 if (enab
->dten_error 
!= 0) { 
10597                          * If we get an error half-way through enabling the 
10598                          * probes, we kick out -- perhaps with some number of 
10599                          * them enabled.  Leaving enabled probes enabled may 
10600                          * be slightly confusing for user-level, but we expect 
10601                          * that no one will attempt to actually drive on in 
10602                          * the face of such errors.  If this is an anonymous 
10603                          * enabling (indicated with a NULL nmatched pointer), 
10604                          * we cmn_err() a message.  We aren't expecting to 
10605                          * get such an error -- such as it can exist at all, 
10606                          * it would be a result of corrupted DOF in the driver 
10609                         if (nmatched 
== NULL
) { 
10610                                 cmn_err(CE_WARN
, "dtrace_enabling_match() " 
10611                                     "error on %p: %d", (void *)ep
, 
10615                         return (enab
->dten_error
); 
10619         enab
->dten_probegen 
= dtrace_probegen
; 
10620         if (nmatched 
!= NULL
) 
10621                 *nmatched 
= matched
; 
10627 dtrace_enabling_matchall(void) 
10629         dtrace_enabling_t 
*enab
; 
10631         lck_mtx_lock(&cpu_lock
); 
10632         lck_mtx_lock(&dtrace_lock
); 
10635          * Because we can be called after dtrace_detach() has been called, we 
10636          * cannot assert that there are retained enablings.  We can safely 
10637          * load from dtrace_retained, however:  the taskq_destroy() at the 
10638          * end of dtrace_detach() will block pending our completion. 
10640         for (enab 
= dtrace_retained
; enab 
!= NULL
; enab 
= enab
->dten_next
) 
10641                 (void) dtrace_enabling_match(enab
, NULL
); 
10643         lck_mtx_unlock(&dtrace_lock
); 
10644         lck_mtx_unlock(&cpu_lock
); 
10648 dtrace_enabling_matchstate(dtrace_state_t 
*state
, int *nmatched
) 
10650         dtrace_enabling_t 
*enab
; 
10651         int matched
, total 
= 0, err
; 
10653         lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
10654         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10656         for (enab 
= dtrace_retained
; enab 
!= NULL
; enab 
= enab
->dten_next
) { 
10657                 ASSERT(enab
->dten_vstate
->dtvs_state 
!= NULL
); 
10659                 if (enab
->dten_vstate
->dtvs_state 
!= state
) 
10662                 if ((err 
= dtrace_enabling_match(enab
, &matched
)) != 0) 
10668         if (nmatched 
!= NULL
) 
10675  * If an enabling is to be enabled without having matched probes (that is, if 
10676  * dtrace_state_go() is to be called on the underlying dtrace_state_t), the 
10677  * enabling must be _primed_ by creating an ECB for every ECB description. 
10678  * This must be done to assure that we know the number of speculations, the 
10679  * number of aggregations, the minimum buffer size needed, etc. before we 
10680  * transition out of DTRACE_ACTIVITY_INACTIVE.  To do this without actually 
10681  * enabling any probes, we create ECBs for every ECB decription, but with a 
10682  * NULL probe -- which is exactly what this function does. 
10685 dtrace_enabling_prime(dtrace_state_t 
*state
) 
10687         dtrace_enabling_t 
*enab
; 
10690         for (enab 
= dtrace_retained
; enab 
!= NULL
; enab 
= enab
->dten_next
) { 
10691                 ASSERT(enab
->dten_vstate
->dtvs_state 
!= NULL
); 
10693                 if (enab
->dten_vstate
->dtvs_state 
!= state
) 
10697                  * We don't want to prime an enabling more than once, lest 
10698                  * we allow a malicious user to induce resource exhaustion. 
10699                  * (The ECBs that result from priming an enabling aren't 
10700                  * leaked -- but they also aren't deallocated until the 
10701                  * consumer state is destroyed.) 
10703                 if (enab
->dten_primed
) 
10706                 for (i 
= 0; i 
< enab
->dten_ndesc
; i
++) { 
10707                         enab
->dten_current 
= enab
->dten_desc
[i
]; 
10708                         (void) dtrace_probe_enable(NULL
, enab
); 
10711                 enab
->dten_primed 
= 1; 
10716  * Called to indicate that probes should be provided due to retained 
10717  * enablings.  This is implemented in terms of dtrace_probe_provide(), but it 
10718  * must take an initial lap through the enabling calling the dtps_provide() 
10719  * entry point explicitly to allow for autocreated probes. 
10722 dtrace_enabling_provide(dtrace_provider_t 
*prv
) 
10725         dtrace_probedesc_t desc
; 
10727         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10728         lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
); 
10732                 prv 
= dtrace_provider
; 
10736                 dtrace_enabling_t 
*enab 
= dtrace_retained
; 
10737                 void *parg 
= prv
->dtpv_arg
; 
10739                 for (; enab 
!= NULL
; enab 
= enab
->dten_next
) { 
10740                         for (i 
= 0; i 
< enab
->dten_ndesc
; i
++) { 
10741                                 desc 
= enab
->dten_desc
[i
]->dted_probe
; 
10742                                 lck_mtx_unlock(&dtrace_lock
); 
10743                                 prv
->dtpv_pops
.dtps_provide(parg
, &desc
); 
10744                                 lck_mtx_lock(&dtrace_lock
); 
10747         } while (all 
&& (prv 
= prv
->dtpv_next
) != NULL
); 
10749         lck_mtx_unlock(&dtrace_lock
); 
10750         dtrace_probe_provide(NULL
, all 
? NULL 
: prv
); 
10751         lck_mtx_lock(&dtrace_lock
); 
10755  * DTrace DOF Functions 
10759 dtrace_dof_error(dof_hdr_t 
*dof
, const char *str
) 
10761         if (dtrace_err_verbose
) 
10762                 cmn_err(CE_WARN
, "failed to process DOF: %s", str
); 
10764 #ifdef DTRACE_ERRDEBUG 
10765         dtrace_errdebug(str
); 
10770  * Create DOF out of a currently enabled state.  Right now, we only create 
10771  * DOF containing the run-time options -- but this could be expanded to create 
10772  * complete DOF representing the enabled state. 
10775 dtrace_dof_create(dtrace_state_t 
*state
) 
10779         dof_optdesc_t 
*opt
; 
10780         int i
, len 
= sizeof (dof_hdr_t
) + 
10781             roundup(sizeof (dof_sec_t
), sizeof (uint64_t)) + 
10782             sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
; 
10784         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10786         dof 
= dt_kmem_zalloc_aligned(len
, 8, KM_SLEEP
); 
10787         dof
->dofh_ident
[DOF_ID_MAG0
] = DOF_MAG_MAG0
; 
10788         dof
->dofh_ident
[DOF_ID_MAG1
] = DOF_MAG_MAG1
; 
10789         dof
->dofh_ident
[DOF_ID_MAG2
] = DOF_MAG_MAG2
; 
10790         dof
->dofh_ident
[DOF_ID_MAG3
] = DOF_MAG_MAG3
; 
10792         dof
->dofh_ident
[DOF_ID_MODEL
] = DOF_MODEL_NATIVE
; 
10793         dof
->dofh_ident
[DOF_ID_ENCODING
] = DOF_ENCODE_NATIVE
; 
10794         dof
->dofh_ident
[DOF_ID_VERSION
] = DOF_VERSION
; 
10795         dof
->dofh_ident
[DOF_ID_DIFVERS
] = DIF_VERSION
; 
10796         dof
->dofh_ident
[DOF_ID_DIFIREG
] = DIF_DIR_NREGS
; 
10797         dof
->dofh_ident
[DOF_ID_DIFTREG
] = DIF_DTR_NREGS
; 
10799         dof
->dofh_flags 
= 0; 
10800         dof
->dofh_hdrsize 
= sizeof (dof_hdr_t
); 
10801         dof
->dofh_secsize 
= sizeof (dof_sec_t
); 
10802         dof
->dofh_secnum 
= 1;   /* only DOF_SECT_OPTDESC */ 
10803         dof
->dofh_secoff 
= sizeof (dof_hdr_t
); 
10804         dof
->dofh_loadsz 
= len
; 
10805         dof
->dofh_filesz 
= len
; 
10809          * Fill in the option section header... 
10811         sec 
= (dof_sec_t 
*)((uintptr_t)dof 
+ sizeof (dof_hdr_t
)); 
10812         sec
->dofs_type 
= DOF_SECT_OPTDESC
; 
10813         sec
->dofs_align 
= sizeof (uint64_t); 
10814         sec
->dofs_flags 
= DOF_SECF_LOAD
; 
10815         sec
->dofs_entsize 
= sizeof (dof_optdesc_t
); 
10817         opt 
= (dof_optdesc_t 
*)((uintptr_t)sec 
+ 
10818             roundup(sizeof (dof_sec_t
), sizeof (uint64_t))); 
10820         sec
->dofs_offset 
= (uintptr_t)opt 
- (uintptr_t)dof
; 
10821         sec
->dofs_size 
= sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
; 
10823         for (i 
= 0; i 
< DTRACEOPT_MAX
; i
++) { 
10824                 opt
[i
].dofo_option 
= i
; 
10825                 opt
[i
].dofo_strtab 
= DOF_SECIDX_NONE
; 
10826                 opt
[i
].dofo_value 
= state
->dts_options
[i
]; 
10833 #if defined(__APPLE__) 
10834 dtrace_dof_copyin(user_addr_t uarg
, int *errp
) 
10836 dtrace_dof_copyin(uintptr_t uarg
, int *errp
) 
10839         dof_hdr_t hdr
, *dof
; 
10841         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
10844          * First, we're going to copyin() the sizeof (dof_hdr_t). 
10846 #if defined(__APPLE__) 
10847         if (copyin(uarg
, &hdr
, sizeof (hdr
)) != 0) { 
10849         if (copyin((void *)uarg
, &hdr
, sizeof (hdr
)) != 0) { 
10851                 dtrace_dof_error(NULL
, "failed to copyin DOF header"); 
10857          * Now we'll allocate the entire DOF and copy it in -- provided 
10858          * that the length isn't outrageous. 
10860         if (hdr
.dofh_loadsz 
>= dtrace_dof_maxsize
) { 
10861                 dtrace_dof_error(&hdr
, "load size exceeds maximum"); 
10866         if (hdr
.dofh_loadsz 
< sizeof (hdr
)) { 
10867                 dtrace_dof_error(&hdr
, "invalid load size"); 
10872         dof 
= dt_kmem_alloc_aligned(hdr
.dofh_loadsz
, 8, KM_SLEEP
); 
10874 #if defined(__APPLE__) 
10875         if (copyin(uarg
, dof
, hdr
.dofh_loadsz
) != 0) { 
10877         if (copyin((void *)uarg
, dof
, hdr
.dofh_loadsz
) != 0) { 
10879                 dt_kmem_free_aligned(dof
, hdr
.dofh_loadsz
); 
10887 #if defined(__APPLE__) 
10890 dtrace_dof_copyin_from_proc(proc_t
* p
, user_addr_t uarg
, int *errp
) 
10892         dof_hdr_t hdr
, *dof
; 
10894         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
10897          * First, we're going to copyin() the sizeof (dof_hdr_t). 
10899         if (uread(p
, &hdr
, sizeof(hdr
), uarg
) != KERN_SUCCESS
) { 
10900                 dtrace_dof_error(NULL
, "failed to copyin DOF header"); 
10906          * Now we'll allocate the entire DOF and copy it in -- provided 
10907          * that the length isn't outrageous. 
10909         if (hdr
.dofh_loadsz 
>= dtrace_dof_maxsize
) { 
10910                 dtrace_dof_error(&hdr
, "load size exceeds maximum"); 
10915         if (hdr
.dofh_loadsz 
< sizeof (hdr
)) { 
10916                 dtrace_dof_error(&hdr
, "invalid load size"); 
10921         dof 
= dt_kmem_alloc_aligned(hdr
.dofh_loadsz
, 8, KM_SLEEP
); 
10923         if (uread(p
, dof
, hdr
.dofh_loadsz
, uarg
) != KERN_SUCCESS
) { 
10924                 dt_kmem_free_aligned(dof
, hdr
.dofh_loadsz
); 
10932 #endif /* __APPLE__ */ 
10935 dtrace_dof_property(const char *name
) 
10939         unsigned int len
, i
; 
10943          * Unfortunately, array of values in .conf files are always (and 
10944          * only) interpreted to be integer arrays.  We must read our DOF 
10945          * as an integer array, and then squeeze it into a byte array. 
10947         if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY
, dtrace_devi
, 0, 
10948             (char *)name
, (int **)&buf
, &len
) != DDI_PROP_SUCCESS
) 
10951         for (i 
= 0; i 
< len
; i
++) 
10952                 buf
[i
] = (uchar_t
)(((int *)buf
)[i
]); 
10954         if (len 
< sizeof (dof_hdr_t
)) { 
10955                 ddi_prop_free(buf
); 
10956                 dtrace_dof_error(NULL
, "truncated header"); 
10960         if (len 
< (loadsz 
= ((dof_hdr_t 
*)buf
)->dofh_loadsz
)) { 
10961                 ddi_prop_free(buf
); 
10962                 dtrace_dof_error(NULL
, "truncated DOF"); 
10966         if (loadsz 
>= dtrace_dof_maxsize
) { 
10967                 ddi_prop_free(buf
); 
10968                 dtrace_dof_error(NULL
, "oversized DOF"); 
10972         dof 
= dt_kmem_alloc_aligned(loadsz
, 8, KM_SLEEP
); 
10973         bcopy(buf
, dof
, loadsz
); 
10974         ddi_prop_free(buf
); 
10980 dtrace_dof_destroy(dof_hdr_t 
*dof
) 
10982         dt_kmem_free_aligned(dof
, dof
->dofh_loadsz
); 
10986  * Return the dof_sec_t pointer corresponding to a given section index.  If the 
10987  * index is not valid, dtrace_dof_error() is called and NULL is returned.  If 
10988  * a type other than DOF_SECT_NONE is specified, the header is checked against 
10989  * this type and NULL is returned if the types do not match. 
10992 dtrace_dof_sect(dof_hdr_t 
*dof
, uint32_t type
, dof_secidx_t i
) 
10994         dof_sec_t 
*sec 
= (dof_sec_t 
*)(uintptr_t) 
10995             ((uintptr_t)dof 
+ dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
10997         if (i 
>= dof
->dofh_secnum
) { 
10998                 dtrace_dof_error(dof
, "referenced section index is invalid"); 
11002         if (!(sec
->dofs_flags 
& DOF_SECF_LOAD
)) { 
11003                 dtrace_dof_error(dof
, "referenced section is not loadable"); 
11007         if (type 
!= DOF_SECT_NONE 
&& type 
!= sec
->dofs_type
) { 
11008                 dtrace_dof_error(dof
, "referenced section is the wrong type"); 
11015 static dtrace_probedesc_t 
* 
11016 dtrace_dof_probedesc(dof_hdr_t 
*dof
, dof_sec_t 
*sec
, dtrace_probedesc_t 
*desc
) 
11018         dof_probedesc_t 
*probe
; 
11020         uintptr_t daddr 
= (uintptr_t)dof
; 
11024         if (sec
->dofs_type 
!= DOF_SECT_PROBEDESC
) { 
11025                 dtrace_dof_error(dof
, "invalid probe section"); 
11029         if (sec
->dofs_align 
!= sizeof (dof_secidx_t
)) { 
11030                 dtrace_dof_error(dof
, "bad alignment in probe description"); 
11034         if (sec
->dofs_offset 
+ sizeof (dof_probedesc_t
) > dof
->dofh_loadsz
) { 
11035                 dtrace_dof_error(dof
, "truncated probe description"); 
11039         probe 
= (dof_probedesc_t 
*)(uintptr_t)(daddr 
+ sec
->dofs_offset
); 
11040         strtab 
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, probe
->dofp_strtab
); 
11042         if (strtab 
== NULL
) 
11045         str 
= daddr 
+ strtab
->dofs_offset
; 
11046         size 
= strtab
->dofs_size
; 
11048         if (probe
->dofp_provider 
>= strtab
->dofs_size
) { 
11049                 dtrace_dof_error(dof
, "corrupt probe provider"); 
11053         (void) strncpy(desc
->dtpd_provider
, 
11054             (char *)(str 
+ probe
->dofp_provider
), 
11055             MIN(DTRACE_PROVNAMELEN 
- 1, size 
- probe
->dofp_provider
)); 
11057         if (probe
->dofp_mod 
>= strtab
->dofs_size
) { 
11058                 dtrace_dof_error(dof
, "corrupt probe module"); 
11062         (void) strncpy(desc
->dtpd_mod
, (char *)(str 
+ probe
->dofp_mod
), 
11063             MIN(DTRACE_MODNAMELEN 
- 1, size 
- probe
->dofp_mod
)); 
11065         if (probe
->dofp_func 
>= strtab
->dofs_size
) { 
11066                 dtrace_dof_error(dof
, "corrupt probe function"); 
11070         (void) strncpy(desc
->dtpd_func
, (char *)(str 
+ probe
->dofp_func
), 
11071             MIN(DTRACE_FUNCNAMELEN 
- 1, size 
- probe
->dofp_func
)); 
11073         if (probe
->dofp_name 
>= strtab
->dofs_size
) { 
11074                 dtrace_dof_error(dof
, "corrupt probe name"); 
11078         (void) strncpy(desc
->dtpd_name
, (char *)(str 
+ probe
->dofp_name
), 
11079             MIN(DTRACE_NAMELEN 
- 1, size 
- probe
->dofp_name
)); 
11084 static dtrace_difo_t 
* 
11085 dtrace_dof_difo(dof_hdr_t 
*dof
, dof_sec_t 
*sec
, dtrace_vstate_t 
*vstate
, 
11090         dof_difohdr_t 
*dofd
; 
11091         uintptr_t daddr 
= (uintptr_t)dof
; 
11092         size_t max 
= dtrace_difo_maxsize
; 
11095         static const struct { 
11103                 { DOF_SECT_DIF
, offsetof(dtrace_difo_t
, dtdo_buf
), 
11104                 offsetof(dtrace_difo_t
, dtdo_len
), sizeof (dif_instr_t
), 
11105                 sizeof (dif_instr_t
), "multiple DIF sections" }, 
11107                 { DOF_SECT_INTTAB
, offsetof(dtrace_difo_t
, dtdo_inttab
), 
11108                 offsetof(dtrace_difo_t
, dtdo_intlen
), sizeof (uint64_t), 
11109                 sizeof (uint64_t), "multiple integer tables" }, 
11111                 { DOF_SECT_STRTAB
, offsetof(dtrace_difo_t
, dtdo_strtab
), 
11112                 offsetof(dtrace_difo_t
, dtdo_strlen
), 0, 
11113                 sizeof (char), "multiple string tables" }, 
11115                 { DOF_SECT_VARTAB
, offsetof(dtrace_difo_t
, dtdo_vartab
), 
11116                 offsetof(dtrace_difo_t
, dtdo_varlen
), sizeof (dtrace_difv_t
), 
11117                 sizeof (uint_t
), "multiple variable tables" }, 
11119 #if !defined(__APPLE__) 
11120                 { DOF_SECT_NONE
, 0, 0, 0, NULL 
} 
11122                 { DOF_SECT_NONE
, 0, 0, 0, 0, NULL 
} 
11123 #endif /* __APPLE__ */ 
11126         if (sec
->dofs_type 
!= DOF_SECT_DIFOHDR
) { 
11127                 dtrace_dof_error(dof
, "invalid DIFO header section"); 
11131         if (sec
->dofs_align 
!= sizeof (dof_secidx_t
)) { 
11132                 dtrace_dof_error(dof
, "bad alignment in DIFO header"); 
11136         if (sec
->dofs_size 
< sizeof (dof_difohdr_t
) || 
11137             sec
->dofs_size 
% sizeof (dof_secidx_t
)) { 
11138                 dtrace_dof_error(dof
, "bad size in DIFO header"); 
11142         dofd 
= (dof_difohdr_t 
*)(uintptr_t)(daddr 
+ sec
->dofs_offset
); 
11143         n 
= (sec
->dofs_size 
- sizeof (*dofd
)) / sizeof (dof_secidx_t
) + 1; 
11145         dp 
= kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
); 
11146         dp
->dtdo_rtype 
= dofd
->dofd_rtype
; 
11148         for (l 
= 0; l 
< n
; l
++) { 
11153                 if ((subsec 
= dtrace_dof_sect(dof
, DOF_SECT_NONE
, 
11154                     dofd
->dofd_links
[l
])) == NULL
) 
11155                         goto err
; /* invalid section link */ 
11157                 if (ttl 
+ subsec
->dofs_size 
> max
) { 
11158                         dtrace_dof_error(dof
, "exceeds maximum size"); 
11162                 ttl 
+= subsec
->dofs_size
; 
11164                 for (i 
= 0; difo
[i
].section 
!= DOF_SECT_NONE
; i
++) { 
11165                         if (subsec
->dofs_type 
!= difo
[i
].section
) 
11168                         if (!(subsec
->dofs_flags 
& DOF_SECF_LOAD
)) { 
11169                                 dtrace_dof_error(dof
, "section not loaded"); 
11173                         if (subsec
->dofs_align 
!= difo
[i
].align
) { 
11174                                 dtrace_dof_error(dof
, "bad alignment"); 
11178                         bufp 
= (void **)((uintptr_t)dp 
+ difo
[i
].bufoffs
); 
11179                         lenp 
= (uint32_t *)((uintptr_t)dp 
+ difo
[i
].lenoffs
); 
11181                         if (*bufp 
!= NULL
) { 
11182                                 dtrace_dof_error(dof
, difo
[i
].msg
); 
11186                         if (difo
[i
].entsize 
!= subsec
->dofs_entsize
) { 
11187                                 dtrace_dof_error(dof
, "entry size mismatch"); 
11191                         if (subsec
->dofs_entsize 
!= 0 && 
11192                             (subsec
->dofs_size 
% subsec
->dofs_entsize
) != 0) { 
11193                                 dtrace_dof_error(dof
, "corrupt entry size"); 
11197                         *lenp 
= subsec
->dofs_size
; 
11198                         *bufp 
= kmem_alloc(subsec
->dofs_size
, KM_SLEEP
); 
11199                         bcopy((char *)(uintptr_t)(daddr 
+ subsec
->dofs_offset
), 
11200                             *bufp
, subsec
->dofs_size
); 
11202                         if (subsec
->dofs_entsize 
!= 0) 
11203                                 *lenp 
/= subsec
->dofs_entsize
; 
11209                  * If we encounter a loadable DIFO sub-section that is not 
11210                  * known to us, assume this is a broken program and fail. 
11212                 if (difo
[i
].section 
== DOF_SECT_NONE 
&& 
11213                     (subsec
->dofs_flags 
& DOF_SECF_LOAD
)) { 
11214                         dtrace_dof_error(dof
, "unrecognized DIFO subsection"); 
11219         if (dp
->dtdo_buf 
== NULL
) { 
11221                  * We can't have a DIF object without DIF text. 
11223                 dtrace_dof_error(dof
, "missing DIF text"); 
11228          * Before we validate the DIF object, run through the variable table 
11229          * looking for the strings -- if any of their size are under, we'll set 
11230          * their size to be the system-wide default string size.  Note that 
11231          * this should _not_ happen if the "strsize" option has been set -- 
11232          * in this case, the compiler should have set the size to reflect the 
11233          * setting of the option. 
11235         for (i 
= 0; i 
< dp
->dtdo_varlen
; i
++) { 
11236                 dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
]; 
11237                 dtrace_diftype_t 
*t 
= &v
->dtdv_type
; 
11239                 if (v
->dtdv_id 
< DIF_VAR_OTHER_UBASE
) 
11242                 if (t
->dtdt_kind 
== DIF_TYPE_STRING 
&& t
->dtdt_size 
== 0) 
11243                         t
->dtdt_size 
= dtrace_strsize_default
; 
11246         if (dtrace_difo_validate(dp
, vstate
, DIF_DIR_NREGS
, cr
) != 0) 
11249         dtrace_difo_init(dp
, vstate
); 
11253         kmem_free(dp
->dtdo_buf
, dp
->dtdo_len 
* sizeof (dif_instr_t
)); 
11254         kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen 
* sizeof (uint64_t)); 
11255         kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
); 
11256         kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen 
* sizeof (dtrace_difv_t
)); 
11258         kmem_free(dp
, sizeof (dtrace_difo_t
)); 
11262 static dtrace_predicate_t 
* 
11263 dtrace_dof_predicate(dof_hdr_t 
*dof
, dof_sec_t 
*sec
, dtrace_vstate_t 
*vstate
, 
11268         if ((dp 
= dtrace_dof_difo(dof
, sec
, vstate
, cr
)) == NULL
) 
11271         return (dtrace_predicate_create(dp
)); 
11274 static dtrace_actdesc_t 
* 
11275 dtrace_dof_actdesc(dof_hdr_t 
*dof
, dof_sec_t 
*sec
, dtrace_vstate_t 
*vstate
, 
11278         dtrace_actdesc_t 
*act
, *first 
= NULL
, *last 
= NULL
, *next
; 
11279         dof_actdesc_t 
*desc
; 
11280         dof_sec_t 
*difosec
; 
11282         uintptr_t daddr 
= (uintptr_t)dof
; 
11284         dtrace_actkind_t kind
; 
11286         if (sec
->dofs_type 
!= DOF_SECT_ACTDESC
) { 
11287                 dtrace_dof_error(dof
, "invalid action section"); 
11291         if (sec
->dofs_offset 
+ sizeof (dof_actdesc_t
) > dof
->dofh_loadsz
) { 
11292                 dtrace_dof_error(dof
, "truncated action description"); 
11296         if (sec
->dofs_align 
!= sizeof (uint64_t)) { 
11297                 dtrace_dof_error(dof
, "bad alignment in action description"); 
11301         if (sec
->dofs_size 
< sec
->dofs_entsize
) { 
11302                 dtrace_dof_error(dof
, "section entry size exceeds total size"); 
11306         if (sec
->dofs_entsize 
!= sizeof (dof_actdesc_t
)) { 
11307                 dtrace_dof_error(dof
, "bad entry size in action description"); 
11311         if (sec
->dofs_size 
/ sec
->dofs_entsize 
> dtrace_actions_max
) { 
11312                 dtrace_dof_error(dof
, "actions exceed dtrace_actions_max"); 
11316         for (offs 
= 0; offs 
< sec
->dofs_size
; offs 
+= sec
->dofs_entsize
) { 
11317                 desc 
= (dof_actdesc_t 
*)(daddr 
+ 
11318                     (uintptr_t)sec
->dofs_offset 
+ offs
); 
11319                 kind 
= (dtrace_actkind_t
)desc
->dofa_kind
; 
11321                 if (DTRACEACT_ISPRINTFLIKE(kind
) && 
11322                     (kind 
!= DTRACEACT_PRINTA 
|| 
11323                     desc
->dofa_strtab 
!= DOF_SECIDX_NONE
)) { 
11329                          * printf()-like actions must have a format string. 
11331                         if ((strtab 
= dtrace_dof_sect(dof
, 
11332                             DOF_SECT_STRTAB
, desc
->dofa_strtab
)) == NULL
) 
11335                         str 
= (char *)((uintptr_t)dof 
+ 
11336                             (uintptr_t)strtab
->dofs_offset
); 
11338                         for (i 
= desc
->dofa_arg
; i 
< strtab
->dofs_size
; i
++) { 
11339                                 if (str
[i
] == '\0') 
11343                         if (i 
>= strtab
->dofs_size
) { 
11344                                 dtrace_dof_error(dof
, "bogus format string"); 
11348                         if (i 
== desc
->dofa_arg
) { 
11349                                 dtrace_dof_error(dof
, "empty format string"); 
11353                         i 
-= desc
->dofa_arg
; 
11354                         fmt 
= kmem_alloc(i 
+ 1, KM_SLEEP
); 
11355                         bcopy(&str
[desc
->dofa_arg
], fmt
, i 
+ 1); 
11356                         arg 
= (uint64_t)(uintptr_t)fmt
; 
11358                         if (kind 
== DTRACEACT_PRINTA
) { 
11359                                 ASSERT(desc
->dofa_strtab 
== DOF_SECIDX_NONE
); 
11362                                 arg 
= desc
->dofa_arg
; 
11366                 act 
= dtrace_actdesc_create(kind
, desc
->dofa_ntuple
, 
11367                     desc
->dofa_uarg
, arg
); 
11369                 if (last 
!= NULL
) { 
11370                         last
->dtad_next 
= act
; 
11377                 if (desc
->dofa_difo 
== DOF_SECIDX_NONE
) 
11380                 if ((difosec 
= dtrace_dof_sect(dof
, 
11381                     DOF_SECT_DIFOHDR
, desc
->dofa_difo
)) == NULL
) 
11384                 act
->dtad_difo 
= dtrace_dof_difo(dof
, difosec
, vstate
, cr
); 
11386                 if (act
->dtad_difo 
== NULL
) 
11390         ASSERT(first 
!= NULL
); 
11394         for (act 
= first
; act 
!= NULL
; act 
= next
) { 
11395                 next 
= act
->dtad_next
; 
11396                 dtrace_actdesc_release(act
, vstate
); 
11402 static dtrace_ecbdesc_t 
* 
11403 dtrace_dof_ecbdesc(dof_hdr_t 
*dof
, dof_sec_t 
*sec
, dtrace_vstate_t 
*vstate
, 
11406         dtrace_ecbdesc_t 
*ep
; 
11407         dof_ecbdesc_t 
*ecb
; 
11408         dtrace_probedesc_t 
*desc
; 
11409         dtrace_predicate_t 
*pred 
= NULL
; 
11411         if (sec
->dofs_size 
< sizeof (dof_ecbdesc_t
)) { 
11412                 dtrace_dof_error(dof
, "truncated ECB description"); 
11416         if (sec
->dofs_align 
!= sizeof (uint64_t)) { 
11417                 dtrace_dof_error(dof
, "bad alignment in ECB description"); 
11421         ecb 
= (dof_ecbdesc_t 
*)((uintptr_t)dof 
+ (uintptr_t)sec
->dofs_offset
); 
11422         sec 
= dtrace_dof_sect(dof
, DOF_SECT_PROBEDESC
, ecb
->dofe_probes
); 
11427         ep 
= kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
); 
11428         ep
->dted_uarg 
= ecb
->dofe_uarg
; 
11429         desc 
= &ep
->dted_probe
; 
11431         if (dtrace_dof_probedesc(dof
, sec
, desc
) == NULL
) 
11434         if (ecb
->dofe_pred 
!= DOF_SECIDX_NONE
) { 
11435                 if ((sec 
= dtrace_dof_sect(dof
, 
11436                     DOF_SECT_DIFOHDR
, ecb
->dofe_pred
)) == NULL
) 
11439                 if ((pred 
= dtrace_dof_predicate(dof
, sec
, vstate
, cr
)) == NULL
) 
11442                 ep
->dted_pred
.dtpdd_predicate 
= pred
; 
11445         if (ecb
->dofe_actions 
!= DOF_SECIDX_NONE
) { 
11446                 if ((sec 
= dtrace_dof_sect(dof
, 
11447                     DOF_SECT_ACTDESC
, ecb
->dofe_actions
)) == NULL
) 
11450                 ep
->dted_action 
= dtrace_dof_actdesc(dof
, sec
, vstate
, cr
); 
11452                 if (ep
->dted_action 
== NULL
) 
11460                 dtrace_predicate_release(pred
, vstate
); 
11461         kmem_free(ep
, sizeof (dtrace_ecbdesc_t
)); 
11465 #if !defined(__APPLE__) /* APPLE dyld has already done this for us */ 
11467  * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the 
11468  * specified DOF.  At present, this amounts to simply adding 'ubase' to the 
11469  * site of any user SETX relocations to account for load object base address. 
11470  * In the future, if we need other relocations, this function can be extended. 
11473 dtrace_dof_relocate(dof_hdr_t 
*dof
, dof_sec_t 
*sec
, uint64_t ubase
) 
11475         uintptr_t daddr 
= (uintptr_t)dof
; 
11476         dof_relohdr_t 
*dofr 
= 
11477             (dof_relohdr_t 
*)(uintptr_t)(daddr 
+ sec
->dofs_offset
); 
11478         dof_sec_t 
*ss
, *rs
, *ts
; 
11482         if (sec
->dofs_size 
< sizeof (dof_relohdr_t
) || 
11483             sec
->dofs_align 
!= sizeof (dof_secidx_t
)) { 
11484                 dtrace_dof_error(dof
, "invalid relocation header"); 
11488         ss 
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, dofr
->dofr_strtab
); 
11489         rs 
= dtrace_dof_sect(dof
, DOF_SECT_RELTAB
, dofr
->dofr_relsec
); 
11490         ts 
= dtrace_dof_sect(dof
, DOF_SECT_NONE
, dofr
->dofr_tgtsec
); 
11492         if (ss 
== NULL 
|| rs 
== NULL 
|| ts 
== NULL
) 
11493                 return (-1); /* dtrace_dof_error() has been called already */ 
11495         if (rs
->dofs_entsize 
< sizeof (dof_relodesc_t
) || 
11496             rs
->dofs_align 
!= sizeof (uint64_t)) { 
11497                 dtrace_dof_error(dof
, "invalid relocation section"); 
11501         r 
= (dof_relodesc_t 
*)(uintptr_t)(daddr 
+ rs
->dofs_offset
); 
11502         n 
= rs
->dofs_size 
/ rs
->dofs_entsize
; 
11504         for (i 
= 0; i 
< n
; i
++) { 
11505                 uintptr_t taddr 
= daddr 
+ ts
->dofs_offset 
+ r
->dofr_offset
; 
11507                 switch (r
->dofr_type
) { 
11508                 case DOF_RELO_NONE
: 
11510                 case DOF_RELO_SETX
: 
11511                         if (r
->dofr_offset 
>= ts
->dofs_size 
|| r
->dofr_offset 
+ 
11512                             sizeof (uint64_t) > ts
->dofs_size
) { 
11513                                 dtrace_dof_error(dof
, "bad relocation offset"); 
11517                         if (!IS_P2ALIGNED(taddr
, sizeof (uint64_t))) { 
11518                                 dtrace_dof_error(dof
, "misaligned setx relo"); 
11522                         *(uint64_t *)taddr 
+= ubase
; 
11525                         dtrace_dof_error(dof
, "invalid relocation type"); 
11529                 r 
= (dof_relodesc_t 
*)((uintptr_t)r 
+ rs
->dofs_entsize
); 
11534 #endif /* __APPLE__ */ 
11537  * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated 
11538  * header:  it should be at the front of a memory region that is at least 
11539  * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in 
11540  * size.  It need not be validated in any other way. 
11543 dtrace_dof_slurp(dof_hdr_t 
*dof
, dtrace_vstate_t 
*vstate
, cred_t 
*cr
, 
11544     dtrace_enabling_t 
**enabp
, uint64_t ubase
, int noprobes
) 
11546         uint64_t len 
= dof
->dofh_loadsz
, seclen
; 
11547         uintptr_t daddr 
= (uintptr_t)dof
; 
11548         dtrace_ecbdesc_t 
*ep
; 
11549         dtrace_enabling_t 
*enab
; 
11552         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
11553         ASSERT(dof
->dofh_loadsz 
>= sizeof (dof_hdr_t
)); 
11556          * Check the DOF header identification bytes.  In addition to checking 
11557          * valid settings, we also verify that unused bits/bytes are zeroed so 
11558          * we can use them later without fear of regressing existing binaries. 
11560         if (bcmp(&dof
->dofh_ident
[DOF_ID_MAG0
], 
11561             DOF_MAG_STRING
, DOF_MAG_STRLEN
) != 0) { 
11562                 dtrace_dof_error(dof
, "DOF magic string mismatch"); 
11566         if (dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_ILP32 
&& 
11567             dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_LP64
) { 
11568                 dtrace_dof_error(dof
, "DOF has invalid data model"); 
11572         if (dof
->dofh_ident
[DOF_ID_ENCODING
] != DOF_ENCODE_NATIVE
) { 
11573                 dtrace_dof_error(dof
, "DOF encoding mismatch"); 
11577 #if !defined(__APPLE__) 
11578         if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1 
&& 
11579             dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_2
) { 
11580                 dtrace_dof_error(dof
, "DOF version mismatch"); 
11585          * We only support DOF_VERSION_3 for now. 
11587         if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_3
) { 
11588                 dtrace_dof_error(dof
, "DOF version mismatch"); 
11593         if (dof
->dofh_ident
[DOF_ID_DIFVERS
] != DIF_VERSION_2
) { 
11594                 dtrace_dof_error(dof
, "DOF uses unsupported instruction set"); 
11598         if (dof
->dofh_ident
[DOF_ID_DIFIREG
] > DIF_DIR_NREGS
) { 
11599                 dtrace_dof_error(dof
, "DOF uses too many integer registers"); 
11603         if (dof
->dofh_ident
[DOF_ID_DIFTREG
] > DIF_DTR_NREGS
) { 
11604                 dtrace_dof_error(dof
, "DOF uses too many tuple registers"); 
11608         for (i 
= DOF_ID_PAD
; i 
< DOF_ID_SIZE
; i
++) { 
11609                 if (dof
->dofh_ident
[i
] != 0) { 
11610                         dtrace_dof_error(dof
, "DOF has invalid ident byte set"); 
11615         if (dof
->dofh_flags 
& ~DOF_FL_VALID
) { 
11616                 dtrace_dof_error(dof
, "DOF has invalid flag bits set"); 
11620         if (dof
->dofh_secsize 
== 0) { 
11621                 dtrace_dof_error(dof
, "zero section header size"); 
11626          * Check that the section headers don't exceed the amount of DOF 
11627          * data.  Note that we cast the section size and number of sections 
11628          * to uint64_t's to prevent possible overflow in the multiplication. 
11630         seclen 
= (uint64_t)dof
->dofh_secnum 
* (uint64_t)dof
->dofh_secsize
; 
11632         if (dof
->dofh_secoff 
> len 
|| seclen 
> len 
|| 
11633             dof
->dofh_secoff 
+ seclen 
> len
) { 
11634                 dtrace_dof_error(dof
, "truncated section headers"); 
11638         if (!IS_P2ALIGNED(dof
->dofh_secoff
, sizeof (uint64_t))) { 
11639                 dtrace_dof_error(dof
, "misaligned section headers"); 
11643         if (!IS_P2ALIGNED(dof
->dofh_secsize
, sizeof (uint64_t))) { 
11644                 dtrace_dof_error(dof
, "misaligned section size"); 
11649          * Take an initial pass through the section headers to be sure that 
11650          * the headers don't have stray offsets.  If the 'noprobes' flag is 
11651          * set, do not permit sections relating to providers, probes, or args. 
11653         for (i 
= 0; i 
< dof
->dofh_secnum
; i
++) { 
11654                 dof_sec_t 
*sec 
= (dof_sec_t 
*)(daddr 
+ 
11655                     (uintptr_t)dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
11658                         switch (sec
->dofs_type
) { 
11659                         case DOF_SECT_PROVIDER
: 
11660                         case DOF_SECT_PROBES
: 
11661                         case DOF_SECT_PRARGS
: 
11662                         case DOF_SECT_PROFFS
: 
11663                                 dtrace_dof_error(dof
, "illegal sections " 
11669                 if (!(sec
->dofs_flags 
& DOF_SECF_LOAD
)) 
11670                         continue; /* just ignore non-loadable sections */ 
11672                 if (sec
->dofs_align 
& (sec
->dofs_align 
- 1)) { 
11673                         dtrace_dof_error(dof
, "bad section alignment"); 
11677                 if (sec
->dofs_offset 
& (sec
->dofs_align 
- 1)) { 
11678                         dtrace_dof_error(dof
, "misaligned section"); 
11682                 if (sec
->dofs_offset 
> len 
|| sec
->dofs_size 
> len 
|| 
11683                     sec
->dofs_offset 
+ sec
->dofs_size 
> len
) { 
11684                         dtrace_dof_error(dof
, "corrupt section header"); 
11688                 if (sec
->dofs_type 
== DOF_SECT_STRTAB 
&& *((char *)daddr 
+ 
11689                     sec
->dofs_offset 
+ sec
->dofs_size 
- 1) != '\0') { 
11690                         dtrace_dof_error(dof
, "non-terminating string table"); 
11695 #if !defined(__APPLE__) 
11697          * APPLE NOTE: We have no relocation to perform. All dof values are 
11698          * relative offsets. 
11702          * Take a second pass through the sections and locate and perform any 
11703          * relocations that are present.  We do this after the first pass to 
11704          * be sure that all sections have had their headers validated. 
11706         for (i 
= 0; i 
< dof
->dofh_secnum
; i
++) { 
11707                 dof_sec_t 
*sec 
= (dof_sec_t 
*)(daddr 
+ 
11708                     (uintptr_t)dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
11710                 if (!(sec
->dofs_flags 
& DOF_SECF_LOAD
)) 
11711                         continue; /* skip sections that are not loadable */ 
11713                 switch (sec
->dofs_type
) { 
11714                 case DOF_SECT_URELHDR
: 
11715                         if (dtrace_dof_relocate(dof
, sec
, ubase
) != 0) 
11720 #endif /* __APPLE__ */ 
11722         if ((enab 
= *enabp
) == NULL
) 
11723                 enab 
= *enabp 
= dtrace_enabling_create(vstate
); 
11725         for (i 
= 0; i 
< dof
->dofh_secnum
; i
++) { 
11726                 dof_sec_t 
*sec 
= (dof_sec_t 
*)(daddr 
+ 
11727                     (uintptr_t)dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
11729                 if (sec
->dofs_type 
!= DOF_SECT_ECBDESC
) 
11732 #if !defined(__APPLE__) 
11733                 if ((ep 
= dtrace_dof_ecbdesc(dof
, sec
, vstate
, cr
)) == NULL
) { 
11734                         dtrace_enabling_destroy(enab
); 
11739                 /* XXX Defend against gcc 4.0 botch on x86 (not all paths out of inlined dtrace_dof_ecbdesc 
11740                    are checked for the NULL return value.) */ 
11741                 ep 
= dtrace_dof_ecbdesc(dof
, sec
, vstate
, cr
); 
11743                         dtrace_enabling_destroy(enab
); 
11747 #endif /* __APPLE__ */ 
11749                 dtrace_enabling_add(enab
, ep
); 
11756  * Process DOF for any options.  This routine assumes that the DOF has been 
11757  * at least processed by dtrace_dof_slurp(). 
11760 dtrace_dof_options(dof_hdr_t 
*dof
, dtrace_state_t 
*state
) 
11765         dof_optdesc_t 
*desc
; 
11767         for (i 
= 0; i 
< dof
->dofh_secnum
; i
++) { 
11768                 dof_sec_t 
*sec 
= (dof_sec_t 
*)((uintptr_t)dof 
+ 
11769                     (uintptr_t)dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
11771                 if (sec
->dofs_type 
!= DOF_SECT_OPTDESC
) 
11774                 if (sec
->dofs_align 
!= sizeof (uint64_t)) { 
11775                         dtrace_dof_error(dof
, "bad alignment in " 
11776                             "option description"); 
11780                 if ((entsize 
= sec
->dofs_entsize
) == 0) { 
11781                         dtrace_dof_error(dof
, "zeroed option entry size"); 
11785                 if (entsize 
< sizeof (dof_optdesc_t
)) { 
11786                         dtrace_dof_error(dof
, "bad option entry size"); 
11790                 for (offs 
= 0; offs 
< sec
->dofs_size
; offs 
+= entsize
) { 
11791                         desc 
= (dof_optdesc_t 
*)((uintptr_t)dof 
+ 
11792                             (uintptr_t)sec
->dofs_offset 
+ offs
); 
11794                         if (desc
->dofo_strtab 
!= DOF_SECIDX_NONE
) { 
11795                                 dtrace_dof_error(dof
, "non-zero option string"); 
11799                         if (desc
->dofo_value 
== DTRACEOPT_UNSET
) { 
11800                                 dtrace_dof_error(dof
, "unset option"); 
11804                         if ((rval 
= dtrace_state_option(state
, 
11805                             desc
->dofo_option
, desc
->dofo_value
)) != 0) { 
11806                                 dtrace_dof_error(dof
, "rejected option"); 
11816  * DTrace Consumer State Functions 
11818 #if defined(__APPLE__) 
11820 #endif /* __APPLE__ */ 
11822 dtrace_dstate_init(dtrace_dstate_t 
*dstate
, size_t size
) 
11824         size_t hashsize
, maxper
, min
, chunksize 
= dstate
->dtds_chunksize
; 
11827         dtrace_dynvar_t 
*dvar
, *next
, *start
; 
11830         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
11831         ASSERT(dstate
->dtds_base 
== NULL 
&& dstate
->dtds_percpu 
== NULL
); 
11833         bzero(dstate
, sizeof (dtrace_dstate_t
)); 
11835         if ((dstate
->dtds_chunksize 
= chunksize
) == 0) 
11836                 dstate
->dtds_chunksize 
= DTRACE_DYNVAR_CHUNKSIZE
; 
11838         if (size 
< (min 
= dstate
->dtds_chunksize 
+ sizeof (dtrace_dynhash_t
))) 
11841         if ((base 
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
) 
11844         dstate
->dtds_size 
= size
; 
11845         dstate
->dtds_base 
= base
; 
11846         dstate
->dtds_percpu 
= kmem_cache_alloc(dtrace_state_cache
, KM_SLEEP
); 
11847         bzero(dstate
->dtds_percpu
, NCPU 
* sizeof (dtrace_dstate_percpu_t
)); 
11849         hashsize 
= size 
/ (dstate
->dtds_chunksize 
+ sizeof (dtrace_dynhash_t
)); 
11851         if (hashsize 
!= 1 && (hashsize 
& 1)) 
11854         dstate
->dtds_hashsize 
= hashsize
; 
11855         dstate
->dtds_hash 
= dstate
->dtds_base
; 
11858          * Set all of our hash buckets to point to the single sink, and (if 
11859          * it hasn't already been set), set the sink's hash value to be the 
11860          * sink sentinel value.  The sink is needed for dynamic variable 
11861          * lookups to know that they have iterated over an entire, valid hash 
11864         for (i 
= 0; i 
< hashsize
; i
++) 
11865                 dstate
->dtds_hash
[i
].dtdh_chain 
= &dtrace_dynhash_sink
; 
11867         if (dtrace_dynhash_sink
.dtdv_hashval 
!= DTRACE_DYNHASH_SINK
) 
11868                 dtrace_dynhash_sink
.dtdv_hashval 
= DTRACE_DYNHASH_SINK
; 
11871          * Determine number of active CPUs.  Divide free list evenly among 
11874         start 
= (dtrace_dynvar_t 
*) 
11875             ((uintptr_t)base 
+ hashsize 
* sizeof (dtrace_dynhash_t
)); 
11876         limit 
= (uintptr_t)base 
+ size
; 
11878         maxper 
= (limit 
- (uintptr_t)start
) / NCPU
; 
11879         maxper 
= (maxper 
/ dstate
->dtds_chunksize
) * dstate
->dtds_chunksize
; 
11881         for (i 
= 0; i 
< NCPU
; i
++) { 
11882                 dstate
->dtds_percpu
[i
].dtdsc_free 
= dvar 
= start
; 
11885                  * If we don't even have enough chunks to make it once through 
11886                  * NCPUs, we're just going to allocate everything to the first 
11887                  * CPU.  And if we're on the last CPU, we're going to allocate 
11888                  * whatever is left over.  In either case, we set the limit to 
11889                  * be the limit of the dynamic variable space. 
11891                 if (maxper 
== 0 || i 
== NCPU 
- 1) { 
11892                         limit 
= (uintptr_t)base 
+ size
; 
11895                         limit 
= (uintptr_t)start 
+ maxper
; 
11896                         start 
= (dtrace_dynvar_t 
*)limit
; 
11899                 ASSERT(limit 
<= (uintptr_t)base 
+ size
); 
11902                         next 
= (dtrace_dynvar_t 
*)((uintptr_t)dvar 
+ 
11903                             dstate
->dtds_chunksize
); 
11905                         if ((uintptr_t)next 
+ dstate
->dtds_chunksize 
>= limit
) 
11908                         dvar
->dtdv_next 
= next
; 
11919 #if defined(__APPLE__) 
11921 #endif /* __APPLE__ */ 
11923 dtrace_dstate_fini(dtrace_dstate_t 
*dstate
) 
11925         lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
11927         if (dstate
->dtds_base 
== NULL
) 
11930         kmem_free(dstate
->dtds_base
, dstate
->dtds_size
); 
11931         kmem_cache_free(dtrace_state_cache
, dstate
->dtds_percpu
); 
11935 dtrace_vstate_fini(dtrace_vstate_t 
*vstate
) 
11938          * Logical XOR, where are you? 
11940         ASSERT((vstate
->dtvs_nglobals 
== 0) ^ (vstate
->dtvs_globals 
!= NULL
)); 
11942         if (vstate
->dtvs_nglobals 
> 0) { 
11943                 kmem_free(vstate
->dtvs_globals
, vstate
->dtvs_nglobals 
* 
11944                     sizeof (dtrace_statvar_t 
*)); 
11947         if (vstate
->dtvs_ntlocals 
> 0) { 
11948                 kmem_free(vstate
->dtvs_tlocals
, vstate
->dtvs_ntlocals 
* 
11949                     sizeof (dtrace_difv_t
)); 
11952         ASSERT((vstate
->dtvs_nlocals 
== 0) ^ (vstate
->dtvs_locals 
!= NULL
)); 
11954         if (vstate
->dtvs_nlocals 
> 0) { 
11955                 kmem_free(vstate
->dtvs_locals
, vstate
->dtvs_nlocals 
* 
11956                     sizeof (dtrace_statvar_t 
*)); 
11961 dtrace_state_clean(dtrace_state_t 
*state
) 
11963         if (state
->dts_activity 
== DTRACE_ACTIVITY_INACTIVE
) 
11966         dtrace_dynvar_clean(&state
->dts_vstate
.dtvs_dynvars
); 
11967         dtrace_speculation_clean(state
); 
11971 dtrace_state_deadman(dtrace_state_t 
*state
) 
11977         now 
= dtrace_gethrtime(); 
11979         if (state 
!= dtrace_anon
.dta_state 
&& 
11980             now 
- state
->dts_laststatus 
>= dtrace_deadman_user
) 
11984          * We must be sure that dts_alive never appears to be less than the 
11985          * value upon entry to dtrace_state_deadman(), and because we lack a 
11986          * dtrace_cas64(), we cannot store to it atomically.  We thus instead 
11987          * store INT64_MAX to it, followed by a memory barrier, followed by 
11988          * the new value.  This assures that dts_alive never appears to be 
11989          * less than its true value, regardless of the order in which the 
11990          * stores to the underlying storage are issued. 
11992         state
->dts_alive 
= INT64_MAX
; 
11993         dtrace_membar_producer(); 
11994         state
->dts_alive 
= now
; 
11997 #if defined(__APPLE__) 
11999 #endif /* __APPLE__ */ 
12001 dtrace_state_create(dev_t 
*devp
, cred_t 
*cr
) 
12006         dtrace_state_t 
*state
; 
12007         dtrace_optval_t 
*opt
; 
12008         int bufsize 
= NCPU 
* sizeof (dtrace_buffer_t
), i
; 
12010         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
12011         lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
12013 #if !defined(__APPLE__) 
12014         minor 
= (minor_t
)(uintptr_t)vmem_alloc(dtrace_minor
, 1, 
12015             VM_BESTFIT 
| VM_SLEEP
); 
12018          * Darwin's DEVFS layer acquired the minor number for this "device" when it called 
12019          * dtrace_devfs_clone_func(). At that time, dtrace_devfs_clone_func() proposed a minor number 
12020          * (next unused according to vmem_alloc()) and then immediately put the number back in play 
12021          * (by calling vmem_free()). Now that minor number is being used for an open, so committing it 
12022          * to use. The following vmem_alloc() must deliver that same minor number. 
12025         minor 
= (minor_t
)(uintptr_t)vmem_alloc(dtrace_minor
, 1, 
12026             VM_BESTFIT 
| VM_SLEEP
); 
12028         if (NULL 
!= devp
) { 
12029         ASSERT(getminor(*devp
) == minor
); 
12030                 if (getminor(*devp
) != minor
) { 
12031                         printf("dtrace_open: couldn't re-acquire vended minor number %d. Instead got %d\n",  
12032                                         getminor(*devp
), minor
); 
12033                         vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1); 
12037         /* NULL==devp iff "Anonymous state" (see dtrace_anon_property), 
12038                  * so just vend the minor device number here de novo since no "open" has occurred. */ 
12041 #endif /* __APPLE__ */ 
12043         if (ddi_soft_state_zalloc(dtrace_softstate
, minor
) != DDI_SUCCESS
) { 
12044                 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1); 
12048         state 
= ddi_get_soft_state(dtrace_softstate
, minor
); 
12049         state
->dts_epid 
= DTRACE_EPIDNONE 
+ 1; 
12051         (void) snprintf(c
, sizeof (c
), "dtrace_aggid_%d", minor
); 
12052         state
->dts_aggid_arena 
= vmem_create(c
, (void *)1, UINT32_MAX
, 1, 
12053             NULL
, NULL
, NULL
, 0, VM_SLEEP 
| VMC_IDENTIFIER
); 
12055         if (devp 
!= NULL
) { 
12056                 major 
= getemajor(*devp
); 
12058                 major 
= ddi_driver_major(dtrace_devi
); 
12061         state
->dts_dev 
= makedevice(major
, minor
); 
12064                 *devp 
= state
->dts_dev
; 
12067          * We allocate NCPU buffers.  On the one hand, this can be quite 
12068          * a bit of memory per instance (nearly 36K on a Starcat).  On the 
12069          * other hand, it saves an additional memory reference in the probe 
12072         state
->dts_buffer 
= kmem_zalloc(bufsize
, KM_SLEEP
); 
12073         state
->dts_aggbuffer 
= kmem_zalloc(bufsize
, KM_SLEEP
); 
12074         state
->dts_cleaner 
= CYCLIC_NONE
; 
12075         state
->dts_deadman 
= CYCLIC_NONE
; 
12076         state
->dts_vstate
.dtvs_state 
= state
; 
12078         for (i 
= 0; i 
< DTRACEOPT_MAX
; i
++) 
12079                 state
->dts_options
[i
] = DTRACEOPT_UNSET
; 
12082          * Set the default options. 
12084         opt 
= state
->dts_options
; 
12085         opt
[DTRACEOPT_BUFPOLICY
] = DTRACEOPT_BUFPOLICY_SWITCH
; 
12086         opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_AUTO
; 
12087         opt
[DTRACEOPT_NSPEC
] = dtrace_nspec_default
; 
12088         opt
[DTRACEOPT_SPECSIZE
] = dtrace_specsize_default
; 
12089         opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)DTRACE_CPUALL
; 
12090         opt
[DTRACEOPT_STRSIZE
] = dtrace_strsize_default
; 
12091         opt
[DTRACEOPT_STACKFRAMES
] = dtrace_stackframes_default
; 
12092         opt
[DTRACEOPT_USTACKFRAMES
] = dtrace_ustackframes_default
; 
12093         opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_default
; 
12094         opt
[DTRACEOPT_AGGRATE
] = dtrace_aggrate_default
; 
12095         opt
[DTRACEOPT_SWITCHRATE
] = dtrace_switchrate_default
; 
12096         opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_default
; 
12097         opt
[DTRACEOPT_JSTACKFRAMES
] = dtrace_jstackframes_default
; 
12098         opt
[DTRACEOPT_JSTACKSTRSIZE
] = dtrace_jstackstrsize_default
; 
12100         state
->dts_activity 
= DTRACE_ACTIVITY_INACTIVE
; 
12103          * Depending on the user credentials, we set flag bits which alter probe 
12104          * visibility or the amount of destructiveness allowed.  In the case of 
12105          * actual anonymous tracing, or the possession of all privileges, all of 
12106          * the normal checks are bypassed. 
12108         if (cr 
== NULL 
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) { 
12109                 state
->dts_cred
.dcr_visible 
= DTRACE_CRV_ALL
; 
12110                 state
->dts_cred
.dcr_action 
= DTRACE_CRA_ALL
; 
12113                  * Set up the credentials for this instantiation.  We take a 
12114                  * hold on the credential to prevent it from disappearing on 
12115                  * us; this in turn prevents the zone_t referenced by this 
12116                  * credential from disappearing.  This means that we can 
12117                  * examine the credential and the zone from probe context. 
12120                 state
->dts_cred
.dcr_cred 
= cr
; 
12123                  * CRA_PROC means "we have *some* privilege for dtrace" and 
12124                  * unlocks the use of variables like pid, zonename, etc. 
12126                 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
) || 
12127                     PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) { 
12128                         state
->dts_cred
.dcr_action 
|= DTRACE_CRA_PROC
; 
12132                  * dtrace_user allows use of syscall and profile providers. 
12133                  * If the user also has proc_owner and/or proc_zone, we 
12134                  * extend the scope to include additional visibility and 
12135                  * destructive power. 
12137                 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
)) { 
12138                         if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
)) { 
12139                                 state
->dts_cred
.dcr_visible 
|= 
12140                                     DTRACE_CRV_ALLPROC
; 
12142                                 state
->dts_cred
.dcr_action 
|= 
12143                                     DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
; 
12146                         if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
)) { 
12147                                 state
->dts_cred
.dcr_visible 
|= 
12148                                     DTRACE_CRV_ALLZONE
; 
12150                                 state
->dts_cred
.dcr_action 
|= 
12151                                     DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
; 
12155                          * If we have all privs in whatever zone this is, 
12156                          * we can do destructive things to processes which 
12157                          * have altered credentials. 
12159 #if !defined(__APPLE__) 
12160                         if (priv_isequalset(priv_getset(cr
, PRIV_EFFECTIVE
), 
12161                             cr
->cr_zone
->zone_privset
)) { 
12162                                 state
->dts_cred
.dcr_action 
|= 
12163                                         DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
; 
12166                         /* Darwin doesn't do zones. */ 
12167                         state
->dts_cred
.dcr_action 
|= 
12168                                 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
; 
12169 #endif /* __APPLE__ */ 
12173                  * Holding the dtrace_kernel privilege also implies that 
12174                  * the user has the dtrace_user privilege from a visibility 
12175                  * perspective.  But without further privileges, some 
12176                  * destructive actions are not available. 
12178                 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
)) { 
12180                          * Make all probes in all zones visible.  However, 
12181                          * this doesn't mean that all actions become available 
12184                         state
->dts_cred
.dcr_visible 
|= DTRACE_CRV_KERNEL 
| 
12185                             DTRACE_CRV_ALLPROC 
| DTRACE_CRV_ALLZONE
; 
12187                         state
->dts_cred
.dcr_action 
|= DTRACE_CRA_KERNEL 
| 
12190                          * Holding proc_owner means that destructive actions 
12191                          * for *this* zone are allowed. 
12193                         if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
)) 
12194                                 state
->dts_cred
.dcr_action 
|= 
12195                                     DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
; 
12198                          * Holding proc_zone means that destructive actions 
12199                          * for this user/group ID in all zones is allowed. 
12201                         if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
)) 
12202                                 state
->dts_cred
.dcr_action 
|= 
12203                                     DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
; 
12206                          * If we have all privs in whatever zone this is, 
12207                          * we can do destructive things to processes which 
12208                          * have altered credentials. 
12210 #if !defined(__APPLE__) 
12211                         if (priv_isequalset(priv_getset(cr
, PRIV_EFFECTIVE
), 
12212                             cr
->cr_zone
->zone_privset
)) { 
12213                                 state
->dts_cred
.dcr_action 
|= 
12214                                     DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
; 
12217                         /* Darwin doesn't do zones. */ 
12218                         state
->dts_cred
.dcr_action 
|= 
12219                                 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
; 
12220 #endif /* __APPLE__ */ 
12224                  * Holding the dtrace_proc privilege gives control over fasttrap 
12225                  * and pid providers.  We need to grant wider destructive 
12226                  * privileges in the event that the user has proc_owner and/or 
12229                 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) { 
12230                         if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
)) 
12231                                 state
->dts_cred
.dcr_action 
|= 
12232                                     DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
; 
12234                         if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
)) 
12235                                 state
->dts_cred
.dcr_action 
|= 
12236                                     DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
; 
12244 dtrace_state_buffer(dtrace_state_t 
*state
, dtrace_buffer_t 
*buf
, int which
) 
12246         dtrace_optval_t 
*opt 
= state
->dts_options
, size
; 
12248         int flags 
= 0, rval
; 
12250         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
12251         lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
12252         ASSERT(which 
< DTRACEOPT_MAX
); 
12253         ASSERT(state
->dts_activity 
== DTRACE_ACTIVITY_INACTIVE 
|| 
12254             (state 
== dtrace_anon
.dta_state 
&& 
12255             state
->dts_activity 
== DTRACE_ACTIVITY_ACTIVE
)); 
12257         if (opt
[which
] == DTRACEOPT_UNSET 
|| opt
[which
] == 0) 
12260         if (opt
[DTRACEOPT_CPU
] != DTRACEOPT_UNSET
) 
12261                 cpu 
= opt
[DTRACEOPT_CPU
]; 
12263         if (which 
== DTRACEOPT_SPECSIZE
) 
12264                 flags 
|= DTRACEBUF_NOSWITCH
; 
12266         if (which 
== DTRACEOPT_BUFSIZE
) { 
12267                 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_RING
) 
12268                         flags 
|= DTRACEBUF_RING
; 
12270                 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_FILL
) 
12271                         flags 
|= DTRACEBUF_FILL
; 
12273                 if (state 
!= dtrace_anon
.dta_state 
|| 
12274                     state
->dts_activity 
!= DTRACE_ACTIVITY_ACTIVE
) 
12275                         flags 
|= DTRACEBUF_INACTIVE
; 
12278         for (size 
= opt
[which
]; size 
>= sizeof (uint64_t); size 
>>= 1) { 
12280                  * The size must be 8-byte aligned.  If the size is not 8-byte 
12281                  * aligned, drop it down by the difference. 
12283                 if (size 
& (sizeof (uint64_t) - 1)) 
12284                         size 
-= size 
& (sizeof (uint64_t) - 1); 
12286                 if (size 
< state
->dts_reserve
) { 
12288                          * Buffers always must be large enough to accommodate 
12289                          * their prereserved space.  We return E2BIG instead 
12290                          * of ENOMEM in this case to allow for user-level 
12291                          * software to differentiate the cases. 
12296                 rval 
= dtrace_buffer_alloc(buf
, size
, flags
, cpu
); 
12298                 if (rval 
!= ENOMEM
) { 
12303                 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
) 
12311 dtrace_state_buffers(dtrace_state_t 
*state
) 
12313         dtrace_speculation_t 
*spec 
= state
->dts_speculations
; 
12316         if ((rval 
= dtrace_state_buffer(state
, state
->dts_buffer
, 
12317             DTRACEOPT_BUFSIZE
)) != 0) 
12320         if ((rval 
= dtrace_state_buffer(state
, state
->dts_aggbuffer
, 
12321             DTRACEOPT_AGGSIZE
)) != 0) 
12324         for (i 
= 0; i 
< state
->dts_nspeculations
; i
++) { 
12325                 if ((rval 
= dtrace_state_buffer(state
, 
12326                     spec
[i
].dtsp_buffer
, DTRACEOPT_SPECSIZE
)) != 0) 
12334 dtrace_state_prereserve(dtrace_state_t 
*state
) 
12337         dtrace_probe_t 
*probe
; 
12339         state
->dts_reserve 
= 0; 
12341         if (state
->dts_options
[DTRACEOPT_BUFPOLICY
] != DTRACEOPT_BUFPOLICY_FILL
) 
12345          * If our buffer policy is a "fill" buffer policy, we need to set the 
12346          * prereserved space to be the space required by the END probes. 
12348         probe 
= dtrace_probes
[dtrace_probeid_end 
- 1]; 
12349         ASSERT(probe 
!= NULL
); 
12351         for (ecb 
= probe
->dtpr_ecb
; ecb 
!= NULL
; ecb 
= ecb
->dte_next
) { 
12352                 if (ecb
->dte_state 
!= state
) 
12355                 state
->dts_reserve 
+= ecb
->dte_needed 
+ ecb
->dte_alignment
; 
12360 dtrace_state_go(dtrace_state_t 
*state
, processorid_t 
*cpu
) 
12362         dtrace_optval_t 
*opt 
= state
->dts_options
, sz
, nspec
; 
12363         dtrace_speculation_t 
*spec
; 
12364         dtrace_buffer_t 
*buf
; 
12365         cyc_handler_t hdlr
; 
12367         int rval 
= 0, i
, bufsize 
= NCPU 
* sizeof (dtrace_buffer_t
); 
12368         dtrace_icookie_t cookie
; 
12370         lck_mtx_lock(&cpu_lock
); 
12371         lck_mtx_lock(&dtrace_lock
); 
12373         if (state
->dts_activity 
!= DTRACE_ACTIVITY_INACTIVE
) { 
12379          * Before we can perform any checks, we must prime all of the 
12380          * retained enablings that correspond to this state. 
12382         dtrace_enabling_prime(state
); 
12384         if (state
->dts_destructive 
&& !state
->dts_cred
.dcr_destructive
) { 
12389         dtrace_state_prereserve(state
); 
12392          * Now we want to do is try to allocate our speculations. 
12393          * We do not automatically resize the number of speculations; if 
12394          * this fails, we will fail the operation. 
12396         nspec 
= opt
[DTRACEOPT_NSPEC
]; 
12397         ASSERT(nspec 
!= DTRACEOPT_UNSET
); 
12399         if (nspec 
> INT_MAX
) { 
12404         spec 
= kmem_zalloc(nspec 
* sizeof (dtrace_speculation_t
), KM_NOSLEEP
); 
12406         if (spec 
== NULL
) { 
12411         state
->dts_speculations 
= spec
; 
12412         state
->dts_nspeculations 
= (int)nspec
; 
12414         for (i 
= 0; i 
< nspec
; i
++) { 
12415                 if ((buf 
= kmem_zalloc(bufsize
, KM_NOSLEEP
)) == NULL
) { 
12420                 spec
[i
].dtsp_buffer 
= buf
; 
12423         if (opt
[DTRACEOPT_GRABANON
] != DTRACEOPT_UNSET
) { 
12424                 if (dtrace_anon
.dta_state 
== NULL
) { 
12429                 if (state
->dts_necbs 
!= 0) { 
12434                 state
->dts_anon 
= dtrace_anon_grab(); 
12435                 ASSERT(state
->dts_anon 
!= NULL
); 
12436                 state 
= state
->dts_anon
; 
12439                  * We want "grabanon" to be set in the grabbed state, so we'll 
12440                  * copy that option value from the grabbing state into the 
12443                 state
->dts_options
[DTRACEOPT_GRABANON
] = 
12444                     opt
[DTRACEOPT_GRABANON
]; 
12446                 *cpu 
= dtrace_anon
.dta_beganon
; 
12449                  * If the anonymous state is active (as it almost certainly 
12450                  * is if the anonymous enabling ultimately matched anything), 
12451                  * we don't allow any further option processing -- but we 
12452                  * don't return failure. 
12454                 if (state
->dts_activity 
!= DTRACE_ACTIVITY_INACTIVE
) 
12458         if (opt
[DTRACEOPT_AGGSIZE
] != DTRACEOPT_UNSET 
&& 
12459             opt
[DTRACEOPT_AGGSIZE
] != 0) { 
12460                 if (state
->dts_aggregations 
== NULL
) { 
12462                          * We're not going to create an aggregation buffer 
12463                          * because we don't have any ECBs that contain 
12464                          * aggregations -- set this option to 0. 
12466                         opt
[DTRACEOPT_AGGSIZE
] = 0; 
12469                          * If we have an aggregation buffer, we must also have 
12470                          * a buffer to use as scratch. 
12472                         if (opt
[DTRACEOPT_BUFSIZE
] == DTRACEOPT_UNSET 
|| 
12473                             opt
[DTRACEOPT_BUFSIZE
] < state
->dts_needed
) { 
12474                                 opt
[DTRACEOPT_BUFSIZE
] = state
->dts_needed
; 
12479         if (opt
[DTRACEOPT_SPECSIZE
] != DTRACEOPT_UNSET 
&& 
12480             opt
[DTRACEOPT_SPECSIZE
] != 0) { 
12481                 if (!state
->dts_speculates
) { 
12483                          * We're not going to create speculation buffers 
12484                          * because we don't have any ECBs that actually 
12485                          * speculate -- set the speculation size to 0. 
12487                         opt
[DTRACEOPT_SPECSIZE
] = 0; 
12492          * The bare minimum size for any buffer that we're actually going to 
12493          * do anything to is sizeof (uint64_t). 
12495         sz 
= sizeof (uint64_t); 
12497         if ((state
->dts_needed 
!= 0 && opt
[DTRACEOPT_BUFSIZE
] < sz
) || 
12498             (state
->dts_speculates 
&& opt
[DTRACEOPT_SPECSIZE
] < sz
) || 
12499             (state
->dts_aggregations 
!= NULL 
&& opt
[DTRACEOPT_AGGSIZE
] < sz
)) { 
12501                  * A buffer size has been explicitly set to 0 (or to a size 
12502                  * that will be adjusted to 0) and we need the space -- we 
12503                  * need to return failure.  We return ENOSPC to differentiate 
12504                  * it from failing to allocate a buffer due to failure to meet 
12505                  * the reserve (for which we return E2BIG). 
12511         if ((rval 
= dtrace_state_buffers(state
)) != 0) 
12514         if ((sz 
= opt
[DTRACEOPT_DYNVARSIZE
]) == DTRACEOPT_UNSET
) 
12515                 sz 
= dtrace_dstate_defsize
; 
12518                 rval 
= dtrace_dstate_init(&state
->dts_vstate
.dtvs_dynvars
, sz
); 
12523                 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
) 
12525         } while (sz 
>>= 1); 
12527         opt
[DTRACEOPT_DYNVARSIZE
] = sz
; 
12532         if (opt
[DTRACEOPT_STATUSRATE
] > dtrace_statusrate_max
) 
12533                 opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_max
; 
12535         if (opt
[DTRACEOPT_CLEANRATE
] == 0) 
12536                 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
; 
12538         if (opt
[DTRACEOPT_CLEANRATE
] < dtrace_cleanrate_min
) 
12539                 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_min
; 
12541         if (opt
[DTRACEOPT_CLEANRATE
] > dtrace_cleanrate_max
) 
12542                 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
; 
12544         hdlr
.cyh_func 
= (cyc_func_t
)dtrace_state_clean
; 
12545         hdlr
.cyh_arg 
= state
; 
12546         hdlr
.cyh_level 
= CY_LOW_LEVEL
; 
12549         when
.cyt_interval 
= opt
[DTRACEOPT_CLEANRATE
]; 
12551         state
->dts_cleaner 
= cyclic_add(&hdlr
, &when
); 
12553         hdlr
.cyh_func 
= (cyc_func_t
)dtrace_state_deadman
; 
12554         hdlr
.cyh_arg 
= state
; 
12555         hdlr
.cyh_level 
= CY_LOW_LEVEL
; 
12558         when
.cyt_interval 
= dtrace_deadman_interval
; 
12560         state
->dts_alive 
= state
->dts_laststatus 
= dtrace_gethrtime(); 
12561         state
->dts_deadman 
= cyclic_add(&hdlr
, &when
); 
12563         state
->dts_activity 
= DTRACE_ACTIVITY_WARMUP
; 
12566          * Now it's time to actually fire the BEGIN probe.  We need to disable 
12567          * interrupts here both to record the CPU on which we fired the BEGIN 
12568          * probe (the data from this CPU will be processed first at user 
12569          * level) and to manually activate the buffer for this CPU. 
12571         cookie 
= dtrace_interrupt_disable(); 
12572         *cpu 
= CPU
->cpu_id
; 
12573         ASSERT(state
->dts_buffer
[*cpu
].dtb_flags 
& DTRACEBUF_INACTIVE
); 
12574         state
->dts_buffer
[*cpu
].dtb_flags 
&= ~DTRACEBUF_INACTIVE
; 
12576         dtrace_probe(dtrace_probeid_begin
, 
12577             (uint64_t)(uintptr_t)state
, 0, 0, 0, 0); 
12578         dtrace_interrupt_enable(cookie
); 
12580          * We may have had an exit action from a BEGIN probe; only change our 
12581          * state to ACTIVE if we're still in WARMUP. 
12583         ASSERT(state
->dts_activity 
== DTRACE_ACTIVITY_WARMUP 
|| 
12584             state
->dts_activity 
== DTRACE_ACTIVITY_DRAINING
); 
12586         if (state
->dts_activity 
== DTRACE_ACTIVITY_WARMUP
) 
12587                 state
->dts_activity 
= DTRACE_ACTIVITY_ACTIVE
; 
12590          * Regardless of whether or not now we're in ACTIVE or DRAINING, we 
12591          * want each CPU to transition its principal buffer out of the 
12592          * INACTIVE state.  Doing this assures that no CPU will suddenly begin 
12593          * processing an ECB halfway down a probe's ECB chain; all CPUs will 
12594          * atomically transition from processing none of a state's ECBs to 
12595          * processing all of them. 
12597         dtrace_xcall(DTRACE_CPUALL
, 
12598             (dtrace_xcall_t
)dtrace_buffer_activate
, state
); 
12602         dtrace_buffer_free(state
->dts_buffer
); 
12603         dtrace_buffer_free(state
->dts_aggbuffer
); 
12605         if ((nspec 
= state
->dts_nspeculations
) == 0) { 
12606                 ASSERT(state
->dts_speculations 
== NULL
); 
12610         spec 
= state
->dts_speculations
; 
12611         ASSERT(spec 
!= NULL
); 
12613         for (i 
= 0; i 
< state
->dts_nspeculations
; i
++) { 
12614                 if ((buf 
= spec
[i
].dtsp_buffer
) == NULL
) 
12617                 dtrace_buffer_free(buf
); 
12618                 kmem_free(buf
, bufsize
); 
12621         kmem_free(spec
, nspec 
* sizeof (dtrace_speculation_t
)); 
12622         state
->dts_nspeculations 
= 0; 
12623         state
->dts_speculations 
= NULL
; 
12626         lck_mtx_unlock(&dtrace_lock
); 
12627         lck_mtx_unlock(&cpu_lock
); 
12633 dtrace_state_stop(dtrace_state_t 
*state
, processorid_t 
*cpu
) 
12635         dtrace_icookie_t cookie
; 
12637         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
12639         if (state
->dts_activity 
!= DTRACE_ACTIVITY_ACTIVE 
&& 
12640             state
->dts_activity 
!= DTRACE_ACTIVITY_DRAINING
) 
12644          * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync 
12645          * to be sure that every CPU has seen it.  See below for the details 
12646          * on why this is done. 
12648         state
->dts_activity 
= DTRACE_ACTIVITY_DRAINING
; 
12652          * By this point, it is impossible for any CPU to be still processing 
12653          * with DTRACE_ACTIVITY_ACTIVE.  We can thus set our activity to 
12654          * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any 
12655          * other CPU in dtrace_buffer_reserve().  This allows dtrace_probe() 
12656          * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN 
12657          * iff we're in the END probe. 
12659         state
->dts_activity 
= DTRACE_ACTIVITY_COOLDOWN
; 
12661         ASSERT(state
->dts_activity 
== DTRACE_ACTIVITY_COOLDOWN
); 
12664          * Finally, we can release the reserve and call the END probe.  We 
12665          * disable interrupts across calling the END probe to allow us to 
12666          * return the CPU on which we actually called the END probe.  This 
12667          * allows user-land to be sure that this CPU's principal buffer is 
12670         state
->dts_reserve 
= 0; 
12672         cookie 
= dtrace_interrupt_disable(); 
12673         *cpu 
= CPU
->cpu_id
; 
12674         dtrace_probe(dtrace_probeid_end
, 
12675             (uint64_t)(uintptr_t)state
, 0, 0, 0, 0); 
12676         dtrace_interrupt_enable(cookie
); 
12678         state
->dts_activity 
= DTRACE_ACTIVITY_STOPPED
; 
12685 dtrace_state_option(dtrace_state_t 
*state
, dtrace_optid_t option
, 
12686     dtrace_optval_t val
) 
12688         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
12690         if (state
->dts_activity 
!= DTRACE_ACTIVITY_INACTIVE
) 
12693         if (option 
>= DTRACEOPT_MAX
) 
12696         if (option 
!= DTRACEOPT_CPU 
&& val 
< 0) 
12700         case DTRACEOPT_DESTRUCTIVE
: 
12701                 if (dtrace_destructive_disallow
) 
12704                 state
->dts_cred
.dcr_destructive 
= 1; 
12707         case DTRACEOPT_BUFSIZE
: 
12708         case DTRACEOPT_DYNVARSIZE
: 
12709         case DTRACEOPT_AGGSIZE
: 
12710         case DTRACEOPT_SPECSIZE
: 
12711         case DTRACEOPT_STRSIZE
: 
12715                 if (val 
>= LONG_MAX
) { 
12717                          * If this is an otherwise negative value, set it to 
12718                          * the highest multiple of 128m less than LONG_MAX. 
12719                          * Technically, we're adjusting the size without 
12720                          * regard to the buffer resizing policy, but in fact, 
12721                          * this has no effect -- if we set the buffer size to 
12722                          * ~LONG_MAX and the buffer policy is ultimately set to 
12723                          * be "manual", the buffer allocation is guaranteed to 
12724                          * fail, if only because the allocation requires two 
12725                          * buffers.  (We set the the size to the highest 
12726                          * multiple of 128m because it ensures that the size 
12727                          * will remain a multiple of a megabyte when 
12728                          * repeatedly halved -- all the way down to 15m.) 
12730                         val 
= LONG_MAX 
- (1 << 27) + 1; 
12734         state
->dts_options
[option
] = val
; 
12740 dtrace_state_destroy(dtrace_state_t 
*state
) 
12743         dtrace_vstate_t 
*vstate 
= &state
->dts_vstate
; 
12744         minor_t minor 
= getminor(state
->dts_dev
); 
12745         int i
, bufsize 
= NCPU 
* sizeof (dtrace_buffer_t
); 
12746         dtrace_speculation_t 
*spec 
= state
->dts_speculations
; 
12747         int nspec 
= state
->dts_nspeculations
; 
12750         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
12751         lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
12754          * First, retract any retained enablings for this state. 
12756         dtrace_enabling_retract(state
); 
12757         ASSERT(state
->dts_nretained 
== 0); 
12759         if (state
->dts_activity 
== DTRACE_ACTIVITY_ACTIVE 
|| 
12760             state
->dts_activity 
== DTRACE_ACTIVITY_DRAINING
) { 
12762                  * We have managed to come into dtrace_state_destroy() on a 
12763                  * hot enabling -- almost certainly because of a disorderly 
12764                  * shutdown of a consumer.  (That is, a consumer that is 
12765                  * exiting without having called dtrace_stop().) In this case, 
12766                  * we're going to set our activity to be KILLED, and then 
12767                  * issue a sync to be sure that everyone is out of probe 
12768                  * context before we start blowing away ECBs. 
12770                 state
->dts_activity 
= DTRACE_ACTIVITY_KILLED
; 
12775          * Release the credential hold we took in dtrace_state_create(). 
12777         if (state
->dts_cred
.dcr_cred 
!= NULL
) 
12778                 crfree(state
->dts_cred
.dcr_cred
); 
12781          * Now we can safely disable and destroy any enabled probes.  Because 
12782          * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress 
12783          * (especially if they're all enabled), we take two passes through the 
12784          * ECBs:  in the first, we disable just DTRACE_PRIV_KERNEL probes, and 
12785          * in the second we disable whatever is left over. 
12787         for (match 
= DTRACE_PRIV_KERNEL
; ; match 
= 0) { 
12788                 for (i 
= 0; i 
< state
->dts_necbs
; i
++) { 
12789                         if ((ecb 
= state
->dts_ecbs
[i
]) == NULL
) 
12792                         if (match 
&& ecb
->dte_probe 
!= NULL
) { 
12793                                 dtrace_probe_t 
*probe 
= ecb
->dte_probe
; 
12794                                 dtrace_provider_t 
*prov 
= probe
->dtpr_provider
; 
12796                                 if (!(prov
->dtpv_priv
.dtpp_flags 
& match
)) 
12800                         dtrace_ecb_disable(ecb
); 
12801                         dtrace_ecb_destroy(ecb
); 
12809          * Before we free the buffers, perform one more sync to assure that 
12810          * every CPU is out of probe context. 
12814         dtrace_buffer_free(state
->dts_buffer
); 
12815         dtrace_buffer_free(state
->dts_aggbuffer
); 
12817         for (i 
= 0; i 
< nspec
; i
++) 
12818                 dtrace_buffer_free(spec
[i
].dtsp_buffer
); 
12820         if (state
->dts_cleaner 
!= CYCLIC_NONE
) 
12821                 cyclic_remove(state
->dts_cleaner
); 
12823         if (state
->dts_deadman 
!= CYCLIC_NONE
) 
12824                 cyclic_remove(state
->dts_deadman
); 
12826         dtrace_dstate_fini(&vstate
->dtvs_dynvars
); 
12827         dtrace_vstate_fini(vstate
); 
12828         kmem_free(state
->dts_ecbs
, state
->dts_necbs 
* sizeof (dtrace_ecb_t 
*)); 
12830         if (state
->dts_aggregations 
!= NULL
) { 
12832                 for (i 
= 0; i 
< state
->dts_naggregations
; i
++) 
12833                         ASSERT(state
->dts_aggregations
[i
] == NULL
); 
12835                 ASSERT(state
->dts_naggregations 
> 0); 
12836                 kmem_free(state
->dts_aggregations
, 
12837                     state
->dts_naggregations 
* sizeof (dtrace_aggregation_t 
*)); 
12840         kmem_free(state
->dts_buffer
, bufsize
); 
12841         kmem_free(state
->dts_aggbuffer
, bufsize
); 
12843         for (i 
= 0; i 
< nspec
; i
++) 
12844                 kmem_free(spec
[i
].dtsp_buffer
, bufsize
); 
12846         kmem_free(spec
, nspec 
* sizeof (dtrace_speculation_t
)); 
12848         dtrace_format_destroy(state
); 
12850         vmem_destroy(state
->dts_aggid_arena
); 
12851         ddi_soft_state_free(dtrace_softstate
, minor
); 
12852         vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1); 
12856  * DTrace Anonymous Enabling Functions 
12858 static dtrace_state_t 
* 
12859 dtrace_anon_grab(void) 
12861         dtrace_state_t 
*state
; 
12863         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
12865         if ((state 
= dtrace_anon
.dta_state
) == NULL
) { 
12866                 ASSERT(dtrace_anon
.dta_enabling 
== NULL
); 
12870         ASSERT(dtrace_anon
.dta_enabling 
!= NULL
); 
12871         ASSERT(dtrace_retained 
!= NULL
); 
12873         dtrace_enabling_destroy(dtrace_anon
.dta_enabling
); 
12874         dtrace_anon
.dta_enabling 
= NULL
; 
12875         dtrace_anon
.dta_state 
= NULL
; 
12881 dtrace_anon_property(void) 
12884         dtrace_state_t 
*state
; 
12886         char c
[32];             /* enough for "dof-data-" + digits */ 
12888         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
12889         lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
12891         for (i 
= 0; ; i
++) { 
12892                 (void) snprintf(c
, sizeof (c
), "dof-data-%d", i
); 
12894                 dtrace_err_verbose 
= 1; 
12896                 if ((dof 
= dtrace_dof_property(c
)) == NULL
) { 
12897                         dtrace_err_verbose 
= 0; 
12902                  * We want to create anonymous state, so we need to transition 
12903                  * the kernel debugger to indicate that DTrace is active.  If 
12904                  * this fails (e.g. because the debugger has modified text in 
12905                  * some way), we won't continue with the processing. 
12907                 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) { 
12908                         cmn_err(CE_NOTE
, "kernel debugger active; anonymous " 
12909                             "enabling ignored."); 
12910                         dtrace_dof_destroy(dof
); 
12915                  * If we haven't allocated an anonymous state, we'll do so now. 
12917                 if ((state 
= dtrace_anon
.dta_state
) == NULL
) { 
12918                         state 
= dtrace_state_create(NULL
, NULL
); 
12919                         dtrace_anon
.dta_state 
= state
; 
12921                         if (state 
== NULL
) { 
12923                                  * This basically shouldn't happen:  the only 
12924                                  * failure mode from dtrace_state_create() is a 
12925                                  * failure of ddi_soft_state_zalloc() that 
12926                                  * itself should never happen.  Still, the 
12927                                  * interface allows for a failure mode, and 
12928                                  * we want to fail as gracefully as possible: 
12929                                  * we'll emit an error message and cease 
12930                                  * processing anonymous state in this case. 
12932                                 cmn_err(CE_WARN
, "failed to create " 
12933                                     "anonymous state"); 
12934                                 dtrace_dof_destroy(dof
); 
12939                 rv 
= dtrace_dof_slurp(dof
, &state
->dts_vstate
, CRED(), 
12940                     &dtrace_anon
.dta_enabling
, 0, B_TRUE
); 
12943                         rv 
= dtrace_dof_options(dof
, state
); 
12945                 dtrace_err_verbose 
= 0; 
12946                 dtrace_dof_destroy(dof
); 
12950                          * This is malformed DOF; chuck any anonymous state 
12953                         ASSERT(dtrace_anon
.dta_enabling 
== NULL
); 
12954                         dtrace_state_destroy(state
); 
12955                         dtrace_anon
.dta_state 
= NULL
; 
12959                 ASSERT(dtrace_anon
.dta_enabling 
!= NULL
); 
12962         if (dtrace_anon
.dta_enabling 
!= NULL
) { 
12966                  * dtrace_enabling_retain() can only fail because we are 
12967                  * trying to retain more enablings than are allowed -- but 
12968                  * we only have one anonymous enabling, and we are guaranteed 
12969                  * to be allowed at least one retained enabling; we assert 
12970                  * that dtrace_enabling_retain() returns success. 
12972                 rval 
= dtrace_enabling_retain(dtrace_anon
.dta_enabling
); 
12975                 dtrace_enabling_dump(dtrace_anon
.dta_enabling
); 
12980  * DTrace Helper Functions 
12983 dtrace_helper_trace(dtrace_helper_action_t 
*helper
, 
12984     dtrace_mstate_t 
*mstate
, dtrace_vstate_t 
*vstate
, int where
) 
12986         uint32_t size
, next
, nnext
, i
; 
12987         dtrace_helptrace_t 
*ent
; 
12988         uint16_t flags 
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
12990         if (!dtrace_helptrace_enabled
) 
12993         ASSERT(vstate
->dtvs_nlocals 
<= dtrace_helptrace_nlocals
); 
12996          * What would a tracing framework be without its own tracing 
12997          * framework?  (Well, a hell of a lot simpler, for starters...) 
12999         size 
= sizeof (dtrace_helptrace_t
) + dtrace_helptrace_nlocals 
* 
13000             sizeof (uint64_t) - sizeof (uint64_t); 
13003          * Iterate until we can allocate a slot in the trace buffer. 
13006                 next 
= dtrace_helptrace_next
; 
13008                 if (next 
+ size 
< dtrace_helptrace_bufsize
) { 
13009                         nnext 
= next 
+ size
; 
13013         } while (dtrace_cas32(&dtrace_helptrace_next
, next
, nnext
) != next
); 
13016          * We have our slot; fill it in. 
13021         ent 
= (dtrace_helptrace_t 
*)&dtrace_helptrace_buffer
[next
]; 
13022         ent
->dtht_helper 
= helper
; 
13023         ent
->dtht_where 
= where
; 
13024         ent
->dtht_nlocals 
= vstate
->dtvs_nlocals
; 
13026         ent
->dtht_fltoffs 
= (mstate
->dtms_present 
& DTRACE_MSTATE_FLTOFFS
) ? 
13027             mstate
->dtms_fltoffs 
: -1; 
13028         ent
->dtht_fault 
= DTRACE_FLAGS2FLT(flags
); 
13029         ent
->dtht_illval 
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
; 
13031         for (i 
= 0; i 
< vstate
->dtvs_nlocals
; i
++) { 
13032                 dtrace_statvar_t 
*svar
; 
13034                 if ((svar 
= vstate
->dtvs_locals
[i
]) == NULL
) 
13037                 ASSERT(svar
->dtsv_size 
>= NCPU 
* sizeof (uint64_t)); 
13038                 ent
->dtht_locals
[i
] = 
13039                     ((uint64_t *)(uintptr_t)svar
->dtsv_data
)[CPU
->cpu_id
]; 
13044 dtrace_helper(int which
, dtrace_mstate_t 
*mstate
, 
13045     dtrace_state_t 
*state
, uint64_t arg0
, uint64_t arg1
) 
13047         uint16_t *flags 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
13048         uint64_t sarg0 
= mstate
->dtms_arg
[0]; 
13049         uint64_t sarg1 
= mstate
->dtms_arg
[1]; 
13051         dtrace_helpers_t 
*helpers 
= curproc
->p_dtrace_helpers
; 
13052         dtrace_helper_action_t 
*helper
; 
13053         dtrace_vstate_t 
*vstate
; 
13054         dtrace_difo_t 
*pred
; 
13055         int i
, trace 
= dtrace_helptrace_enabled
; 
13057         ASSERT(which 
>= 0 && which 
< DTRACE_NHELPER_ACTIONS
); 
13059         if (helpers 
== NULL
) 
13062         if ((helper 
= helpers
->dthps_actions
[which
]) == NULL
) 
13065         vstate 
= &helpers
->dthps_vstate
; 
13066         mstate
->dtms_arg
[0] = arg0
; 
13067         mstate
->dtms_arg
[1] = arg1
; 
13070          * Now iterate over each helper.  If its predicate evaluates to 'true', 
13071          * we'll call the corresponding actions.  Note that the below calls 
13072          * to dtrace_dif_emulate() may set faults in machine state.  This is 
13073          * okay:  our caller (the outer dtrace_dif_emulate()) will simply plow 
13074          * the stored DIF offset with its own (which is the desired behavior). 
13075          * Also, note the calls to dtrace_dif_emulate() may allocate scratch 
13076          * from machine state; this is okay, too. 
13078         for (; helper 
!= NULL
; helper 
= helper
->dtha_next
) { 
13079                 if ((pred 
= helper
->dtha_predicate
) != NULL
) { 
13081                                 dtrace_helper_trace(helper
, mstate
, vstate
, 0); 
13083                         if (!dtrace_dif_emulate(pred
, mstate
, vstate
, state
)) 
13086                         if (*flags 
& CPU_DTRACE_FAULT
) 
13090                 for (i 
= 0; i 
< helper
->dtha_nactions
; i
++) { 
13092                                 dtrace_helper_trace(helper
, 
13093                                     mstate
, vstate
, i 
+ 1); 
13095                         rval 
= dtrace_dif_emulate(helper
->dtha_actions
[i
], 
13096                             mstate
, vstate
, state
); 
13098                         if (*flags 
& CPU_DTRACE_FAULT
) 
13104                         dtrace_helper_trace(helper
, mstate
, vstate
, 
13105                             DTRACE_HELPTRACE_NEXT
); 
13109                 dtrace_helper_trace(helper
, mstate
, vstate
, 
13110                     DTRACE_HELPTRACE_DONE
); 
13113          * Restore the arg0 that we saved upon entry. 
13115         mstate
->dtms_arg
[0] = sarg0
; 
13116         mstate
->dtms_arg
[1] = sarg1
; 
13122                 dtrace_helper_trace(helper
, mstate
, vstate
, 
13123                     DTRACE_HELPTRACE_ERR
); 
13126          * Restore the arg0 that we saved upon entry. 
13128         mstate
->dtms_arg
[0] = sarg0
; 
13129         mstate
->dtms_arg
[1] = sarg1
; 
13135 dtrace_helper_action_destroy(dtrace_helper_action_t 
*helper
, 
13136     dtrace_vstate_t 
*vstate
) 
13140         if (helper
->dtha_predicate 
!= NULL
) 
13141                 dtrace_difo_release(helper
->dtha_predicate
, vstate
); 
13143         for (i 
= 0; i 
< helper
->dtha_nactions
; i
++) { 
13144                 ASSERT(helper
->dtha_actions
[i
] != NULL
); 
13145                 dtrace_difo_release(helper
->dtha_actions
[i
], vstate
); 
13148         kmem_free(helper
->dtha_actions
, 
13149             helper
->dtha_nactions 
* sizeof (dtrace_difo_t 
*)); 
13150         kmem_free(helper
, sizeof (dtrace_helper_action_t
)); 
13153 #if !defined(__APPLE__) 
13155 dtrace_helper_destroygen(int gen
) 
13157         proc_t 
*p 
= curproc
; 
13160 dtrace_helper_destroygen(proc_t
* p
, int gen
) 
13163         dtrace_helpers_t 
*help 
= p
->p_dtrace_helpers
; 
13164         dtrace_vstate_t 
*vstate
; 
13167         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
13169         if (help 
== NULL 
|| gen 
> help
->dthps_generation
) 
13172         vstate 
= &help
->dthps_vstate
; 
13174         for (i 
= 0; i 
< DTRACE_NHELPER_ACTIONS
; i
++) { 
13175                 dtrace_helper_action_t 
*last 
= NULL
, *h
, *next
; 
13177                 for (h 
= help
->dthps_actions
[i
]; h 
!= NULL
; h 
= next
) { 
13178                         next 
= h
->dtha_next
; 
13180                         if (h
->dtha_generation 
== gen
) { 
13181                                 if (last 
!= NULL
) { 
13182                                         last
->dtha_next 
= next
; 
13184                                         help
->dthps_actions
[i
] = next
; 
13187                                 dtrace_helper_action_destroy(h
, vstate
); 
13195          * Interate until we've cleared out all helper providers with the 
13196          * given generation number. 
13199                 dtrace_helper_provider_t 
*prov
; 
13202                  * Look for a helper provider with the right generation. We 
13203                  * have to start back at the beginning of the list each time 
13204                  * because we drop dtrace_lock. It's unlikely that we'll make 
13205                  * more than two passes. 
13207                 for (i 
= 0; i 
< help
->dthps_nprovs
; i
++) { 
13208                         prov 
= help
->dthps_provs
[i
]; 
13210                         if (prov
->dthp_generation 
== gen
) 
13215                  * If there were no matches, we're done. 
13217                 if (i 
== help
->dthps_nprovs
) 
13221                  * Move the last helper provider into this slot. 
13223                 help
->dthps_nprovs
--; 
13224                 help
->dthps_provs
[i
] = help
->dthps_provs
[help
->dthps_nprovs
]; 
13225                 help
->dthps_provs
[help
->dthps_nprovs
] = NULL
; 
13227                 lck_mtx_unlock(&dtrace_lock
); 
13230                  * If we have a meta provider, remove this helper provider. 
13232                 lck_mtx_lock(&dtrace_meta_lock
); 
13233                 if (dtrace_meta_pid 
!= NULL
) { 
13234                         ASSERT(dtrace_deferred_pid 
== NULL
); 
13235                         dtrace_helper_provider_remove(&prov
->dthp_prov
, 
13238                 lck_mtx_unlock(&dtrace_meta_lock
); 
13240                 dtrace_helper_provider_destroy(prov
); 
13242                 lck_mtx_lock(&dtrace_lock
); 
13249 dtrace_helper_validate(dtrace_helper_action_t 
*helper
) 
13254         if ((dp 
= helper
->dtha_predicate
) != NULL
) 
13255                 err 
+= dtrace_difo_validate_helper(dp
); 
13257         for (i 
= 0; i 
< helper
->dtha_nactions
; i
++) 
13258                 err 
+= dtrace_difo_validate_helper(helper
->dtha_actions
[i
]); 
13263 #if !defined(__APPLE__) 
13265 dtrace_helper_action_add(int which
, dtrace_ecbdesc_t 
*ep
) 
13268 dtrace_helper_action_add(proc_t
* p
, int which
, dtrace_ecbdesc_t 
*ep
) 
13271         dtrace_helpers_t 
*help
; 
13272         dtrace_helper_action_t 
*helper
, *last
; 
13273         dtrace_actdesc_t 
*act
; 
13274         dtrace_vstate_t 
*vstate
; 
13275         dtrace_predicate_t 
*pred
; 
13276         int count 
= 0, nactions 
= 0, i
; 
13278         if (which 
< 0 || which 
>= DTRACE_NHELPER_ACTIONS
) 
13281 #if !defined(__APPLE__) 
13282         help 
= curproc
->p_dtrace_helpers
; 
13284         help 
= p
->p_dtrace_helpers
; 
13286         last 
= help
->dthps_actions
[which
]; 
13287         vstate 
= &help
->dthps_vstate
; 
13289         for (count 
= 0; last 
!= NULL
; last 
= last
->dtha_next
) { 
13291                 if (last
->dtha_next 
== NULL
) 
13296          * If we already have dtrace_helper_actions_max helper actions for this 
13297          * helper action type, we'll refuse to add a new one. 
13299         if (count 
>= dtrace_helper_actions_max
) 
13302         helper 
= kmem_zalloc(sizeof (dtrace_helper_action_t
), KM_SLEEP
); 
13303         helper
->dtha_generation 
= help
->dthps_generation
; 
13305         if ((pred 
= ep
->dted_pred
.dtpdd_predicate
) != NULL
) { 
13306                 ASSERT(pred
->dtp_difo 
!= NULL
); 
13307                 dtrace_difo_hold(pred
->dtp_difo
); 
13308                 helper
->dtha_predicate 
= pred
->dtp_difo
; 
13311         for (act 
= ep
->dted_action
; act 
!= NULL
; act 
= act
->dtad_next
) { 
13312                 if (act
->dtad_kind 
!= DTRACEACT_DIFEXPR
) 
13315                 if (act
->dtad_difo 
== NULL
) 
13321         helper
->dtha_actions 
= kmem_zalloc(sizeof (dtrace_difo_t 
*) * 
13322             (helper
->dtha_nactions 
= nactions
), KM_SLEEP
); 
13324         for (act 
= ep
->dted_action
, i 
= 0; act 
!= NULL
; act 
= act
->dtad_next
) { 
13325                 dtrace_difo_hold(act
->dtad_difo
); 
13326                 helper
->dtha_actions
[i
++] = act
->dtad_difo
; 
13329         if (!dtrace_helper_validate(helper
)) 
13332         if (last 
== NULL
) { 
13333                 help
->dthps_actions
[which
] = helper
; 
13335                 last
->dtha_next 
= helper
; 
13338         if (vstate
->dtvs_nlocals 
> dtrace_helptrace_nlocals
) { 
13339                 dtrace_helptrace_nlocals 
= vstate
->dtvs_nlocals
; 
13340                 dtrace_helptrace_next 
= 0; 
13345         dtrace_helper_action_destroy(helper
, vstate
); 
13350 dtrace_helper_provider_register(proc_t 
*p
, dtrace_helpers_t 
*help
, 
13351     dof_helper_t 
*dofhp
) 
13353         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
13355         lck_mtx_lock(&dtrace_meta_lock
); 
13356         lck_mtx_lock(&dtrace_lock
); 
13358         if (!dtrace_attached() || dtrace_meta_pid 
== NULL
) { 
13360                  * If the dtrace module is loaded but not attached, or if 
13361                  * there aren't isn't a meta provider registered to deal with 
13362                  * these provider descriptions, we need to postpone creating 
13363                  * the actual providers until later. 
13366                 if (help
->dthps_next 
== NULL 
&& help
->dthps_prev 
== NULL 
&& 
13367                     dtrace_deferred_pid 
!= help
) { 
13368                         help
->dthps_deferred 
= 1; 
13369                         help
->dthps_pid 
= p
->p_pid
; 
13370                         help
->dthps_next 
= dtrace_deferred_pid
; 
13371                         help
->dthps_prev 
= NULL
; 
13372                         if (dtrace_deferred_pid 
!= NULL
) 
13373                                 dtrace_deferred_pid
->dthps_prev 
= help
; 
13374                         dtrace_deferred_pid 
= help
; 
13377                 lck_mtx_unlock(&dtrace_lock
); 
13379         } else if (dofhp 
!= NULL
) { 
13381                  * If the dtrace module is loaded and we have a particular 
13382                  * helper provider description, pass that off to the 
13386                 lck_mtx_unlock(&dtrace_lock
); 
13388                 dtrace_helper_provide(dofhp
, p
->p_pid
); 
13392                  * Otherwise, just pass all the helper provider descriptions 
13393                  * off to the meta provider. 
13397                 lck_mtx_unlock(&dtrace_lock
); 
13399                 for (i 
= 0; i 
< help
->dthps_nprovs
; i
++) { 
13400                         dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
, 
13405         lck_mtx_unlock(&dtrace_meta_lock
); 
13408 #if !defined(__APPLE__) 
13410 dtrace_helper_provider_add(dof_helper_t 
*dofhp
, int gen
) 
13413 dtrace_helper_provider_add(proc_t
* p
, dof_helper_t 
*dofhp
, int gen
) 
13416         dtrace_helpers_t 
*help
; 
13417         dtrace_helper_provider_t 
*hprov
, **tmp_provs
; 
13418         uint_t tmp_maxprovs
, i
; 
13420         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
13422 #if !defined(__APPLE__) 
13423         help 
= curproc
->p_dtrace_helpers
; 
13425         help 
= p
->p_dtrace_helpers
; 
13427         ASSERT(help 
!= NULL
); 
13430          * If we already have dtrace_helper_providers_max helper providers, 
13431          * we're refuse to add a new one. 
13433         if (help
->dthps_nprovs 
>= dtrace_helper_providers_max
) 
13437          * Check to make sure this isn't a duplicate. 
13439         for (i 
= 0; i 
< help
->dthps_nprovs
; i
++) { 
13440                 if (dofhp
->dofhp_addr 
== 
13441                     help
->dthps_provs
[i
]->dthp_prov
.dofhp_addr
) 
13445         hprov 
= kmem_zalloc(sizeof (dtrace_helper_provider_t
), KM_SLEEP
); 
13446         hprov
->dthp_prov 
= *dofhp
; 
13447         hprov
->dthp_ref 
= 1; 
13448         hprov
->dthp_generation 
= gen
; 
13451          * Allocate a bigger table for helper providers if it's already full. 
13453         if (help
->dthps_maxprovs 
== help
->dthps_nprovs
) { 
13454                 tmp_maxprovs 
= help
->dthps_maxprovs
; 
13455                 tmp_provs 
= help
->dthps_provs
; 
13457                 if (help
->dthps_maxprovs 
== 0) 
13458                         help
->dthps_maxprovs 
= 2; 
13460                         help
->dthps_maxprovs 
*= 2; 
13461                 if (help
->dthps_maxprovs 
> dtrace_helper_providers_max
) 
13462                         help
->dthps_maxprovs 
= dtrace_helper_providers_max
; 
13464                 ASSERT(tmp_maxprovs 
< help
->dthps_maxprovs
); 
13466                 help
->dthps_provs 
= kmem_zalloc(help
->dthps_maxprovs 
* 
13467                     sizeof (dtrace_helper_provider_t 
*), KM_SLEEP
); 
13469                 if (tmp_provs 
!= NULL
) { 
13470                         bcopy(tmp_provs
, help
->dthps_provs
, tmp_maxprovs 
* 
13471                             sizeof (dtrace_helper_provider_t 
*)); 
13472                         kmem_free(tmp_provs
, tmp_maxprovs 
* 
13473                             sizeof (dtrace_helper_provider_t 
*)); 
13477         help
->dthps_provs
[help
->dthps_nprovs
] = hprov
; 
13478         help
->dthps_nprovs
++; 
13484 dtrace_helper_provider_destroy(dtrace_helper_provider_t 
*hprov
) 
13486         lck_mtx_lock(&dtrace_lock
); 
13488         if (--hprov
->dthp_ref 
== 0) { 
13490                 lck_mtx_unlock(&dtrace_lock
); 
13491                 dof 
= (dof_hdr_t 
*)(uintptr_t)hprov
->dthp_prov
.dofhp_dof
; 
13492                 dtrace_dof_destroy(dof
); 
13493                 kmem_free(hprov
, sizeof (dtrace_helper_provider_t
)); 
13495                 lck_mtx_unlock(&dtrace_lock
); 
13500 dtrace_helper_provider_validate(dof_hdr_t 
*dof
, dof_sec_t 
*sec
) 
13502         uintptr_t daddr 
= (uintptr_t)dof
; 
13503         dof_sec_t 
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
; 
13504         dof_provider_t 
*provider
; 
13505         dof_probe_t 
*probe
; 
13507         char *strtab
, *typestr
; 
13508         dof_stridx_t typeidx
; 
13510         uint_t nprobes
, j
, k
; 
13512         ASSERT(sec
->dofs_type 
== DOF_SECT_PROVIDER
); 
13514         if (sec
->dofs_offset 
& (sizeof (uint_t
) - 1)) { 
13515                 dtrace_dof_error(dof
, "misaligned section offset"); 
13520          * The section needs to be large enough to contain the DOF provider 
13521          * structure appropriate for the given version. 
13523         if (sec
->dofs_size 
< 
13524             ((dof
->dofh_ident
[DOF_ID_VERSION
] == DOF_VERSION_1
) ? 
13525             offsetof(dof_provider_t
, dofpv_prenoffs
) : 
13526             sizeof (dof_provider_t
))) { 
13527                 dtrace_dof_error(dof
, "provider section too small"); 
13531         provider 
= (dof_provider_t 
*)(uintptr_t)(daddr 
+ sec
->dofs_offset
); 
13532         str_sec 
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, provider
->dofpv_strtab
); 
13533         prb_sec 
= dtrace_dof_sect(dof
, DOF_SECT_PROBES
, provider
->dofpv_probes
); 
13534         arg_sec 
= dtrace_dof_sect(dof
, DOF_SECT_PRARGS
, provider
->dofpv_prargs
); 
13535         off_sec 
= dtrace_dof_sect(dof
, DOF_SECT_PROFFS
, provider
->dofpv_proffs
); 
13537         if (str_sec 
== NULL 
|| prb_sec 
== NULL 
|| 
13538             arg_sec 
== NULL 
|| off_sec 
== NULL
) 
13543         if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1 
&& 
13544             provider
->dofpv_prenoffs 
!= DOF_SECT_NONE 
&& 
13545             (enoff_sec 
= dtrace_dof_sect(dof
, DOF_SECT_PRENOFFS
, 
13546             provider
->dofpv_prenoffs
)) == NULL
) 
13549         strtab 
= (char *)(uintptr_t)(daddr 
+ str_sec
->dofs_offset
); 
13551         if (provider
->dofpv_name 
>= str_sec
->dofs_size 
|| 
13552             strlen(strtab 
+ provider
->dofpv_name
) >= DTRACE_PROVNAMELEN
) { 
13553                 dtrace_dof_error(dof
, "invalid provider name"); 
13557         if (prb_sec
->dofs_entsize 
== 0 || 
13558             prb_sec
->dofs_entsize 
> prb_sec
->dofs_size
) { 
13559                 dtrace_dof_error(dof
, "invalid entry size"); 
13563         if (prb_sec
->dofs_entsize 
& (sizeof (uintptr_t) - 1)) { 
13564                 dtrace_dof_error(dof
, "misaligned entry size"); 
13568         if (off_sec
->dofs_entsize 
!= sizeof (uint32_t)) { 
13569                 dtrace_dof_error(dof
, "invalid entry size"); 
13573         if (off_sec
->dofs_offset 
& (sizeof (uint32_t) - 1)) { 
13574                 dtrace_dof_error(dof
, "misaligned section offset"); 
13578         if (arg_sec
->dofs_entsize 
!= sizeof (uint8_t)) { 
13579                 dtrace_dof_error(dof
, "invalid entry size"); 
13583         arg 
= (uint8_t *)(uintptr_t)(daddr 
+ arg_sec
->dofs_offset
); 
13585         nprobes 
= prb_sec
->dofs_size 
/ prb_sec
->dofs_entsize
; 
13588          * Take a pass through the probes to check for errors. 
13590         for (j 
= 0; j 
< nprobes
; j
++) { 
13591                 probe 
= (dof_probe_t 
*)(uintptr_t)(daddr 
+ 
13592                     prb_sec
->dofs_offset 
+ j 
* prb_sec
->dofs_entsize
); 
13594                 if (probe
->dofpr_func 
>= str_sec
->dofs_size
) { 
13595                         dtrace_dof_error(dof
, "invalid function name"); 
13599                 if (strlen(strtab 
+ probe
->dofpr_func
) >= DTRACE_FUNCNAMELEN
) { 
13600                         dtrace_dof_error(dof
, "function name too long"); 
13604                 if (probe
->dofpr_name 
>= str_sec
->dofs_size 
|| 
13605                     strlen(strtab 
+ probe
->dofpr_name
) >= DTRACE_NAMELEN
) { 
13606                         dtrace_dof_error(dof
, "invalid probe name"); 
13611                  * The offset count must not wrap the index, and the offsets 
13612                  * must also not overflow the section's data. 
13614                 if (probe
->dofpr_offidx 
+ probe
->dofpr_noffs 
< 
13615                     probe
->dofpr_offidx 
|| 
13616                     (probe
->dofpr_offidx 
+ probe
->dofpr_noffs
) * 
13617                     off_sec
->dofs_entsize 
> off_sec
->dofs_size
) { 
13618                         dtrace_dof_error(dof
, "invalid probe offset"); 
13622                 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
) { 
13624                          * If there's no is-enabled offset section, make sure 
13625                          * there aren't any is-enabled offsets. Otherwise 
13626                          * perform the same checks as for probe offsets 
13627                          * (immediately above). 
13629                         if (enoff_sec 
== NULL
) { 
13630                                 if (probe
->dofpr_enoffidx 
!= 0 || 
13631                                     probe
->dofpr_nenoffs 
!= 0) { 
13632                                         dtrace_dof_error(dof
, "is-enabled " 
13633                                             "offsets with null section"); 
13636                         } else if (probe
->dofpr_enoffidx 
+ 
13637                             probe
->dofpr_nenoffs 
< probe
->dofpr_enoffidx 
|| 
13638                             (probe
->dofpr_enoffidx 
+ probe
->dofpr_nenoffs
) * 
13639                             enoff_sec
->dofs_entsize 
> enoff_sec
->dofs_size
) { 
13640                                 dtrace_dof_error(dof
, "invalid is-enabled " 
13645                         if (probe
->dofpr_noffs 
+ probe
->dofpr_nenoffs 
== 0) { 
13646                                 dtrace_dof_error(dof
, "zero probe and " 
13647                                     "is-enabled offsets"); 
13650                 } else if (probe
->dofpr_noffs 
== 0) { 
13651                         dtrace_dof_error(dof
, "zero probe offsets"); 
13655                 if (probe
->dofpr_argidx 
+ probe
->dofpr_xargc 
< 
13656                     probe
->dofpr_argidx 
|| 
13657                     (probe
->dofpr_argidx 
+ probe
->dofpr_xargc
) * 
13658                     arg_sec
->dofs_entsize 
> arg_sec
->dofs_size
) { 
13659                         dtrace_dof_error(dof
, "invalid args"); 
13663                 typeidx 
= probe
->dofpr_nargv
; 
13664                 typestr 
= strtab 
+ probe
->dofpr_nargv
; 
13665                 for (k 
= 0; k 
< probe
->dofpr_nargc
; k
++) { 
13666                         if (typeidx 
>= str_sec
->dofs_size
) { 
13667                                 dtrace_dof_error(dof
, "bad " 
13668                                     "native argument type"); 
13672                         typesz 
= strlen(typestr
) + 1; 
13673                         if (typesz 
> DTRACE_ARGTYPELEN
) { 
13674                                 dtrace_dof_error(dof
, "native " 
13675                                     "argument type too long"); 
13682                 typeidx 
= probe
->dofpr_xargv
; 
13683                 typestr 
= strtab 
+ probe
->dofpr_xargv
; 
13684                 for (k 
= 0; k 
< probe
->dofpr_xargc
; k
++) { 
13685                         if (arg
[probe
->dofpr_argidx 
+ k
] > probe
->dofpr_nargc
) { 
13686                                 dtrace_dof_error(dof
, "bad " 
13687                                     "native argument index"); 
13691                         if (typeidx 
>= str_sec
->dofs_size
) { 
13692                                 dtrace_dof_error(dof
, "bad " 
13693                                     "translated argument type"); 
13697                         typesz 
= strlen(typestr
) + 1; 
13698                         if (typesz 
> DTRACE_ARGTYPELEN
) { 
13699                                 dtrace_dof_error(dof
, "translated argument " 
13712 #if !defined(__APPLE__) 
13714 dtrace_helper_slurp(dof_hdr_t 
*dof
, dof_helper_t 
*dhp
) 
13717 dtrace_helper_slurp(proc_t
* p
, dof_hdr_t 
*dof
, dof_helper_t 
*dhp
) 
13720         dtrace_helpers_t 
*help
; 
13721         dtrace_vstate_t 
*vstate
; 
13722         dtrace_enabling_t 
*enab 
= NULL
; 
13723         int i
, gen
, rv
, nhelpers 
= 0, nprovs 
= 0, destroy 
= 1; 
13724         uintptr_t daddr 
= (uintptr_t)dof
; 
13726         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
13728 #if !defined(__APPLE__) 
13729         if ((help 
= curproc
->p_dtrace_helpers
) == NULL
) 
13730                 help 
= dtrace_helpers_create(curproc
); 
13732         if ((help 
= p
->p_dtrace_helpers
) == NULL
) 
13733                 help 
= dtrace_helpers_create(p
); 
13736         vstate 
= &help
->dthps_vstate
; 
13738         if ((rv 
= dtrace_dof_slurp(dof
, vstate
, NULL
, &enab
, 
13739             dhp 
!= NULL 
? dhp
->dofhp_addr 
: 0, B_FALSE
)) != 0) { 
13740                 dtrace_dof_destroy(dof
); 
13745          * Look for helper providers and validate their descriptions. 
13748                 for (i 
= 0; i 
< dof
->dofh_secnum
; i
++) { 
13749                         dof_sec_t 
*sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ 
13750                             dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
13752                         if (sec
->dofs_type 
!= DOF_SECT_PROVIDER
) 
13755                         if (dtrace_helper_provider_validate(dof
, sec
) != 0) { 
13756                                 dtrace_enabling_destroy(enab
); 
13757                                 dtrace_dof_destroy(dof
); 
13766          * Now we need to walk through the ECB descriptions in the enabling. 
13768         for (i 
= 0; i 
< enab
->dten_ndesc
; i
++) { 
13769                 dtrace_ecbdesc_t 
*ep 
= enab
->dten_desc
[i
]; 
13770                 dtrace_probedesc_t 
*desc 
= &ep
->dted_probe
; 
13772                 if (strcmp(desc
->dtpd_provider
, "dtrace") != 0) 
13775                 if (strcmp(desc
->dtpd_mod
, "helper") != 0) 
13778                 if (strcmp(desc
->dtpd_func
, "ustack") != 0) 
13781 #if !defined(__APPLE__) 
13782                 if ((rv 
= dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK
, ep
)) != 0) 
13784                 if ((rv 
= dtrace_helper_action_add(p
, DTRACE_HELPER_ACTION_USTACK
, ep
)) != 0) 
13788                          * Adding this helper action failed -- we are now going 
13789                          * to rip out the entire generation and return failure. 
13791 #if !defined(__APPLE__) 
13792                         (void) dtrace_helper_destroygen(help
->dthps_generation
); 
13794                         (void) dtrace_helper_destroygen(p
, help
->dthps_generation
); 
13796                         dtrace_enabling_destroy(enab
); 
13797                         dtrace_dof_destroy(dof
); 
13804         if (nhelpers 
< enab
->dten_ndesc
) 
13805                 dtrace_dof_error(dof
, "unmatched helpers"); 
13807         gen 
= help
->dthps_generation
++; 
13808         dtrace_enabling_destroy(enab
); 
13810         if (dhp 
!= NULL 
&& nprovs 
> 0) { 
13811                 dhp
->dofhp_dof 
= (uint64_t)(uintptr_t)dof
; 
13812 #if !defined(__APPLE__) 
13813                 if (dtrace_helper_provider_add(dhp
, gen
) == 0) { 
13815                 if (dtrace_helper_provider_add(p
, dhp
, gen
) == 0) { 
13817                         lck_mtx_unlock(&dtrace_lock
); 
13818 #if !defined(__APPLE__) 
13819                         dtrace_helper_provider_register(curproc
, help
, dhp
); 
13821                         dtrace_helper_provider_register(p
, help
, dhp
); 
13823                         lck_mtx_lock(&dtrace_lock
); 
13830                 dtrace_dof_destroy(dof
); 
13835 #if defined(__APPLE__) 
13840  * DTrace user static probes (USDT probes) and helper actions are loaded 
13841  * in a process by proccessing dof sections. The dof sections are passed 
13842  * into the kernel by dyld, in a dof_ioctl_data_t block. It is rather 
13843  * expensive to process dof for a process that will never use it. There 
13844  * is a memory cost (allocating the providers/probes), and a cpu cost 
13845  * (creating the providers/probes). 
13847  * To reduce this cost, we use "lazy dof". The normal proceedure for 
13848  * dof processing is to copyin the dof(s) pointed to by the dof_ioctl_data_t 
13849  * block, and invoke dof_slurp_helper() on them. When "lazy dof" is 
13850  * used, each process retains the dof_ioctl_data_t block, instead of 
13851  * copying in the data it points to. 
13853  * The dof_ioctl_data_t blocks are managed as if they were the actual 
13854  * processed dof; on fork the block is copied to the child, on exec and 
13855  * exit the block is freed. 
13857  * If the process loads library(s) containing additional dof, the 
13858  * new dof_ioctl_data_t is merged with the existing block. 
13860  * There are a few catches that make this slightly more difficult. 
13861  * When dyld registers dof_ioctl_data_t blocks, it expects a unique 
13862  * identifier value for each dof in the block. In non-lazy dof terms, 
13863  * this is the generation that dof was loaded in. If we hand back 
13864  * a UID for a lazy dof, that same UID must be able to unload the 
13865  * dof once it has become non-lazy. To meet this requirement, the 
13866  * code that loads lazy dof requires that the UID's for dof(s) in 
13867  * the lazy dof be sorted, and in ascending order. It is okay to skip 
13868  * UID's, I.E., 1 -> 5 -> 6 is legal. 
13870  * Once a process has become non-lazy, it will stay non-lazy. All 
13871  * future dof operations for that process will be non-lazy, even 
13872  * if the dof mode transitions back to lazy. 
13874  * Always do lazy dof checks before non-lazy (I.E. In fork, exit, exec.). 
13875  * That way if the lazy check fails due to transitioning to non-lazy, the 
13876  * right thing is done with the newly faulted in dof. 
13880  * This method is a bit squicky. It must handle: 
13882  * dof should not be lazy. 
13883  * dof should have been handled lazily, but there was an error 
13884  * dof was handled lazily, and needs to be freed. 
13885  * dof was handled lazily, and must not be freed. 
13888  * Returns EACCESS if dof should be handled non-lazily. 
13890  * KERN_SUCCESS and all other return codes indicate lazy handling of dof. 
13892  * If the dofs data is claimed by this method, dofs_claimed will be set. 
13893  * Callers should not free claimed dofs. 
13896 dtrace_lazy_dofs_add(proc_t 
*p
, dof_ioctl_data_t
* incoming_dofs
, int *dofs_claimed
) 
13899         ASSERT(incoming_dofs 
&& incoming_dofs
->dofiod_count 
> 0); 
13904         lck_rw_lock_shared(&dtrace_dof_mode_lock
); 
13907          * If we have lazy dof, dof mode better be LAZY_ON. 
13909         ASSERT(p
->p_dtrace_lazy_dofs 
== NULL 
|| dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_ON
); 
13910         ASSERT(p
->p_dtrace_lazy_dofs 
== NULL 
|| p
->p_dtrace_helpers 
== NULL
); 
13911         ASSERT(dtrace_dof_mode 
!= DTRACE_DOF_MODE_NEVER
); 
13914          * Any existing helpers force non-lazy behavior. 
13916         if (dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_ON 
&& (p
->p_dtrace_helpers 
== NULL
)) { 
13917                 lck_mtx_lock(&p
->p_dtrace_sprlock
); 
13919                 dof_ioctl_data_t
* existing_dofs 
= p
->p_dtrace_lazy_dofs
; 
13920                 unsigned int existing_dofs_count 
= (existing_dofs
) ? existing_dofs
->dofiod_count 
: 0; 
13921                 unsigned int i
, merged_dofs_count 
= incoming_dofs
->dofiod_count 
+ existing_dofs_count
; 
13926                 if (merged_dofs_count 
== 0 || merged_dofs_count 
> 1024) { 
13927                         dtrace_dof_error(NULL
, "lazy_dofs_add merged_dofs_count out of range"); 
13933                  * Each dof being added must be assigned a unique generation. 
13935                 uint64_t generation 
= (existing_dofs
) ? existing_dofs
->dofiod_helpers
[existing_dofs_count 
- 1].dofhp_dof 
+ 1 : 1; 
13936                 for (i
=0; i
<incoming_dofs
->dofiod_count
; i
++) { 
13938                          * We rely on these being the same so we can overwrite dofhp_dof and not lose info. 
13940                         ASSERT(incoming_dofs
->dofiod_helpers
[i
].dofhp_dof 
== incoming_dofs
->dofiod_helpers
[i
].dofhp_addr
); 
13941                         incoming_dofs
->dofiod_helpers
[i
].dofhp_dof 
= generation
++; 
13945                 if (existing_dofs
) { 
13947                          * Merge the existing and incoming dofs 
13949                         size_t merged_dofs_size 
= DOF_IOCTL_DATA_T_SIZE(merged_dofs_count
); 
13950                         dof_ioctl_data_t
* merged_dofs 
= kmem_alloc(merged_dofs_size
, KM_SLEEP
); 
13952                         bcopy(&existing_dofs
->dofiod_helpers
[0], 
13953                               &merged_dofs
->dofiod_helpers
[0], 
13954                               sizeof(dof_helper_t
) * existing_dofs_count
); 
13955                         bcopy(&incoming_dofs
->dofiod_helpers
[0], 
13956                               &merged_dofs
->dofiod_helpers
[existing_dofs_count
], 
13957                               sizeof(dof_helper_t
) * incoming_dofs
->dofiod_count
); 
13959                         merged_dofs
->dofiod_count 
= merged_dofs_count
; 
13961                         kmem_free(existing_dofs
, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count
)); 
13963                         p
->p_dtrace_lazy_dofs 
= merged_dofs
; 
13966                          * Claim the incoming dofs 
13969                         p
->p_dtrace_lazy_dofs 
= incoming_dofs
; 
13973                 dof_ioctl_data_t
* all_dofs 
= p
->p_dtrace_lazy_dofs
; 
13974                 for (i
=0; i
<all_dofs
->dofiod_count
-1; i
++) { 
13975                         ASSERT(all_dofs
->dofiod_helpers
[i
].dofhp_dof 
< all_dofs
->dofiod_helpers
[i
+1].dofhp_dof
); 
13980                 lck_mtx_unlock(&p
->p_dtrace_sprlock
); 
13985         lck_rw_unlock_shared(&dtrace_dof_mode_lock
); 
13993  * EINVAL: lazy dof is enabled, but the requested generation was not found. 
13994  * EACCES: This removal needs to be handled non-lazily.  
13997 dtrace_lazy_dofs_remove(proc_t 
*p
, int generation
) 
14001         lck_rw_lock_shared(&dtrace_dof_mode_lock
); 
14004          * If we have lazy dof, dof mode better be LAZY_ON. 
14006         ASSERT(p
->p_dtrace_lazy_dofs 
== NULL 
|| dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_ON
); 
14007         ASSERT(p
->p_dtrace_lazy_dofs 
== NULL 
|| p
->p_dtrace_helpers 
== NULL
); 
14008         ASSERT(dtrace_dof_mode 
!= DTRACE_DOF_MODE_NEVER
); 
14011          * Any existing helpers force non-lazy behavior. 
14013         if (dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_ON 
&& (p
->p_dtrace_helpers 
== NULL
)) { 
14014                 lck_mtx_lock(&p
->p_dtrace_sprlock
); 
14016                 dof_ioctl_data_t
* existing_dofs 
= p
->p_dtrace_lazy_dofs
; 
14018                 if (existing_dofs
) {             
14019                         int index
, existing_dofs_count 
= existing_dofs
->dofiod_count
; 
14020                         for (index
=0; index
<existing_dofs_count
; index
++) { 
14021                                 if ((int)existing_dofs
->dofiod_helpers
[index
].dofhp_dof 
== generation
) { 
14022                                         dof_ioctl_data_t
* removed_dofs 
= NULL
; 
14025                                          * If there is only 1 dof, we'll delete it and swap in NULL. 
14027                                         if (existing_dofs_count 
> 1) { 
14028                                                 int removed_dofs_count 
= existing_dofs_count 
- 1; 
14029                                                 size_t removed_dofs_size 
= DOF_IOCTL_DATA_T_SIZE(removed_dofs_count
); 
14031                                                 removed_dofs 
= kmem_alloc(removed_dofs_size
, KM_SLEEP
); 
14032                                                 removed_dofs
->dofiod_count 
= removed_dofs_count
; 
14035                                                  * copy the remaining data. 
14038                                                         bcopy(&existing_dofs
->dofiod_helpers
[0], 
14039                                                               &removed_dofs
->dofiod_helpers
[0], 
14040                                                               index 
* sizeof(dof_helper_t
)); 
14043                                                 if (index 
< existing_dofs_count
-1) { 
14044                                                         bcopy(&existing_dofs
->dofiod_helpers
[index
+1], 
14045                                                               &removed_dofs
->dofiod_helpers
[index
], 
14046                                                               (existing_dofs_count 
- index 
- 1) * sizeof(dof_helper_t
)); 
14050                                         kmem_free(existing_dofs
, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count
)); 
14052                                         p
->p_dtrace_lazy_dofs 
= removed_dofs
; 
14054                                         rval 
= KERN_SUCCESS
; 
14061                         dof_ioctl_data_t
* all_dofs 
= p
->p_dtrace_lazy_dofs
; 
14064                                 for (i
=0; i
<all_dofs
->dofiod_count
-1; i
++) { 
14065                                         ASSERT(all_dofs
->dofiod_helpers
[i
].dofhp_dof 
< all_dofs
->dofiod_helpers
[i
+1].dofhp_dof
); 
14072                 lck_mtx_unlock(&p
->p_dtrace_sprlock
); 
14077         lck_rw_unlock_shared(&dtrace_dof_mode_lock
); 
14083 dtrace_lazy_dofs_destroy(proc_t 
*p
) 
14085         lck_rw_lock_shared(&dtrace_dof_mode_lock
); 
14086         lck_mtx_lock(&p
->p_dtrace_sprlock
); 
14089          * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting. 
14090          * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from 
14091          * kern_exit.c and kern_exec.c. 
14093         ASSERT(p
->p_dtrace_lazy_dofs 
== NULL 
|| dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_ON 
|| p
->p_lflag 
& P_LEXIT
); 
14094         ASSERT(p
->p_dtrace_lazy_dofs 
== NULL 
|| p
->p_dtrace_helpers 
== NULL
); 
14096         dof_ioctl_data_t
* lazy_dofs 
= p
->p_dtrace_lazy_dofs
; 
14097         p
->p_dtrace_lazy_dofs 
= NULL
; 
14099         lck_mtx_unlock(&p
->p_dtrace_sprlock
); 
14100         lck_rw_unlock_shared(&dtrace_dof_mode_lock
); 
14103                 kmem_free(lazy_dofs
, DOF_IOCTL_DATA_T_SIZE(lazy_dofs
->dofiod_count
)); 
14108 dtrace_lazy_dofs_duplicate(proc_t 
*parent
, proc_t 
*child
) 
14110         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
14111         lck_mtx_assert(&parent
->p_dtrace_sprlock
, LCK_MTX_ASSERT_NOTOWNED
); 
14112         lck_mtx_assert(&child
->p_dtrace_sprlock
, LCK_MTX_ASSERT_NOTOWNED
); 
14114         lck_rw_lock_shared(&dtrace_dof_mode_lock
); 
14115         lck_mtx_lock(&parent
->p_dtrace_sprlock
); 
14118          * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting. 
14119          * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from 
14122         ASSERT(parent
->p_dtrace_lazy_dofs 
== NULL 
|| dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_ON
); 
14123         ASSERT(parent
->p_dtrace_lazy_dofs 
== NULL 
|| parent
->p_dtrace_helpers 
== NULL
); 
14125          * In theory we should hold the child sprlock, but this is safe... 
14127         ASSERT(child
->p_dtrace_lazy_dofs 
== NULL 
&& child
->p_dtrace_helpers 
== NULL
); 
14129         dof_ioctl_data_t
* parent_dofs 
= parent
->p_dtrace_lazy_dofs
; 
14130         dof_ioctl_data_t
* child_dofs 
= NULL
; 
14132                 size_t parent_dofs_size 
= DOF_IOCTL_DATA_T_SIZE(parent_dofs
->dofiod_count
); 
14133                 child_dofs 
= kmem_alloc(parent_dofs_size
, KM_SLEEP
); 
14134                 bcopy(parent_dofs
, child_dofs
, parent_dofs_size
); 
14137         lck_mtx_unlock(&parent
->p_dtrace_sprlock
); 
14140                 lck_mtx_lock(&child
->p_dtrace_sprlock
); 
14141                 child
->p_dtrace_lazy_dofs 
= child_dofs
; 
14142                 lck_mtx_unlock(&child
->p_dtrace_sprlock
); 
14145         lck_rw_unlock_shared(&dtrace_dof_mode_lock
); 
14149 dtrace_lazy_dofs_proc_iterate_filter(proc_t 
*p
, void* ignored
) 
14151 #pragma unused(ignored) 
14153          * Okay to NULL test without taking the sprlock. 
14155         return p
->p_dtrace_lazy_dofs 
!= NULL
; 
14159 dtrace_lazy_dofs_proc_iterate_doit(proc_t 
*p
, void* ignored
) 
14161 #pragma unused(ignored) 
14163          * It is possible this process may exit during our attempt to 
14164          * fault in the dof. We could fix this by holding locks longer, 
14165          * but the errors are benign. 
14167         lck_mtx_lock(&p
->p_dtrace_sprlock
); 
14170          * In this case only, it is okay to have lazy dof when dof mode is DTRACE_DOF_MODE_LAZY_OFF 
14172         ASSERT(p
->p_dtrace_lazy_dofs 
== NULL 
|| p
->p_dtrace_helpers 
== NULL
); 
14173         ASSERT(dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_OFF
); 
14176         dof_ioctl_data_t
* lazy_dofs 
= p
->p_dtrace_lazy_dofs
; 
14177         p
->p_dtrace_lazy_dofs 
= NULL
; 
14179         lck_mtx_unlock(&p
->p_dtrace_sprlock
); 
14182          * Process each dof_helper_t 
14184         if (lazy_dofs 
!= NULL
) { 
14188                 for (i
=0; i
<lazy_dofs
->dofiod_count
; i
++) { 
14190                          * When loading lazy dof, we depend on the generations being sorted in ascending order. 
14192                         ASSERT(i 
>= (lazy_dofs
->dofiod_count 
- 1) || lazy_dofs
->dofiod_helpers
[i
].dofhp_dof 
< lazy_dofs
->dofiod_helpers
[i
+1].dofhp_dof
); 
14194                         dof_helper_t 
*dhp 
= &lazy_dofs
->dofiod_helpers
[i
]; 
14197                          * We stored the generation in dofhp_dof. Save it, and restore the original value. 
14199                         int generation 
= dhp
->dofhp_dof
; 
14200                         dhp
->dofhp_dof 
= dhp
->dofhp_addr
; 
14202                         dof_hdr_t 
*dof 
= dtrace_dof_copyin_from_proc(p
, dhp
->dofhp_dof
, &rval
); 
14205                                 dtrace_helpers_t 
*help
; 
14207                                 lck_mtx_lock(&dtrace_lock
); 
14210                                  * This must be done with the dtrace_lock held 
14212                                 if ((help 
= p
->p_dtrace_helpers
) == NULL
) 
14213                                         help 
= dtrace_helpers_create(p
); 
14216                                  * If the generation value has been bumped, someone snuck in 
14217                                  * when we released the dtrace lock. We have to dump this generation, 
14218                                  * there is no safe way to load it. 
14220                                 if (help
->dthps_generation 
<= generation
) { 
14221                                         help
->dthps_generation 
= generation
; 
14224                                          * dtrace_helper_slurp() takes responsibility for the dof -- 
14225                                          * it may free it now or it may save it and free it later. 
14227                                         if ((rval 
= dtrace_helper_slurp(p
, dof
, dhp
)) != generation
) { 
14228                                                 dtrace_dof_error(NULL
, "returned value did not match expected generation"); 
14232                                 lck_mtx_unlock(&dtrace_lock
); 
14236                 kmem_free(lazy_dofs
, DOF_IOCTL_DATA_T_SIZE(lazy_dofs
->dofiod_count
)); 
14239         return PROC_RETURNED
; 
14242 #endif /* __APPLE__ */ 
14244 static dtrace_helpers_t 
* 
14245 dtrace_helpers_create(proc_t 
*p
) 
14247         dtrace_helpers_t 
*help
; 
14249         lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
14250         ASSERT(p
->p_dtrace_helpers 
== NULL
); 
14252         help 
= kmem_zalloc(sizeof (dtrace_helpers_t
), KM_SLEEP
); 
14253         help
->dthps_actions 
= kmem_zalloc(sizeof (dtrace_helper_action_t 
*) * 
14254             DTRACE_NHELPER_ACTIONS
, KM_SLEEP
); 
14256         p
->p_dtrace_helpers 
= help
; 
14262 #if !defined(__APPLE__) 
14264 dtrace_helpers_destroy(void) 
14266         proc_t 
*p 
= curproc
; 
14269 dtrace_helpers_destroy(proc_t
* p
) 
14272         dtrace_helpers_t 
*help
; 
14273         dtrace_vstate_t 
*vstate
; 
14276         lck_mtx_lock(&dtrace_lock
); 
14278         ASSERT(p
->p_dtrace_helpers 
!= NULL
); 
14279         ASSERT(dtrace_helpers 
> 0); 
14281         help 
= p
->p_dtrace_helpers
; 
14282         vstate 
= &help
->dthps_vstate
; 
14285          * We're now going to lose the help from this process. 
14287         p
->p_dtrace_helpers 
= NULL
; 
14291          * Destory the helper actions. 
14293         for (i 
= 0; i 
< DTRACE_NHELPER_ACTIONS
; i
++) { 
14294                 dtrace_helper_action_t 
*h
, *next
; 
14296                 for (h 
= help
->dthps_actions
[i
]; h 
!= NULL
; h 
= next
) { 
14297                         next 
= h
->dtha_next
; 
14298                         dtrace_helper_action_destroy(h
, vstate
); 
14303         lck_mtx_unlock(&dtrace_lock
); 
14306          * Destroy the helper providers. 
14308         if (help
->dthps_maxprovs 
> 0) { 
14309                 lck_mtx_lock(&dtrace_meta_lock
); 
14310                 if (dtrace_meta_pid 
!= NULL
) { 
14311                         ASSERT(dtrace_deferred_pid 
== NULL
); 
14313                         for (i 
= 0; i 
< help
->dthps_nprovs
; i
++) { 
14314                                 dtrace_helper_provider_remove( 
14315                                     &help
->dthps_provs
[i
]->dthp_prov
, p
->p_pid
); 
14318                         lck_mtx_lock(&dtrace_lock
); 
14319                         ASSERT(help
->dthps_deferred 
== 0 || 
14320                             help
->dthps_next 
!= NULL 
|| 
14321                             help
->dthps_prev 
!= NULL 
|| 
14322                             help 
== dtrace_deferred_pid
); 
14325                          * Remove the helper from the deferred list. 
14327                         if (help
->dthps_next 
!= NULL
) 
14328                                 help
->dthps_next
->dthps_prev 
= help
->dthps_prev
; 
14329                         if (help
->dthps_prev 
!= NULL
) 
14330                                 help
->dthps_prev
->dthps_next 
= help
->dthps_next
; 
14331                         if (dtrace_deferred_pid 
== help
) { 
14332                                 dtrace_deferred_pid 
= help
->dthps_next
; 
14333                                 ASSERT(help
->dthps_prev 
== NULL
); 
14336                         lck_mtx_unlock(&dtrace_lock
); 
14339                 lck_mtx_unlock(&dtrace_meta_lock
); 
14341                 for (i 
= 0; i 
< help
->dthps_nprovs
; i
++) { 
14342                         dtrace_helper_provider_destroy(help
->dthps_provs
[i
]); 
14345                 kmem_free(help
->dthps_provs
, help
->dthps_maxprovs 
* 
14346                     sizeof (dtrace_helper_provider_t 
*)); 
14349         lck_mtx_lock(&dtrace_lock
); 
14351         dtrace_vstate_fini(&help
->dthps_vstate
); 
14352         kmem_free(help
->dthps_actions
, 
14353             sizeof (dtrace_helper_action_t 
*) * DTRACE_NHELPER_ACTIONS
); 
14354         kmem_free(help
, sizeof (dtrace_helpers_t
)); 
14357         lck_mtx_unlock(&dtrace_lock
); 
14361 dtrace_helpers_duplicate(proc_t 
*from
, proc_t 
*to
) 
14363         dtrace_helpers_t 
*help
, *newhelp
; 
14364         dtrace_helper_action_t 
*helper
, *new, *last
; 
14366         dtrace_vstate_t 
*vstate
; 
14367         int i
, j
, sz
, hasprovs 
= 0; 
14369         lck_mtx_lock(&dtrace_lock
); 
14370         ASSERT(from
->p_dtrace_helpers 
!= NULL
); 
14371         ASSERT(dtrace_helpers 
> 0); 
14373         help 
= from
->p_dtrace_helpers
; 
14374         newhelp 
= dtrace_helpers_create(to
); 
14375         ASSERT(to
->p_dtrace_helpers 
!= NULL
); 
14377         newhelp
->dthps_generation 
= help
->dthps_generation
; 
14378         vstate 
= &newhelp
->dthps_vstate
; 
14381          * Duplicate the helper actions. 
14383         for (i 
= 0; i 
< DTRACE_NHELPER_ACTIONS
; i
++) { 
14384                 if ((helper 
= help
->dthps_actions
[i
]) == NULL
) 
14387                 for (last 
= NULL
; helper 
!= NULL
; helper 
= helper
->dtha_next
) { 
14388                         new = kmem_zalloc(sizeof (dtrace_helper_action_t
), 
14390                         new->dtha_generation 
= helper
->dtha_generation
; 
14392                         if ((dp 
= helper
->dtha_predicate
) != NULL
) { 
14393                                 dp 
= dtrace_difo_duplicate(dp
, vstate
); 
14394                                 new->dtha_predicate 
= dp
; 
14397                         new->dtha_nactions 
= helper
->dtha_nactions
; 
14398                         sz 
= sizeof (dtrace_difo_t 
*) * new->dtha_nactions
; 
14399                         new->dtha_actions 
= kmem_alloc(sz
, KM_SLEEP
); 
14401                         for (j 
= 0; j 
< new->dtha_nactions
; j
++) { 
14402                                 dtrace_difo_t 
*dp 
= helper
->dtha_actions
[j
]; 
14404                                 ASSERT(dp 
!= NULL
); 
14405                                 dp 
= dtrace_difo_duplicate(dp
, vstate
); 
14406                                 new->dtha_actions
[j
] = dp
; 
14409                         if (last 
!= NULL
) { 
14410                                 last
->dtha_next 
= new; 
14412                                 newhelp
->dthps_actions
[i
] = new; 
14420          * Duplicate the helper providers and register them with the 
14421          * DTrace framework. 
14423         if (help
->dthps_nprovs 
> 0) { 
14424                 newhelp
->dthps_nprovs 
= help
->dthps_nprovs
; 
14425                 newhelp
->dthps_maxprovs 
= help
->dthps_nprovs
; 
14426                 newhelp
->dthps_provs 
= kmem_alloc(newhelp
->dthps_nprovs 
* 
14427                     sizeof (dtrace_helper_provider_t 
*), KM_SLEEP
); 
14428                 for (i 
= 0; i 
< newhelp
->dthps_nprovs
; i
++) { 
14429                         newhelp
->dthps_provs
[i
] = help
->dthps_provs
[i
]; 
14430                         newhelp
->dthps_provs
[i
]->dthp_ref
++; 
14436         lck_mtx_unlock(&dtrace_lock
); 
14439                 dtrace_helper_provider_register(to
, newhelp
, NULL
); 
14443  * DTrace Hook Functions 
14446 dtrace_module_loaded(struct modctl 
*ctl
) 
14448         dtrace_provider_t 
*prv
; 
14450         lck_mtx_lock(&dtrace_provider_lock
); 
14451         lck_mtx_lock(&mod_lock
); 
14453         // ASSERT(ctl->mod_busy); 
14456          * We're going to call each providers per-module provide operation 
14457          * specifying only this module. 
14459         for (prv 
= dtrace_provider
; prv 
!= NULL
; prv 
= prv
->dtpv_next
) 
14460                 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
); 
14462         lck_mtx_unlock(&mod_lock
); 
14463         lck_mtx_unlock(&dtrace_provider_lock
); 
14466          * If we have any retained enablings, we need to match against them. 
14467          * Enabling probes requires that cpu_lock be held, and we cannot hold 
14468          * cpu_lock here -- it is legal for cpu_lock to be held when loading a 
14469          * module.  (In particular, this happens when loading scheduling 
14470          * classes.)  So if we have any retained enablings, we need to dispatch 
14471          * our task queue to do the match for us. 
14473         lck_mtx_lock(&dtrace_lock
); 
14475         if (dtrace_retained 
== NULL
) { 
14476                 lck_mtx_unlock(&dtrace_lock
); 
14480         (void) taskq_dispatch(dtrace_taskq
, 
14481             (task_func_t 
*)dtrace_enabling_matchall
, NULL
, TQ_SLEEP
); 
14483         lck_mtx_unlock(&dtrace_lock
); 
14486          * And now, for a little heuristic sleaze:  in general, we want to 
14487          * match modules as soon as they load.  However, we cannot guarantee 
14488          * this, because it would lead us to the lock ordering violation 
14489          * outlined above.  The common case, of course, is that cpu_lock is 
14490          * _not_ held -- so we delay here for a clock tick, hoping that that's 
14491          * long enough for the task queue to do its work.  If it's not, it's 
14492          * not a serious problem -- it just means that the module that we 
14493          * just loaded may not be immediately instrumentable. 
14499 dtrace_module_unloaded(struct modctl 
*ctl
) 
14501         dtrace_probe_t 
template, *probe
, *first
, *next
; 
14502         dtrace_provider_t 
*prov
; 
14504         template.dtpr_mod 
= ctl
->mod_modname
; 
14506         lck_mtx_lock(&dtrace_provider_lock
); 
14507         lck_mtx_lock(&mod_lock
); 
14508         lck_mtx_lock(&dtrace_lock
); 
14510         if (dtrace_bymod 
== NULL
) { 
14512                  * The DTrace module is loaded (obviously) but not attached; 
14513                  * we don't have any work to do. 
14515                 lck_mtx_unlock(&dtrace_provider_lock
); 
14516                 lck_mtx_unlock(&mod_lock
); 
14517                 lck_mtx_unlock(&dtrace_lock
); 
14521         for (probe 
= first 
= dtrace_hash_lookup(dtrace_bymod
, &template); 
14522             probe 
!= NULL
; probe 
= probe
->dtpr_nextmod
) { 
14523                 if (probe
->dtpr_ecb 
!= NULL
) { 
14524                         lck_mtx_unlock(&dtrace_provider_lock
); 
14525                         lck_mtx_unlock(&mod_lock
); 
14526                         lck_mtx_unlock(&dtrace_lock
); 
14529                          * This shouldn't _actually_ be possible -- we're 
14530                          * unloading a module that has an enabled probe in it. 
14531                          * (It's normally up to the provider to make sure that 
14532                          * this can't happen.)  However, because dtps_enable() 
14533                          * doesn't have a failure mode, there can be an 
14534                          * enable/unload race.  Upshot:  we don't want to 
14535                          * assert, but we're not going to disable the 
14538                         if (dtrace_err_verbose
) { 
14539                                 cmn_err(CE_WARN
, "unloaded module '%s' had " 
14540                                     "enabled probes", ctl
->mod_modname
); 
14549         for (first 
= NULL
; probe 
!= NULL
; probe 
= next
) { 
14550                 ASSERT(dtrace_probes
[probe
->dtpr_id 
- 1] == probe
); 
14552                 dtrace_probes
[probe
->dtpr_id 
- 1] = NULL
; 
14554                 next 
= probe
->dtpr_nextmod
; 
14555                 dtrace_hash_remove(dtrace_bymod
, probe
); 
14556                 dtrace_hash_remove(dtrace_byfunc
, probe
); 
14557                 dtrace_hash_remove(dtrace_byname
, probe
); 
14559                 if (first 
== NULL
) { 
14561                         probe
->dtpr_nextmod 
= NULL
; 
14563                         probe
->dtpr_nextmod 
= first
; 
14569          * We've removed all of the module's probes from the hash chains and 
14570          * from the probe array.  Now issue a dtrace_sync() to be sure that 
14571          * everyone has cleared out from any probe array processing. 
14575         for (probe 
= first
; probe 
!= NULL
; probe 
= first
) { 
14576                 first 
= probe
->dtpr_nextmod
; 
14577                 prov 
= probe
->dtpr_provider
; 
14578                 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, probe
->dtpr_id
, 
14580                 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1); 
14581                 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1); 
14582                 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1); 
14583                 vmem_free(dtrace_arena
, (void *)(uintptr_t)probe
->dtpr_id
, 1); 
14584 #if !defined(__APPLE__) 
14585                 kmem_free(probe
, sizeof (dtrace_probe_t
)); 
14587                 zfree(dtrace_probe_t_zone
, probe
); 
14591         lck_mtx_unlock(&dtrace_lock
); 
14592         lck_mtx_unlock(&mod_lock
); 
14593         lck_mtx_unlock(&dtrace_provider_lock
); 
14597 dtrace_suspend(void) 
14599         dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_suspend
)); 
14603 dtrace_resume(void) 
14605         dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_resume
)); 
14609 dtrace_cpu_setup(cpu_setup_t what
, processorid_t cpu
) 
14611         lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
14612         lck_mtx_lock(&dtrace_lock
); 
14616                 dtrace_state_t 
*state
; 
14617                 dtrace_optval_t 
*opt
, rs
, c
; 
14620                  * For now, we only allocate a new buffer for anonymous state. 
14622                 if ((state 
= dtrace_anon
.dta_state
) == NULL
) 
14625                 if (state
->dts_activity 
!= DTRACE_ACTIVITY_ACTIVE
) 
14628                 opt 
= state
->dts_options
; 
14629                 c 
= opt
[DTRACEOPT_CPU
]; 
14631                 if (c 
!= DTRACE_CPUALL 
&& c 
!= DTRACEOPT_UNSET 
&& c 
!= cpu
) 
14635                  * Regardless of what the actual policy is, we're going to 
14636                  * temporarily set our resize policy to be manual.  We're 
14637                  * also going to temporarily set our CPU option to denote 
14638                  * the newly configured CPU. 
14640                 rs 
= opt
[DTRACEOPT_BUFRESIZE
]; 
14641                 opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_MANUAL
; 
14642                 opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)cpu
; 
14644                 (void) dtrace_state_buffers(state
); 
14646                 opt
[DTRACEOPT_BUFRESIZE
] = rs
; 
14647                 opt
[DTRACEOPT_CPU
] = c
; 
14654                  * We don't free the buffer in the CPU_UNCONFIG case.  (The 
14655                  * buffer will be freed when the consumer exits.) 
14663         lck_mtx_unlock(&dtrace_lock
); 
14668 dtrace_cpu_setup_initial(processorid_t cpu
) 
14670         (void) dtrace_cpu_setup(CPU_CONFIG
, cpu
); 
14674 dtrace_toxrange_add(uintptr_t base
, uintptr_t limit
) 
14676         if (dtrace_toxranges 
>= dtrace_toxranges_max
) { 
14678                 dtrace_toxrange_t 
*range
; 
14680                 osize 
= dtrace_toxranges_max 
* sizeof (dtrace_toxrange_t
); 
14683                         ASSERT(dtrace_toxrange 
== NULL
); 
14684                         ASSERT(dtrace_toxranges_max 
== 0); 
14685                         dtrace_toxranges_max 
= 1; 
14687                         dtrace_toxranges_max 
<<= 1; 
14690                 nsize 
= dtrace_toxranges_max 
* sizeof (dtrace_toxrange_t
); 
14691                 range 
= kmem_zalloc(nsize
, KM_SLEEP
); 
14693                 if (dtrace_toxrange 
!= NULL
) { 
14694                         ASSERT(osize 
!= 0); 
14695                         bcopy(dtrace_toxrange
, range
, osize
); 
14696                         kmem_free(dtrace_toxrange
, osize
); 
14699                 dtrace_toxrange 
= range
; 
14702         ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_base 
== NULL
); 
14703         ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_limit 
== NULL
); 
14705         dtrace_toxrange
[dtrace_toxranges
].dtt_base 
= base
; 
14706         dtrace_toxrange
[dtrace_toxranges
].dtt_limit 
= limit
; 
14707         dtrace_toxranges
++; 
14711  * DTrace Driver Cookbook Functions 
14715 dtrace_attach(dev_info_t 
*devi
, ddi_attach_cmd_t cmd
) 
14717         dtrace_provider_id_t id
; 
14718         dtrace_state_t 
*state 
= NULL
; 
14719         dtrace_enabling_t 
*enab
; 
14721         lck_mtx_lock(&cpu_lock
); 
14722         lck_mtx_lock(&dtrace_provider_lock
); 
14723         lck_mtx_lock(&dtrace_lock
); 
14725         if (ddi_soft_state_init(&dtrace_softstate
, 
14726             sizeof (dtrace_state_t
), 0) != 0) { 
14727                 cmn_err(CE_NOTE
, "/dev/dtrace failed to initialize soft state"); 
14728                 lck_mtx_unlock(&cpu_lock
); 
14729                 lck_mtx_unlock(&dtrace_provider_lock
); 
14730                 lck_mtx_unlock(&dtrace_lock
); 
14731                 return (DDI_FAILURE
); 
14734 #if !defined(__APPLE__) 
14735         if (ddi_create_minor_node(devi
, DTRACEMNR_DTRACE
, S_IFCHR
, 
14736             DTRACEMNRN_DTRACE
, DDI_PSEUDO
, NULL
) == DDI_FAILURE 
|| 
14737             ddi_create_minor_node(devi
, DTRACEMNR_HELPER
, S_IFCHR
, 
14738             DTRACEMNRN_HELPER
, DDI_PSEUDO
, NULL
) == DDI_FAILURE
) { 
14739                 cmn_err(CE_NOTE
, "/dev/dtrace couldn't create minor nodes"); 
14740                 ddi_remove_minor_node(devi
, NULL
); 
14741                 ddi_soft_state_fini(&dtrace_softstate
); 
14742                 lck_mtx_unlock(&cpu_lock
); 
14743                 lck_mtx_unlock(&dtrace_provider_lock
); 
14744                 lck_mtx_unlock(&dtrace_lock
); 
14745                 return (DDI_FAILURE
); 
14747 #endif /* __APPLE__ */ 
14749         ddi_report_dev(devi
); 
14750         dtrace_devi 
= devi
; 
14752         dtrace_modload 
= dtrace_module_loaded
; 
14753         dtrace_modunload 
= dtrace_module_unloaded
; 
14754         dtrace_cpu_init 
= dtrace_cpu_setup_initial
; 
14755         dtrace_helpers_cleanup 
= dtrace_helpers_destroy
; 
14756         dtrace_helpers_fork 
= dtrace_helpers_duplicate
; 
14757         dtrace_cpustart_init 
= dtrace_suspend
; 
14758         dtrace_cpustart_fini 
= dtrace_resume
; 
14759         dtrace_debugger_init 
= dtrace_suspend
; 
14760         dtrace_debugger_fini 
= dtrace_resume
; 
14761         dtrace_kreloc_init 
= dtrace_suspend
; 
14762         dtrace_kreloc_fini 
= dtrace_resume
; 
14764         register_cpu_setup_func((cpu_setup_func_t 
*)dtrace_cpu_setup
, NULL
); 
14766         lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
14768         dtrace_arena 
= vmem_create("dtrace", (void *)1, UINT32_MAX
, 1, 
14769             NULL
, NULL
, NULL
, 0, VM_SLEEP 
| VMC_IDENTIFIER
); 
14770         dtrace_minor 
= vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE
, 
14771             UINT32_MAX 
- DTRACEMNRN_CLONE
, 1, NULL
, NULL
, NULL
, 0, 
14772             VM_SLEEP 
| VMC_IDENTIFIER
); 
14773         dtrace_taskq 
= taskq_create("dtrace_taskq", 1, maxclsyspri
, 
14776         dtrace_state_cache 
= kmem_cache_create("dtrace_state_cache", 
14777             sizeof (dtrace_dstate_percpu_t
) * NCPU
, DTRACE_STATE_ALIGN
, 
14778             NULL
, NULL
, NULL
, NULL
, NULL
, 0); 
14780         lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
14782         dtrace_bymod 
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_mod
), 
14783             offsetof(dtrace_probe_t
, dtpr_nextmod
), 
14784             offsetof(dtrace_probe_t
, dtpr_prevmod
)); 
14786         dtrace_byfunc 
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_func
), 
14787             offsetof(dtrace_probe_t
, dtpr_nextfunc
), 
14788             offsetof(dtrace_probe_t
, dtpr_prevfunc
)); 
14790         dtrace_byname 
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_name
), 
14791             offsetof(dtrace_probe_t
, dtpr_nextname
), 
14792             offsetof(dtrace_probe_t
, dtpr_prevname
)); 
14794         if (dtrace_retain_max 
< 1) { 
14795                 cmn_err(CE_WARN
, "illegal value (%lu) for dtrace_retain_max; " 
14796                     "setting to 1", dtrace_retain_max
); 
14797                 dtrace_retain_max 
= 1; 
14801          * Now discover our toxic ranges. 
14803         dtrace_toxic_ranges(dtrace_toxrange_add
); 
14806          * Before we register ourselves as a provider to our own framework, 
14807          * we would like to assert that dtrace_provider is NULL -- but that's 
14808          * not true if we were loaded as a dependency of a DTrace provider. 
14809          * Once we've registered, we can assert that dtrace_provider is our 
14812         (void) dtrace_register("dtrace", &dtrace_provider_attr
, 
14813             DTRACE_PRIV_NONE
, 0, &dtrace_provider_ops
, NULL
, &id
); 
14815         ASSERT(dtrace_provider 
!= NULL
); 
14816         ASSERT((dtrace_provider_id_t
)dtrace_provider 
== id
); 
14818 #if !defined(__APPLE__) 
14819         dtrace_probeid_begin 
= dtrace_probe_create((dtrace_provider_id_t
) 
14820             dtrace_provider
, NULL
, NULL
, "BEGIN", 0, NULL
); 
14821         dtrace_probeid_end 
= dtrace_probe_create((dtrace_provider_id_t
) 
14822             dtrace_provider
, NULL
, NULL
, "END", 0, NULL
); 
14823         dtrace_probeid_error 
= dtrace_probe_create((dtrace_provider_id_t
) 
14824             dtrace_provider
, NULL
, NULL
, "ERROR", 1, NULL
); 
14825 #elif defined(__ppc__) || defined(__ppc64__) 
14826         dtrace_probeid_begin 
= dtrace_probe_create((dtrace_provider_id_t
) 
14827             dtrace_provider
, NULL
, NULL
, "BEGIN", 2, NULL
); 
14828         dtrace_probeid_end 
= dtrace_probe_create((dtrace_provider_id_t
) 
14829             dtrace_provider
, NULL
, NULL
, "END", 1, NULL
); 
14830         dtrace_probeid_error 
= dtrace_probe_create((dtrace_provider_id_t
) 
14831             dtrace_provider
, NULL
, NULL
, "ERROR", 4, NULL
); 
14832 #elif (defined(__i386__) || defined (__x86_64__)) 
14833         dtrace_probeid_begin 
= dtrace_probe_create((dtrace_provider_id_t
) 
14834             dtrace_provider
, NULL
, NULL
, "BEGIN", 1, NULL
); 
14835         dtrace_probeid_end 
= dtrace_probe_create((dtrace_provider_id_t
) 
14836             dtrace_provider
, NULL
, NULL
, "END", 0, NULL
); 
14837         dtrace_probeid_error 
= dtrace_probe_create((dtrace_provider_id_t
) 
14838             dtrace_provider
, NULL
, NULL
, "ERROR", 3, NULL
); 
14839 #elif defined(__arm__) 
14840         dtrace_probeid_begin 
= dtrace_probe_create((dtrace_provider_id_t
) 
14841             dtrace_provider
, NULL
, NULL
, "BEGIN", 2, NULL
); 
14842         dtrace_probeid_end 
= dtrace_probe_create((dtrace_provider_id_t
) 
14843             dtrace_provider
, NULL
, NULL
, "END", 1, NULL
); 
14844         dtrace_probeid_error 
= dtrace_probe_create((dtrace_provider_id_t
) 
14845             dtrace_provider
, NULL
, NULL
, "ERROR", 4, NULL
); 
14847 #error Unknown Architecture 
14848 #endif /* __APPLE__ */ 
14850         dtrace_anon_property(); 
14851         lck_mtx_unlock(&cpu_lock
); 
14854          * If DTrace helper tracing is enabled, we need to allocate the 
14855          * trace buffer and initialize the values. 
14857         if (dtrace_helptrace_enabled
) { 
14858                 ASSERT(dtrace_helptrace_buffer 
== NULL
); 
14859                 dtrace_helptrace_buffer 
= 
14860                     kmem_zalloc(dtrace_helptrace_bufsize
, KM_SLEEP
); 
14861                 dtrace_helptrace_next 
= 0; 
14865          * If there are already providers, we must ask them to provide their 
14866          * probes, and then match any anonymous enabling against them.  Note 
14867          * that there should be no other retained enablings at this time: 
14868          * the only retained enablings at this time should be the anonymous 
14871         if (dtrace_anon
.dta_enabling 
!= NULL
) { 
14872                 ASSERT(dtrace_retained 
== dtrace_anon
.dta_enabling
); 
14874                 dtrace_enabling_provide(NULL
); 
14875                 state 
= dtrace_anon
.dta_state
; 
14878                  * We couldn't hold cpu_lock across the above call to 
14879                  * dtrace_enabling_provide(), but we must hold it to actually 
14880                  * enable the probes.  We have to drop all of our locks, pick 
14881                  * up cpu_lock, and regain our locks before matching the 
14882                  * retained anonymous enabling. 
14884                 lck_mtx_unlock(&dtrace_lock
); 
14885                 lck_mtx_unlock(&dtrace_provider_lock
); 
14887                 lck_mtx_lock(&cpu_lock
); 
14888                 lck_mtx_lock(&dtrace_provider_lock
); 
14889                 lck_mtx_lock(&dtrace_lock
); 
14891                 if ((enab 
= dtrace_anon
.dta_enabling
) != NULL
) 
14892                         (void) dtrace_enabling_match(enab
, NULL
); 
14894                 lck_mtx_unlock(&cpu_lock
); 
14897         lck_mtx_unlock(&dtrace_lock
); 
14898         lck_mtx_unlock(&dtrace_provider_lock
); 
14900         if (state 
!= NULL
) { 
14902                  * If we created any anonymous state, set it going now. 
14904                 (void) dtrace_state_go(state
, &dtrace_anon
.dta_beganon
); 
14907         return (DDI_SUCCESS
); 
14910 extern void fasttrap_init(void); 
14914 dtrace_open(dev_t 
*devp
, int flag
, int otyp
, cred_t 
*cred_p
) 
14916 #pragma unused(flag, otyp) 
14917         dtrace_state_t 
*state
; 
14922 #if !defined(__APPLE__) 
14923         if (getminor(*devp
) == DTRACEMNRN_HELPER
) 
14927          * If this wasn't an open with the "helper" minor, then it must be 
14928          * the "dtrace" minor. 
14930         ASSERT(getminor(*devp
) == DTRACEMNRN_DTRACE
); 
14932         /* Darwin puts Helper on its own major device. */ 
14933 #endif /* __APPLE__ */ 
14936          * If no DTRACE_PRIV_* bits are set in the credential, then the 
14937          * caller lacks sufficient permission to do anything with DTrace. 
14939         dtrace_cred2priv(cred_p
, &priv
, &uid
, &zoneid
); 
14940         if (priv 
== DTRACE_PRIV_NONE
) 
14943 #if defined(__APPLE__) 
14945          * We delay the initialization of fasttrap as late as possible. 
14946          * It certainly can't be later than now! 
14949 #endif /* __APPLE__ */ 
14952          * Ask all providers to provide all their probes. 
14954         lck_mtx_lock(&dtrace_provider_lock
); 
14955         dtrace_probe_provide(NULL
, NULL
); 
14956         lck_mtx_unlock(&dtrace_provider_lock
); 
14958         lck_mtx_lock(&cpu_lock
); 
14959         lck_mtx_lock(&dtrace_lock
); 
14961         dtrace_membar_producer(); 
14964          * If the kernel debugger is active (that is, if the kernel debugger 
14965          * modified text in some way), we won't allow the open. 
14967         if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) { 
14969                 lck_mtx_unlock(&cpu_lock
); 
14970                 lck_mtx_unlock(&dtrace_lock
); 
14974         state 
= dtrace_state_create(devp
, cred_p
); 
14975         lck_mtx_unlock(&cpu_lock
); 
14977         if (state 
== NULL
) { 
14978                 if (--dtrace_opens 
== 0) 
14979                         (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
); 
14980                 lck_mtx_unlock(&dtrace_lock
); 
14984         lck_mtx_unlock(&dtrace_lock
); 
14986 #if defined(__APPLE__) 
14987         lck_rw_lock_exclusive(&dtrace_dof_mode_lock
); 
14990          * If we are currently lazy, transition states. 
14992          * Unlike dtrace_close, we do not need to check the 
14993          * value of dtrace_opens, as any positive value (and 
14994          * we count as 1) means we transition states. 
14996         if (dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_ON
) { 
14997                 dtrace_dof_mode 
= DTRACE_DOF_MODE_LAZY_OFF
; 
15000                  * Iterate all existing processes and load lazy dofs. 
15002                 proc_iterate(PROC_ALLPROCLIST 
| PROC_NOWAITTRANS
, 
15003                              dtrace_lazy_dofs_proc_iterate_doit
, 
15005                              dtrace_lazy_dofs_proc_iterate_filter
, 
15009         lck_rw_unlock_exclusive(&dtrace_dof_mode_lock
); 
15017 dtrace_close(dev_t dev
, int flag
, int otyp
, cred_t 
*cred_p
) 
15019         minor_t minor 
= getminor(dev
); 
15020         dtrace_state_t 
*state
; 
15022 #if !defined(__APPLE__) 
15023         if (minor 
== DTRACEMNRN_HELPER
) 
15026         /* Darwin puts Helper on its own major device. */ 
15027 #endif /* __APPLE__ */ 
15029         state 
= ddi_get_soft_state(dtrace_softstate
, minor
); 
15031         lck_mtx_lock(&cpu_lock
); 
15032         lck_mtx_lock(&dtrace_lock
); 
15034         if (state
->dts_anon
) { 
15036                  * There is anonymous state. Destroy that first. 
15038                 ASSERT(dtrace_anon
.dta_state 
== NULL
); 
15039                 dtrace_state_destroy(state
->dts_anon
); 
15042         dtrace_state_destroy(state
); 
15043         ASSERT(dtrace_opens 
> 0); 
15044         if (--dtrace_opens 
== 0) 
15045                 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
); 
15047         lck_mtx_unlock(&dtrace_lock
); 
15048         lck_mtx_unlock(&cpu_lock
); 
15050 #if defined(__APPLE__) 
15053          * Lock ordering requires the dof mode lock be taken before 
15056         lck_rw_lock_exclusive(&dtrace_dof_mode_lock
); 
15057         lck_mtx_lock(&dtrace_lock
); 
15060          * If we are currently lazy-off, and this is the last close, transition to 
15063         if (dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_OFF 
&& dtrace_opens 
== 0) { 
15064                 dtrace_dof_mode 
= DTRACE_DOF_MODE_LAZY_ON
; 
15067         lck_mtx_unlock(&dtrace_lock
); 
15068         lck_rw_unlock_exclusive(&dtrace_dof_mode_lock
); 
15074 #if defined(__APPLE__) 
15076  * Introduce cast to quiet warnings. 
15077  * XXX: This hides a lot of brokenness. 
15079 #define copyin(src, dst, len) copyin( (user_addr_t)(src), (dst), (len) ) 
15080 #define copyout(src, dst, len) copyout( (src), (user_addr_t)(dst), (len) ) 
15081 #endif /* __APPLE__ */ 
15083 #if defined(__APPLE__) 
15086 dtrace_ioctl_helper(int cmd
, caddr_t arg
, int *rv
) 
15090          * Safe to check this outside the dof mode lock 
15092         if (dtrace_dof_mode 
== DTRACE_DOF_MODE_NEVER
) 
15093                 return KERN_SUCCESS
; 
15096                 case DTRACEHIOC_ADDDOF
: { 
15097                         dof_helper_t 
*dhp 
= NULL
; 
15098                         size_t dof_ioctl_data_size
; 
15099                         dof_ioctl_data_t
* multi_dof
; 
15102                         user_addr_t user_address 
= *(user_addr_t
*)arg
; 
15103                         uint64_t dof_count
; 
15104                         int multi_dof_claimed 
= 0; 
15105                         proc_t
* p 
= current_proc(); 
15108                          * Read the number of DOF sections being passed in. 
15110                         if (copyin(user_address 
+ offsetof(dof_ioctl_data_t
, dofiod_count
), 
15112                                    sizeof(dof_count
))) { 
15113                                 dtrace_dof_error(NULL
, "failed to copyin dofiod_count"); 
15118                          * Range check the count. 
15120                         if (dof_count 
== 0 || dof_count 
> 1024) { 
15121                                 dtrace_dof_error(NULL
, "dofiod_count is not valid"); 
15126                          * Allocate a correctly sized structure and copyin the data. 
15128                         dof_ioctl_data_size 
= DOF_IOCTL_DATA_T_SIZE(dof_count
); 
15129                         if ((multi_dof 
= kmem_alloc(dof_ioctl_data_size
, KM_SLEEP
)) == NULL
)  
15132                         /* NOTE! We can no longer exit this method via return */ 
15133                         if (copyin(user_address
, multi_dof
, dof_ioctl_data_size
) != 0) { 
15134                                 dtrace_dof_error(NULL
, "failed copyin of dof_ioctl_data_t"); 
15140                          * Check that the count didn't change between the first copyin and the second. 
15142                         if (multi_dof
->dofiod_count 
!= dof_count
) { 
15148                          * Try to process lazily first. 
15150                         rval 
= dtrace_lazy_dofs_add(p
, multi_dof
, &multi_dof_claimed
); 
15153                          * If rval is EACCES, we must be non-lazy. 
15155                         if (rval 
== EACCES
) { 
15158                                  * Process each dof_helper_t 
15162                                         dhp 
= &multi_dof
->dofiod_helpers
[i
]; 
15164                                         dof_hdr_t 
*dof 
= dtrace_dof_copyin(dhp
->dofhp_dof
, &rval
); 
15167                                                 lck_mtx_lock(&dtrace_lock
); 
15170                                                  * dtrace_helper_slurp() takes responsibility for the dof -- 
15171                                                  * it may free it now or it may save it and free it later. 
15173                                                 if ((dhp
->dofhp_dof 
= (uint64_t)dtrace_helper_slurp(p
, dof
, dhp
)) == -1ULL) { 
15177                                                 lck_mtx_unlock(&dtrace_lock
); 
15179                                 } while (++i 
< multi_dof
->dofiod_count 
&& rval 
== 0); 
15183                          * We need to copyout the multi_dof struct, because it contains 
15184                          * the generation (unique id) values needed to call DTRACEHIOC_REMOVE 
15186                          * This could certainly be better optimized. 
15188                         if (copyout(multi_dof
, user_address
, dof_ioctl_data_size
) != 0) { 
15189                                 dtrace_dof_error(NULL
, "failed copyout of dof_ioctl_data_t"); 
15190                                 /* Don't overwrite pre-existing error code */ 
15191                                 if (rval 
== 0) rval 
= EFAULT
; 
15196                          * If we had to allocate struct memory, free it. 
15198                         if (multi_dof 
!= NULL 
&& !multi_dof_claimed
) { 
15199                                 kmem_free(multi_dof
, dof_ioctl_data_size
); 
15205                 case DTRACEHIOC_REMOVE
: { 
15206                         int generation 
= *(int*)arg
; 
15207                         proc_t
* p 
= current_proc(); 
15212                         int rval 
= dtrace_lazy_dofs_remove(p
, generation
); 
15215                          * EACCES means non-lazy 
15217                         if (rval 
== EACCES
) { 
15218                                 lck_mtx_lock(&dtrace_lock
); 
15219                                 rval 
= dtrace_helper_destroygen(p
, generation
); 
15220                                 lck_mtx_unlock(&dtrace_lock
); 
15232 #endif /* __APPLE__ */ 
15236 dtrace_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int md
, cred_t 
*cr
, int *rv
) 
15238         minor_t minor 
= getminor(dev
); 
15239         dtrace_state_t 
*state
; 
15242 #if !defined(__APPLE__) 
15243         if (minor 
== DTRACEMNRN_HELPER
) 
15244                 return (dtrace_ioctl_helper(cmd
, arg
, rv
)); 
15246         /* Darwin puts Helper on its own major device. */ 
15247 #endif /* __APPLE__ */ 
15249         state 
= ddi_get_soft_state(dtrace_softstate
, minor
); 
15251         if (state
->dts_anon
) { 
15252                 ASSERT(dtrace_anon
.dta_state 
== NULL
); 
15253                 state 
= state
->dts_anon
; 
15257         case DTRACEIOC_PROVIDER
: { 
15258                 dtrace_providerdesc_t pvd
; 
15259                 dtrace_provider_t 
*pvp
; 
15261                 if (copyin((void *)arg
, &pvd
, sizeof (pvd
)) != 0) 
15264                 pvd
.dtvd_name
[DTRACE_PROVNAMELEN 
- 1] = '\0'; 
15265                 lck_mtx_lock(&dtrace_provider_lock
); 
15267                 for (pvp 
= dtrace_provider
; pvp 
!= NULL
; pvp 
= pvp
->dtpv_next
) { 
15268                         if (strcmp(pvp
->dtpv_name
, pvd
.dtvd_name
) == 0) 
15272                 lck_mtx_unlock(&dtrace_provider_lock
); 
15277                 bcopy(&pvp
->dtpv_priv
, &pvd
.dtvd_priv
, sizeof (dtrace_ppriv_t
)); 
15278                 bcopy(&pvp
->dtpv_attr
, &pvd
.dtvd_attr
, sizeof (dtrace_pattr_t
)); 
15279                 if (copyout(&pvd
, (void *)arg
, sizeof (pvd
)) != 0) 
15285         case DTRACEIOC_EPROBE
: { 
15286                 dtrace_eprobedesc_t epdesc
; 
15288                 dtrace_action_t 
*act
; 
15294                 if (copyin((void *)arg
, &epdesc
, sizeof (epdesc
)) != 0) 
15297                 lck_mtx_lock(&dtrace_lock
); 
15299                 if ((ecb 
= dtrace_epid2ecb(state
, epdesc
.dtepd_epid
)) == NULL
) { 
15300                         lck_mtx_unlock(&dtrace_lock
); 
15304                 if (ecb
->dte_probe 
== NULL
) { 
15305                         lck_mtx_unlock(&dtrace_lock
); 
15309                 epdesc
.dtepd_probeid 
= ecb
->dte_probe
->dtpr_id
; 
15310                 epdesc
.dtepd_uarg 
= ecb
->dte_uarg
; 
15311                 epdesc
.dtepd_size 
= ecb
->dte_size
; 
15313                 nrecs 
= epdesc
.dtepd_nrecs
; 
15314                 epdesc
.dtepd_nrecs 
= 0; 
15315                 for (act 
= ecb
->dte_action
; act 
!= NULL
; act 
= act
->dta_next
) { 
15316                         if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
) 
15319                         epdesc
.dtepd_nrecs
++; 
15323                  * Now that we have the size, we need to allocate a temporary 
15324                  * buffer in which to store the complete description.  We need 
15325                  * the temporary buffer to be able to drop dtrace_lock() 
15326                  * across the copyout(), below. 
15328                 size 
= sizeof (dtrace_eprobedesc_t
) + 
15329                     (epdesc
.dtepd_nrecs 
* sizeof (dtrace_recdesc_t
)); 
15331                 buf 
= kmem_alloc(size
, KM_SLEEP
); 
15332                 dest 
= (uintptr_t)buf
; 
15334                 bcopy(&epdesc
, (void *)dest
, sizeof (epdesc
)); 
15335                 dest 
+= offsetof(dtrace_eprobedesc_t
, dtepd_rec
[0]); 
15337                 for (act 
= ecb
->dte_action
; act 
!= NULL
; act 
= act
->dta_next
) { 
15338                         if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
) 
15344                         bcopy(&act
->dta_rec
, (void *)dest
, 
15345                             sizeof (dtrace_recdesc_t
)); 
15346                         dest 
+= sizeof (dtrace_recdesc_t
); 
15349                 lck_mtx_unlock(&dtrace_lock
); 
15351                 if (copyout(buf
, (void *)arg
, dest 
- (uintptr_t)buf
) != 0) { 
15352                         kmem_free(buf
, size
); 
15356                 kmem_free(buf
, size
); 
15360         case DTRACEIOC_AGGDESC
: { 
15361                 dtrace_aggdesc_t aggdesc
; 
15362                 dtrace_action_t 
*act
; 
15363                 dtrace_aggregation_t 
*agg
; 
15366                 dtrace_recdesc_t 
*lrec
; 
15371                 if (copyin((void *)arg
, &aggdesc
, sizeof (aggdesc
)) != 0) 
15374                 lck_mtx_lock(&dtrace_lock
); 
15376                 if ((agg 
= dtrace_aggid2agg(state
, aggdesc
.dtagd_id
)) == NULL
) { 
15377                         lck_mtx_unlock(&dtrace_lock
); 
15381                 aggdesc
.dtagd_epid 
= agg
->dtag_ecb
->dte_epid
; 
15383                 nrecs 
= aggdesc
.dtagd_nrecs
; 
15384                 aggdesc
.dtagd_nrecs 
= 0; 
15386                 offs 
= agg
->dtag_base
; 
15387                 lrec 
= &agg
->dtag_action
.dta_rec
; 
15388                 aggdesc
.dtagd_size 
= lrec
->dtrd_offset 
+ lrec
->dtrd_size 
- offs
; 
15390                 for (act 
= agg
->dtag_first
; ; act 
= act
->dta_next
) { 
15391                         ASSERT(act
->dta_intuple 
|| 
15392                             DTRACEACT_ISAGG(act
->dta_kind
)); 
15395                          * If this action has a record size of zero, it 
15396                          * denotes an argument to the aggregating action. 
15397                          * Because the presence of this record doesn't (or 
15398                          * shouldn't) affect the way the data is interpreted, 
15399                          * we don't copy it out to save user-level the 
15400                          * confusion of dealing with a zero-length record. 
15402                         if (act
->dta_rec
.dtrd_size 
== 0) { 
15403                                 ASSERT(agg
->dtag_hasarg
); 
15407                         aggdesc
.dtagd_nrecs
++; 
15409                         if (act 
== &agg
->dtag_action
) 
15414                  * Now that we have the size, we need to allocate a temporary 
15415                  * buffer in which to store the complete description.  We need 
15416                  * the temporary buffer to be able to drop dtrace_lock() 
15417                  * across the copyout(), below. 
15419                 size 
= sizeof (dtrace_aggdesc_t
) + 
15420                     (aggdesc
.dtagd_nrecs 
* sizeof (dtrace_recdesc_t
)); 
15422                 buf 
= kmem_alloc(size
, KM_SLEEP
); 
15423                 dest 
= (uintptr_t)buf
; 
15425                 bcopy(&aggdesc
, (void *)dest
, sizeof (aggdesc
)); 
15426                 dest 
+= offsetof(dtrace_aggdesc_t
, dtagd_rec
[0]); 
15428                 for (act 
= agg
->dtag_first
; ; act 
= act
->dta_next
) { 
15429                         dtrace_recdesc_t rec 
= act
->dta_rec
; 
15432                          * See the comment in the above loop for why we pass 
15433                          * over zero-length records. 
15435                         if (rec
.dtrd_size 
== 0) { 
15436                                 ASSERT(agg
->dtag_hasarg
); 
15443                         rec
.dtrd_offset 
-= offs
; 
15444                         bcopy(&rec
, (void *)dest
, sizeof (rec
)); 
15445                         dest 
+= sizeof (dtrace_recdesc_t
); 
15447                         if (act 
== &agg
->dtag_action
) 
15451                 lck_mtx_unlock(&dtrace_lock
); 
15453                 if (copyout(buf
, (void *)arg
, dest 
- (uintptr_t)buf
) != 0) { 
15454                         kmem_free(buf
, size
); 
15458                 kmem_free(buf
, size
); 
15462         case DTRACEIOC_ENABLE
: { 
15464                 dtrace_enabling_t 
*enab 
= NULL
; 
15465                 dtrace_vstate_t 
*vstate
; 
15471                  * If a NULL argument has been passed, we take this as our 
15472                  * cue to reevaluate our enablings. 
15475                         lck_mtx_lock(&cpu_lock
); 
15476                         lck_mtx_lock(&dtrace_lock
); 
15477                         err 
= dtrace_enabling_matchstate(state
, rv
); 
15478                         lck_mtx_unlock(&dtrace_lock
); 
15479                         lck_mtx_unlock(&cpu_lock
); 
15484                 if ((dof 
= dtrace_dof_copyin(arg
, &rval
)) == NULL
) 
15487                 lck_mtx_lock(&cpu_lock
); 
15488                 lck_mtx_lock(&dtrace_lock
); 
15489                 vstate 
= &state
->dts_vstate
; 
15491                 if (state
->dts_activity 
!= DTRACE_ACTIVITY_INACTIVE
) { 
15492                         lck_mtx_unlock(&dtrace_lock
); 
15493                         lck_mtx_unlock(&cpu_lock
); 
15494                         dtrace_dof_destroy(dof
); 
15498                 if (dtrace_dof_slurp(dof
, vstate
, cr
, &enab
, 0, B_TRUE
) != 0) { 
15499                         lck_mtx_unlock(&dtrace_lock
); 
15500                         lck_mtx_unlock(&cpu_lock
); 
15501                         dtrace_dof_destroy(dof
); 
15505                 if ((rval 
= dtrace_dof_options(dof
, state
)) != 0) { 
15506                         dtrace_enabling_destroy(enab
); 
15507                         lck_mtx_unlock(&dtrace_lock
); 
15508                         lck_mtx_unlock(&cpu_lock
); 
15509                         dtrace_dof_destroy(dof
); 
15513                 if ((err 
= dtrace_enabling_match(enab
, rv
)) == 0) { 
15514                         err 
= dtrace_enabling_retain(enab
); 
15516                         dtrace_enabling_destroy(enab
); 
15519                 lck_mtx_unlock(&cpu_lock
); 
15520                 lck_mtx_unlock(&dtrace_lock
); 
15521                 dtrace_dof_destroy(dof
); 
15526         case DTRACEIOC_REPLICATE
: { 
15527                 dtrace_repldesc_t desc
; 
15528                 dtrace_probedesc_t 
*match 
= &desc
.dtrpd_match
; 
15529                 dtrace_probedesc_t 
*create 
= &desc
.dtrpd_create
; 
15532                 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0) 
15535                 match
->dtpd_provider
[DTRACE_PROVNAMELEN 
- 1] = '\0'; 
15536                 match
->dtpd_mod
[DTRACE_MODNAMELEN 
- 1] = '\0'; 
15537                 match
->dtpd_func
[DTRACE_FUNCNAMELEN 
- 1] = '\0'; 
15538                 match
->dtpd_name
[DTRACE_NAMELEN 
- 1] = '\0'; 
15540                 create
->dtpd_provider
[DTRACE_PROVNAMELEN 
- 1] = '\0'; 
15541                 create
->dtpd_mod
[DTRACE_MODNAMELEN 
- 1] = '\0'; 
15542                 create
->dtpd_func
[DTRACE_FUNCNAMELEN 
- 1] = '\0'; 
15543                 create
->dtpd_name
[DTRACE_NAMELEN 
- 1] = '\0'; 
15545                 lck_mtx_lock(&dtrace_lock
); 
15546                 err 
= dtrace_enabling_replicate(state
, match
, create
); 
15547                 lck_mtx_unlock(&dtrace_lock
); 
15552         case DTRACEIOC_PROBEMATCH
: 
15553         case DTRACEIOC_PROBES
: { 
15554                 dtrace_probe_t 
*probe 
= NULL
; 
15555                 dtrace_probedesc_t desc
; 
15556                 dtrace_probekey_t pkey
; 
15563                 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0) 
15566                 desc
.dtpd_provider
[DTRACE_PROVNAMELEN 
- 1] = '\0'; 
15567                 desc
.dtpd_mod
[DTRACE_MODNAMELEN 
- 1] = '\0'; 
15568                 desc
.dtpd_func
[DTRACE_FUNCNAMELEN 
- 1] = '\0'; 
15569                 desc
.dtpd_name
[DTRACE_NAMELEN 
- 1] = '\0'; 
15572                  * Before we attempt to match this probe, we want to give 
15573                  * all providers the opportunity to provide it. 
15575                 if (desc
.dtpd_id 
== DTRACE_IDNONE
) { 
15576                         lck_mtx_lock(&dtrace_provider_lock
); 
15577                         dtrace_probe_provide(&desc
, NULL
); 
15578                         lck_mtx_unlock(&dtrace_provider_lock
); 
15582                 if (cmd 
== DTRACEIOC_PROBEMATCH
)  { 
15583                         dtrace_probekey(&desc
, &pkey
); 
15584                         pkey
.dtpk_id 
= DTRACE_IDNONE
; 
15587                 dtrace_cred2priv(cr
, &priv
, &uid
, &zoneid
); 
15589                 lck_mtx_lock(&dtrace_lock
); 
15591                 if (cmd 
== DTRACEIOC_PROBEMATCH
) { 
15592                         for (i 
= desc
.dtpd_id
; i 
<= dtrace_nprobes
; i
++) { 
15593                                 if ((probe 
= dtrace_probes
[i 
- 1]) != NULL 
&& 
15594                                     (m 
= dtrace_match_probe(probe
, &pkey
, 
15595                                     priv
, uid
, zoneid
)) != 0) 
15600                                 lck_mtx_unlock(&dtrace_lock
); 
15605                         for (i 
= desc
.dtpd_id
; i 
<= dtrace_nprobes
; i
++) { 
15606                                 if ((probe 
= dtrace_probes
[i 
- 1]) != NULL 
&& 
15607                                     dtrace_match_priv(probe
, priv
, uid
, zoneid
)) 
15612                 if (probe 
== NULL
) { 
15613                         lck_mtx_unlock(&dtrace_lock
); 
15617                 dtrace_probe_description(probe
, &desc
); 
15618                 lck_mtx_unlock(&dtrace_lock
); 
15620                 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0) 
15626         case DTRACEIOC_PROBEARG
: { 
15627                 dtrace_argdesc_t desc
; 
15628                 dtrace_probe_t 
*probe
; 
15629                 dtrace_provider_t 
*prov
; 
15631                 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0) 
15634                 if (desc
.dtargd_id 
== DTRACE_IDNONE
) 
15637                 if (desc
.dtargd_ndx 
== DTRACE_ARGNONE
) 
15640                 lck_mtx_lock(&dtrace_provider_lock
); 
15641                 lck_mtx_lock(&mod_lock
); 
15642                 lck_mtx_lock(&dtrace_lock
); 
15644                 if (desc
.dtargd_id 
> dtrace_nprobes
) { 
15645                         lck_mtx_unlock(&dtrace_lock
); 
15646                         lck_mtx_unlock(&mod_lock
); 
15647                         lck_mtx_unlock(&dtrace_provider_lock
); 
15651                 if ((probe 
= dtrace_probes
[desc
.dtargd_id 
- 1]) == NULL
) { 
15652                         lck_mtx_unlock(&dtrace_lock
); 
15653                         lck_mtx_unlock(&mod_lock
); 
15654                         lck_mtx_unlock(&dtrace_provider_lock
); 
15658                 lck_mtx_unlock(&dtrace_lock
); 
15660                 prov 
= probe
->dtpr_provider
; 
15662                 if (prov
->dtpv_pops
.dtps_getargdesc 
== NULL
) { 
15664                          * There isn't any typed information for this probe. 
15665                          * Set the argument number to DTRACE_ARGNONE. 
15667                         desc
.dtargd_ndx 
= DTRACE_ARGNONE
; 
15669                         desc
.dtargd_native
[0] = '\0'; 
15670                         desc
.dtargd_xlate
[0] = '\0'; 
15671                         desc
.dtargd_mapping 
= desc
.dtargd_ndx
; 
15673                         prov
->dtpv_pops
.dtps_getargdesc(prov
->dtpv_arg
, 
15674                             probe
->dtpr_id
, probe
->dtpr_arg
, &desc
); 
15677                 lck_mtx_unlock(&mod_lock
); 
15678                 lck_mtx_unlock(&dtrace_provider_lock
); 
15680                 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0) 
15686         case DTRACEIOC_GO
: { 
15687                 processorid_t cpuid
; 
15688                 rval 
= dtrace_state_go(state
, &cpuid
); 
15693                 if (copyout(&cpuid
, (void *)arg
, sizeof (cpuid
)) != 0) 
15699         case DTRACEIOC_STOP
: { 
15700                 processorid_t cpuid
; 
15702                 lck_mtx_lock(&dtrace_lock
); 
15703                 rval 
= dtrace_state_stop(state
, &cpuid
); 
15704                 lck_mtx_unlock(&dtrace_lock
); 
15709                 if (copyout(&cpuid
, (void *)arg
, sizeof (cpuid
)) != 0) 
15715         case DTRACEIOC_DOFGET
: { 
15716                 dof_hdr_t hdr
, *dof
; 
15719                 if (copyin((void *)arg
, &hdr
, sizeof (hdr
)) != 0) 
15722                 lck_mtx_lock(&dtrace_lock
); 
15723                 dof 
= dtrace_dof_create(state
); 
15724                 lck_mtx_unlock(&dtrace_lock
); 
15726                 len 
= MIN(hdr
.dofh_loadsz
, dof
->dofh_loadsz
); 
15727                 rval 
= copyout(dof
, (void *)arg
, len
); 
15728                 dtrace_dof_destroy(dof
); 
15730                 return (rval 
== 0 ? 0 : EFAULT
); 
15733         case DTRACEIOC_AGGSNAP
: 
15734         case DTRACEIOC_BUFSNAP
: { 
15735                 dtrace_bufdesc_t desc
; 
15737                 dtrace_buffer_t 
*buf
; 
15739                 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0) 
15742                 if (desc
.dtbd_cpu 
< 0 || desc
.dtbd_cpu 
>= NCPU
) 
15745                 lck_mtx_lock(&dtrace_lock
); 
15747                 if (cmd 
== DTRACEIOC_BUFSNAP
) { 
15748                         buf 
= &state
->dts_buffer
[desc
.dtbd_cpu
]; 
15750                         buf 
= &state
->dts_aggbuffer
[desc
.dtbd_cpu
]; 
15753                 if (buf
->dtb_flags 
& (DTRACEBUF_RING 
| DTRACEBUF_FILL
)) { 
15754                         size_t sz 
= buf
->dtb_offset
; 
15756                         if (state
->dts_activity 
!= DTRACE_ACTIVITY_STOPPED
) { 
15757                                 lck_mtx_unlock(&dtrace_lock
); 
15762                          * If this buffer has already been consumed, we're 
15763                          * going to indicate that there's nothing left here 
15766                         if (buf
->dtb_flags 
& DTRACEBUF_CONSUMED
) { 
15767                                 lck_mtx_unlock(&dtrace_lock
); 
15769                                 desc
.dtbd_size 
= 0; 
15770                                 desc
.dtbd_drops 
= 0; 
15771                                 desc
.dtbd_errors 
= 0; 
15772                                 desc
.dtbd_oldest 
= 0; 
15773                                 sz 
= sizeof (desc
); 
15775                                 if (copyout(&desc
, (void *)arg
, sz
) != 0) 
15782                          * If this is a ring buffer that has wrapped, we want 
15783                          * to copy the whole thing out. 
15785                         if (buf
->dtb_flags 
& DTRACEBUF_WRAPPED
) { 
15786                                 dtrace_buffer_polish(buf
); 
15787                                 sz 
= buf
->dtb_size
; 
15790                         if (copyout(buf
->dtb_tomax
, desc
.dtbd_data
, sz
) != 0) { 
15791                                 lck_mtx_unlock(&dtrace_lock
); 
15795                         desc
.dtbd_size 
= sz
; 
15796                         desc
.dtbd_drops 
= buf
->dtb_drops
; 
15797                         desc
.dtbd_errors 
= buf
->dtb_errors
; 
15798                         desc
.dtbd_oldest 
= buf
->dtb_xamot_offset
; 
15800                         lck_mtx_unlock(&dtrace_lock
); 
15802                         if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0) 
15805                         buf
->dtb_flags 
|= DTRACEBUF_CONSUMED
; 
15810                 if (buf
->dtb_tomax 
== NULL
) { 
15811                         ASSERT(buf
->dtb_xamot 
== NULL
); 
15812                         lck_mtx_unlock(&dtrace_lock
); 
15816                 cached 
= buf
->dtb_tomax
; 
15817                 ASSERT(!(buf
->dtb_flags 
& DTRACEBUF_NOSWITCH
)); 
15819                 dtrace_xcall(desc
.dtbd_cpu
, 
15820                     (dtrace_xcall_t
)dtrace_buffer_switch
, buf
); 
15822                 state
->dts_errors 
+= buf
->dtb_xamot_errors
; 
15825                  * If the buffers did not actually switch, then the cross call 
15826                  * did not take place -- presumably because the given CPU is 
15827                  * not in the ready set.  If this is the case, we'll return 
15830                 if (buf
->dtb_tomax 
== cached
) { 
15831                         ASSERT(buf
->dtb_xamot 
!= cached
); 
15832                         lck_mtx_unlock(&dtrace_lock
); 
15836                 ASSERT(cached 
== buf
->dtb_xamot
); 
15839                  * We have our snapshot; now copy it out. 
15841                 if (copyout(buf
->dtb_xamot
, desc
.dtbd_data
, 
15842                     buf
->dtb_xamot_offset
) != 0) { 
15843                         lck_mtx_unlock(&dtrace_lock
); 
15847                 desc
.dtbd_size 
= buf
->dtb_xamot_offset
; 
15848                 desc
.dtbd_drops 
= buf
->dtb_xamot_drops
; 
15849                 desc
.dtbd_errors 
= buf
->dtb_xamot_errors
; 
15850                 desc
.dtbd_oldest 
= 0; 
15852                 lck_mtx_unlock(&dtrace_lock
); 
15855                  * Finally, copy out the buffer description. 
15857                 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0) 
15863         case DTRACEIOC_CONF
: { 
15864                 dtrace_conf_t conf
; 
15866                 bzero(&conf
, sizeof (conf
)); 
15867                 conf
.dtc_difversion 
= DIF_VERSION
; 
15868                 conf
.dtc_difintregs 
= DIF_DIR_NREGS
; 
15869                 conf
.dtc_diftupregs 
= DIF_DTR_NREGS
; 
15870                 conf
.dtc_ctfmodel 
= CTF_MODEL_NATIVE
; 
15872                 if (copyout(&conf
, (void *)arg
, sizeof (conf
)) != 0) 
15878         case DTRACEIOC_STATUS
: { 
15879                 dtrace_status_t stat
; 
15880                 dtrace_dstate_t 
*dstate
; 
15885                  * See the comment in dtrace_state_deadman() for the reason 
15886                  * for setting dts_laststatus to INT64_MAX before setting 
15887                  * it to the correct value. 
15889                 state
->dts_laststatus 
= INT64_MAX
; 
15890                 dtrace_membar_producer(); 
15891                 state
->dts_laststatus 
= dtrace_gethrtime(); 
15893                 bzero(&stat
, sizeof (stat
)); 
15895                 lck_mtx_lock(&dtrace_lock
); 
15897                 if (state
->dts_activity 
== DTRACE_ACTIVITY_INACTIVE
) { 
15898                         lck_mtx_unlock(&dtrace_lock
); 
15902                 if (state
->dts_activity 
== DTRACE_ACTIVITY_DRAINING
) 
15903                         stat
.dtst_exiting 
= 1; 
15905                 nerrs 
= state
->dts_errors
; 
15906                 dstate 
= &state
->dts_vstate
.dtvs_dynvars
; 
15908                 for (i 
= 0; i 
< NCPU
; i
++) { 
15909                         dtrace_dstate_percpu_t 
*dcpu 
= &dstate
->dtds_percpu
[i
]; 
15911                         stat
.dtst_dyndrops 
+= dcpu
->dtdsc_drops
; 
15912                         stat
.dtst_dyndrops_dirty 
+= dcpu
->dtdsc_dirty_drops
; 
15913                         stat
.dtst_dyndrops_rinsing 
+= dcpu
->dtdsc_rinsing_drops
; 
15915                         if (state
->dts_buffer
[i
].dtb_flags 
& DTRACEBUF_FULL
) 
15916                                 stat
.dtst_filled
++; 
15918                         nerrs 
+= state
->dts_buffer
[i
].dtb_errors
; 
15920                         for (j 
= 0; j 
< state
->dts_nspeculations
; j
++) { 
15921                                 dtrace_speculation_t 
*spec
; 
15922                                 dtrace_buffer_t 
*buf
; 
15924                                 spec 
= &state
->dts_speculations
[j
]; 
15925                                 buf 
= &spec
->dtsp_buffer
[i
]; 
15926                                 stat
.dtst_specdrops 
+= buf
->dtb_xamot_drops
; 
15930                 stat
.dtst_specdrops_busy 
= state
->dts_speculations_busy
; 
15931                 stat
.dtst_specdrops_unavail 
= state
->dts_speculations_unavail
; 
15932                 stat
.dtst_stkstroverflows 
= state
->dts_stkstroverflows
; 
15933                 stat
.dtst_dblerrors 
= state
->dts_dblerrors
; 
15935                     (state
->dts_activity 
== DTRACE_ACTIVITY_KILLED
); 
15936                 stat
.dtst_errors 
= nerrs
; 
15938                 lck_mtx_unlock(&dtrace_lock
); 
15940                 if (copyout(&stat
, (void *)arg
, sizeof (stat
)) != 0) 
15946         case DTRACEIOC_FORMAT
: { 
15947                 dtrace_fmtdesc_t fmt
; 
15951                 if (copyin((void *)arg
, &fmt
, sizeof (fmt
)) != 0) 
15954                 lck_mtx_lock(&dtrace_lock
); 
15956                 if (fmt
.dtfd_format 
== 0 || 
15957                     fmt
.dtfd_format 
> state
->dts_nformats
) { 
15958                         lck_mtx_unlock(&dtrace_lock
); 
15963                  * Format strings are allocated contiguously and they are 
15964                  * never freed; if a format index is less than the number 
15965                  * of formats, we can assert that the format map is non-NULL 
15966                  * and that the format for the specified index is non-NULL. 
15968                 ASSERT(state
->dts_formats 
!= NULL
); 
15969                 str 
= state
->dts_formats
[fmt
.dtfd_format 
- 1]; 
15970                 ASSERT(str 
!= NULL
); 
15972                 len 
= strlen(str
) + 1; 
15974                 if (len 
> fmt
.dtfd_length
) { 
15975                         fmt
.dtfd_length 
= len
; 
15977                         if (copyout(&fmt
, (void *)arg
, sizeof (fmt
)) != 0) { 
15978                                 lck_mtx_unlock(&dtrace_lock
); 
15982                         if (copyout(str
, fmt
.dtfd_string
, len
) != 0) { 
15983                                 lck_mtx_unlock(&dtrace_lock
); 
15988                 lck_mtx_unlock(&dtrace_lock
); 
15999 #if defined(__APPLE__) 
16002 #endif /* __APPLE__ */ 
16004 #if !defined(__APPLE__) 
16007 dtrace_detach(dev_info_t 
*dip
, ddi_detach_cmd_t cmd
) 
16009         dtrace_state_t 
*state
; 
16016                 return (DDI_SUCCESS
); 
16019                 return (DDI_FAILURE
); 
16022         lck_mtx_lock(&cpu_lock
); 
16023         lck_mtx_lock(&dtrace_provider_lock
); 
16024         lck_mtx_lock(&dtrace_lock
); 
16026         ASSERT(dtrace_opens 
== 0); 
16028         if (dtrace_helpers 
> 0) { 
16029                 lck_mtx_unlock(&dtrace_provider_lock
); 
16030                 lck_mtx_unlock(&dtrace_lock
); 
16031                 lck_mtx_unlock(&cpu_lock
); 
16032                 return (DDI_FAILURE
); 
16035         if (dtrace_unregister((dtrace_provider_id_t
)dtrace_provider
) != 0) { 
16036                 lck_mtx_unlock(&dtrace_provider_lock
); 
16037                 lck_mtx_unlock(&dtrace_lock
); 
16038                 lck_mtx_unlock(&cpu_lock
); 
16039                 return (DDI_FAILURE
); 
16042         dtrace_provider 
= NULL
; 
16044         if ((state 
= dtrace_anon_grab()) != NULL
) { 
16046                  * If there were ECBs on this state, the provider should 
16047                  * have not been allowed to detach; assert that there is 
16050                 ASSERT(state
->dts_necbs 
== 0); 
16051                 dtrace_state_destroy(state
); 
16054                  * If we're being detached with anonymous state, we need to 
16055                  * indicate to the kernel debugger that DTrace is now inactive. 
16057                 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
); 
16060         bzero(&dtrace_anon
, sizeof (dtrace_anon_t
)); 
16061         unregister_cpu_setup_func((cpu_setup_func_t 
*)dtrace_cpu_setup
, NULL
); 
16062         dtrace_cpu_init 
= NULL
; 
16063         dtrace_helpers_cleanup 
= NULL
; 
16064         dtrace_helpers_fork 
= NULL
; 
16065         dtrace_cpustart_init 
= NULL
; 
16066         dtrace_cpustart_fini 
= NULL
; 
16067         dtrace_debugger_init 
= NULL
; 
16068         dtrace_debugger_fini 
= NULL
; 
16069         dtrace_kreloc_init 
= NULL
; 
16070         dtrace_kreloc_fini 
= NULL
; 
16071         dtrace_modload 
= NULL
; 
16072         dtrace_modunload 
= NULL
; 
16074         lck_mtx_unlock(&cpu_lock
); 
16076         if (dtrace_helptrace_enabled
) { 
16077                 kmem_free(dtrace_helptrace_buffer
, dtrace_helptrace_bufsize
); 
16078                 dtrace_helptrace_buffer 
= NULL
; 
16081         kmem_free(dtrace_probes
, dtrace_nprobes 
* sizeof (dtrace_probe_t 
*)); 
16082         dtrace_probes 
= NULL
; 
16083         dtrace_nprobes 
= 0; 
16085         dtrace_hash_destroy(dtrace_bymod
); 
16086         dtrace_hash_destroy(dtrace_byfunc
); 
16087         dtrace_hash_destroy(dtrace_byname
); 
16088         dtrace_bymod 
= NULL
; 
16089         dtrace_byfunc 
= NULL
; 
16090         dtrace_byname 
= NULL
; 
16092         kmem_cache_destroy(dtrace_state_cache
); 
16093         vmem_destroy(dtrace_minor
); 
16094         vmem_destroy(dtrace_arena
); 
16096         if (dtrace_toxrange 
!= NULL
) { 
16097                 kmem_free(dtrace_toxrange
, 
16098                     dtrace_toxranges_max 
* sizeof (dtrace_toxrange_t
)); 
16099                 dtrace_toxrange 
= NULL
; 
16100                 dtrace_toxranges 
= 0; 
16101                 dtrace_toxranges_max 
= 0; 
16104         ddi_remove_minor_node(dtrace_devi
, NULL
); 
16105         dtrace_devi 
= NULL
; 
16107         ddi_soft_state_fini(&dtrace_softstate
); 
16109         ASSERT(dtrace_vtime_references 
== 0); 
16110         ASSERT(dtrace_opens 
== 0); 
16111         ASSERT(dtrace_retained 
== NULL
); 
16113         lck_mtx_unlock(&dtrace_lock
); 
16114         lck_mtx_unlock(&dtrace_provider_lock
); 
16117          * We don't destroy the task queue until after we have dropped our 
16118          * locks (taskq_destroy() may block on running tasks).  To prevent 
16119          * attempting to do work after we have effectively detached but before 
16120          * the task queue has been destroyed, all tasks dispatched via the 
16121          * task queue must check that DTrace is still attached before 
16122          * performing any operation. 
16124         taskq_destroy(dtrace_taskq
); 
16125         dtrace_taskq 
= NULL
; 
16127         return (DDI_SUCCESS
); 
16132 dtrace_info(dev_info_t 
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
) 
16137         case DDI_INFO_DEVT2DEVINFO
: 
16138                 *result 
= (void *)dtrace_devi
; 
16139                 error 
= DDI_SUCCESS
; 
16141         case DDI_INFO_DEVT2INSTANCE
: 
16142                 *result 
= (void *)0; 
16143                 error 
= DDI_SUCCESS
; 
16146                 error 
= DDI_FAILURE
; 
16151 static struct cb_ops dtrace_cb_ops 
= { 
16152         dtrace_open
,            /* open */ 
16153         dtrace_close
,           /* close */ 
16154         nulldev
,                /* strategy */ 
16155         nulldev
,                /* print */ 
16159         dtrace_ioctl
,           /* ioctl */ 
16160         nodev
,                  /* devmap */ 
16162         nodev
,                  /* segmap */ 
16163         nochpoll
,               /* poll */ 
16164         ddi_prop_op
,            /* cb_prop_op */ 
16166         D_NEW 
| D_MP            
/* Driver compatibility flag */ 
16169 static struct dev_ops dtrace_ops 
= { 
16170         DEVO_REV
,               /* devo_rev */ 
16172         dtrace_info
,            /* get_dev_info */ 
16173         nulldev
,                /* identify */ 
16174         nulldev
,                /* probe */ 
16175         dtrace_attach
,          /* attach */ 
16176         dtrace_detach
,          /* detach */ 
16178         &dtrace_cb_ops
,         /* driver operations */ 
16179         NULL
,                   /* bus operations */ 
16180         nodev                   
/* dev power */ 
16183 static struct modldrv modldrv 
= { 
16184         &mod_driverops
,         /* module type (this is a pseudo driver) */ 
16185         "Dynamic Tracing",      /* name of module */ 
16186         &dtrace_ops
,            /* driver ops */ 
16189 static struct modlinkage modlinkage 
= { 
16198         return (mod_install(&modlinkage
)); 
16202 _info(struct modinfo 
*modinfop
) 
16204         return (mod_info(&modlinkage
, modinfop
)); 
16210         return (mod_remove(&modlinkage
)); 
16214 d_open_t _dtrace_open
, helper_open
; 
16215 d_close_t _dtrace_close
, helper_close
; 
16216 d_ioctl_t _dtrace_ioctl
, helper_ioctl
; 
16219 _dtrace_open(dev_t dev
, int flags
, int devtype
, struct proc 
*p
) 
16222         dev_t locdev 
= dev
; 
16224         return  dtrace_open( &locdev
, flags
, devtype
, CRED()); 
16228 helper_open(dev_t dev
, int flags
, int devtype
, struct proc 
*p
) 
16230 #pragma unused(dev,flags,devtype,p) 
16235 _dtrace_close(dev_t dev
, int flags
, int devtype
, struct proc 
*p
) 
16238         return dtrace_close( dev
, flags
, devtype
, CRED()); 
16242 helper_close(dev_t dev
, int flags
, int devtype
, struct proc 
*p
) 
16244 #pragma unused(dev,flags,devtype,p) 
16249 _dtrace_ioctl(dev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct proc 
*p
) 
16254         err 
= dtrace_ioctl(dev
, (int)cmd
, *(intptr_t *)data
, fflag
, CRED(), &rv
); 
16256         /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ 
16258                 ASSERT( (err 
& 0xfffff000) == 0 ); 
16259                 return (err 
& 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */ 
16260         } else if (rv 
!= 0) { 
16261                 ASSERT( (rv 
& 0xfff00000) == 0 ); 
16262                 return (((rv 
& 0xfffff) << 12)); /* ioctl returns -1 and errno set to a return value >= 4096 */ 
16268 helper_ioctl(dev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct proc 
*p
) 
16270 #pragma unused(dev,fflag,p) 
16273         err 
= dtrace_ioctl_helper((int)cmd
, data
, &rv
); 
16274         /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ 
16276                 ASSERT( (err 
& 0xfffff000) == 0 ); 
16277                 return (err 
& 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */ 
16278         } else if (rv 
!= 0) { 
16279                 ASSERT( (rv 
& 0xfff00000) == 0 ); 
16280                 return (((rv 
& 0xfffff) << 20)); /* ioctl returns -1 and errno set to a return value >= 4096 */ 
16285 #define HELPER_MAJOR  -24 /* let the kernel pick the device number */ 
16288  * A struct describing which functions will get invoked for certain 
16291 static struct cdevsw helper_cdevsw 
= 
16293         helper_open
,            /* open */ 
16294         helper_close
,           /* close */ 
16295         eno_rdwrt
,                      /* read */ 
16296         eno_rdwrt
,                      /* write */ 
16297         helper_ioctl
,           /* ioctl */ 
16298         (stop_fcn_t 
*)nulldev
, /* stop */ 
16299         (reset_fcn_t 
*)nulldev
, /* reset */ 
16301         eno_select
,                     /* select */ 
16302         eno_mmap
,                       /* mmap */ 
16303         eno_strat
,                      /* strategy */ 
16304         eno_getc
,                       /* getc */ 
16305         eno_putc
,                       /* putc */ 
16309 static int helper_majdevno 
= 0; 
16311 static int gDTraceInited 
= 0; 
16314 helper_init( void ) 
16317          * Once the "helper" is initialized, it can take ioctl calls that use locks 
16318          * and zones initialized in dtrace_init. Make certain dtrace_init was called 
16322         if (!gDTraceInited
) { 
16323                 panic("helper_init before dtrace_init\n"); 
16326         if (0 >= helper_majdevno
) 
16328                 helper_majdevno 
= cdevsw_add(HELPER_MAJOR
, &helper_cdevsw
); 
16330                 if (helper_majdevno 
< 0) { 
16331                         printf("helper_init: failed to allocate a major number!\n"); 
16335                 if (NULL 
== devfs_make_node( makedev(helper_majdevno
, 0), DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666,  
16336                                         DTRACEMNR_HELPER
, 0 )) { 
16337                         printf("dtrace_init: failed to devfs_make_node for helper!\n"); 
16341                 panic("helper_init: called twice!\n"); 
16344 #undef HELPER_MAJOR 
16347  * Called with DEVFS_LOCK held, so vmem_alloc's underlying blist structures are protected. 
16350 dtrace_clone_func(dev_t dev
, int action
) 
16352 #pragma unused(dev) 
16354         if (action 
== DEVFS_CLONE_ALLOC
) { 
16355                 if (NULL 
== dtrace_minor
) /* Arena not created yet!?! */ 
16359                          * Propose a minor number, namely the next number that vmem_alloc() will return. 
16360                          * Immediately put it back in play by calling vmem_free(). 
16362                         int ret 
= (int)(uintptr_t)vmem_alloc(dtrace_minor
, 1, VM_BESTFIT 
| VM_SLEEP
); 
16364                         vmem_free(dtrace_minor
, (void *)(uintptr_t)ret
, 1); 
16369         else if (action 
== DEVFS_CLONE_FREE
) { 
16375 #define DTRACE_MAJOR  -24 /* let the kernel pick the device number */ 
16377 static struct cdevsw dtrace_cdevsw 
= 
16379         _dtrace_open
,           /* open */ 
16380         _dtrace_close
,          /* close */ 
16381         eno_rdwrt
,                      /* read */ 
16382         eno_rdwrt
,                      /* write */ 
16383         _dtrace_ioctl
,          /* ioctl */ 
16384         (stop_fcn_t 
*)nulldev
, /* stop */ 
16385         (reset_fcn_t 
*)nulldev
, /* reset */ 
16387         eno_select
,                     /* select */ 
16388         eno_mmap
,                       /* mmap */ 
16389         eno_strat
,                      /* strategy */ 
16390         eno_getc
,                       /* getc */ 
16391         eno_putc
,                       /* putc */ 
16395 lck_attr_t
* dtrace_lck_attr
; 
16396 lck_grp_attr_t
* dtrace_lck_grp_attr
; 
16397 lck_grp_t
* dtrace_lck_grp
; 
16399 static int gMajDevNo
; 
16402 dtrace_init( void ) 
16404         if (0 == gDTraceInited
) { 
16405                 int i
, ncpu 
= NCPU
; 
16407                 gMajDevNo 
= cdevsw_add(DTRACE_MAJOR
, &dtrace_cdevsw
); 
16409                 if (gMajDevNo 
< 0) { 
16410                         printf("dtrace_init: failed to allocate a major number!\n"); 
16415                 if (NULL 
== devfs_make_node_clone( makedev(gMajDevNo
, 0), DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666,  
16416                                         dtrace_clone_func
, DTRACEMNR_DTRACE
, 0 )) { 
16417                         printf("dtrace_init: failed to devfs_make_node_clone for dtrace!\n"); 
16422 #if defined(DTRACE_MEMORY_ZONES) 
16425                  * Initialize the dtrace kalloc-emulation zones. 
16427                 dtrace_alloc_init(); 
16429 #endif /* DTRACE_MEMORY_ZONES */ 
16432                  * Allocate the dtrace_probe_t zone 
16434                 dtrace_probe_t_zone 
= zinit(sizeof(dtrace_probe_t
), 
16435                                             1024 * sizeof(dtrace_probe_t
), 
16436                                             sizeof(dtrace_probe_t
), 
16437                                             "dtrace.dtrace_probe_t"); 
16440                  * Create the dtrace lock group and attrs. 
16442                 dtrace_lck_attr 
= lck_attr_alloc_init(); 
16443                 dtrace_lck_grp_attr
= lck_grp_attr_alloc_init();          
16444                 dtrace_lck_grp 
= lck_grp_alloc_init("dtrace",  dtrace_lck_grp_attr
); 
16447                  * We have to initialize all locks explicitly 
16449                 lck_mtx_init(&dtrace_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
16450                 lck_mtx_init(&dtrace_provider_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
16451                 lck_mtx_init(&dtrace_meta_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
16453                 lck_mtx_init(&dtrace_errlock
, dtrace_lck_grp
, dtrace_lck_attr
); 
16455                 lck_rw_init(&dtrace_dof_mode_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
16458                  * The cpu_core structure consists of per-CPU state available in any context. 
16459                  * On some architectures, this may mean that the page(s) containing the 
16460                  * NCPU-sized array of cpu_core structures must be locked in the TLB -- it 
16461                  * is up to the platform to assure that this is performed properly.  Note that 
16462                  * the structure is sized to avoid false sharing. 
16464                 lck_mtx_init(&cpu_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
16465                 lck_mtx_init(&mod_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
16467                 cpu_core 
= (cpu_core_t 
*)kmem_zalloc( ncpu 
* sizeof(cpu_core_t
), KM_SLEEP 
); 
16468                 for (i 
= 0; i 
< ncpu
; ++i
) { 
16469                         lck_mtx_init(&cpu_core
[i
].cpuc_pid_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
16472                 cpu_list 
= (cpu_t 
*)kmem_zalloc( ncpu 
* sizeof(cpu_t
), KM_SLEEP 
); 
16473                 for (i 
= 0; i 
< ncpu
; ++i
) { 
16474                         cpu_list
[i
].cpu_id 
= (processorid_t
)i
; 
16475                         cpu_list
[i
].cpu_next 
= &(cpu_list
[(i
+1) % ncpu
]); 
16476                         lck_rw_init(&cpu_list
[i
].cpu_ft_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
16479                 lck_mtx_lock(&cpu_lock
); 
16480                 for (i 
= 0; i 
< ncpu
; ++i
)  
16481                         dtrace_cpu_setup_initial( (processorid_t
)i 
); /* In lieu of register_cpu_setup_func() callback */ 
16482                 lck_mtx_unlock(&cpu_lock
); 
16484                 (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */ 
16487                  * See dtrace_impl.h for a description of dof modes. 
16488                  * The default is lazy dof. 
16490                  * XXX Warn if state is LAZY_OFF? It won't break anything, but 
16491                  * makes no sense... 
16493                 if (!PE_parse_boot_arg("dtrace_dof_mode", &dtrace_dof_mode
)) { 
16494                         dtrace_dof_mode 
= DTRACE_DOF_MODE_LAZY_ON
; 
16498                  * Sanity check of dof mode value. 
16500                 switch (dtrace_dof_mode
) { 
16501                         case DTRACE_DOF_MODE_NEVER
: 
16502                         case DTRACE_DOF_MODE_LAZY_ON
: 
16503                                 /* valid modes, but nothing else we need to do */ 
16506                         case DTRACE_DOF_MODE_LAZY_OFF
: 
16507                         case DTRACE_DOF_MODE_NON_LAZY
: 
16508                                 /* Cannot wait for a dtrace_open to init fasttrap */ 
16513                                 /* Invalid, clamp to non lazy */ 
16514                                 dtrace_dof_mode 
= DTRACE_DOF_MODE_NON_LAZY
; 
16522                 panic("dtrace_init: called twice!\n"); 
16526 dtrace_postinit(void) 
16528                 dtrace_attach( (dev_info_t 
*)makedev(gMajDevNo
, 0), 0 ); 
16530 #undef DTRACE_MAJOR 
16533  * Routines used to register interest in cpu's being added to or removed 
16537 register_cpu_setup_func(cpu_setup_func_t 
*ignore1
, void *ignore2
) 
16539 #pragma unused(ignore1,ignore2) 
16543 unregister_cpu_setup_func(cpu_setup_func_t 
*ignore1
, void *ignore2
) 
16545 #pragma unused(ignore1,ignore2) 
16547 #endif /* __APPLE__ */