2  * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29 #include <kern/thread.h> 
  33 #include <sys/kauth.h> 
  35 #include <sys/systm.h> 
  36 #include <sys/dtrace.h> 
  37 #include <sys/dtrace_impl.h> 
  38 #include <machine/atomic.h> 
  39 #include <libkern/OSKextLibPrivate.h> 
  40 #include <kern/kern_types.h> 
  41 #include <kern/timer_call.h> 
  42 #include <kern/thread_call.h> 
  43 #include <kern/task.h> 
  44 #include <kern/sched_prim.h> 
  45 #include <miscfs/devfs/devfs.h> 
  46 #include <kern/kalloc.h> 
  48 #include <mach/vm_param.h> 
  49 #include <mach/mach_vm.h> 
  50 #include <mach/task.h> 
  51 #include <vm/vm_map.h> /* All the bits we care about are guarded by MACH_KERNEL_PRIVATE :-( */ 
  56 /* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ 
  57 #define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ 
  60 dtrace_sprlock(proc_t 
*p
) 
  62         lck_mtx_lock(&p
->p_dtrace_sprlock
); 
  66 dtrace_sprunlock(proc_t 
*p
) 
  68         lck_mtx_unlock(&p
->p_dtrace_sprlock
); 
  71 /* Not called from probe context */ 
  77         if ((p 
= proc_find(pid
)) == PROC_NULL
) { 
  81         task_suspend_internal(p
->task
); 
  88 /* Not called from probe context */ 
  95                 task_resume_internal(p
->task
); 
 105 // These are not exported from vm_map.h. 
 106 extern kern_return_t 
vm_map_read_user(vm_map_t map
, vm_map_address_t src_addr
, void *dst_p
, vm_size_t size
); 
 107 extern kern_return_t 
vm_map_write_user(vm_map_t map
, void *src_p
, vm_map_address_t dst_addr
, vm_size_t size
); 
 109 /* Not called from probe context */ 
 111 uread(proc_t 
*p
, void *buf
, user_size_t len
, user_addr_t a
) 
 115         ASSERT(p 
!= PROC_NULL
); 
 116         ASSERT(p
->task 
!= NULL
); 
 118         task_t task 
= p
->task
; 
 121          * Grab a reference to the task vm_map_t to make sure 
 122          * the map isn't pulled out from under us. 
 124          * Because the proc_lock is not held at all times on all code 
 125          * paths leading here, it is possible for the proc to have 
 126          * exited. If the map is null, fail. 
 128         vm_map_t map 
= get_task_map_reference(task
); 
 130                 ret 
= vm_map_read_user( map
, (vm_map_address_t
)a
, buf
, (vm_size_t
)len
); 
 131                 vm_map_deallocate(map
); 
 133                 ret 
= KERN_TERMINATED
; 
 140 /* Not called from probe context */ 
 142 uwrite(proc_t 
*p
, void *buf
, user_size_t len
, user_addr_t a
) 
 147         ASSERT(p
->task 
!= NULL
); 
 149         task_t task 
= p
->task
; 
 152          * Grab a reference to the task vm_map_t to make sure 
 153          * the map isn't pulled out from under us. 
 155          * Because the proc_lock is not held at all times on all code 
 156          * paths leading here, it is possible for the proc to have 
 157          * exited. If the map is null, fail. 
 159         vm_map_t map 
= get_task_map_reference(task
); 
 161                 /* Find the memory permissions. */ 
 162                 uint32_t nestingDepth 
= 999999; 
 163                 vm_region_submap_short_info_data_64_t info
; 
 164                 mach_msg_type_number_t count 
= VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
; 
 165                 mach_vm_address_t address 
= (mach_vm_address_t
)a
; 
 166                 mach_vm_size_t sizeOfRegion 
= (mach_vm_size_t
)len
; 
 168                 ret 
= mach_vm_region_recurse(map
, &address
, &sizeOfRegion
, &nestingDepth
, (vm_region_recurse_info_t
)&info
, &count
); 
 169                 if (ret 
!= KERN_SUCCESS
) { 
 175                 if (!(info
.protection 
& VM_PROT_WRITE
)) { 
 176                         /* Save the original protection values for restoration later */ 
 177                         reprotect 
= info
.protection
; 
 179                         if (info
.max_protection 
& VM_PROT_WRITE
) { 
 180                                 /* The memory is not currently writable, but can be made writable. */ 
 181                                 ret 
= mach_vm_protect(map
, (mach_vm_offset_t
)a
, (mach_vm_size_t
)len
, 0, (reprotect 
& ~VM_PROT_EXECUTE
) | VM_PROT_WRITE
); 
 184                                  * The memory is not currently writable, and cannot be made writable. We need to COW this memory. 
 186                                  * Strange, we can't just say "reprotect | VM_PROT_COPY", that fails. 
 188                                 ret 
= mach_vm_protect(map
, (mach_vm_offset_t
)a
, (mach_vm_size_t
)len
, 0, VM_PROT_COPY 
| VM_PROT_READ 
| VM_PROT_WRITE
); 
 191                         if (ret 
!= KERN_SUCCESS
) { 
 195                         /* The memory was already writable. */ 
 196                         reprotect 
= VM_PROT_NONE
; 
 199                 ret 
= vm_map_write_user( map
, 
 204                 dtrace_flush_caches(); 
 206                 if (ret 
!= KERN_SUCCESS
) { 
 210                 if (reprotect 
!= VM_PROT_NONE
) { 
 211                         ASSERT(reprotect 
& VM_PROT_EXECUTE
); 
 212                         ret 
= mach_vm_protect(map
, (mach_vm_offset_t
)a
, (mach_vm_size_t
)len
, 0, reprotect
); 
 216                 vm_map_deallocate(map
); 
 218                 ret 
= KERN_TERMINATED
; 
 231 dtrace_cpu_t 
*cpu_list
; 
 232 cpu_core_t 
*cpu_core
; /* XXX TLB lockdown? */ 
 239  * dtrace_CRED() can be called from probe context. We cannot simply call kauth_cred_get() since 
 240  * that function may try to resolve a lazy credential binding, which entails taking the proc_lock. 
 245         struct uthread 
*uthread 
= get_bsdthread_info(current_thread()); 
 247         if (uthread 
== NULL
) { 
 250                 return uthread
->uu_ucred
; /* May return NOCRED which is defined to be 0 */ 
 255 PRIV_POLICY_CHOICE(void* cred
, int priv
, int all
) 
 257 #pragma unused(priv, all) 
 258         return kauth_cred_issuser(cred
); /* XXX TODO: How is this different from PRIV_POLICY_ONLY? */ 
 262 PRIV_POLICY_ONLY(void *cr
, int priv
, int boolean
) 
 264 #pragma unused(priv, boolean) 
 265         return kauth_cred_issuser(cr
); /* XXX TODO: HAS_PRIVILEGE(cr, priv); */ 
 269 crgetuid(const cred_t 
*cr
) 
 271         cred_t copy_cr 
= *cr
; return kauth_cred_getuid(©_cr
); 
 278 typedef struct wrap_timer_call 
{ 
 279         /* node attributes */ 
 285         struct timer_call       call
; 
 287         /* next item in the linked list */ 
 288         LIST_ENTRY(wrap_timer_call
) entries
; 
 291 #define WAKEUP_REAPER           0x7FFFFFFFFFFFFFFFLL 
 292 #define NEARLY_FOREVER          0x7FFFFFFFFFFFFFFELL 
 295 typedef struct cyc_list 
{ 
 296         cyc_omni_handler_t cyl_omni
; 
 297         wrap_timer_call_t cyl_wrap_by_cpus
[]; 
 298 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4) 
 299 } __attribute__ ((aligned(8))) cyc_list_t
; 
 304 /* CPU going online/offline notifications */ 
 305 void (*dtrace_cpu_state_changed_hook
)(int, boolean_t
) = NULL
; 
 306 void dtrace_cpu_state_changed(int, boolean_t
); 
 309 dtrace_install_cpu_hooks(void) 
 311         dtrace_cpu_state_changed_hook 
= dtrace_cpu_state_changed
; 
 315 dtrace_cpu_state_changed(int cpuid
, boolean_t is_running
) 
 317 #pragma unused(cpuid) 
 318         wrap_timer_call_t       
*wrapTC 
= NULL
; 
 319         boolean_t               suspend 
= (is_running 
? FALSE 
: TRUE
); 
 322         /* Ensure that we're not going to leave the CPU */ 
 323         s 
= dtrace_interrupt_disable(); 
 324         assert(cpuid 
== cpu_number()); 
 326         LIST_FOREACH(wrapTC
, &(cpu_list
[cpu_number()].cpu_cyc_list
), entries
) { 
 327                 assert(wrapTC
->cpuid 
== cpu_number()); 
 329                         assert(!wrapTC
->suspended
); 
 330                         /* If this fails, we'll panic anyway, so let's do this now. */ 
 331                         if (!timer_call_cancel(&wrapTC
->call
)) { 
 332                                 panic("timer_call_set_suspend() failed to cancel a timer call"); 
 334                         wrapTC
->suspended 
= TRUE
; 
 336                         /* Rearm the timer, but ensure it was suspended first. */ 
 337                         assert(wrapTC
->suspended
); 
 338                         clock_deadline_for_periodic_event(wrapTC
->when
.cyt_interval
, mach_absolute_time(), 
 340                         timer_call_enter1(&wrapTC
->call
, (void*) wrapTC
, wrapTC
->deadline
, 
 341                             TIMER_CALL_SYS_CRITICAL 
| TIMER_CALL_LOCAL
); 
 342                         wrapTC
->suspended 
= FALSE
; 
 346         /* Restore the previous interrupt state. */ 
 347         dtrace_interrupt_enable(s
); 
 351 _timer_call_apply_cyclic( void *ignore
, void *vTChdl 
) 
 353 #pragma unused(ignore) 
 354         wrap_timer_call_t 
*wrapTC 
= (wrap_timer_call_t 
*)vTChdl
; 
 356         (*(wrapTC
->hdlr
.cyh_func
))( wrapTC
->hdlr
.cyh_arg 
); 
 358         clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, mach_absolute_time(), &(wrapTC
->deadline
)); 
 359         timer_call_enter1( &(wrapTC
->call
), (void *)wrapTC
, wrapTC
->deadline
, TIMER_CALL_SYS_CRITICAL 
| TIMER_CALL_LOCAL 
); 
 363 timer_call_add_cyclic(wrap_timer_call_t 
*wrapTC
, cyc_handler_t 
*handler
, cyc_time_t 
*when
) 
 368         timer_call_setup( &(wrapTC
->call
), _timer_call_apply_cyclic
, NULL 
); 
 369         wrapTC
->hdlr 
= *handler
; 
 370         wrapTC
->when 
= *when
; 
 372         nanoseconds_to_absolutetime( wrapTC
->when
.cyt_interval
, (uint64_t *)&wrapTC
->when
.cyt_interval 
); 
 374         now 
= mach_absolute_time(); 
 375         wrapTC
->deadline 
= now
; 
 377         clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, now
, &(wrapTC
->deadline
)); 
 379         /* Insert the timer to the list of the running timers on this CPU, and start it. */ 
 380         s 
= dtrace_interrupt_disable(); 
 381         wrapTC
->cpuid 
= cpu_number(); 
 382         LIST_INSERT_HEAD(&cpu_list
[wrapTC
->cpuid
].cpu_cyc_list
, wrapTC
, entries
); 
 383         timer_call_enter1(&wrapTC
->call
, (void*) wrapTC
, wrapTC
->deadline
, 
 384             TIMER_CALL_SYS_CRITICAL 
| TIMER_CALL_LOCAL
); 
 385         wrapTC
->suspended 
= FALSE
; 
 386         dtrace_interrupt_enable(s
); 
 388         return (cyclic_id_t
)wrapTC
; 
 392  * Executed on the CPU the timer is running on. 
 395 timer_call_remove_cyclic(wrap_timer_call_t 
*wrapTC
) 
 398         assert(cpu_number() == wrapTC
->cpuid
); 
 400         if (!timer_call_cancel(&wrapTC
->call
)) { 
 401                 panic("timer_call_remove_cyclic() failed to cancel a timer call"); 
 404         LIST_REMOVE(wrapTC
, entries
); 
 408 timer_call_get_cyclic_arg(wrap_timer_call_t 
*wrapTC
) 
 410         return wrapTC 
? wrapTC
->hdlr
.cyh_arg 
: NULL
; 
 414 cyclic_timer_add(cyc_handler_t 
*handler
, cyc_time_t 
*when
) 
 416         wrap_timer_call_t 
*wrapTC 
= _MALLOC(sizeof(wrap_timer_call_t
), M_TEMP
, M_ZERO 
| M_WAITOK
); 
 417         if (NULL 
== wrapTC
) { 
 420                 return timer_call_add_cyclic( wrapTC
, handler
, when 
); 
 425 cyclic_timer_remove(cyclic_id_t cyclic
) 
 427         ASSERT( cyclic 
!= CYCLIC_NONE 
); 
 429         /* Removing a timer call must be done on the CPU the timer is running on. */ 
 430         wrap_timer_call_t 
*wrapTC 
= (wrap_timer_call_t 
*) cyclic
; 
 431         dtrace_xcall(wrapTC
->cpuid
, (dtrace_xcall_t
) timer_call_remove_cyclic
, (void*) cyclic
); 
 433         _FREE((void *)cyclic
, M_TEMP
); 
 437 _cyclic_add_omni(cyc_list_t 
*cyc_list
) 
 441         cyc_omni_handler_t 
*omni 
= &cyc_list
->cyl_omni
; 
 443         (omni
->cyo_online
)(omni
->cyo_arg
, CPU
, &cH
, &cT
); 
 445         wrap_timer_call_t 
*wrapTC 
= &cyc_list
->cyl_wrap_by_cpus
[cpu_number()]; 
 446         timer_call_add_cyclic(wrapTC
, &cH
, &cT
); 
 450 cyclic_add_omni(cyc_omni_handler_t 
*omni
) 
 452         cyc_list_t 
*cyc_list 
= 
 453             _MALLOC(sizeof(cyc_list_t
) + NCPU 
* sizeof(wrap_timer_call_t
), M_TEMP
, M_ZERO 
| M_WAITOK
); 
 455         if (NULL 
== cyc_list
) { 
 459         cyc_list
->cyl_omni 
= *omni
; 
 461         dtrace_xcall(DTRACE_CPUALL
, (dtrace_xcall_t
)_cyclic_add_omni
, (void *)cyc_list
); 
 463         return (cyclic_id_list_t
)cyc_list
; 
 467 _cyclic_remove_omni(cyc_list_t 
*cyc_list
) 
 469         cyc_omni_handler_t 
*omni 
= &cyc_list
->cyl_omni
; 
 471         wrap_timer_call_t 
*wrapTC
; 
 474          * If the processor was offline when dtrace started, we did not allocate 
 475          * a cyclic timer for this CPU. 
 477         if ((wrapTC 
= &cyc_list
->cyl_wrap_by_cpus
[cpu_number()]) != NULL
) { 
 478                 oarg 
= timer_call_get_cyclic_arg(wrapTC
); 
 479                 timer_call_remove_cyclic(wrapTC
); 
 480                 (omni
->cyo_offline
)(omni
->cyo_arg
, CPU
, oarg
); 
 485 cyclic_remove_omni(cyclic_id_list_t cyc_list
) 
 487         ASSERT(cyc_list 
!= NULL
); 
 489         dtrace_xcall(DTRACE_CPUALL
, (dtrace_xcall_t
)_cyclic_remove_omni
, (void *)cyc_list
); 
 490         _FREE(cyc_list
, M_TEMP
); 
 493 typedef struct wrap_thread_call 
{ 
 498 } wrap_thread_call_t
; 
 501  * _cyclic_apply will run on some thread under kernel_task. That's OK for the 
 502  * cleaner and the deadman, but too distant in time and place for the profile provider. 
 505 _cyclic_apply( void *ignore
, void *vTChdl 
) 
 507 #pragma unused(ignore) 
 508         wrap_thread_call_t 
*wrapTC 
= (wrap_thread_call_t 
*)vTChdl
; 
 510         (*(wrapTC
->hdlr
.cyh_func
))( wrapTC
->hdlr
.cyh_arg 
); 
 512         clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, mach_absolute_time(), &(wrapTC
->deadline
)); 
 513         (void)thread_call_enter1_delayed( wrapTC
->TChdl
, (void *)wrapTC
, wrapTC
->deadline 
); 
 515         /* Did cyclic_remove request a wakeup call when this thread call was re-armed? */ 
 516         if (wrapTC
->when
.cyt_interval 
== WAKEUP_REAPER
) { 
 517                 thread_wakeup((event_t
)wrapTC
); 
 522 cyclic_add(cyc_handler_t 
*handler
, cyc_time_t 
*when
) 
 526         wrap_thread_call_t 
*wrapTC 
= _MALLOC(sizeof(wrap_thread_call_t
), M_TEMP
, M_ZERO 
| M_WAITOK
); 
 527         if (NULL 
== wrapTC
) { 
 531         wrapTC
->TChdl 
= thread_call_allocate( _cyclic_apply
, NULL 
); 
 532         wrapTC
->hdlr 
= *handler
; 
 533         wrapTC
->when 
= *when
; 
 535         ASSERT(when
->cyt_when 
== 0); 
 536         ASSERT(when
->cyt_interval 
< WAKEUP_REAPER
); 
 538         nanoseconds_to_absolutetime(wrapTC
->when
.cyt_interval
, (uint64_t *)&wrapTC
->when
.cyt_interval
); 
 540         now 
= mach_absolute_time(); 
 541         wrapTC
->deadline 
= now
; 
 543         clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, now
, &(wrapTC
->deadline
)); 
 544         (void)thread_call_enter1_delayed( wrapTC
->TChdl
, (void *)wrapTC
, wrapTC
->deadline 
); 
 546         return (cyclic_id_t
)wrapTC
; 
 550 noop_cyh_func(void * ignore
) 
 552 #pragma unused(ignore) 
 556 cyclic_remove(cyclic_id_t cyclic
) 
 558         wrap_thread_call_t 
*wrapTC 
= (wrap_thread_call_t 
*)cyclic
; 
 560         ASSERT(cyclic 
!= CYCLIC_NONE
); 
 562         while (!thread_call_cancel(wrapTC
->TChdl
)) { 
 563                 int ret 
= assert_wait(wrapTC
, THREAD_UNINT
); 
 564                 ASSERT(ret 
== THREAD_WAITING
); 
 566                 wrapTC
->when
.cyt_interval 
= WAKEUP_REAPER
; 
 568                 ret 
= thread_block(THREAD_CONTINUE_NULL
); 
 569                 ASSERT(ret 
== THREAD_AWAKENED
); 
 572         if (thread_call_free(wrapTC
->TChdl
)) { 
 573                 _FREE(wrapTC
, M_TEMP
); 
 575                 /* Gut this cyclic and move on ... */ 
 576                 wrapTC
->hdlr
.cyh_func 
= noop_cyh_func
; 
 577                 wrapTC
->when
.cyt_interval 
= NEARLY_FOREVER
; 
 582 ddi_driver_major(dev_info_t     
*devi
) 
 584         return (int)major(CAST_DOWN_EXPLICIT(int, devi
)); 
 588 ddi_create_minor_node(dev_info_t 
*dip
, const char *name
, int spec_type
, 
 589     minor_t minor_num
, const char *node_type
, int flag
) 
 591 #pragma unused(spec_type,node_type,flag) 
 592         dev_t dev 
= makedev( ddi_driver_major(dip
), minor_num 
); 
 594         if (NULL 
== devfs_make_node( dev
, DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666, name
, 0 )) { 
 602 ddi_remove_minor_node(dev_info_t 
*dip
, char *name
) 
 604 #pragma unused(dip,name) 
 605 /* XXX called from dtrace_detach, so NOTREACHED for now. */ 
 611         return (major_t
) major(d
); 
 617         return (minor_t
) minor(d
); 
 620 extern void Debugger(const char*); 
 633 dt_kmem_alloc_site(size_t size
, int kmflag
, vm_allocation_site_t 
*site
) 
 635 #pragma unused(kmflag) 
 638  * We ignore the M_NOWAIT bit in kmflag (all of kmflag, in fact). 
 639  * Requests larger than 8K with M_NOWAIT fail in kalloc_canblock. 
 641         vm_size_t vsize 
= size
; 
 642         return kalloc_canblock(&vsize
, TRUE
, site
); 
 646 dt_kmem_zalloc_site(size_t size
, int kmflag
, vm_allocation_site_t 
*site
) 
 648 #pragma unused(kmflag) 
 651  * We ignore the M_NOWAIT bit in kmflag (all of kmflag, in fact). 
 652  * Requests larger than 8K with M_NOWAIT fail in kalloc_canblock. 
 654         vm_size_t vsize 
= size
; 
 655         void* buf 
= kalloc_canblock(&vsize
, TRUE
, site
); 
 667 dt_kmem_free(void *buf
, size_t size
) 
 671          * DTrace relies on this, its doing a lot of NULL frees. 
 672          * A null free causes the debug builds to panic. 
 686  * aligned dt_kmem allocator 
 687  * align should be a power of two 
 691 dt_kmem_alloc_aligned_site(size_t size
, size_t align
, int kmflag
, vm_allocation_site_t 
*site
) 
 693         void *mem
, **addr_to_free
; 
 694         intptr_t mem_aligned
; 
 695         size_t *size_to_free
, hdr_size
; 
 697         /* Must be a power of two. */ 
 699         assert((align 
& (align 
- 1)) == 0); 
 702          * We are going to add a header to the allocation. It contains 
 703          * the address to free and the total size of the buffer. 
 705         hdr_size 
= sizeof(size_t) + sizeof(void*); 
 706         mem 
= dt_kmem_alloc_site(size 
+ align 
+ hdr_size
, kmflag
, site
); 
 711         mem_aligned 
= (intptr_t) (((intptr_t) mem 
+ align 
+ hdr_size
) & ~(align 
- 1)); 
 713         /* Write the address to free in the header. */ 
 714         addr_to_free 
= (void**) (mem_aligned 
- sizeof(void*)); 
 717         /* Write the size to free in the header. */ 
 718         size_to_free 
= (size_t*) (mem_aligned 
- hdr_size
); 
 719         *size_to_free 
= size 
+ align 
+ hdr_size
; 
 721         return (void*) mem_aligned
; 
 725 dt_kmem_zalloc_aligned_site(size_t size
, size_t align
, int kmflag
, vm_allocation_site_t 
*s
) 
 729         buf 
= dt_kmem_alloc_aligned_site(size
, align
, kmflag
, s
); 
 741 dt_kmem_free_aligned(void* buf
, size_t size
) 
 744         intptr_t ptr 
= (intptr_t) buf
; 
 745         void **addr_to_free 
= (void**) (ptr 
- sizeof(void*)); 
 746         size_t *size_to_free 
= (size_t*) (ptr 
- (sizeof(size_t) + sizeof(void*))); 
 752         dt_kmem_free(*addr_to_free
, *size_to_free
); 
 756  * dtrace wants to manage just a single block: dtrace_state_percpu_t * NCPU, and 
 757  * doesn't specify constructor, destructor, or reclaim methods. 
 758  * At present, it always zeroes the block it obtains from kmem_cache_alloc(). 
 759  * We'll manage this constricted use of kmem_cache with ordinary _MALLOC and _FREE. 
 763         const char *name
,       /* descriptive name for this cache */ 
 764         size_t bufsize
,         /* size of the objects it manages */ 
 765         size_t align
,           /* required object alignment */ 
 766         int (*constructor
)(void *, void *, int), /* object constructor */ 
 767         void (*destructor
)(void *, void *), /* object destructor */ 
 768         void (*reclaim
)(void *), /* memory reclaim callback */ 
 769         void *private,          /* pass-thru arg for constr/destr/reclaim */ 
 770         vmem_t 
*vmp
,            /* vmem source for slab allocation */ 
 771         int cflags
)     /* cache creation flags */ 
 773 #pragma unused(name,align,constructor,destructor,reclaim,private,vmp,cflags) 
 774         return (kmem_cache_t 
*)bufsize
; /* A cookie that tracks the single object size. */ 
 778 kmem_cache_alloc(kmem_cache_t 
*cp
, int kmflag
) 
 780 #pragma unused(kmflag) 
 781         size_t bufsize 
= (size_t)cp
; 
 782         return (void *)_MALLOC(bufsize
, M_TEMP
, M_WAITOK
); 
 786 kmem_cache_free(kmem_cache_t 
*cp
, void *buf
) 
 793 kmem_cache_destroy(kmem_cache_t 
*cp
) 
 799  * vmem (Solaris "slab" allocator) used by DTrace solely to hand out resource ids 
 801 typedef unsigned int u_daddr_t
; 
 804 /* By passing around blist *handles*, the underlying blist can be resized as needed. */ 
 810 vmem_create(const char *name
, void *base
, size_t size
, size_t quantum
, void *ignore5
, 
 811     void *ignore6
, vmem_t 
*source
, size_t qcache_max
, int vmflag
) 
 813 #pragma unused(name,quantum,ignore5,ignore6,source,qcache_max,vmflag) 
 815         struct blist_hdl 
*p 
= _MALLOC(sizeof(struct blist_hdl
), M_TEMP
, M_WAITOK
); 
 817         ASSERT(quantum 
== 1); 
 818         ASSERT(NULL 
== ignore5
); 
 819         ASSERT(NULL 
== ignore6
); 
 820         ASSERT(NULL 
== source
); 
 821         ASSERT(0 == qcache_max
); 
 822         ASSERT(vmflag 
& VMC_IDENTIFIER
); 
 824         size 
= MIN(128, size
); /* Clamp to 128 initially, since the underlying data structure is pre-allocated */ 
 826         p
->blist 
= bl 
= blist_create( size 
); 
 827         blist_free(bl
, 0, size
); 
 829                 blist_alloc( bl
, (daddr_t
)(uintptr_t)base 
);   /* Chomp off initial ID(s) */ 
 835 vmem_alloc(vmem_t 
*vmp
, size_t size
, int vmflag
) 
 837 #pragma unused(vmflag) 
 838         struct blist_hdl 
*q 
= (struct blist_hdl 
*)vmp
; 
 839         blist_t bl 
= q
->blist
; 
 842         p 
= blist_alloc(bl
, (daddr_t
)size
); 
 844         if ((daddr_t
)-1 == p
) { 
 845                 blist_resize(&bl
, (bl
->bl_blocks
) << 1, 1); 
 847                 p 
= blist_alloc(bl
, (daddr_t
)size
); 
 848                 if ((daddr_t
)-1 == p
) { 
 849                         panic("vmem_alloc: failure after blist_resize!"); 
 853         return (void *)(uintptr_t)p
; 
 857 vmem_free(vmem_t 
*vmp
, void *vaddr
, size_t size
) 
 859         struct blist_hdl 
*p 
= (struct blist_hdl 
*)vmp
; 
 861         blist_free( p
->blist
, (daddr_t
)(uintptr_t)vaddr
, (daddr_t
)size 
); 
 865 vmem_destroy(vmem_t 
*vmp
) 
 867         struct blist_hdl 
*p 
= (struct blist_hdl 
*)vmp
; 
 869         blist_destroy( p
->blist 
); 
 870         _FREE( p
, sizeof(struct blist_hdl
)); 
 878  * dtrace_gethrestime() provides the "walltimestamp", a value that is anchored at 
 879  * January 1, 1970. Because it can be called from probe context, it must take no locks. 
 883 dtrace_gethrestime(void) 
 886         clock_nsec_t    nanosecs
; 
 887         uint64_t                secs64
, ns64
; 
 889         clock_get_calendar_nanotime_nowait(&secs
, &nanosecs
); 
 890         secs64 
= (uint64_t)secs
; 
 891         ns64 
= (uint64_t)nanosecs
; 
 893         ns64 
= ns64 
+ (secs64 
* 1000000000LL); 
 898  * dtrace_gethrtime() provides high-resolution timestamps with machine-dependent origin. 
 899  * Hence its primary use is to specify intervals. 
 903 dtrace_abs_to_nano(uint64_t elapsed
) 
 905         static mach_timebase_info_data_t    sTimebaseInfo 
= { 0, 0 }; 
 908          * If this is the first time we've run, get the timebase. 
 909          * We can use denom == 0 to indicate that sTimebaseInfo is 
 910          * uninitialised because it makes no sense to have a zero 
 911          * denominator in a fraction. 
 914         if (sTimebaseInfo
.denom 
== 0) { 
 915                 (void) clock_timebase_info(&sTimebaseInfo
); 
 919          * Convert to nanoseconds. 
 920          * return (elapsed * (uint64_t)sTimebaseInfo.numer)/(uint64_t)sTimebaseInfo.denom; 
 922          * Provided the final result is representable in 64 bits the following maneuver will 
 923          * deliver that result without intermediate overflow. 
 925         if (sTimebaseInfo
.denom 
== sTimebaseInfo
.numer
) { 
 927         } else if (sTimebaseInfo
.denom 
== 1) { 
 928                 return elapsed 
* (uint64_t)sTimebaseInfo
.numer
; 
 930                 /* Decompose elapsed = eta32 * 2^32 + eps32: */ 
 931                 uint64_t eta32 
= elapsed 
>> 32; 
 932                 uint64_t eps32 
= elapsed 
& 0x00000000ffffffffLL
; 
 934                 uint32_t numer 
= sTimebaseInfo
.numer
, denom 
= sTimebaseInfo
.denom
; 
 936                 /* Form product of elapsed64 (decomposed) and numer: */ 
 937                 uint64_t mu64 
= numer 
* eta32
; 
 938                 uint64_t lambda64 
= numer 
* eps32
; 
 940                 /* Divide the constituents by denom: */ 
 941                 uint64_t q32 
= mu64 
/ denom
; 
 942                 uint64_t r32 
= mu64 
- (q32 
* denom
); /* mu64 % denom */ 
 944                 return (q32 
<< 32) + ((r32 
<< 32) + lambda64
) / denom
; 
 949 dtrace_gethrtime(void) 
 951         static uint64_t        start 
= 0; 
 954                 start 
= mach_absolute_time(); 
 957         return dtrace_abs_to_nano(mach_absolute_time() - start
); 
 961  * Atomicity and synchronization 
 964 dtrace_cas32(uint32_t *target
, uint32_t cmp
, uint32_t new) 
 966         if (OSCompareAndSwap((UInt32
)cmp
, (UInt32
)new, (volatile UInt32 
*)target 
)) { 
 969                 return ~cmp
; /* Must return something *other* than cmp */ 
 974 dtrace_casptr(void *target
, void *cmp
, void *new) 
 976         if (OSCompareAndSwapPtr( cmp
, new, (void**)target 
)) { 
 979                 return (void *)(~(uintptr_t)cmp
); /* Must return something *other* than cmp */ 
 984  * Interrupt manipulation 
 987 dtrace_interrupt_disable(void) 
 989         return (dtrace_icookie_t
)ml_set_interrupts_enabled(FALSE
); 
 993 dtrace_interrupt_enable(dtrace_icookie_t reenable
) 
 995         (void)ml_set_interrupts_enabled((boolean_t
)reenable
); 
1002 dtrace_sync_func(void) 
1007  * dtrace_sync() is not called from probe context. 
1012         dtrace_xcall(DTRACE_CPUALL
, (dtrace_xcall_t
)dtrace_sync_func
, NULL
); 
1016  * The dtrace_copyin/out/instr and dtrace_fuword* routines can be called from probe context. 
1019 extern kern_return_t 
dtrace_copyio_preflight(addr64_t
); 
1020 extern kern_return_t 
dtrace_copyio_postflight(addr64_t
); 
1023 dtrace_copycheck(user_addr_t uaddr
, uintptr_t kaddr
, size_t size
) 
1025 #pragma unused(kaddr) 
1027         vm_offset_t recover 
= dtrace_set_thread_recover( current_thread(), 0 ); /* Snare any extant recovery point. */ 
1028         dtrace_set_thread_recover( current_thread(), recover 
); /* Put it back. We *must not* re-enter and overwrite. */ 
1030         ASSERT(kaddr 
+ size 
>= kaddr
); 
1032         if (uaddr 
+ size 
< uaddr 
||             /* Avoid address wrap. */ 
1033             KERN_FAILURE 
== dtrace_copyio_preflight(uaddr
)) {   /* Machine specific setup/constraints. */ 
1034                 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1035                 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= uaddr
; 
1042 dtrace_copyin(user_addr_t src
, uintptr_t dst
, size_t len
, volatile uint16_t *flags
) 
1044 #pragma unused(flags) 
1046         if (dtrace_copycheck( src
, dst
, len 
)) { 
1047                 if (copyin((const user_addr_t
)src
, (char *)dst
, (vm_size_t
)len
)) { 
1048                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1049                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= src
; 
1051                 dtrace_copyio_postflight(src
); 
1056 dtrace_copyinstr(user_addr_t src
, uintptr_t dst
, size_t len
, volatile uint16_t *flags
) 
1058 #pragma unused(flags) 
1062         if (dtrace_copycheck( src
, dst
, len 
)) { 
1063                 /*  copyin as many as 'len' bytes. */ 
1064                 int error 
= copyinstr((const user_addr_t
)src
, (char *)dst
, (vm_size_t
)len
, &actual
); 
1067                  * ENAMETOOLONG is returned when 'len' bytes have been copied in but the NUL terminator was 
1068                  * not encountered. That does not require raising CPU_DTRACE_BADADDR, and we press on. 
1069                  * Note that we do *not* stuff a NUL terminator when returning ENAMETOOLONG, that's left 
1072                 if (error 
&& error 
!= ENAMETOOLONG
) { 
1073                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1074                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= src
; 
1076                 dtrace_copyio_postflight(src
); 
1081 dtrace_copyout(uintptr_t src
, user_addr_t dst
, size_t len
, volatile uint16_t *flags
) 
1083 #pragma unused(flags) 
1085         if (dtrace_copycheck( dst
, src
, len 
)) { 
1086                 if (copyout((const void *)src
, dst
, (vm_size_t
)len
)) { 
1087                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1088                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= dst
; 
1090                 dtrace_copyio_postflight(dst
); 
1095 dtrace_copyoutstr(uintptr_t src
, user_addr_t dst
, size_t len
, volatile uint16_t *flags
) 
1097 #pragma unused(flags) 
1101         if (dtrace_copycheck( dst
, src
, len 
)) { 
1103                  * ENAMETOOLONG is returned when 'len' bytes have been copied out but the NUL terminator was 
1104                  * not encountered. We raise CPU_DTRACE_BADADDR in that case. 
1105                  * Note that we do *not* stuff a NUL terminator when returning ENAMETOOLONG, that's left 
1108                 if (copyoutstr((const void *)src
, dst
, (size_t)len
, &actual
)) { 
1109                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1110                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= dst
; 
1112                 dtrace_copyio_postflight(dst
); 
1116 extern const int copysize_limit_panic
; 
1119 dtrace_copy_maxsize(void) 
1121         return copysize_limit_panic
; 
1126 dtrace_buffer_copyout(const void *kaddr
, user_addr_t uaddr
, vm_size_t nbytes
) 
1128         int maxsize 
= dtrace_copy_maxsize(); 
1130          * Partition the copyout in copysize_limit_panic-sized chunks 
1132         while (nbytes 
>= (vm_size_t
)maxsize
) { 
1133                 if (copyout(kaddr
, uaddr
, maxsize
) != 0) { 
1142                 if (copyout(kaddr
, uaddr
, nbytes
) != 0) { 
1151 dtrace_fuword8(user_addr_t uaddr
) 
1155         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
1156         if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) { 
1157                 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) { 
1158                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1159                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= uaddr
; 
1161                 dtrace_copyio_postflight(uaddr
); 
1163         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
1169 dtrace_fuword16(user_addr_t uaddr
) 
1173         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
1174         if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) { 
1175                 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) { 
1176                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1177                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= uaddr
; 
1179                 dtrace_copyio_postflight(uaddr
); 
1181         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
1187 dtrace_fuword32(user_addr_t uaddr
) 
1191         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
1192         if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) { 
1193                 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) { 
1194                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1195                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= uaddr
; 
1197                 dtrace_copyio_postflight(uaddr
); 
1199         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
1205 dtrace_fuword64(user_addr_t uaddr
) 
1209         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
1210         if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) { 
1211                 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) { 
1212                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1213                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= uaddr
; 
1215                 dtrace_copyio_postflight(uaddr
); 
1217         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
1223  * Emulation of Solaris fuword / suword 
1224  * Called from the fasttrap provider, so the use of copyin/out requires fewer safegaurds. 
1228 fuword8(user_addr_t uaddr
, uint8_t *value
) 
1230         if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint8_t)) != 0) { 
1238 fuword16(user_addr_t uaddr
, uint16_t *value
) 
1240         if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint16_t)) != 0) { 
1248 fuword32(user_addr_t uaddr
, uint32_t *value
) 
1250         if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint32_t)) != 0) { 
1258 fuword64(user_addr_t uaddr
, uint64_t *value
) 
1260         if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint64_t)) != 0) { 
1268 fuword32_noerr(user_addr_t uaddr
, uint32_t *value
) 
1270         if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint32_t))) { 
1276 fuword64_noerr(user_addr_t uaddr
, uint64_t *value
) 
1278         if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint64_t))) { 
1284 suword64(user_addr_t addr
, uint64_t value
) 
1286         if (copyout((const void *)&value
, addr
, sizeof(value
)) != 0) { 
1294 suword32(user_addr_t addr
, uint32_t value
) 
1296         if (copyout((const void *)&value
, addr
, sizeof(value
)) != 0) { 
1306 extern boolean_t 
dtrace_tally_fault(user_addr_t
); 
1309 dtrace_tally_fault(user_addr_t uaddr
) 
1311         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1312         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= uaddr
; 
1313         return DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT
) ? TRUE 
: FALSE
; 
1317 extern int prf(const char *, va_list, int, struct tty 
*); /* bsd/kern/subr_prf.h */ 
1320 vuprintf(const char *format
, va_list ap
) 
1322         return prf(format
, ap
, TOTTY
, NULL
); 
1325 /* Not called from probe context */ 
1327 cmn_err( int level
, const char *format
, ... ) 
1329 #pragma unused(level) 
1332         va_start(alist
, format
); 
1333         vuprintf(format
, alist
); 
1340  *  2002-01-24  gvdl    Initial implementation of strstr 
1343 __private_extern__ 
const char * 
1344 strstr(const char *in
, const char *str
) 
1354                 return (const char *) in
; // Trivial empty string case 
1366         } while (strncmp(in
, str
, len
) != 0); 
1368         return (const char *) (in 
- 1); 
1372 bsearch(const void *key
, const void *base0
, size_t nmemb
, size_t size
, int (*compar
)(const void *, const void *)) 
1374         const char *base 
= base0
; 
1378         for (lim 
= nmemb
; lim 
!= 0; lim 
>>= 1) { 
1379                 p 
= base 
+ (lim 
>> 1) * size
; 
1380                 cmp 
= (*compar
)(key
, p
); 
1384                 if (cmp 
> 0) {  /* key > p: move right */ 
1385                         base 
= (const char *)p 
+ size
; 
1387                 }               /* else move left */ 
1396 dtrace_caller(int ignore
) 
1398 #pragma unused(ignore) 
1399         return -1; /* Just as in Solaris dtrace_asm.s */ 
1403 dtrace_getstackdepth(int aframes
) 
1405         struct frame 
*fp 
= (struct frame 
*)__builtin_frame_address(0); 
1406         struct frame 
*nextfp
, *minfp
, *stacktop
; 
1410         if ((on_intr 
= CPU_ON_INTR(CPU
)) != 0) { 
1411                 stacktop 
= (struct frame 
*)dtrace_get_cpu_int_stack_top(); 
1413                 stacktop 
= (struct frame 
*)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size
); 
1423                 nextfp 
= *(struct frame 
**)fp
; 
1425                 if (nextfp 
<= minfp 
|| nextfp 
>= stacktop
) { 
1428                                  * Hop from interrupt stack to thread stack. 
1430                                 vm_offset_t kstack_base 
= dtrace_get_kernel_stack(current_thread()); 
1432                                 minfp 
= (struct frame 
*)kstack_base
; 
1433                                 stacktop 
= (struct frame 
*)(kstack_base 
+ kernel_stack_size
); 
1445         if (depth 
<= aframes
) { 
1449         return depth 
- aframes
; 
1453 dtrace_addr_in_module(void* addr
, struct modctl 
*ctl
) 
1455         return OSKextKextForAddress(addr
) == (void*)ctl
->mod_address
; 
1462 dtrace_vtime_enable(void) 
1467 dtrace_vtime_disable(void)