2  * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29 #include <kern/thread.h> 
  33 #include <sys/kauth.h> 
  35 #include <sys/systm.h> 
  36 #include <sys/dtrace.h> 
  37 #include <sys/dtrace_impl.h> 
  38 #include <machine/atomic.h> 
  39 #include <libkern/OSKextLibPrivate.h> 
  40 #include <kern/kern_types.h> 
  41 #include <kern/timer_call.h> 
  42 #include <kern/thread_call.h> 
  43 #include <kern/task.h> 
  44 #include <kern/sched_prim.h> 
  45 #include <miscfs/devfs/devfs.h> 
  46 #include <kern/kalloc.h> 
  48 #include <mach/vm_param.h> 
  49 #include <mach/mach_vm.h> 
  50 #include <mach/task.h> 
  51 #include <vm/vm_map.h> /* All the bits we care about are guarded by MACH_KERNEL_PRIVATE :-( */ 
  56 /* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ 
  57 #define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ 
  59 KALLOC_HEAP_DEFINE(KHEAP_DTRACE
, "dtrace", KHEAP_ID_DEFAULT
); 
  62 dtrace_sprlock(proc_t 
*p
) 
  64         lck_mtx_lock(&p
->p_dtrace_sprlock
); 
  68 dtrace_sprunlock(proc_t 
*p
) 
  70         lck_mtx_unlock(&p
->p_dtrace_sprlock
); 
  73 /* Not called from probe context */ 
  79         if ((p 
= proc_find(pid
)) == PROC_NULL
) { 
  83         task_suspend_internal(p
->task
); 
  90 /* Not called from probe context */ 
  97                 task_resume_internal(p
->task
); 
 107 // These are not exported from vm_map.h. 
 108 extern kern_return_t 
vm_map_read_user(vm_map_t map
, vm_map_address_t src_addr
, void *dst_p
, vm_size_t size
); 
 109 extern kern_return_t 
vm_map_write_user(vm_map_t map
, void *src_p
, vm_map_address_t dst_addr
, vm_size_t size
); 
 111 /* Not called from probe context */ 
 113 uread(proc_t 
*p
, void *buf
, user_size_t len
, user_addr_t a
) 
 117         ASSERT(p 
!= PROC_NULL
); 
 118         ASSERT(p
->task 
!= NULL
); 
 120         task_t task 
= p
->task
; 
 123          * Grab a reference to the task vm_map_t to make sure 
 124          * the map isn't pulled out from under us. 
 126          * Because the proc_lock is not held at all times on all code 
 127          * paths leading here, it is possible for the proc to have 
 128          * exited. If the map is null, fail. 
 130         vm_map_t map 
= get_task_map_reference(task
); 
 132                 ret 
= vm_map_read_user( map
, (vm_map_address_t
)a
, buf
, (vm_size_t
)len
); 
 133                 vm_map_deallocate(map
); 
 135                 ret 
= KERN_TERMINATED
; 
 142 /* Not called from probe context */ 
 144 uwrite(proc_t 
*p
, void *buf
, user_size_t len
, user_addr_t a
) 
 149         ASSERT(p
->task 
!= NULL
); 
 151         task_t task 
= p
->task
; 
 154          * Grab a reference to the task vm_map_t to make sure 
 155          * the map isn't pulled out from under us. 
 157          * Because the proc_lock is not held at all times on all code 
 158          * paths leading here, it is possible for the proc to have 
 159          * exited. If the map is null, fail. 
 161         vm_map_t map 
= get_task_map_reference(task
); 
 163                 /* Find the memory permissions. */ 
 164                 uint32_t nestingDepth 
= 999999; 
 165                 vm_region_submap_short_info_data_64_t info
; 
 166                 mach_msg_type_number_t count 
= VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
; 
 167                 mach_vm_address_t address 
= (mach_vm_address_t
)a
; 
 168                 mach_vm_size_t sizeOfRegion 
= (mach_vm_size_t
)len
; 
 170                 ret 
= mach_vm_region_recurse(map
, &address
, &sizeOfRegion
, &nestingDepth
, (vm_region_recurse_info_t
)&info
, &count
); 
 171                 if (ret 
!= KERN_SUCCESS
) { 
 177                 if (!(info
.protection 
& VM_PROT_WRITE
)) { 
 178                         /* Save the original protection values for restoration later */ 
 179                         reprotect 
= info
.protection
; 
 181                         if (info
.max_protection 
& VM_PROT_WRITE
) { 
 182                                 /* The memory is not currently writable, but can be made writable. */ 
 183                                 ret 
= mach_vm_protect(map
, (mach_vm_offset_t
)a
, (mach_vm_size_t
)len
, 0, (reprotect 
& ~VM_PROT_EXECUTE
) | VM_PROT_WRITE
); 
 186                                  * The memory is not currently writable, and cannot be made writable. We need to COW this memory. 
 188                                  * Strange, we can't just say "reprotect | VM_PROT_COPY", that fails. 
 190                                 ret 
= mach_vm_protect(map
, (mach_vm_offset_t
)a
, (mach_vm_size_t
)len
, 0, VM_PROT_COPY 
| VM_PROT_READ 
| VM_PROT_WRITE
); 
 193                         if (ret 
!= KERN_SUCCESS
) { 
 197                         /* The memory was already writable. */ 
 198                         reprotect 
= VM_PROT_NONE
; 
 201                 ret 
= vm_map_write_user( map
, 
 206                 dtrace_flush_caches(); 
 208                 if (ret 
!= KERN_SUCCESS
) { 
 212                 if (reprotect 
!= VM_PROT_NONE
) { 
 213                         ASSERT(reprotect 
& VM_PROT_EXECUTE
); 
 214                         ret 
= mach_vm_protect(map
, (mach_vm_offset_t
)a
, (mach_vm_size_t
)len
, 0, reprotect
); 
 218                 vm_map_deallocate(map
); 
 220                 ret 
= KERN_TERMINATED
; 
 233 dtrace_cpu_t 
*cpu_list
; 
 234 cpu_core_t 
*cpu_core
; /* XXX TLB lockdown? */ 
 241  * dtrace_CRED() can be called from probe context. We cannot simply call kauth_cred_get() since 
 242  * that function may try to resolve a lazy credential binding, which entails taking the proc_lock. 
 247         struct uthread 
*uthread 
= get_bsdthread_info(current_thread()); 
 249         if (uthread 
== NULL
) { 
 252                 return uthread
->uu_ucred
; /* May return NOCRED which is defined to be 0 */ 
 257 PRIV_POLICY_CHOICE(void* cred
, int priv
, int all
) 
 259 #pragma unused(priv, all) 
 260         return kauth_cred_issuser(cred
); /* XXX TODO: How is this different from PRIV_POLICY_ONLY? */ 
 264 PRIV_POLICY_ONLY(void *cr
, int priv
, int boolean
) 
 266 #pragma unused(priv, boolean) 
 267         return kauth_cred_issuser(cr
); /* XXX TODO: HAS_PRIVILEGE(cr, priv); */ 
 271 crgetuid(const cred_t 
*cr
) 
 273         cred_t copy_cr 
= *cr
; return kauth_cred_getuid(©_cr
); 
 280 typedef struct wrap_timer_call 
{ 
 281         /* node attributes */ 
 287         struct timer_call       call
; 
 289         /* next item in the linked list */ 
 290         LIST_ENTRY(wrap_timer_call
) entries
; 
 293 #define WAKEUP_REAPER           0x7FFFFFFFFFFFFFFFLL 
 294 #define NEARLY_FOREVER          0x7FFFFFFFFFFFFFFELL 
 297 typedef struct cyc_list 
{ 
 298         cyc_omni_handler_t cyl_omni
; 
 299         wrap_timer_call_t cyl_wrap_by_cpus
[]; 
 300 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4) 
 301 } __attribute__ ((aligned(8))) cyc_list_t
; 
 306 /* CPU going online/offline notifications */ 
 307 void (*dtrace_cpu_state_changed_hook
)(int, boolean_t
) = NULL
; 
 308 void dtrace_cpu_state_changed(int, boolean_t
); 
 311 dtrace_install_cpu_hooks(void) 
 313         dtrace_cpu_state_changed_hook 
= dtrace_cpu_state_changed
; 
 317 dtrace_cpu_state_changed(int cpuid
, boolean_t is_running
) 
 319 #pragma unused(cpuid) 
 320         wrap_timer_call_t       
*wrapTC 
= NULL
; 
 321         boolean_t               suspend 
= (is_running 
? FALSE 
: TRUE
); 
 324         /* Ensure that we're not going to leave the CPU */ 
 325         s 
= dtrace_interrupt_disable(); 
 326         assert(cpuid 
== cpu_number()); 
 328         LIST_FOREACH(wrapTC
, &(cpu_list
[cpu_number()].cpu_cyc_list
), entries
) { 
 329                 assert(wrapTC
->cpuid 
== cpu_number()); 
 331                         assert(!wrapTC
->suspended
); 
 332                         /* If this fails, we'll panic anyway, so let's do this now. */ 
 333                         if (!timer_call_cancel(&wrapTC
->call
)) { 
 334                                 panic("timer_call_set_suspend() failed to cancel a timer call"); 
 336                         wrapTC
->suspended 
= TRUE
; 
 338                         /* Rearm the timer, but ensure it was suspended first. */ 
 339                         assert(wrapTC
->suspended
); 
 340                         clock_deadline_for_periodic_event(wrapTC
->when
.cyt_interval
, mach_absolute_time(), 
 342                         timer_call_enter1(&wrapTC
->call
, (void*) wrapTC
, wrapTC
->deadline
, 
 343                             TIMER_CALL_SYS_CRITICAL 
| TIMER_CALL_LOCAL
); 
 344                         wrapTC
->suspended 
= FALSE
; 
 348         /* Restore the previous interrupt state. */ 
 349         dtrace_interrupt_enable(s
); 
 353 _timer_call_apply_cyclic( void *ignore
, void *vTChdl 
) 
 355 #pragma unused(ignore) 
 356         wrap_timer_call_t 
*wrapTC 
= (wrap_timer_call_t 
*)vTChdl
; 
 358         (*(wrapTC
->hdlr
.cyh_func
))( wrapTC
->hdlr
.cyh_arg 
); 
 360         clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, mach_absolute_time(), &(wrapTC
->deadline
)); 
 361         timer_call_enter1( &(wrapTC
->call
), (void *)wrapTC
, wrapTC
->deadline
, TIMER_CALL_SYS_CRITICAL 
| TIMER_CALL_LOCAL 
); 
 365 timer_call_add_cyclic(wrap_timer_call_t 
*wrapTC
, cyc_handler_t 
*handler
, cyc_time_t 
*when
) 
 370         timer_call_setup( &(wrapTC
->call
), _timer_call_apply_cyclic
, NULL 
); 
 371         wrapTC
->hdlr 
= *handler
; 
 372         wrapTC
->when 
= *when
; 
 374         nanoseconds_to_absolutetime( wrapTC
->when
.cyt_interval
, (uint64_t *)&wrapTC
->when
.cyt_interval 
); 
 376         now 
= mach_absolute_time(); 
 377         wrapTC
->deadline 
= now
; 
 379         clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, now
, &(wrapTC
->deadline
)); 
 381         /* Insert the timer to the list of the running timers on this CPU, and start it. */ 
 382         s 
= dtrace_interrupt_disable(); 
 383         wrapTC
->cpuid 
= cpu_number(); 
 384         LIST_INSERT_HEAD(&cpu_list
[wrapTC
->cpuid
].cpu_cyc_list
, wrapTC
, entries
); 
 385         timer_call_enter1(&wrapTC
->call
, (void*) wrapTC
, wrapTC
->deadline
, 
 386             TIMER_CALL_SYS_CRITICAL 
| TIMER_CALL_LOCAL
); 
 387         wrapTC
->suspended 
= FALSE
; 
 388         dtrace_interrupt_enable(s
); 
 390         return (cyclic_id_t
)wrapTC
; 
 394  * Executed on the CPU the timer is running on. 
 397 timer_call_remove_cyclic(wrap_timer_call_t 
*wrapTC
) 
 400         assert(cpu_number() == wrapTC
->cpuid
); 
 402         if (!timer_call_cancel(&wrapTC
->call
)) { 
 403                 panic("timer_call_remove_cyclic() failed to cancel a timer call"); 
 406         LIST_REMOVE(wrapTC
, entries
); 
 410 timer_call_get_cyclic_arg(wrap_timer_call_t 
*wrapTC
) 
 412         return wrapTC 
? wrapTC
->hdlr
.cyh_arg 
: NULL
; 
 416 cyclic_timer_add(cyc_handler_t 
*handler
, cyc_time_t 
*when
) 
 418         wrap_timer_call_t 
*wrapTC 
= _MALLOC(sizeof(wrap_timer_call_t
), M_TEMP
, M_ZERO 
| M_WAITOK
); 
 419         if (NULL 
== wrapTC
) { 
 422                 return timer_call_add_cyclic( wrapTC
, handler
, when 
); 
 427 cyclic_timer_remove(cyclic_id_t cyclic
) 
 429         ASSERT( cyclic 
!= CYCLIC_NONE 
); 
 431         /* Removing a timer call must be done on the CPU the timer is running on. */ 
 432         wrap_timer_call_t 
*wrapTC 
= (wrap_timer_call_t 
*) cyclic
; 
 433         dtrace_xcall(wrapTC
->cpuid
, (dtrace_xcall_t
) timer_call_remove_cyclic
, (void*) cyclic
); 
 435         _FREE((void *)cyclic
, M_TEMP
); 
 439 _cyclic_add_omni(cyc_list_t 
*cyc_list
) 
 443         cyc_omni_handler_t 
*omni 
= &cyc_list
->cyl_omni
; 
 445         (omni
->cyo_online
)(omni
->cyo_arg
, CPU
, &cH
, &cT
); 
 447         wrap_timer_call_t 
*wrapTC 
= &cyc_list
->cyl_wrap_by_cpus
[cpu_number()]; 
 448         timer_call_add_cyclic(wrapTC
, &cH
, &cT
); 
 452 cyclic_add_omni(cyc_omni_handler_t 
*omni
) 
 454         cyc_list_t 
*cyc_list 
= 
 455             _MALLOC(sizeof(cyc_list_t
) + NCPU 
* sizeof(wrap_timer_call_t
), M_TEMP
, M_ZERO 
| M_WAITOK
); 
 457         if (NULL 
== cyc_list
) { 
 461         cyc_list
->cyl_omni 
= *omni
; 
 463         dtrace_xcall(DTRACE_CPUALL
, (dtrace_xcall_t
)_cyclic_add_omni
, (void *)cyc_list
); 
 465         return (cyclic_id_list_t
)cyc_list
; 
 469 _cyclic_remove_omni(cyc_list_t 
*cyc_list
) 
 471         cyc_omni_handler_t 
*omni 
= &cyc_list
->cyl_omni
; 
 473         wrap_timer_call_t 
*wrapTC
; 
 476          * If the processor was offline when dtrace started, we did not allocate 
 477          * a cyclic timer for this CPU. 
 479         if ((wrapTC 
= &cyc_list
->cyl_wrap_by_cpus
[cpu_number()]) != NULL
) { 
 480                 oarg 
= timer_call_get_cyclic_arg(wrapTC
); 
 481                 timer_call_remove_cyclic(wrapTC
); 
 482                 (omni
->cyo_offline
)(omni
->cyo_arg
, CPU
, oarg
); 
 487 cyclic_remove_omni(cyclic_id_list_t cyc_list
) 
 489         ASSERT(cyc_list 
!= NULL
); 
 491         dtrace_xcall(DTRACE_CPUALL
, (dtrace_xcall_t
)_cyclic_remove_omni
, (void *)cyc_list
); 
 492         _FREE(cyc_list
, M_TEMP
); 
 495 typedef struct wrap_thread_call 
{ 
 500 } wrap_thread_call_t
; 
 503  * _cyclic_apply will run on some thread under kernel_task. That's OK for the 
 504  * cleaner and the deadman, but too distant in time and place for the profile provider. 
 507 _cyclic_apply( void *ignore
, void *vTChdl 
) 
 509 #pragma unused(ignore) 
 510         wrap_thread_call_t 
*wrapTC 
= (wrap_thread_call_t 
*)vTChdl
; 
 512         (*(wrapTC
->hdlr
.cyh_func
))( wrapTC
->hdlr
.cyh_arg 
); 
 514         clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, mach_absolute_time(), &(wrapTC
->deadline
)); 
 515         (void)thread_call_enter1_delayed( wrapTC
->TChdl
, (void *)wrapTC
, wrapTC
->deadline 
); 
 517         /* Did cyclic_remove request a wakeup call when this thread call was re-armed? */ 
 518         if (wrapTC
->when
.cyt_interval 
== WAKEUP_REAPER
) { 
 519                 thread_wakeup((event_t
)wrapTC
); 
 524 cyclic_add(cyc_handler_t 
*handler
, cyc_time_t 
*when
) 
 528         wrap_thread_call_t 
*wrapTC 
= _MALLOC(sizeof(wrap_thread_call_t
), M_TEMP
, M_ZERO 
| M_WAITOK
); 
 529         if (NULL 
== wrapTC
) { 
 533         wrapTC
->TChdl 
= thread_call_allocate( _cyclic_apply
, NULL 
); 
 534         wrapTC
->hdlr 
= *handler
; 
 535         wrapTC
->when 
= *when
; 
 537         ASSERT(when
->cyt_when 
== 0); 
 538         ASSERT(when
->cyt_interval 
< WAKEUP_REAPER
); 
 540         nanoseconds_to_absolutetime(wrapTC
->when
.cyt_interval
, (uint64_t *)&wrapTC
->when
.cyt_interval
); 
 542         now 
= mach_absolute_time(); 
 543         wrapTC
->deadline 
= now
; 
 545         clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, now
, &(wrapTC
->deadline
)); 
 546         (void)thread_call_enter1_delayed( wrapTC
->TChdl
, (void *)wrapTC
, wrapTC
->deadline 
); 
 548         return (cyclic_id_t
)wrapTC
; 
 552 noop_cyh_func(void * ignore
) 
 554 #pragma unused(ignore) 
 558 cyclic_remove(cyclic_id_t cyclic
) 
 560         wrap_thread_call_t 
*wrapTC 
= (wrap_thread_call_t 
*)cyclic
; 
 562         ASSERT(cyclic 
!= CYCLIC_NONE
); 
 564         while (!thread_call_cancel(wrapTC
->TChdl
)) { 
 565                 int ret 
= assert_wait(wrapTC
, THREAD_UNINT
); 
 566                 ASSERT(ret 
== THREAD_WAITING
); 
 568                 wrapTC
->when
.cyt_interval 
= WAKEUP_REAPER
; 
 570                 ret 
= thread_block(THREAD_CONTINUE_NULL
); 
 571                 ASSERT(ret 
== THREAD_AWAKENED
); 
 574         if (thread_call_free(wrapTC
->TChdl
)) { 
 575                 _FREE(wrapTC
, M_TEMP
); 
 577                 /* Gut this cyclic and move on ... */ 
 578                 wrapTC
->hdlr
.cyh_func 
= noop_cyh_func
; 
 579                 wrapTC
->when
.cyt_interval 
= NEARLY_FOREVER
; 
 584 ddi_driver_major(dev_info_t     
*devi
) 
 586         return (int)major(CAST_DOWN_EXPLICIT(int, devi
)); 
 590 ddi_create_minor_node(dev_info_t 
*dip
, const char *name
, int spec_type
, 
 591     minor_t minor_num
, const char *node_type
, int flag
) 
 593 #pragma unused(spec_type,node_type,flag) 
 594         dev_t dev 
= makedev( ddi_driver_major(dip
), minor_num 
); 
 596         if (NULL 
== devfs_make_node( dev
, DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666, name
, 0 )) { 
 604 ddi_remove_minor_node(dev_info_t 
*dip
, char *name
) 
 606 #pragma unused(dip,name) 
 607 /* XXX called from dtrace_detach, so NOTREACHED for now. */ 
 613         return (major_t
) major(d
); 
 619         return (minor_t
) minor(d
); 
 622 extern void Debugger(const char*); 
 635 dt_kmem_alloc_site(size_t size
, int kmflag
, vm_allocation_site_t 
*site
) 
 637 #pragma unused(kmflag) 
 640  * We ignore the M_NOWAIT bit in kmflag (all of kmflag, in fact). 
 641  * Requests larger than 8K with M_NOWAIT fail in kalloc_ext. 
 643         return kalloc_ext(KHEAP_DTRACE
, size
, Z_WAITOK
, site
).addr
; 
 647 dt_kmem_zalloc_site(size_t size
, int kmflag
, vm_allocation_site_t 
*site
) 
 649 #pragma unused(kmflag) 
 652  * We ignore the M_NOWAIT bit in kmflag (all of kmflag, in fact). 
 653  * Requests larger than 8K with M_NOWAIT fail in kalloc_ext. 
 655         return kalloc_ext(KHEAP_DTRACE
, size
, Z_WAITOK 
| Z_ZERO
, site
).addr
; 
 659 dt_kmem_free(void *buf
, size_t size
) 
 661         kheap_free(KHEAP_DTRACE
, buf
, size
); 
 667  * aligned dt_kmem allocator 
 668  * align should be a power of two 
 672 dt_kmem_alloc_aligned_site(size_t size
, size_t align
, int kmflag
, vm_allocation_site_t 
*site
) 
 674         void *mem
, **addr_to_free
; 
 675         intptr_t mem_aligned
; 
 676         size_t *size_to_free
, hdr_size
; 
 678         /* Must be a power of two. */ 
 680         assert((align 
& (align 
- 1)) == 0); 
 683          * We are going to add a header to the allocation. It contains 
 684          * the address to free and the total size of the buffer. 
 686         hdr_size 
= sizeof(size_t) + sizeof(void*); 
 687         mem 
= dt_kmem_alloc_site(size 
+ align 
+ hdr_size
, kmflag
, site
); 
 692         mem_aligned 
= (intptr_t) (((intptr_t) mem 
+ align 
+ hdr_size
) & ~(align 
- 1)); 
 694         /* Write the address to free in the header. */ 
 695         addr_to_free 
= (void**) (mem_aligned 
- sizeof(void*)); 
 698         /* Write the size to free in the header. */ 
 699         size_to_free 
= (size_t*) (mem_aligned 
- hdr_size
); 
 700         *size_to_free 
= size 
+ align 
+ hdr_size
; 
 702         return (void*) mem_aligned
; 
 706 dt_kmem_zalloc_aligned_site(size_t size
, size_t align
, int kmflag
, vm_allocation_site_t 
*s
) 
 710         buf 
= dt_kmem_alloc_aligned_site(size
, align
, kmflag
, s
); 
 722 dt_kmem_free_aligned(void* buf
, size_t size
) 
 725         intptr_t ptr 
= (intptr_t) buf
; 
 726         void **addr_to_free 
= (void**) (ptr 
- sizeof(void*)); 
 727         size_t *size_to_free 
= (size_t*) (ptr 
- (sizeof(size_t) + sizeof(void*))); 
 733         dt_kmem_free(*addr_to_free
, *size_to_free
); 
 737  * dtrace wants to manage just a single block: dtrace_state_percpu_t * NCPU, and 
 738  * doesn't specify constructor, destructor, or reclaim methods. 
 739  * At present, it always zeroes the block it obtains from kmem_cache_alloc(). 
 740  * We'll manage this constricted use of kmem_cache with ordinary _MALLOC and _FREE. 
 744         const char *name
,       /* descriptive name for this cache */ 
 745         size_t bufsize
,         /* size of the objects it manages */ 
 746         size_t align
,           /* required object alignment */ 
 747         int (*constructor
)(void *, void *, int), /* object constructor */ 
 748         void (*destructor
)(void *, void *), /* object destructor */ 
 749         void (*reclaim
)(void *), /* memory reclaim callback */ 
 750         void *private,          /* pass-thru arg for constr/destr/reclaim */ 
 751         vmem_t 
*vmp
,            /* vmem source for slab allocation */ 
 752         int cflags
)     /* cache creation flags */ 
 754 #pragma unused(name,align,constructor,destructor,reclaim,private,vmp,cflags) 
 755         return (kmem_cache_t 
*)bufsize
; /* A cookie that tracks the single object size. */ 
 759 kmem_cache_alloc(kmem_cache_t 
*cp
, int kmflag
) 
 761 #pragma unused(kmflag) 
 762         size_t bufsize 
= (size_t)cp
; 
 763         return (void *)_MALLOC(bufsize
, M_TEMP
, M_WAITOK
); 
 767 kmem_cache_free(kmem_cache_t 
*cp
, void *buf
) 
 774 kmem_cache_destroy(kmem_cache_t 
*cp
) 
 780  * vmem (Solaris "slab" allocator) used by DTrace solely to hand out resource ids 
 782 typedef unsigned int u_daddr_t
; 
 785 /* By passing around blist *handles*, the underlying blist can be resized as needed. */ 
 791 vmem_create(const char *name
, void *base
, size_t size
, size_t quantum
, void *ignore5
, 
 792     void *ignore6
, vmem_t 
*source
, size_t qcache_max
, int vmflag
) 
 794 #pragma unused(name,quantum,ignore5,ignore6,source,qcache_max,vmflag) 
 796         struct blist_hdl 
*p 
= _MALLOC(sizeof(struct blist_hdl
), M_TEMP
, M_WAITOK
); 
 798         ASSERT(quantum 
== 1); 
 799         ASSERT(NULL 
== ignore5
); 
 800         ASSERT(NULL 
== ignore6
); 
 801         ASSERT(NULL 
== source
); 
 802         ASSERT(0 == qcache_max
); 
 803         ASSERT(size 
<= INT32_MAX
); 
 804         ASSERT(vmflag 
& VMC_IDENTIFIER
); 
 806         size 
= MIN(128, size
); /* Clamp to 128 initially, since the underlying data structure is pre-allocated */ 
 808         p
->blist 
= bl 
= blist_create((daddr_t
)size
); 
 809         blist_free(bl
, 0, (daddr_t
)size
); 
 811                 blist_alloc( bl
, (daddr_t
)(uintptr_t)base 
);   /* Chomp off initial ID(s) */ 
 817 vmem_alloc(vmem_t 
*vmp
, size_t size
, int vmflag
) 
 819 #pragma unused(vmflag) 
 820         struct blist_hdl 
*q 
= (struct blist_hdl 
*)vmp
; 
 821         blist_t bl 
= q
->blist
; 
 824         p 
= blist_alloc(bl
, (daddr_t
)size
); 
 826         if (p 
== SWAPBLK_NONE
) { 
 827                 blist_resize(&bl
, (bl
->bl_blocks
) << 1, 1); 
 829                 p 
= blist_alloc(bl
, (daddr_t
)size
); 
 830                 if (p 
== SWAPBLK_NONE
) { 
 831                         panic("vmem_alloc: failure after blist_resize!"); 
 835         return (void *)(uintptr_t)p
; 
 839 vmem_free(vmem_t 
*vmp
, void *vaddr
, size_t size
) 
 841         struct blist_hdl 
*p 
= (struct blist_hdl 
*)vmp
; 
 843         blist_free( p
->blist
, (daddr_t
)(uintptr_t)vaddr
, (daddr_t
)size 
); 
 847 vmem_destroy(vmem_t 
*vmp
) 
 849         struct blist_hdl 
*p 
= (struct blist_hdl 
*)vmp
; 
 851         blist_destroy( p
->blist 
); 
 852         _FREE( p
, sizeof(struct blist_hdl
)); 
 860  * dtrace_gethrestime() provides the "walltimestamp", a value that is anchored at 
 861  * January 1, 1970. Because it can be called from probe context, it must take no locks. 
 865 dtrace_gethrestime(void) 
 868         clock_nsec_t    nanosecs
; 
 869         uint64_t                secs64
, ns64
; 
 871         clock_get_calendar_nanotime_nowait(&secs
, &nanosecs
); 
 872         secs64 
= (uint64_t)secs
; 
 873         ns64 
= (uint64_t)nanosecs
; 
 875         ns64 
= ns64 
+ (secs64 
* 1000000000LL); 
 880  * dtrace_gethrtime() provides high-resolution timestamps with machine-dependent origin. 
 881  * Hence its primary use is to specify intervals. 
 885 dtrace_abs_to_nano(uint64_t elapsed
) 
 887         static mach_timebase_info_data_t    sTimebaseInfo 
= { 0, 0 }; 
 890          * If this is the first time we've run, get the timebase. 
 891          * We can use denom == 0 to indicate that sTimebaseInfo is 
 892          * uninitialised because it makes no sense to have a zero 
 893          * denominator in a fraction. 
 896         if (sTimebaseInfo
.denom 
== 0) { 
 897                 (void) clock_timebase_info(&sTimebaseInfo
); 
 901          * Convert to nanoseconds. 
 902          * return (elapsed * (uint64_t)sTimebaseInfo.numer)/(uint64_t)sTimebaseInfo.denom; 
 904          * Provided the final result is representable in 64 bits the following maneuver will 
 905          * deliver that result without intermediate overflow. 
 907         if (sTimebaseInfo
.denom 
== sTimebaseInfo
.numer
) { 
 909         } else if (sTimebaseInfo
.denom 
== 1) { 
 910                 return elapsed 
* (uint64_t)sTimebaseInfo
.numer
; 
 912                 /* Decompose elapsed = eta32 * 2^32 + eps32: */ 
 913                 uint64_t eta32 
= elapsed 
>> 32; 
 914                 uint64_t eps32 
= elapsed 
& 0x00000000ffffffffLL
; 
 916                 uint32_t numer 
= sTimebaseInfo
.numer
, denom 
= sTimebaseInfo
.denom
; 
 918                 /* Form product of elapsed64 (decomposed) and numer: */ 
 919                 uint64_t mu64 
= numer 
* eta32
; 
 920                 uint64_t lambda64 
= numer 
* eps32
; 
 922                 /* Divide the constituents by denom: */ 
 923                 uint64_t q32 
= mu64 
/ denom
; 
 924                 uint64_t r32 
= mu64 
- (q32 
* denom
); /* mu64 % denom */ 
 926                 return (q32 
<< 32) + ((r32 
<< 32) + lambda64
) / denom
; 
 931 dtrace_gethrtime(void) 
 933         static uint64_t        start 
= 0; 
 936                 start 
= mach_absolute_time(); 
 939         return dtrace_abs_to_nano(mach_absolute_time() - start
); 
 943  * Atomicity and synchronization 
 946 dtrace_cas32(uint32_t *target
, uint32_t cmp
, uint32_t new) 
 948         if (OSCompareAndSwap((UInt32
)cmp
, (UInt32
)new, (volatile UInt32 
*)target 
)) { 
 951                 return ~cmp
; /* Must return something *other* than cmp */ 
 956 dtrace_casptr(void *target
, void *cmp
, void *new) 
 958         if (OSCompareAndSwapPtr( cmp
, new, (void**)target 
)) { 
 961                 return (void *)(~(uintptr_t)cmp
); /* Must return something *other* than cmp */ 
 966  * Interrupt manipulation 
 969 dtrace_interrupt_disable(void) 
 971         return (dtrace_icookie_t
)ml_set_interrupts_enabled(FALSE
); 
 975 dtrace_interrupt_enable(dtrace_icookie_t reenable
) 
 977         (void)ml_set_interrupts_enabled((boolean_t
)reenable
); 
 984 dtrace_sync_func(void) 
 989  * dtrace_sync() is not called from probe context. 
 994         dtrace_xcall(DTRACE_CPUALL
, (dtrace_xcall_t
)dtrace_sync_func
, NULL
); 
 998  * The dtrace_copyin/out/instr and dtrace_fuword* routines can be called from probe context. 
1001 extern kern_return_t 
dtrace_copyio_preflight(addr64_t
); 
1002 extern kern_return_t 
dtrace_copyio_postflight(addr64_t
); 
1005 dtrace_copycheck(user_addr_t uaddr
, uintptr_t kaddr
, size_t size
) 
1007 #pragma unused(kaddr) 
1009         vm_offset_t recover 
= dtrace_set_thread_recover( current_thread(), 0 ); /* Snare any extant recovery point. */ 
1010         dtrace_set_thread_recover( current_thread(), recover 
); /* Put it back. We *must not* re-enter and overwrite. */ 
1012         ASSERT(kaddr 
+ size 
>= kaddr
); 
1014         if (uaddr 
+ size 
< uaddr 
||             /* Avoid address wrap. */ 
1015             KERN_FAILURE 
== dtrace_copyio_preflight(uaddr
)) {   /* Machine specific setup/constraints. */ 
1016                 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1017                 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= uaddr
; 
1024 dtrace_copyin(user_addr_t src
, uintptr_t dst
, size_t len
, volatile uint16_t *flags
) 
1026 #pragma unused(flags) 
1028         if (dtrace_copycheck( src
, dst
, len 
)) { 
1029                 if (copyin((const user_addr_t
)src
, (char *)dst
, (vm_size_t
)len
)) { 
1030                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1031                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= src
; 
1033                 dtrace_copyio_postflight(src
); 
1038 dtrace_copyinstr(user_addr_t src
, uintptr_t dst
, size_t len
, volatile uint16_t *flags
) 
1040 #pragma unused(flags) 
1044         if (dtrace_copycheck( src
, dst
, len 
)) { 
1045                 /*  copyin as many as 'len' bytes. */ 
1046                 int error 
= copyinstr((const user_addr_t
)src
, (char *)dst
, (vm_size_t
)len
, &actual
); 
1049                  * ENAMETOOLONG is returned when 'len' bytes have been copied in but the NUL terminator was 
1050                  * not encountered. That does not require raising CPU_DTRACE_BADADDR, and we press on. 
1051                  * Note that we do *not* stuff a NUL terminator when returning ENAMETOOLONG, that's left 
1054                 if (error 
&& error 
!= ENAMETOOLONG
) { 
1055                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1056                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= src
; 
1058                 dtrace_copyio_postflight(src
); 
1063 dtrace_copyout(uintptr_t src
, user_addr_t dst
, size_t len
, volatile uint16_t *flags
) 
1065 #pragma unused(flags) 
1067         if (dtrace_copycheck( dst
, src
, len 
)) { 
1068                 if (copyout((const void *)src
, dst
, (vm_size_t
)len
)) { 
1069                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1070                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= dst
; 
1072                 dtrace_copyio_postflight(dst
); 
1077 dtrace_copyoutstr(uintptr_t src
, user_addr_t dst
, size_t len
, volatile uint16_t *flags
) 
1079 #pragma unused(flags) 
1083         if (dtrace_copycheck( dst
, src
, len 
)) { 
1085                  * ENAMETOOLONG is returned when 'len' bytes have been copied out but the NUL terminator was 
1086                  * not encountered. We raise CPU_DTRACE_BADADDR in that case. 
1087                  * Note that we do *not* stuff a NUL terminator when returning ENAMETOOLONG, that's left 
1090                 if (copyoutstr((const void *)src
, dst
, (size_t)len
, &actual
)) { 
1091                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1092                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= dst
; 
1094                 dtrace_copyio_postflight(dst
); 
1098 extern const int copysize_limit_panic
; 
1101 dtrace_copy_maxsize(void) 
1103         return copysize_limit_panic
; 
1108 dtrace_buffer_copyout(const void *kaddr
, user_addr_t uaddr
, vm_size_t nbytes
) 
1110         int maxsize 
= dtrace_copy_maxsize(); 
1112          * Partition the copyout in copysize_limit_panic-sized chunks 
1114         while (nbytes 
>= (vm_size_t
)maxsize
) { 
1115                 if (copyout(kaddr
, uaddr
, maxsize
) != 0) { 
1124                 if (copyout(kaddr
, uaddr
, nbytes
) != 0) { 
1133 dtrace_fuword8(user_addr_t uaddr
) 
1137         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
1138         if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) { 
1139                 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) { 
1140                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1141                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= uaddr
; 
1143                 dtrace_copyio_postflight(uaddr
); 
1145         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
1151 dtrace_fuword16(user_addr_t uaddr
) 
1155         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
1156         if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) { 
1157                 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) { 
1158                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1159                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= uaddr
; 
1161                 dtrace_copyio_postflight(uaddr
); 
1163         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
1169 dtrace_fuword32(user_addr_t uaddr
) 
1173         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
1174         if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) { 
1175                 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) { 
1176                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1177                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= uaddr
; 
1179                 dtrace_copyio_postflight(uaddr
); 
1181         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
1187 dtrace_fuword64(user_addr_t uaddr
) 
1191         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
1192         if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) { 
1193                 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) { 
1194                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1195                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= uaddr
; 
1197                 dtrace_copyio_postflight(uaddr
); 
1199         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
1205  * Emulation of Solaris fuword / suword 
1206  * Called from the fasttrap provider, so the use of copyin/out requires fewer safegaurds. 
1210 fuword8(user_addr_t uaddr
, uint8_t *value
) 
1212         if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint8_t)) != 0) { 
1220 fuword16(user_addr_t uaddr
, uint16_t *value
) 
1222         if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint16_t)) != 0) { 
1230 fuword32(user_addr_t uaddr
, uint32_t *value
) 
1232         if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint32_t)) != 0) { 
1240 fuword64(user_addr_t uaddr
, uint64_t *value
) 
1242         if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint64_t)) != 0) { 
1250 fuword32_noerr(user_addr_t uaddr
, uint32_t *value
) 
1252         if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint32_t))) { 
1258 fuword64_noerr(user_addr_t uaddr
, uint64_t *value
) 
1260         if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint64_t))) { 
1266 suword64(user_addr_t addr
, uint64_t value
) 
1268         if (copyout((const void *)&value
, addr
, sizeof(value
)) != 0) { 
1276 suword32(user_addr_t addr
, uint32_t value
) 
1278         if (copyout((const void *)&value
, addr
, sizeof(value
)) != 0) { 
1288 extern boolean_t 
dtrace_tally_fault(user_addr_t
); 
1291 dtrace_tally_fault(user_addr_t uaddr
) 
1293         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1294         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= uaddr
; 
1295         return DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT
) ? TRUE 
: FALSE
; 
1299 extern int prf(const char *, va_list, int, struct tty 
*); /* bsd/kern/subr_prf.h */ 
1302 vuprintf(const char *format
, va_list ap
) 
1304         return prf(format
, ap
, TOTTY
, NULL
); 
1307 /* Not called from probe context */ 
1309 cmn_err( int level
, const char *format
, ... ) 
1311 #pragma unused(level) 
1314         va_start(alist
, format
); 
1315         vuprintf(format
, alist
); 
1321 bsearch(const void *key
, const void *base0
, size_t nmemb
, size_t size
, int (*compar
)(const void *, const void *)) 
1323         const char *base 
= base0
; 
1327         for (lim 
= nmemb
; lim 
!= 0; lim 
>>= 1) { 
1328                 p 
= base 
+ (lim 
>> 1) * size
; 
1329                 cmp 
= (*compar
)(key
, p
); 
1333                 if (cmp 
> 0) {  /* key > p: move right */ 
1334                         base 
= (const char *)p 
+ size
; 
1336                 }               /* else move left */ 
1345 dtrace_caller(int ignore
) 
1347 #pragma unused(ignore) 
1348         return -1; /* Just as in Solaris dtrace_asm.s */ 
1352 dtrace_getstackdepth(int aframes
) 
1354         struct frame 
*fp 
= (struct frame 
*)__builtin_frame_address(0); 
1355         struct frame 
*nextfp
, *minfp
, *stacktop
; 
1359         if ((on_intr 
= CPU_ON_INTR(CPU
)) != 0) { 
1360                 stacktop 
= (struct frame 
*)dtrace_get_cpu_int_stack_top(); 
1362                 stacktop 
= (struct frame 
*)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size
); 
1372                 nextfp 
= *(struct frame 
**)fp
; 
1374                 if (nextfp 
<= minfp 
|| nextfp 
>= stacktop
) { 
1377                                  * Hop from interrupt stack to thread stack. 
1379                                 vm_offset_t kstack_base 
= dtrace_get_kernel_stack(current_thread()); 
1381                                 minfp 
= (struct frame 
*)kstack_base
; 
1382                                 stacktop 
= (struct frame 
*)(kstack_base 
+ kernel_stack_size
); 
1394         if (depth 
<= aframes
) { 
1398         return depth 
- aframes
; 
1402 dtrace_addr_in_module(void* addr
, struct modctl 
*ctl
) 
1404         return OSKextKextForAddress(addr
) == (void*)ctl
->mod_address
; 
1411 dtrace_vtime_enable(void) 
1416 dtrace_vtime_disable(void)