2 * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <kern/thread.h>
33 #include <sys/kauth.h>
35 #include <sys/systm.h>
36 #include <sys/dtrace.h>
37 #include <sys/dtrace_impl.h>
38 #include <machine/atomic.h>
39 #include <libkern/OSKextLibPrivate.h>
40 #include <kern/kern_types.h>
41 #include <kern/timer_call.h>
42 #include <kern/thread_call.h>
43 #include <kern/task.h>
44 #include <kern/sched_prim.h>
45 #include <miscfs/devfs/devfs.h>
46 #include <kern/kalloc.h>
48 #include <mach/vm_param.h>
49 #include <mach/mach_vm.h>
50 #include <mach/task.h>
51 #include <vm/vm_map.h> /* All the bits we care about are guarded by MACH_KERNEL_PRIVATE :-( */
56 /* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */
57 #define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */
60 dtrace_sprlock(proc_t
*p
)
62 lck_mtx_lock(&p
->p_dtrace_sprlock
);
66 dtrace_sprunlock(proc_t
*p
)
68 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
71 /* Not called from probe context */
77 if ((p
= proc_find(pid
)) == PROC_NULL
) {
81 task_suspend_internal(p
->task
);
88 /* Not called from probe context */
95 task_resume_internal(p
->task
);
105 // These are not exported from vm_map.h.
106 extern kern_return_t
vm_map_read_user(vm_map_t map
, vm_map_address_t src_addr
, void *dst_p
, vm_size_t size
);
107 extern kern_return_t
vm_map_write_user(vm_map_t map
, void *src_p
, vm_map_address_t dst_addr
, vm_size_t size
);
109 /* Not called from probe context */
111 uread(proc_t
*p
, void *buf
, user_size_t len
, user_addr_t a
)
115 ASSERT(p
!= PROC_NULL
);
116 ASSERT(p
->task
!= NULL
);
118 task_t task
= p
->task
;
121 * Grab a reference to the task vm_map_t to make sure
122 * the map isn't pulled out from under us.
124 * Because the proc_lock is not held at all times on all code
125 * paths leading here, it is possible for the proc to have
126 * exited. If the map is null, fail.
128 vm_map_t map
= get_task_map_reference(task
);
130 ret
= vm_map_read_user( map
, (vm_map_address_t
)a
, buf
, (vm_size_t
)len
);
131 vm_map_deallocate(map
);
133 ret
= KERN_TERMINATED
;
140 /* Not called from probe context */
142 uwrite(proc_t
*p
, void *buf
, user_size_t len
, user_addr_t a
)
147 ASSERT(p
->task
!= NULL
);
149 task_t task
= p
->task
;
152 * Grab a reference to the task vm_map_t to make sure
153 * the map isn't pulled out from under us.
155 * Because the proc_lock is not held at all times on all code
156 * paths leading here, it is possible for the proc to have
157 * exited. If the map is null, fail.
159 vm_map_t map
= get_task_map_reference(task
);
161 /* Find the memory permissions. */
162 uint32_t nestingDepth
= 999999;
163 vm_region_submap_short_info_data_64_t info
;
164 mach_msg_type_number_t count
= VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
;
165 mach_vm_address_t address
= (mach_vm_address_t
)a
;
166 mach_vm_size_t sizeOfRegion
= (mach_vm_size_t
)len
;
168 ret
= mach_vm_region_recurse(map
, &address
, &sizeOfRegion
, &nestingDepth
, (vm_region_recurse_info_t
)&info
, &count
);
169 if (ret
!= KERN_SUCCESS
) {
175 if (!(info
.protection
& VM_PROT_WRITE
)) {
176 /* Save the original protection values for restoration later */
177 reprotect
= info
.protection
;
179 if (info
.max_protection
& VM_PROT_WRITE
) {
180 /* The memory is not currently writable, but can be made writable. */
181 ret
= mach_vm_protect(map
, (mach_vm_offset_t
)a
, (mach_vm_size_t
)len
, 0, (reprotect
& ~VM_PROT_EXECUTE
) | VM_PROT_WRITE
);
184 * The memory is not currently writable, and cannot be made writable. We need to COW this memory.
186 * Strange, we can't just say "reprotect | VM_PROT_COPY", that fails.
188 ret
= mach_vm_protect(map
, (mach_vm_offset_t
)a
, (mach_vm_size_t
)len
, 0, VM_PROT_COPY
| VM_PROT_READ
| VM_PROT_WRITE
);
191 if (ret
!= KERN_SUCCESS
) {
195 /* The memory was already writable. */
196 reprotect
= VM_PROT_NONE
;
199 ret
= vm_map_write_user( map
,
204 dtrace_flush_caches();
206 if (ret
!= KERN_SUCCESS
) {
210 if (reprotect
!= VM_PROT_NONE
) {
211 ASSERT(reprotect
& VM_PROT_EXECUTE
);
212 ret
= mach_vm_protect(map
, (mach_vm_offset_t
)a
, (mach_vm_size_t
)len
, 0, reprotect
);
216 vm_map_deallocate(map
);
218 ret
= KERN_TERMINATED
;
231 dtrace_cpu_t
*cpu_list
;
232 cpu_core_t
*cpu_core
; /* XXX TLB lockdown? */
239 * dtrace_CRED() can be called from probe context. We cannot simply call kauth_cred_get() since
240 * that function may try to resolve a lazy credential binding, which entails taking the proc_lock.
245 struct uthread
*uthread
= get_bsdthread_info(current_thread());
247 if (uthread
== NULL
) {
250 return uthread
->uu_ucred
; /* May return NOCRED which is defined to be 0 */
255 PRIV_POLICY_CHOICE(void* cred
, int priv
, int all
)
257 #pragma unused(priv, all)
258 return kauth_cred_issuser(cred
); /* XXX TODO: How is this different from PRIV_POLICY_ONLY? */
262 PRIV_POLICY_ONLY(void *cr
, int priv
, int boolean
)
264 #pragma unused(priv, boolean)
265 return kauth_cred_issuser(cr
); /* XXX TODO: HAS_PRIVILEGE(cr, priv); */
269 crgetuid(const cred_t
*cr
)
271 cred_t copy_cr
= *cr
; return kauth_cred_getuid(©_cr
);
278 typedef struct wrap_timer_call
{
279 /* node attributes */
285 struct timer_call call
;
287 /* next item in the linked list */
288 LIST_ENTRY(wrap_timer_call
) entries
;
291 #define WAKEUP_REAPER 0x7FFFFFFFFFFFFFFFLL
292 #define NEARLY_FOREVER 0x7FFFFFFFFFFFFFFELL
295 typedef struct cyc_list
{
296 cyc_omni_handler_t cyl_omni
;
297 wrap_timer_call_t cyl_wrap_by_cpus
[];
298 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
299 } __attribute__ ((aligned(8))) cyc_list_t
;
304 /* CPU going online/offline notifications */
305 void (*dtrace_cpu_state_changed_hook
)(int, boolean_t
) = NULL
;
306 void dtrace_cpu_state_changed(int, boolean_t
);
309 dtrace_install_cpu_hooks(void)
311 dtrace_cpu_state_changed_hook
= dtrace_cpu_state_changed
;
315 dtrace_cpu_state_changed(int cpuid
, boolean_t is_running
)
317 #pragma unused(cpuid)
318 wrap_timer_call_t
*wrapTC
= NULL
;
319 boolean_t suspend
= (is_running
? FALSE
: TRUE
);
322 /* Ensure that we're not going to leave the CPU */
323 s
= dtrace_interrupt_disable();
324 assert(cpuid
== cpu_number());
326 LIST_FOREACH(wrapTC
, &(cpu_list
[cpu_number()].cpu_cyc_list
), entries
) {
327 assert(wrapTC
->cpuid
== cpu_number());
329 assert(!wrapTC
->suspended
);
330 /* If this fails, we'll panic anyway, so let's do this now. */
331 if (!timer_call_cancel(&wrapTC
->call
)) {
332 panic("timer_call_set_suspend() failed to cancel a timer call");
334 wrapTC
->suspended
= TRUE
;
336 /* Rearm the timer, but ensure it was suspended first. */
337 assert(wrapTC
->suspended
);
338 clock_deadline_for_periodic_event(wrapTC
->when
.cyt_interval
, mach_absolute_time(),
340 timer_call_enter1(&wrapTC
->call
, (void*) wrapTC
, wrapTC
->deadline
,
341 TIMER_CALL_SYS_CRITICAL
| TIMER_CALL_LOCAL
);
342 wrapTC
->suspended
= FALSE
;
346 /* Restore the previous interrupt state. */
347 dtrace_interrupt_enable(s
);
351 _timer_call_apply_cyclic( void *ignore
, void *vTChdl
)
353 #pragma unused(ignore)
354 wrap_timer_call_t
*wrapTC
= (wrap_timer_call_t
*)vTChdl
;
356 (*(wrapTC
->hdlr
.cyh_func
))( wrapTC
->hdlr
.cyh_arg
);
358 clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, mach_absolute_time(), &(wrapTC
->deadline
));
359 timer_call_enter1( &(wrapTC
->call
), (void *)wrapTC
, wrapTC
->deadline
, TIMER_CALL_SYS_CRITICAL
| TIMER_CALL_LOCAL
);
363 timer_call_add_cyclic(wrap_timer_call_t
*wrapTC
, cyc_handler_t
*handler
, cyc_time_t
*when
)
368 timer_call_setup( &(wrapTC
->call
), _timer_call_apply_cyclic
, NULL
);
369 wrapTC
->hdlr
= *handler
;
370 wrapTC
->when
= *when
;
372 nanoseconds_to_absolutetime( wrapTC
->when
.cyt_interval
, (uint64_t *)&wrapTC
->when
.cyt_interval
);
374 now
= mach_absolute_time();
375 wrapTC
->deadline
= now
;
377 clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, now
, &(wrapTC
->deadline
));
379 /* Insert the timer to the list of the running timers on this CPU, and start it. */
380 s
= dtrace_interrupt_disable();
381 wrapTC
->cpuid
= cpu_number();
382 LIST_INSERT_HEAD(&cpu_list
[wrapTC
->cpuid
].cpu_cyc_list
, wrapTC
, entries
);
383 timer_call_enter1(&wrapTC
->call
, (void*) wrapTC
, wrapTC
->deadline
,
384 TIMER_CALL_SYS_CRITICAL
| TIMER_CALL_LOCAL
);
385 wrapTC
->suspended
= FALSE
;
386 dtrace_interrupt_enable(s
);
388 return (cyclic_id_t
)wrapTC
;
392 * Executed on the CPU the timer is running on.
395 timer_call_remove_cyclic(wrap_timer_call_t
*wrapTC
)
398 assert(cpu_number() == wrapTC
->cpuid
);
400 if (!timer_call_cancel(&wrapTC
->call
)) {
401 panic("timer_call_remove_cyclic() failed to cancel a timer call");
404 LIST_REMOVE(wrapTC
, entries
);
408 timer_call_get_cyclic_arg(wrap_timer_call_t
*wrapTC
)
410 return wrapTC
? wrapTC
->hdlr
.cyh_arg
: NULL
;
414 cyclic_timer_add(cyc_handler_t
*handler
, cyc_time_t
*when
)
416 wrap_timer_call_t
*wrapTC
= _MALLOC(sizeof(wrap_timer_call_t
), M_TEMP
, M_ZERO
| M_WAITOK
);
417 if (NULL
== wrapTC
) {
420 return timer_call_add_cyclic( wrapTC
, handler
, when
);
425 cyclic_timer_remove(cyclic_id_t cyclic
)
427 ASSERT( cyclic
!= CYCLIC_NONE
);
429 /* Removing a timer call must be done on the CPU the timer is running on. */
430 wrap_timer_call_t
*wrapTC
= (wrap_timer_call_t
*) cyclic
;
431 dtrace_xcall(wrapTC
->cpuid
, (dtrace_xcall_t
) timer_call_remove_cyclic
, (void*) cyclic
);
433 _FREE((void *)cyclic
, M_TEMP
);
437 _cyclic_add_omni(cyc_list_t
*cyc_list
)
441 cyc_omni_handler_t
*omni
= &cyc_list
->cyl_omni
;
443 (omni
->cyo_online
)(omni
->cyo_arg
, CPU
, &cH
, &cT
);
445 wrap_timer_call_t
*wrapTC
= &cyc_list
->cyl_wrap_by_cpus
[cpu_number()];
446 timer_call_add_cyclic(wrapTC
, &cH
, &cT
);
450 cyclic_add_omni(cyc_omni_handler_t
*omni
)
452 cyc_list_t
*cyc_list
=
453 _MALLOC(sizeof(cyc_list_t
) + NCPU
* sizeof(wrap_timer_call_t
), M_TEMP
, M_ZERO
| M_WAITOK
);
455 if (NULL
== cyc_list
) {
459 cyc_list
->cyl_omni
= *omni
;
461 dtrace_xcall(DTRACE_CPUALL
, (dtrace_xcall_t
)_cyclic_add_omni
, (void *)cyc_list
);
463 return (cyclic_id_list_t
)cyc_list
;
467 _cyclic_remove_omni(cyc_list_t
*cyc_list
)
469 cyc_omni_handler_t
*omni
= &cyc_list
->cyl_omni
;
471 wrap_timer_call_t
*wrapTC
;
474 * If the processor was offline when dtrace started, we did not allocate
475 * a cyclic timer for this CPU.
477 if ((wrapTC
= &cyc_list
->cyl_wrap_by_cpus
[cpu_number()]) != NULL
) {
478 oarg
= timer_call_get_cyclic_arg(wrapTC
);
479 timer_call_remove_cyclic(wrapTC
);
480 (omni
->cyo_offline
)(omni
->cyo_arg
, CPU
, oarg
);
485 cyclic_remove_omni(cyclic_id_list_t cyc_list
)
487 ASSERT(cyc_list
!= NULL
);
489 dtrace_xcall(DTRACE_CPUALL
, (dtrace_xcall_t
)_cyclic_remove_omni
, (void *)cyc_list
);
490 _FREE(cyc_list
, M_TEMP
);
493 typedef struct wrap_thread_call
{
498 } wrap_thread_call_t
;
501 * _cyclic_apply will run on some thread under kernel_task. That's OK for the
502 * cleaner and the deadman, but too distant in time and place for the profile provider.
505 _cyclic_apply( void *ignore
, void *vTChdl
)
507 #pragma unused(ignore)
508 wrap_thread_call_t
*wrapTC
= (wrap_thread_call_t
*)vTChdl
;
510 (*(wrapTC
->hdlr
.cyh_func
))( wrapTC
->hdlr
.cyh_arg
);
512 clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, mach_absolute_time(), &(wrapTC
->deadline
));
513 (void)thread_call_enter1_delayed( wrapTC
->TChdl
, (void *)wrapTC
, wrapTC
->deadline
);
515 /* Did cyclic_remove request a wakeup call when this thread call was re-armed? */
516 if (wrapTC
->when
.cyt_interval
== WAKEUP_REAPER
) {
517 thread_wakeup((event_t
)wrapTC
);
522 cyclic_add(cyc_handler_t
*handler
, cyc_time_t
*when
)
526 wrap_thread_call_t
*wrapTC
= _MALLOC(sizeof(wrap_thread_call_t
), M_TEMP
, M_ZERO
| M_WAITOK
);
527 if (NULL
== wrapTC
) {
531 wrapTC
->TChdl
= thread_call_allocate( _cyclic_apply
, NULL
);
532 wrapTC
->hdlr
= *handler
;
533 wrapTC
->when
= *when
;
535 ASSERT(when
->cyt_when
== 0);
536 ASSERT(when
->cyt_interval
< WAKEUP_REAPER
);
538 nanoseconds_to_absolutetime(wrapTC
->when
.cyt_interval
, (uint64_t *)&wrapTC
->when
.cyt_interval
);
540 now
= mach_absolute_time();
541 wrapTC
->deadline
= now
;
543 clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, now
, &(wrapTC
->deadline
));
544 (void)thread_call_enter1_delayed( wrapTC
->TChdl
, (void *)wrapTC
, wrapTC
->deadline
);
546 return (cyclic_id_t
)wrapTC
;
550 noop_cyh_func(void * ignore
)
552 #pragma unused(ignore)
556 cyclic_remove(cyclic_id_t cyclic
)
558 wrap_thread_call_t
*wrapTC
= (wrap_thread_call_t
*)cyclic
;
560 ASSERT(cyclic
!= CYCLIC_NONE
);
562 while (!thread_call_cancel(wrapTC
->TChdl
)) {
563 int ret
= assert_wait(wrapTC
, THREAD_UNINT
);
564 ASSERT(ret
== THREAD_WAITING
);
566 wrapTC
->when
.cyt_interval
= WAKEUP_REAPER
;
568 ret
= thread_block(THREAD_CONTINUE_NULL
);
569 ASSERT(ret
== THREAD_AWAKENED
);
572 if (thread_call_free(wrapTC
->TChdl
)) {
573 _FREE(wrapTC
, M_TEMP
);
575 /* Gut this cyclic and move on ... */
576 wrapTC
->hdlr
.cyh_func
= noop_cyh_func
;
577 wrapTC
->when
.cyt_interval
= NEARLY_FOREVER
;
582 ddi_driver_major(dev_info_t
*devi
)
584 return (int)major(CAST_DOWN_EXPLICIT(int, devi
));
588 ddi_create_minor_node(dev_info_t
*dip
, const char *name
, int spec_type
,
589 minor_t minor_num
, const char *node_type
, int flag
)
591 #pragma unused(spec_type,node_type,flag)
592 dev_t dev
= makedev( ddi_driver_major(dip
), minor_num
);
594 if (NULL
== devfs_make_node( dev
, DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666, name
, 0 )) {
602 ddi_remove_minor_node(dev_info_t
*dip
, char *name
)
604 #pragma unused(dip,name)
605 /* XXX called from dtrace_detach, so NOTREACHED for now. */
611 return (major_t
) major(d
);
617 return (minor_t
) minor(d
);
620 extern void Debugger(const char*);
633 dt_kmem_alloc_site(size_t size
, int kmflag
, vm_allocation_site_t
*site
)
635 #pragma unused(kmflag)
638 * We ignore the M_NOWAIT bit in kmflag (all of kmflag, in fact).
639 * Requests larger than 8K with M_NOWAIT fail in kalloc_canblock.
641 vm_size_t vsize
= size
;
642 return kalloc_canblock(&vsize
, TRUE
, site
);
646 dt_kmem_zalloc_site(size_t size
, int kmflag
, vm_allocation_site_t
*site
)
648 #pragma unused(kmflag)
651 * We ignore the M_NOWAIT bit in kmflag (all of kmflag, in fact).
652 * Requests larger than 8K with M_NOWAIT fail in kalloc_canblock.
654 vm_size_t vsize
= size
;
655 void* buf
= kalloc_canblock(&vsize
, TRUE
, site
);
667 dt_kmem_free(void *buf
, size_t size
)
671 * DTrace relies on this, its doing a lot of NULL frees.
672 * A null free causes the debug builds to panic.
686 * aligned dt_kmem allocator
687 * align should be a power of two
691 dt_kmem_alloc_aligned_site(size_t size
, size_t align
, int kmflag
, vm_allocation_site_t
*site
)
693 void *mem
, **addr_to_free
;
694 intptr_t mem_aligned
;
695 size_t *size_to_free
, hdr_size
;
697 /* Must be a power of two. */
699 assert((align
& (align
- 1)) == 0);
702 * We are going to add a header to the allocation. It contains
703 * the address to free and the total size of the buffer.
705 hdr_size
= sizeof(size_t) + sizeof(void*);
706 mem
= dt_kmem_alloc_site(size
+ align
+ hdr_size
, kmflag
, site
);
711 mem_aligned
= (intptr_t) (((intptr_t) mem
+ align
+ hdr_size
) & ~(align
- 1));
713 /* Write the address to free in the header. */
714 addr_to_free
= (void**) (mem_aligned
- sizeof(void*));
717 /* Write the size to free in the header. */
718 size_to_free
= (size_t*) (mem_aligned
- hdr_size
);
719 *size_to_free
= size
+ align
+ hdr_size
;
721 return (void*) mem_aligned
;
725 dt_kmem_zalloc_aligned_site(size_t size
, size_t align
, int kmflag
, vm_allocation_site_t
*s
)
729 buf
= dt_kmem_alloc_aligned_site(size
, align
, kmflag
, s
);
741 dt_kmem_free_aligned(void* buf
, size_t size
)
744 intptr_t ptr
= (intptr_t) buf
;
745 void **addr_to_free
= (void**) (ptr
- sizeof(void*));
746 size_t *size_to_free
= (size_t*) (ptr
- (sizeof(size_t) + sizeof(void*)));
752 dt_kmem_free(*addr_to_free
, *size_to_free
);
756 * dtrace wants to manage just a single block: dtrace_state_percpu_t * NCPU, and
757 * doesn't specify constructor, destructor, or reclaim methods.
758 * At present, it always zeroes the block it obtains from kmem_cache_alloc().
759 * We'll manage this constricted use of kmem_cache with ordinary _MALLOC and _FREE.
763 const char *name
, /* descriptive name for this cache */
764 size_t bufsize
, /* size of the objects it manages */
765 size_t align
, /* required object alignment */
766 int (*constructor
)(void *, void *, int), /* object constructor */
767 void (*destructor
)(void *, void *), /* object destructor */
768 void (*reclaim
)(void *), /* memory reclaim callback */
769 void *private, /* pass-thru arg for constr/destr/reclaim */
770 vmem_t
*vmp
, /* vmem source for slab allocation */
771 int cflags
) /* cache creation flags */
773 #pragma unused(name,align,constructor,destructor,reclaim,private,vmp,cflags)
774 return (kmem_cache_t
*)bufsize
; /* A cookie that tracks the single object size. */
778 kmem_cache_alloc(kmem_cache_t
*cp
, int kmflag
)
780 #pragma unused(kmflag)
781 size_t bufsize
= (size_t)cp
;
782 return (void *)_MALLOC(bufsize
, M_TEMP
, M_WAITOK
);
786 kmem_cache_free(kmem_cache_t
*cp
, void *buf
)
793 kmem_cache_destroy(kmem_cache_t
*cp
)
799 * vmem (Solaris "slab" allocator) used by DTrace solely to hand out resource ids
801 typedef unsigned int u_daddr_t
;
804 /* By passing around blist *handles*, the underlying blist can be resized as needed. */
810 vmem_create(const char *name
, void *base
, size_t size
, size_t quantum
, void *ignore5
,
811 void *ignore6
, vmem_t
*source
, size_t qcache_max
, int vmflag
)
813 #pragma unused(name,quantum,ignore5,ignore6,source,qcache_max,vmflag)
815 struct blist_hdl
*p
= _MALLOC(sizeof(struct blist_hdl
), M_TEMP
, M_WAITOK
);
817 ASSERT(quantum
== 1);
818 ASSERT(NULL
== ignore5
);
819 ASSERT(NULL
== ignore6
);
820 ASSERT(NULL
== source
);
821 ASSERT(0 == qcache_max
);
822 ASSERT(vmflag
& VMC_IDENTIFIER
);
824 size
= MIN(128, size
); /* Clamp to 128 initially, since the underlying data structure is pre-allocated */
826 p
->blist
= bl
= blist_create( size
);
827 blist_free(bl
, 0, size
);
829 blist_alloc( bl
, (daddr_t
)(uintptr_t)base
); /* Chomp off initial ID(s) */
835 vmem_alloc(vmem_t
*vmp
, size_t size
, int vmflag
)
837 #pragma unused(vmflag)
838 struct blist_hdl
*q
= (struct blist_hdl
*)vmp
;
839 blist_t bl
= q
->blist
;
842 p
= blist_alloc(bl
, (daddr_t
)size
);
844 if ((daddr_t
)-1 == p
) {
845 blist_resize(&bl
, (bl
->bl_blocks
) << 1, 1);
847 p
= blist_alloc(bl
, (daddr_t
)size
);
848 if ((daddr_t
)-1 == p
) {
849 panic("vmem_alloc: failure after blist_resize!");
853 return (void *)(uintptr_t)p
;
857 vmem_free(vmem_t
*vmp
, void *vaddr
, size_t size
)
859 struct blist_hdl
*p
= (struct blist_hdl
*)vmp
;
861 blist_free( p
->blist
, (daddr_t
)(uintptr_t)vaddr
, (daddr_t
)size
);
865 vmem_destroy(vmem_t
*vmp
)
867 struct blist_hdl
*p
= (struct blist_hdl
*)vmp
;
869 blist_destroy( p
->blist
);
870 _FREE( p
, sizeof(struct blist_hdl
));
878 * dtrace_gethrestime() provides the "walltimestamp", a value that is anchored at
879 * January 1, 1970. Because it can be called from probe context, it must take no locks.
883 dtrace_gethrestime(void)
886 clock_nsec_t nanosecs
;
887 uint64_t secs64
, ns64
;
889 clock_get_calendar_nanotime_nowait(&secs
, &nanosecs
);
890 secs64
= (uint64_t)secs
;
891 ns64
= (uint64_t)nanosecs
;
893 ns64
= ns64
+ (secs64
* 1000000000LL);
898 * dtrace_gethrtime() provides high-resolution timestamps with machine-dependent origin.
899 * Hence its primary use is to specify intervals.
903 dtrace_abs_to_nano(uint64_t elapsed
)
905 static mach_timebase_info_data_t sTimebaseInfo
= { 0, 0 };
908 * If this is the first time we've run, get the timebase.
909 * We can use denom == 0 to indicate that sTimebaseInfo is
910 * uninitialised because it makes no sense to have a zero
911 * denominator in a fraction.
914 if (sTimebaseInfo
.denom
== 0) {
915 (void) clock_timebase_info(&sTimebaseInfo
);
919 * Convert to nanoseconds.
920 * return (elapsed * (uint64_t)sTimebaseInfo.numer)/(uint64_t)sTimebaseInfo.denom;
922 * Provided the final result is representable in 64 bits the following maneuver will
923 * deliver that result without intermediate overflow.
925 if (sTimebaseInfo
.denom
== sTimebaseInfo
.numer
) {
927 } else if (sTimebaseInfo
.denom
== 1) {
928 return elapsed
* (uint64_t)sTimebaseInfo
.numer
;
930 /* Decompose elapsed = eta32 * 2^32 + eps32: */
931 uint64_t eta32
= elapsed
>> 32;
932 uint64_t eps32
= elapsed
& 0x00000000ffffffffLL
;
934 uint32_t numer
= sTimebaseInfo
.numer
, denom
= sTimebaseInfo
.denom
;
936 /* Form product of elapsed64 (decomposed) and numer: */
937 uint64_t mu64
= numer
* eta32
;
938 uint64_t lambda64
= numer
* eps32
;
940 /* Divide the constituents by denom: */
941 uint64_t q32
= mu64
/ denom
;
942 uint64_t r32
= mu64
- (q32
* denom
); /* mu64 % denom */
944 return (q32
<< 32) + ((r32
<< 32) + lambda64
) / denom
;
949 dtrace_gethrtime(void)
951 static uint64_t start
= 0;
954 start
= mach_absolute_time();
957 return dtrace_abs_to_nano(mach_absolute_time() - start
);
961 * Atomicity and synchronization
964 dtrace_cas32(uint32_t *target
, uint32_t cmp
, uint32_t new)
966 if (OSCompareAndSwap((UInt32
)cmp
, (UInt32
)new, (volatile UInt32
*)target
)) {
969 return ~cmp
; /* Must return something *other* than cmp */
974 dtrace_casptr(void *target
, void *cmp
, void *new)
976 if (OSCompareAndSwapPtr( cmp
, new, (void**)target
)) {
979 return (void *)(~(uintptr_t)cmp
); /* Must return something *other* than cmp */
984 * Interrupt manipulation
987 dtrace_interrupt_disable(void)
989 return (dtrace_icookie_t
)ml_set_interrupts_enabled(FALSE
);
993 dtrace_interrupt_enable(dtrace_icookie_t reenable
)
995 (void)ml_set_interrupts_enabled((boolean_t
)reenable
);
1002 dtrace_sync_func(void)
1007 * dtrace_sync() is not called from probe context.
1012 dtrace_xcall(DTRACE_CPUALL
, (dtrace_xcall_t
)dtrace_sync_func
, NULL
);
1016 * The dtrace_copyin/out/instr and dtrace_fuword* routines can be called from probe context.
1019 extern kern_return_t
dtrace_copyio_preflight(addr64_t
);
1020 extern kern_return_t
dtrace_copyio_postflight(addr64_t
);
1023 dtrace_copycheck(user_addr_t uaddr
, uintptr_t kaddr
, size_t size
)
1025 #pragma unused(kaddr)
1027 vm_offset_t recover
= dtrace_set_thread_recover( current_thread(), 0 ); /* Snare any extant recovery point. */
1028 dtrace_set_thread_recover( current_thread(), recover
); /* Put it back. We *must not* re-enter and overwrite. */
1030 ASSERT(kaddr
+ size
>= kaddr
);
1032 if (uaddr
+ size
< uaddr
|| /* Avoid address wrap. */
1033 KERN_FAILURE
== dtrace_copyio_preflight(uaddr
)) { /* Machine specific setup/constraints. */
1034 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1035 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= uaddr
;
1042 dtrace_copyin(user_addr_t src
, uintptr_t dst
, size_t len
, volatile uint16_t *flags
)
1044 #pragma unused(flags)
1046 if (dtrace_copycheck( src
, dst
, len
)) {
1047 if (copyin((const user_addr_t
)src
, (char *)dst
, (vm_size_t
)len
)) {
1048 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1049 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= src
;
1051 dtrace_copyio_postflight(src
);
1056 dtrace_copyinstr(user_addr_t src
, uintptr_t dst
, size_t len
, volatile uint16_t *flags
)
1058 #pragma unused(flags)
1062 if (dtrace_copycheck( src
, dst
, len
)) {
1063 /* copyin as many as 'len' bytes. */
1064 int error
= copyinstr((const user_addr_t
)src
, (char *)dst
, (vm_size_t
)len
, &actual
);
1067 * ENAMETOOLONG is returned when 'len' bytes have been copied in but the NUL terminator was
1068 * not encountered. That does not require raising CPU_DTRACE_BADADDR, and we press on.
1069 * Note that we do *not* stuff a NUL terminator when returning ENAMETOOLONG, that's left
1072 if (error
&& error
!= ENAMETOOLONG
) {
1073 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1074 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= src
;
1076 dtrace_copyio_postflight(src
);
1081 dtrace_copyout(uintptr_t src
, user_addr_t dst
, size_t len
, volatile uint16_t *flags
)
1083 #pragma unused(flags)
1085 if (dtrace_copycheck( dst
, src
, len
)) {
1086 if (copyout((const void *)src
, dst
, (vm_size_t
)len
)) {
1087 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1088 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= dst
;
1090 dtrace_copyio_postflight(dst
);
1095 dtrace_copyoutstr(uintptr_t src
, user_addr_t dst
, size_t len
, volatile uint16_t *flags
)
1097 #pragma unused(flags)
1101 if (dtrace_copycheck( dst
, src
, len
)) {
1103 * ENAMETOOLONG is returned when 'len' bytes have been copied out but the NUL terminator was
1104 * not encountered. We raise CPU_DTRACE_BADADDR in that case.
1105 * Note that we do *not* stuff a NUL terminator when returning ENAMETOOLONG, that's left
1108 if (copyoutstr((const void *)src
, dst
, (size_t)len
, &actual
)) {
1109 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1110 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= dst
;
1112 dtrace_copyio_postflight(dst
);
1116 extern const int copysize_limit_panic
;
1119 dtrace_copy_maxsize(void)
1121 return copysize_limit_panic
;
1126 dtrace_buffer_copyout(const void *kaddr
, user_addr_t uaddr
, vm_size_t nbytes
)
1128 int maxsize
= dtrace_copy_maxsize();
1130 * Partition the copyout in copysize_limit_panic-sized chunks
1132 while (nbytes
>= (vm_size_t
)maxsize
) {
1133 if (copyout(kaddr
, uaddr
, maxsize
) != 0) {
1142 if (copyout(kaddr
, uaddr
, nbytes
) != 0) {
1151 dtrace_fuword8(user_addr_t uaddr
)
1155 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
1156 if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) {
1157 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) {
1158 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1159 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= uaddr
;
1161 dtrace_copyio_postflight(uaddr
);
1163 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
1169 dtrace_fuword16(user_addr_t uaddr
)
1173 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
1174 if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) {
1175 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) {
1176 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1177 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= uaddr
;
1179 dtrace_copyio_postflight(uaddr
);
1181 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
1187 dtrace_fuword32(user_addr_t uaddr
)
1191 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
1192 if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) {
1193 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) {
1194 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1195 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= uaddr
;
1197 dtrace_copyio_postflight(uaddr
);
1199 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
1205 dtrace_fuword64(user_addr_t uaddr
)
1209 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
1210 if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) {
1211 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) {
1212 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1213 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= uaddr
;
1215 dtrace_copyio_postflight(uaddr
);
1217 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
1223 * Emulation of Solaris fuword / suword
1224 * Called from the fasttrap provider, so the use of copyin/out requires fewer safegaurds.
1228 fuword8(user_addr_t uaddr
, uint8_t *value
)
1230 if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint8_t)) != 0) {
1238 fuword16(user_addr_t uaddr
, uint16_t *value
)
1240 if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint16_t)) != 0) {
1248 fuword32(user_addr_t uaddr
, uint32_t *value
)
1250 if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint32_t)) != 0) {
1258 fuword64(user_addr_t uaddr
, uint64_t *value
)
1260 if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint64_t)) != 0) {
1268 fuword32_noerr(user_addr_t uaddr
, uint32_t *value
)
1270 if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint32_t))) {
1276 fuword64_noerr(user_addr_t uaddr
, uint64_t *value
)
1278 if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint64_t))) {
1284 suword64(user_addr_t addr
, uint64_t value
)
1286 if (copyout((const void *)&value
, addr
, sizeof(value
)) != 0) {
1294 suword32(user_addr_t addr
, uint32_t value
)
1296 if (copyout((const void *)&value
, addr
, sizeof(value
)) != 0) {
1306 extern boolean_t
dtrace_tally_fault(user_addr_t
);
1309 dtrace_tally_fault(user_addr_t uaddr
)
1311 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1312 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= uaddr
;
1313 return DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT
) ? TRUE
: FALSE
;
1317 extern int prf(const char *, va_list, int, struct tty
*); /* bsd/kern/subr_prf.h */
1320 vuprintf(const char *format
, va_list ap
)
1322 return prf(format
, ap
, TOTTY
, NULL
);
1325 /* Not called from probe context */
1327 cmn_err( int level
, const char *format
, ... )
1329 #pragma unused(level)
1332 va_start(alist
, format
);
1333 vuprintf(format
, alist
);
1340 * 2002-01-24 gvdl Initial implementation of strstr
1343 __private_extern__
const char *
1344 strstr(const char *in
, const char *str
)
1354 return (const char *) in
; // Trivial empty string case
1366 } while (strncmp(in
, str
, len
) != 0);
1368 return (const char *) (in
- 1);
1372 bsearch(const void *key
, const void *base0
, size_t nmemb
, size_t size
, int (*compar
)(const void *, const void *))
1374 const char *base
= base0
;
1378 for (lim
= nmemb
; lim
!= 0; lim
>>= 1) {
1379 p
= base
+ (lim
>> 1) * size
;
1380 cmp
= (*compar
)(key
, p
);
1384 if (cmp
> 0) { /* key > p: move right */
1385 base
= (const char *)p
+ size
;
1387 } /* else move left */
1396 dtrace_caller(int ignore
)
1398 #pragma unused(ignore)
1399 return -1; /* Just as in Solaris dtrace_asm.s */
1403 dtrace_getstackdepth(int aframes
)
1405 struct frame
*fp
= (struct frame
*)__builtin_frame_address(0);
1406 struct frame
*nextfp
, *minfp
, *stacktop
;
1410 if ((on_intr
= CPU_ON_INTR(CPU
)) != 0) {
1411 stacktop
= (struct frame
*)dtrace_get_cpu_int_stack_top();
1413 stacktop
= (struct frame
*)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size
);
1423 nextfp
= *(struct frame
**)fp
;
1425 if (nextfp
<= minfp
|| nextfp
>= stacktop
) {
1428 * Hop from interrupt stack to thread stack.
1430 vm_offset_t kstack_base
= dtrace_get_kernel_stack(current_thread());
1432 minfp
= (struct frame
*)kstack_base
;
1433 stacktop
= (struct frame
*)(kstack_base
+ kernel_stack_size
);
1445 if (depth
<= aframes
) {
1449 return depth
- aframes
;
1453 dtrace_addr_in_module(void* addr
, struct modctl
*ctl
)
1455 return OSKextKextForAddress(addr
) == (void*)ctl
->mod_address
;
1462 dtrace_vtime_enable(void)
1467 dtrace_vtime_disable(void)