2 * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
31 * APPLE NOTE: This file is compiled even if dtrace is unconfig'd. A symbol
32 * from this file (_dtrace_register_anon_DOF) always needs to be exported for
33 * an external kext to link against.
38 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
39 #include <kern/thread.h>
40 #include <mach/thread_status.h>
44 #include <sys/malloc.h>
47 #include <sys/proc_internal.h>
48 #include <sys/kauth.h>
50 #include <sys/systm.h>
51 #include <sys/dtrace.h>
52 #include <sys/dtrace_impl.h>
53 #include <libkern/OSAtomic.h>
54 #include <kern/thread_call.h>
55 #include <kern/task.h>
56 #include <kern/sched_prim.h>
57 #include <kern/queue.h>
58 #include <miscfs/devfs/devfs.h>
59 #include <kern/kalloc.h>
61 #include <mach/vm_param.h>
62 #include <mach/mach_vm.h>
63 #include <mach/task.h>
65 #include <vm/vm_map.h> /* All the bits we care about are guarded by MACH_KERNEL_PRIVATE :-( */
70 #define proc_t struct proc
72 /* Not called from probe context */
78 if ((p
= proc_find(pid
)) == PROC_NULL
) {
82 task_suspend(p
->task
);
86 lck_mtx_lock(&p
->p_dtrace_sprlock
);
91 /* Not called from probe context */
96 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
100 task_resume(p
->task
);
110 // These are not exported from vm_map.h.
111 extern kern_return_t
vm_map_read_user(vm_map_t map
, vm_map_address_t src_addr
, void *dst_p
, vm_size_t size
);
112 extern kern_return_t
vm_map_write_user(vm_map_t map
, void *src_p
, vm_map_address_t dst_addr
, vm_size_t size
);
114 /* Not called from probe context */
116 uread(proc_t
*p
, void *buf
, user_size_t len
, user_addr_t a
)
120 ASSERT(p
!= PROC_NULL
);
121 ASSERT(p
->task
!= NULL
);
123 task_t task
= p
->task
;
126 * Grab a reference to the task vm_map_t to make sure
127 * the map isn't pulled out from under us.
129 * Because the proc_lock is not held at all times on all code
130 * paths leading here, it is possible for the proc to have
131 * exited. If the map is null, fail.
133 vm_map_t map
= get_task_map_reference(task
);
135 ret
= vm_map_read_user( map
, (vm_map_address_t
)a
, buf
, (vm_size_t
)len
);
136 vm_map_deallocate(map
);
138 ret
= KERN_TERMINATED
;
144 /* Not called from probe context */
146 uwrite(proc_t
*p
, void *buf
, user_size_t len
, user_addr_t a
)
151 ASSERT(p
->task
!= NULL
);
153 task_t task
= p
->task
;
156 * Grab a reference to the task vm_map_t to make sure
157 * the map isn't pulled out from under us.
159 * Because the proc_lock is not held at all times on all code
160 * paths leading here, it is possible for the proc to have
161 * exited. If the map is null, fail.
163 vm_map_t map
= get_task_map_reference(task
);
165 /* Find the memory permissions. */
166 uint32_t nestingDepth
=999999;
167 vm_region_submap_short_info_data_64_t info
;
168 mach_msg_type_number_t count
= VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
;
169 mach_vm_address_t address
= (mach_vm_address_t
)a
;
170 mach_vm_size_t sizeOfRegion
= (mach_vm_size_t
)len
;
172 ret
= mach_vm_region_recurse(map
, &address
, &sizeOfRegion
, &nestingDepth
, (vm_region_recurse_info_t
)&info
, &count
);
173 if (ret
!= KERN_SUCCESS
)
178 if (!(info
.protection
& VM_PROT_WRITE
)) {
179 /* Save the original protection values for restoration later */
180 reprotect
= info
.protection
;
182 if (info
.max_protection
& VM_PROT_WRITE
) {
183 /* The memory is not currently writable, but can be made writable. */
184 ret
= mach_vm_protect (map
, (mach_vm_offset_t
)a
, (mach_vm_size_t
)len
, 0, reprotect
| VM_PROT_WRITE
);
187 * The memory is not currently writable, and cannot be made writable. We need to COW this memory.
189 * Strange, we can't just say "reprotect | VM_PROT_COPY", that fails.
191 ret
= mach_vm_protect (map
, (mach_vm_offset_t
)a
, (mach_vm_size_t
)len
, 0, VM_PROT_COPY
| VM_PROT_READ
| VM_PROT_WRITE
);
194 if (ret
!= KERN_SUCCESS
)
198 /* The memory was already writable. */
199 reprotect
= VM_PROT_NONE
;
202 ret
= vm_map_write_user( map
,
207 if (ret
!= KERN_SUCCESS
)
210 if (reprotect
!= VM_PROT_NONE
) {
211 ASSERT(reprotect
& VM_PROT_EXECUTE
);
212 ret
= mach_vm_protect (map
, (mach_vm_offset_t
)a
, (mach_vm_size_t
)len
, 0, reprotect
);
216 vm_map_deallocate(map
);
218 ret
= KERN_TERMINATED
;
230 cpu_core_t
*cpu_core
; /* XXX TLB lockdown? */
237 * dtrace_CRED() can be called from probe context. We cannot simply call kauth_cred_get() since
238 * that function may try to resolve a lazy credential binding, which entails taking the proc_lock.
243 struct uthread
*uthread
= get_bsdthread_info(current_thread());
248 return uthread
->uu_ucred
; /* May return NOCRED which is defined to be 0 */
251 #define HAS_ALLPRIVS(cr) priv_isfullset(&CR_OEPRIV(cr))
252 #define HAS_PRIVILEGE(cr, pr) ((pr) == PRIV_ALL ? \
254 PRIV_ISASSERT(&CR_OEPRIV(cr), pr))
256 int PRIV_POLICY_CHOICE(void* cred
, int priv
, int all
)
258 #pragma unused(priv, all)
259 return kauth_cred_issuser(cred
); /* XXX TODO: How is this different from PRIV_POLICY_ONLY? */
263 PRIV_POLICY_ONLY(void *cr
, int priv
, int boolean
)
265 #pragma unused(priv, boolean)
266 return kauth_cred_issuser(cr
); /* XXX TODO: HAS_PRIVILEGE(cr, priv); */
270 crgetgid(const cred_t
*cr
) { return cr
->cr_groups
[0]; }
273 crgetuid(const cred_t
*cr
) { return cr
->cr_uid
; }
279 /* osfmk/kern/timer_call.h */
280 typedef void *call_entry_param_t
;
281 typedef void (*call_entry_func_t
)(
282 call_entry_param_t param0
,
283 call_entry_param_t param1
);
285 typedef struct call_entry
{
286 queue_chain_t q_link
;
287 call_entry_func_t func
;
288 call_entry_param_t param0
;
289 call_entry_param_t param1
;
298 typedef struct call_entry
*timer_call_t
;
299 typedef void *timer_call_param_t
;
300 typedef void (*timer_call_func_t
)(
301 timer_call_param_t param0
,
302 timer_call_param_t param1
);
307 timer_call_func_t func
,
308 timer_call_param_t param0
);
313 timer_call_param_t param1
,
320 typedef struct wrap_timer_call
{
324 struct call_entry call
;
327 #define WAKEUP_REAPER 0x7FFFFFFFFFFFFFFFLL
328 #define NEARLY_FOREVER 0x7FFFFFFFFFFFFFFELL
331 _timer_call_apply_cyclic( void *ignore
, void *vTChdl
)
333 #pragma unused(ignore)
334 wrap_timer_call_t
*wrapTC
= (wrap_timer_call_t
*)vTChdl
;
336 (*(wrapTC
->hdlr
.cyh_func
))( wrapTC
->hdlr
.cyh_arg
);
338 clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, mach_absolute_time(), &(wrapTC
->deadline
) );
339 timer_call_enter1( &(wrapTC
->call
), (void *)wrapTC
, wrapTC
->deadline
);
341 /* Did timer_call_remove_cyclic request a wakeup call when this timer call was re-armed? */
342 if (wrapTC
->when
.cyt_interval
== WAKEUP_REAPER
)
343 thread_wakeup((event_t
)wrapTC
);
347 timer_call_add_cyclic(wrap_timer_call_t
*wrapTC
, cyc_handler_t
*handler
, cyc_time_t
*when
)
351 timer_call_setup( &(wrapTC
->call
), _timer_call_apply_cyclic
, NULL
);
352 wrapTC
->hdlr
= *handler
;
353 wrapTC
->when
= *when
;
355 nanoseconds_to_absolutetime( wrapTC
->when
.cyt_interval
, (uint64_t *)&wrapTC
->when
.cyt_interval
);
357 now
= mach_absolute_time();
358 wrapTC
->deadline
= now
;
360 clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, now
, &(wrapTC
->deadline
) );
361 timer_call_enter1( &(wrapTC
->call
), (void *)wrapTC
, wrapTC
->deadline
);
363 return (cyclic_id_t
)wrapTC
;
367 timer_call_remove_cyclic(cyclic_id_t cyclic
)
369 wrap_timer_call_t
*wrapTC
= (wrap_timer_call_t
*)cyclic
;
371 while (!timer_call_cancel(&(wrapTC
->call
))) {
372 int ret
= assert_wait(wrapTC
, THREAD_UNINT
);
373 ASSERT(ret
== THREAD_WAITING
);
375 wrapTC
->when
.cyt_interval
= WAKEUP_REAPER
;
377 ret
= thread_block(THREAD_CONTINUE_NULL
);
378 ASSERT(ret
== THREAD_AWAKENED
);
383 timer_call_get_cyclic_arg(cyclic_id_t cyclic
)
385 wrap_timer_call_t
*wrapTC
= (wrap_timer_call_t
*)cyclic
;
387 return (wrapTC
? wrapTC
->hdlr
.cyh_arg
: NULL
);
391 cyclic_timer_add(cyc_handler_t
*handler
, cyc_time_t
*when
)
393 wrap_timer_call_t
*wrapTC
= _MALLOC(sizeof(wrap_timer_call_t
), M_TEMP
, M_ZERO
| M_WAITOK
);
397 return timer_call_add_cyclic( wrapTC
, handler
, when
);
401 cyclic_timer_remove(cyclic_id_t cyclic
)
403 ASSERT( cyclic
!= CYCLIC_NONE
);
405 timer_call_remove_cyclic( cyclic
);
406 _FREE((void *)cyclic
, M_TEMP
);
410 _cyclic_add_omni(cyclic_id_list_t cyc_list
)
414 wrap_timer_call_t
*wrapTC
;
415 cyc_omni_handler_t
*omni
= (cyc_omni_handler_t
*)cyc_list
;
418 (omni
->cyo_online
)(omni
->cyo_arg
, CPU
, &cH
, &cT
);
420 t
= (char *)cyc_list
;
421 t
+= sizeof(cyc_omni_handler_t
);
422 cyc_list
= (cyclic_id_list_t
)t
;
424 t
+= sizeof(cyclic_id_t
)*NCPU
;
425 t
+= (sizeof(wrap_timer_call_t
))*cpu_number();
426 wrapTC
= (wrap_timer_call_t
*)t
;
428 cyc_list
[cpu_number()] = timer_call_add_cyclic(wrapTC
, &cH
, &cT
);
432 cyclic_add_omni(cyc_omni_handler_t
*omni
)
434 cyclic_id_list_t cyc_list
=
435 _MALLOC( (sizeof(wrap_timer_call_t
))*NCPU
+
436 sizeof(cyclic_id_t
)*NCPU
+
437 sizeof(cyc_omni_handler_t
), M_TEMP
, M_ZERO
| M_WAITOK
);
438 if (NULL
== cyc_list
)
439 return (cyclic_id_list_t
)CYCLIC_NONE
;
441 *(cyc_omni_handler_t
*)cyc_list
= *omni
;
442 dtrace_xcall(DTRACE_CPUALL
, (dtrace_xcall_t
)_cyclic_add_omni
, (void *)cyc_list
);
448 _cyclic_remove_omni(cyclic_id_list_t cyc_list
)
450 cyc_omni_handler_t
*omni
= (cyc_omni_handler_t
*)cyc_list
;
455 t
= (char *)cyc_list
;
456 t
+= sizeof(cyc_omni_handler_t
);
457 cyc_list
= (cyclic_id_list_t
)t
;
459 cid
= cyc_list
[cpu_number()];
460 oarg
= timer_call_get_cyclic_arg(cid
);
462 timer_call_remove_cyclic( cid
);
463 (omni
->cyo_offline
)(omni
->cyo_arg
, CPU
, oarg
);
467 cyclic_remove_omni(cyclic_id_list_t cyc_list
)
469 ASSERT( cyc_list
!= (cyclic_id_list_t
)CYCLIC_NONE
);
471 dtrace_xcall(DTRACE_CPUALL
, (dtrace_xcall_t
)_cyclic_remove_omni
, (void *)cyc_list
);
472 _FREE(cyc_list
, M_TEMP
);
475 typedef struct wrap_thread_call
{
480 } wrap_thread_call_t
;
483 * _cyclic_apply will run on some thread under kernel_task. That's OK for the
484 * cleaner and the deadman, but too distant in time and place for the profile provider.
487 _cyclic_apply( void *ignore
, void *vTChdl
)
489 #pragma unused(ignore)
490 wrap_thread_call_t
*wrapTC
= (wrap_thread_call_t
*)vTChdl
;
492 (*(wrapTC
->hdlr
.cyh_func
))( wrapTC
->hdlr
.cyh_arg
);
494 clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, mach_absolute_time(), &(wrapTC
->deadline
) );
495 (void)thread_call_enter1_delayed( wrapTC
->TChdl
, (void *)wrapTC
, wrapTC
->deadline
);
497 /* Did cyclic_remove request a wakeup call when this thread call was re-armed? */
498 if (wrapTC
->when
.cyt_interval
== WAKEUP_REAPER
)
499 thread_wakeup((event_t
)wrapTC
);
503 cyclic_add(cyc_handler_t
*handler
, cyc_time_t
*when
)
507 wrap_thread_call_t
*wrapTC
= _MALLOC(sizeof(wrap_thread_call_t
), M_TEMP
, M_ZERO
| M_WAITOK
);
511 wrapTC
->TChdl
= thread_call_allocate( _cyclic_apply
, NULL
);
512 wrapTC
->hdlr
= *handler
;
513 wrapTC
->when
= *when
;
515 ASSERT(when
->cyt_when
== 0);
516 ASSERT(when
->cyt_interval
< WAKEUP_REAPER
);
518 nanoseconds_to_absolutetime(wrapTC
->when
.cyt_interval
, (uint64_t *)&wrapTC
->when
.cyt_interval
);
520 now
= mach_absolute_time();
521 wrapTC
->deadline
= now
;
523 clock_deadline_for_periodic_event( wrapTC
->when
.cyt_interval
, now
, &(wrapTC
->deadline
) );
524 (void)thread_call_enter1_delayed( wrapTC
->TChdl
, (void *)wrapTC
, wrapTC
->deadline
);
526 return (cyclic_id_t
)wrapTC
;
530 noop_cyh_func(void * ignore
)
532 #pragma unused(ignore)
536 cyclic_remove(cyclic_id_t cyclic
)
538 wrap_thread_call_t
*wrapTC
= (wrap_thread_call_t
*)cyclic
;
540 ASSERT(cyclic
!= CYCLIC_NONE
);
542 while (!thread_call_cancel(wrapTC
->TChdl
)) {
543 int ret
= assert_wait(wrapTC
, THREAD_UNINT
);
544 ASSERT(ret
== THREAD_WAITING
);
546 wrapTC
->when
.cyt_interval
= WAKEUP_REAPER
;
548 ret
= thread_block(THREAD_CONTINUE_NULL
);
549 ASSERT(ret
== THREAD_AWAKENED
);
552 if (thread_call_free(wrapTC
->TChdl
))
553 _FREE(wrapTC
, M_TEMP
);
555 /* Gut this cyclic and move on ... */
556 wrapTC
->hdlr
.cyh_func
= noop_cyh_func
;
557 wrapTC
->when
.cyt_interval
= NEARLY_FOREVER
;
562 * timeout / untimeout (converted to dtrace_timeout / dtrace_untimeout due to name collision)
566 dtrace_timeout(void (*func
)(void *, void *), void* arg
, uint64_t nanos
)
569 thread_call_t call
= thread_call_allocate(func
, NULL
);
571 nanoseconds_to_absolutetime(nanos
, &nanos
);
574 * This method does not use clock_deadline_for_periodic_event() because it is a one-shot,
575 * and clock drift on later invocations is not a worry.
577 uint64_t deadline
= mach_absolute_time() + nanos
;
579 thread_call_enter_delayed(call
, deadline
);
588 ddi_report_dev(dev_info_t
*devi
)
593 #define NSOFT_STATES 32 /* XXX No more than 32 clients at a time, please. */
594 static void *soft
[NSOFT_STATES
];
597 ddi_soft_state_init(void **state_p
, size_t size
, size_t n_items
)
599 #pragma unused(n_items)
602 for (i
= 0; i
< NSOFT_STATES
; ++i
) soft
[i
] = _MALLOC(size
, M_TEMP
, M_ZERO
| M_WAITOK
);
603 *(size_t *)state_p
= size
;
608 ddi_soft_state_zalloc(void *state
, int item
)
610 #pragma unused(state)
611 if (item
< NSOFT_STATES
)
618 ddi_get_soft_state(void *state
, int item
)
620 #pragma unused(state)
621 ASSERT(item
< NSOFT_STATES
);
626 ddi_soft_state_free(void *state
, int item
)
628 ASSERT(item
< NSOFT_STATES
);
629 bzero( soft
[item
], (size_t)state
);
634 ddi_soft_state_fini(void **state_p
)
636 #pragma unused(state_p)
639 for (i
= 0; i
< NSOFT_STATES
; ++i
) _FREE( soft
[i
], M_TEMP
);
642 static unsigned int gRegisteredProps
= 0;
644 char name
[32]; /* enough for "dof-data-" + digits */
649 kern_return_t
_dtrace_register_anon_DOF(char *, uchar_t
*, uint_t
);
652 _dtrace_register_anon_DOF(char *name
, uchar_t
*data
, uint_t nelements
)
654 if (gRegisteredProps
< sizeof(gPropTable
)/sizeof(gPropTable
[0])) {
655 int *p
= (int *)_MALLOC(nelements
*sizeof(int), M_TEMP
, M_WAITOK
);
660 strlcpy(gPropTable
[gRegisteredProps
].name
, name
, sizeof(gPropTable
[0].name
));
661 gPropTable
[gRegisteredProps
].nelements
= nelements
;
662 gPropTable
[gRegisteredProps
].data
= p
;
664 while (nelements
-- > 0) {
665 *p
++ = (int)(*data
++);
676 ddi_prop_lookup_int_array(dev_t match_dev
, dev_info_t
*dip
, uint_t flags
,
677 char *name
, int **data
, uint_t
*nelements
)
679 #pragma unused(match_dev,dip,flags)
681 for (i
= 0; i
< gRegisteredProps
; ++i
)
683 if (0 == strncmp(name
, gPropTable
[i
].name
,
684 sizeof(gPropTable
[i
].name
))) {
685 *data
= gPropTable
[i
].data
;
686 *nelements
= gPropTable
[i
].nelements
;
694 ddi_prop_free(void *buf
)
701 ddi_driver_major(dev_info_t
*devi
) { return (int)major(devi
); }
704 ddi_create_minor_node(dev_info_t
*dip
, const char *name
, int spec_type
,
705 minor_t minor_num
, const char *node_type
, int flag
)
707 #pragma unused(spec_type,node_type,flag)
708 dev_t dev
= makedev( (uint32_t)dip
, minor_num
);
710 if (NULL
== devfs_make_node( dev
, DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666, name
, 0 ))
717 ddi_remove_minor_node(dev_info_t
*dip
, char *name
)
719 #pragma unused(dip,name)
720 /* XXX called from dtrace_detach, so NOTREACHED for now. */
726 return (major_t
) major(d
);
732 return (minor_t
) minor(d
);
736 makedevice(major_t major
, minor_t minor
)
738 return makedev( major
, minor
);
741 int ddi_getprop(dev_t dev
, dev_info_t
*dip
, int flags
, const char *name
, int defvalue
)
743 #pragma unused(dev, dip, flags, name)
749 * Kernel Debug Interface
752 kdi_dtrace_set(kdi_dtrace_set_t ignore
)
754 #pragma unused(ignore)
755 return 0; /* Success */
758 extern void Debugger(const char*);
761 debug_enter(char *c
) { Debugger(c
); }
768 dt_kmem_alloc(size_t size
, int kmflag
)
770 #pragma unused(kmflag)
773 * We ignore the M_NOWAIT bit in kmflag (all of kmflag, in fact).
774 * Requests larger than 8K with M_NOWAIT fail in kalloc_canblock.
776 #if defined(DTRACE_MEMORY_ZONES)
777 return dtrace_alloc(size
);
784 dt_kmem_zalloc(size_t size
, int kmflag
)
786 #pragma unused(kmflag)
789 * We ignore the M_NOWAIT bit in kmflag (all of kmflag, in fact).
790 * Requests larger than 8K with M_NOWAIT fail in kalloc_canblock.
792 #if defined(DTRACE_MEMORY_ZONES)
793 void* buf
= dtrace_alloc(size
);
795 void* buf
= kalloc(size
);
807 dt_kmem_free(void *buf
, size_t size
)
811 * DTrace relies on this, its doing a lot of NULL frees.
812 * A null free causes the debug builds to panic.
814 if (buf
== NULL
) return;
818 #if defined(DTRACE_MEMORY_ZONES)
819 dtrace_free(buf
, size
);
828 * aligned kmem allocator
829 * align should be a power of two
832 void* dt_kmem_alloc_aligned(size_t size
, size_t align
, int kmflag
)
838 buf
= dt_kmem_alloc(align
+ sizeof(void*) + size
, kmflag
);
844 p
+= sizeof(void*); /* now we have enough room to store the backup */
845 p
= P2ROUNDUP(p
, align
); /* and now we're aligned */
847 buf_backup
= (void**)(p
- sizeof(void*));
848 *buf_backup
= buf
; /* back up the address we need to free */
853 void* dt_kmem_zalloc_aligned(size_t size
, size_t align
, int kmflag
)
857 buf
= dt_kmem_alloc_aligned(size
, align
, kmflag
);
867 void dt_kmem_free_aligned(void* buf
, size_t size
)
875 buf_backup
= (void**)(p
);
877 dt_kmem_free(*buf_backup
, size
+ ((char*)buf
- (char*)*buf_backup
));
881 * dtrace wants to manage just a single block: dtrace_state_percpu_t * NCPU, and
882 * doesn't specify constructor, destructor, or reclaim methods.
883 * At present, it always zeroes the block it obtains from kmem_cache_alloc().
884 * We'll manage this constricted use of kmem_cache with ordinary _MALLOC and _FREE.
888 char *name
, /* descriptive name for this cache */
889 size_t bufsize
, /* size of the objects it manages */
890 size_t align
, /* required object alignment */
891 int (*constructor
)(void *, void *, int), /* object constructor */
892 void (*destructor
)(void *, void *), /* object destructor */
893 void (*reclaim
)(void *), /* memory reclaim callback */
894 void *private, /* pass-thru arg for constr/destr/reclaim */
895 vmem_t
*vmp
, /* vmem source for slab allocation */
896 int cflags
) /* cache creation flags */
898 #pragma unused(name,align,constructor,destructor,reclaim,private,vmp,cflags)
899 return (kmem_cache_t
*)bufsize
; /* A cookie that tracks the single object size. */
903 kmem_cache_alloc(kmem_cache_t
*cp
, int kmflag
)
905 #pragma unused(kmflag)
906 size_t bufsize
= (size_t)cp
;
907 return (void *)_MALLOC(bufsize
, M_TEMP
, M_WAITOK
);
911 kmem_cache_free(kmem_cache_t
*cp
, void *buf
)
918 kmem_cache_destroy(kmem_cache_t
*cp
)
926 extern void thread_call_setup(thread_call_t
, thread_call_func_t
, thread_call_param_t
); /* XXX MACH_KERNEL_PRIVATE */
929 _taskq_apply( task_func_t func
, thread_call_param_t arg
)
935 taskq_create(const char *name
, int nthreads
, pri_t pri
, int minalloc
,
936 int maxalloc
, uint_t flags
)
938 #pragma unused(name,nthreads,pri,minalloc,maxalloc,flags)
940 return (taskq_t
*)thread_call_allocate( (thread_call_func_t
)_taskq_apply
, NULL
);
944 taskq_dispatch(taskq_t
*tq
, task_func_t func
, void *arg
, uint_t flags
)
946 #pragma unused(flags)
947 thread_call_setup( (thread_call_t
) tq
, (thread_call_func_t
)_taskq_apply
, (thread_call_param_t
)func
);
948 thread_call_enter1( (thread_call_t
) tq
, (thread_call_param_t
)arg
);
949 return (taskqid_t
) tq
/* for lack of anything better */;
953 taskq_destroy(taskq_t
*tq
)
955 thread_call_cancel( (thread_call_t
) tq
);
956 thread_call_free( (thread_call_t
) tq
);
962 * vmem (Solaris "slab" allocator) used by DTrace solely to hand out resource ids
964 typedef unsigned int u_daddr_t
;
967 /* By passing around blist *handles*, the underlying blist can be resized as needed. */
973 vmem_create(const char *name
, void *base
, size_t size
, size_t quantum
, void *ignore5
,
974 void *ignore6
, vmem_t
*source
, size_t qcache_max
, int vmflag
)
976 #pragma unused(name,quantum,ignore5,ignore6,source,qcache_max,vmflag)
978 struct blist_hdl
*p
= _MALLOC(sizeof(struct blist_hdl
), M_TEMP
, M_WAITOK
);
980 ASSERT(quantum
== 1);
981 ASSERT(NULL
== ignore5
);
982 ASSERT(NULL
== ignore6
);
983 ASSERT(NULL
== source
);
984 ASSERT(0 == qcache_max
);
985 ASSERT(vmflag
& VMC_IDENTIFIER
);
987 size
= MIN(128, size
); /* Clamp to 128 initially, since the underlying data structure is pre-allocated */
989 p
->blist
= bl
= blist_create( size
);
990 blist_free(bl
, 0, size
);
991 if (base
) blist_alloc( bl
, (daddr_t
)base
); /* Chomp off initial ID(s) */
997 vmem_alloc(vmem_t
*vmp
, size_t size
, int vmflag
)
999 #pragma unused(vmflag)
1000 struct blist_hdl
*q
= (struct blist_hdl
*)vmp
;
1001 blist_t bl
= q
->blist
;
1004 p
= blist_alloc(bl
, (daddr_t
)size
);
1006 if ((daddr_t
)-1 == p
) {
1007 blist_resize(&bl
, (bl
->bl_blocks
) << 1, 1);
1009 p
= blist_alloc(bl
, (daddr_t
)size
);
1010 if ((daddr_t
)-1 == p
)
1011 panic("vmem_alloc: failure after blist_resize!");
1018 vmem_free(vmem_t
*vmp
, void *vaddr
, size_t size
)
1020 struct blist_hdl
*p
= (struct blist_hdl
*)vmp
;
1022 blist_free( p
->blist
, (daddr_t
)vaddr
, (daddr_t
)size
);
1026 vmem_destroy(vmem_t
*vmp
)
1028 struct blist_hdl
*p
= (struct blist_hdl
*)vmp
;
1030 blist_destroy( p
->blist
);
1031 _FREE( p
, sizeof(struct blist_hdl
) );
1039 * dtrace_gethrestime() provides the "walltimestamp", a value that is anchored at
1040 * January 1, 1970. Because it can be called from probe context, it must take no locks.
1044 dtrace_gethrestime(void)
1046 uint32_t secs
, nanosecs
;
1047 uint64_t secs64
, ns64
;
1049 clock_get_calendar_nanotime_nowait(&secs
, &nanosecs
);
1050 secs64
= (uint64_t)secs
;
1051 ns64
= (uint64_t)nanosecs
;
1053 ns64
= ns64
+ (secs64
* 1000000000LL);
1058 * dtrace_gethrtime() provides high-resolution timestamps with machine-dependent origin.
1059 * Hence its primary use is to specify intervals.
1063 dtrace_abs_to_nano(uint64_t elapsed
)
1065 static mach_timebase_info_data_t sTimebaseInfo
= { 0, 0 };
1068 * If this is the first time we've run, get the timebase.
1069 * We can use denom == 0 to indicate that sTimebaseInfo is
1070 * uninitialised because it makes no sense to have a zero
1071 * denominator in a fraction.
1074 if ( sTimebaseInfo
.denom
== 0 ) {
1075 (void) clock_timebase_info(&sTimebaseInfo
);
1079 * Convert to nanoseconds.
1080 * return (elapsed * (uint64_t)sTimebaseInfo.numer)/(uint64_t)sTimebaseInfo.denom;
1082 * Provided the final result is representable in 64 bits the following maneuver will
1083 * deliver that result without intermediate overflow.
1085 if (sTimebaseInfo
.denom
== sTimebaseInfo
.numer
)
1087 else if (sTimebaseInfo
.denom
== 1)
1088 return elapsed
* (uint64_t)sTimebaseInfo
.numer
;
1090 /* Decompose elapsed = eta32 * 2^32 + eps32: */
1091 uint64_t eta32
= elapsed
>> 32;
1092 uint64_t eps32
= elapsed
& 0x00000000ffffffffLL
;
1094 uint32_t numer
= sTimebaseInfo
.numer
, denom
= sTimebaseInfo
.denom
;
1096 /* Form product of elapsed64 (decomposed) and numer: */
1097 uint64_t mu64
= numer
* eta32
;
1098 uint64_t lambda64
= numer
* eps32
;
1100 /* Divide the constituents by denom: */
1101 uint64_t q32
= mu64
/denom
;
1102 uint64_t r32
= mu64
- (q32
* denom
); /* mu64 % denom */
1104 return (q32
<< 32) + ((r32
<< 32) + lambda64
)/denom
;
1109 dtrace_gethrtime(void)
1111 static uint64_t start
= 0;
1114 start
= mach_absolute_time();
1116 return dtrace_abs_to_nano(mach_absolute_time() - start
);
1120 * Atomicity and synchronization
1123 dtrace_cas32(uint32_t *target
, uint32_t cmp
, uint32_t new)
1125 if (OSCompareAndSwap( cmp
, new, (unsigned long *)target
))
1128 return ~cmp
; /* Must return something *other* than cmp */
1132 dtrace_casptr(void *target
, void *cmp
, void *new)
1134 #if defined(__LP64__)
1135 #error dtrace_casptr implementation missing for LP64
1137 if (OSCompareAndSwap( (uint32_t)cmp
, (uint32_t)new, (unsigned long *)target
))
1140 return (void *)(~(uintptr_t)cmp
); /* Must return something *other* than cmp */
1145 * Interrupt manipulation
1148 dtrace_interrupt_disable(void)
1150 return (dtrace_icookie_t
)ml_set_interrupts_enabled(FALSE
);
1154 dtrace_interrupt_enable(dtrace_icookie_t reenable
)
1156 (void)ml_set_interrupts_enabled((boolean_t
)reenable
);
1163 dtrace_sync_func(void) {}
1166 * dtrace_sync() is not called from probe context.
1171 dtrace_xcall(DTRACE_CPUALL
, (dtrace_xcall_t
)dtrace_sync_func
, NULL
);
1175 * The dtrace_copyin/out/instr and dtrace_fuword* routines can be called from probe context.
1178 extern kern_return_t
dtrace_copyio_preflight(addr64_t
);
1179 extern kern_return_t
dtrace_copyio_postflight(addr64_t
);
1182 dtrace_copycheck(user_addr_t uaddr
, uintptr_t kaddr
, size_t size
)
1184 #pragma unused(kaddr)
1186 vm_offset_t recover
= dtrace_set_thread_recover( current_thread(), 0 ); /* Snare any extant recovery point. */
1187 dtrace_set_thread_recover( current_thread(), recover
); /* Put it back. We *must not* re-enter and overwrite. */
1189 ASSERT(kaddr
+ size
>= kaddr
);
1191 if (ml_at_interrupt_context() || /* Avoid possible copyio page fault on int stack, which panics! */
1192 0 != recover
|| /* Avoid reentrancy into copyio facility. */
1193 uaddr
+ size
< uaddr
|| /* Avoid address wrap. */
1194 KERN_FAILURE
== dtrace_copyio_preflight(uaddr
)) /* Machine specific setup/constraints. */
1196 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1197 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= uaddr
;
1204 dtrace_copyin(user_addr_t src
, uintptr_t dst
, size_t len
)
1206 if (dtrace_copycheck( src
, dst
, len
)) {
1207 if (copyin((const user_addr_t
)src
, (char *)dst
, (vm_size_t
)len
)) {
1208 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1209 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= src
;
1211 dtrace_copyio_postflight(src
);
1216 dtrace_copyinstr(user_addr_t src
, uintptr_t dst
, size_t len
)
1220 if (dtrace_copycheck( src
, dst
, len
)) {
1221 /* copyin as many as 'len' bytes. */
1222 int error
= copyinstr((const user_addr_t
)src
, (char *)dst
, (vm_size_t
)len
, &actual
);
1225 * ENAMETOOLONG is returned when 'len' bytes have been copied in but the NUL terminator was
1226 * not encountered. That does not require raising CPU_DTRACE_BADADDR, and we press on.
1227 * Note that we do *not* stuff a NUL terminator when returning ENAMETOOLONG, that's left
1230 if (error
&& error
!= ENAMETOOLONG
) {
1231 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1232 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= src
;
1234 dtrace_copyio_postflight(src
);
1239 dtrace_copyout(uintptr_t src
, user_addr_t dst
, size_t len
)
1241 if (dtrace_copycheck( dst
, src
, len
)) {
1242 if (copyout((const void *)src
, dst
, (vm_size_t
)len
)) {
1243 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1244 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= dst
;
1246 dtrace_copyio_postflight(dst
);
1251 dtrace_copyoutstr(uintptr_t src
, user_addr_t dst
, size_t len
)
1255 if (dtrace_copycheck( dst
, src
, len
)) {
1258 * ENAMETOOLONG is returned when 'len' bytes have been copied out but the NUL terminator was
1259 * not encountered. We raise CPU_DTRACE_BADADDR in that case.
1260 * Note that we do *not* stuff a NUL terminator when returning ENAMETOOLONG, that's left
1263 if (copyoutstr((const void *)src
, dst
, (size_t)len
, &actual
)) {
1264 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1265 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= dst
;
1267 dtrace_copyio_postflight(dst
);
1272 dtrace_fuword8(user_addr_t uaddr
)
1276 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
1277 if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) {
1278 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) {
1279 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1280 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= uaddr
;
1282 dtrace_copyio_postflight(uaddr
);
1284 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
1290 dtrace_fuword16(user_addr_t uaddr
)
1294 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
1295 if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) {
1296 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) {
1297 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1298 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= uaddr
;
1300 dtrace_copyio_postflight(uaddr
);
1302 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
1308 dtrace_fuword32(user_addr_t uaddr
)
1312 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
1313 if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) {
1314 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) {
1315 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1316 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= uaddr
;
1318 dtrace_copyio_postflight(uaddr
);
1320 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
1326 dtrace_fuword64(user_addr_t uaddr
)
1330 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
1331 if (dtrace_copycheck( uaddr
, (uintptr_t)&ret
, sizeof(ret
))) {
1332 if (copyin((const user_addr_t
)uaddr
, (char *)&ret
, sizeof(ret
))) {
1333 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1334 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= uaddr
;
1336 dtrace_copyio_postflight(uaddr
);
1338 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
1344 * Emulation of Solaris fuword / suword
1345 * Called from the fasttrap provider, so the use of copyin/out requires fewer safegaurds.
1349 fuword8(user_addr_t uaddr
, uint8_t *value
)
1351 if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint8_t)) != 0) {
1359 fuword16(user_addr_t uaddr
, uint16_t *value
)
1361 if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint16_t)) != 0) {
1369 fuword32(user_addr_t uaddr
, uint32_t *value
)
1371 if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint32_t)) != 0) {
1379 fuword64(user_addr_t uaddr
, uint64_t *value
)
1381 if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint64_t)) != 0) {
1389 fuword8_noerr(user_addr_t uaddr
, uint8_t *value
)
1391 if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint8_t))) {
1397 fuword16_noerr(user_addr_t uaddr
, uint16_t *value
)
1399 if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint16_t))) {
1405 fuword32_noerr(user_addr_t uaddr
, uint32_t *value
)
1407 if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint32_t))) {
1413 fuword64_noerr(user_addr_t uaddr
, uint64_t *value
)
1415 if (copyin((const user_addr_t
)uaddr
, (char *)value
, sizeof(uint64_t))) {
1421 suword64(user_addr_t addr
, uint64_t value
)
1423 if (copyout((const void *)&value
, addr
, sizeof(value
)) != 0) {
1431 suword32(user_addr_t addr
, uint32_t value
)
1433 if (copyout((const void *)&value
, addr
, sizeof(value
)) != 0) {
1441 suword16(user_addr_t addr
, uint16_t value
)
1443 if (copyout((const void *)&value
, addr
, sizeof(value
)) != 0) {
1451 suword8(user_addr_t addr
, uint8_t value
)
1453 if (copyout((const void *)&value
, addr
, sizeof(value
)) != 0) {
1464 extern boolean_t
dtrace_tally_fault(user_addr_t
);
1467 dtrace_tally_fault(user_addr_t uaddr
)
1469 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1470 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= uaddr
;
1471 return( DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT
) ? TRUE
: FALSE
);
1475 dtrace_vpanic(const char *format
, va_list alist
)
1477 vuprintf( format
, alist
);
1478 panic("dtrace_vpanic");
1482 extern int prf(const char *, va_list, int, struct tty
*); /* bsd/kern/subr_prf.h */
1485 vuprintf(const char *format
, va_list ap
)
1487 return prf(format
, ap
, TOTTY
, NULL
);
1490 /* Not called from probe context */
1491 void cmn_err( int level
, const char *format
, ... )
1493 #pragma unused(level)
1496 va_start(alist
, format
);
1497 vuprintf(format
, alist
);
1504 * 2002-01-24 gvdl Initial implementation of strstr
1507 __private_extern__
char *
1508 strstr(const char *in
, const char *str
)
1515 return (char *) in
; // Trivial empty string case
1526 } while (strncmp(in
, str
, len
) != 0);
1528 return (char *) (in
- 1);
1535 dtrace_caller(int ignore
)
1537 #pragma unused(ignore)
1538 return -1; /* Just as in Solaris dtrace_asm.s */
1542 dtrace_getstackdepth(int aframes
)
1544 struct frame
*fp
= (struct frame
*)dtrace_getfp();
1545 struct frame
*nextfp
, *minfp
, *stacktop
;
1549 if ((on_intr
= CPU_ON_INTR(CPU
)) != 0)
1550 stacktop
= (struct frame
*)dtrace_get_cpu_int_stack_top();
1552 stacktop
= (struct frame
*)(dtrace_get_kernel_stack(current_thread()) + KERNEL_STACK_SIZE
);
1561 nextfp
= *(struct frame
**)fp
;
1563 if (nextfp
<= minfp
|| nextfp
>= stacktop
) {
1566 * Hop from interrupt stack to thread stack.
1568 vm_offset_t kstack_base
= dtrace_get_kernel_stack(current_thread());
1570 minfp
= (struct frame
*)kstack_base
;
1571 stacktop
= (struct frame
*)(kstack_base
+ KERNEL_STACK_SIZE
);
1583 if (depth
<= aframes
)
1586 return (depth
- aframes
);
1593 dtrace_vtime_enable(void) {}
1596 dtrace_vtime_disable(void) {}
1598 #else /* else ! CONFIG_DTRACE */
1600 #include <sys/types.h>
1601 #include <mach/vm_types.h>
1602 #include <mach/kmod.h>
1605 * This exists to prevent build errors when dtrace is unconfigured.
1608 kern_return_t
_dtrace_register_anon_DOF(char *, unsigned char *, uint32_t);
1610 kern_return_t
_dtrace_register_anon_DOF(char *arg1
, unsigned char *arg2
, uint32_t arg3
) {
1611 #pragma unused(arg1, arg2, arg3)
1613 return KERN_FAILURE
;
1616 #endif /* CONFIG_DTRACE */