4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* #pragma ident "@(#)profile.c 1.7 07/01/10 SMI" */
30 #define _KERNEL /* Solaris vs. Darwin */
34 #include <kern/cpu_data.h>
35 #include <kern/thread.h>
36 #include <kern/assert.h>
37 #include <mach/thread_status.h>
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/errno.h>
43 #include <sys/ioctl.h>
45 #include <sys/fcntl.h>
46 #include <miscfs/devfs/devfs.h>
48 #include <sys/dtrace.h>
49 #include <sys/dtrace_impl.h>
51 #include <sys/dtrace_glue.h>
53 #include <machine/pal_routines.h>
55 #if defined(__x86_64__)
56 extern x86_saved_state_t
*find_kern_regs(thread_t
);
57 #elif defined (__arm__) || defined(__arm64__)
58 extern struct arm_saved_state
*find_kern_regs(thread_t
);
60 #error Unknown architecture
64 #define ASSERT(x) do {} while(0)
66 extern void profile_init(void);
68 static dev_info_t
*profile_devi
;
69 static dtrace_provider_id_t profile_id
;
72 * Regardless of platform, the stack frames look like this in the case of the
81 * On x86, there are five frames from the generic interrupt code; further, the
82 * interrupted instruction appears as its own stack frame, giving us a total of
85 * On SPARC, the picture is further complicated because the compiler
86 * optimizes away tail-calls -- so the following frames are optimized away:
91 * This gives three frames. However, on DEBUG kernels, the cyclic_expire
92 * frame cannot be tail-call eliminated, yielding four frames in this case.
94 * All of the above constraints lead to the mess below. Yes, the profile
95 * provider should ideally figure this out on-the-fly by hitting one of its own
96 * probes and then walking its own stack trace. This is complicated, however,
97 * and the static definition doesn't seem to be overly brittle. Still, we
98 * allow for a manual override in case we get it completely wrong.
101 #if defined(__x86_64__)
102 #define PROF_ARTIFICIAL_FRAMES 9
103 #elif defined(__arm__) || defined(__arm64__)
104 #define PROF_ARTIFICIAL_FRAMES 8
106 #error Unknown architecture
109 #define PROF_NAMELEN 15
111 #define PROF_PROFILE 0
113 #define PROF_PREFIX_PROFILE "profile-"
114 #define PROF_PREFIX_TICK "tick-"
116 typedef struct profile_probe
{
117 char prof_name
[PROF_NAMELEN
];
120 hrtime_t prof_interval
;
121 cyclic_id_t prof_cyclic
;
124 typedef struct profile_probe_percpu
{
125 hrtime_t profc_expected
;
126 hrtime_t profc_interval
;
127 profile_probe_t
*profc_probe
;
128 } profile_probe_percpu_t
;
130 hrtime_t profile_interval_min
= NANOSEC
/ 5000; /* 5000 hz */
131 int profile_aframes
= 0; /* override */
133 static int profile_rates
[] = {
134 97, 199, 499, 997, 1999,
140 static int profile_ticks
[] = {
141 1, 10, 100, 500, 1000,
147 * profile_max defines the upper bound on the number of profile probes that
148 * can exist (this is to prevent malicious or clumsy users from exhausing
149 * system resources by creating a slew of profile probes). At mod load time,
150 * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
151 * present in the profile.conf file.
153 #define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */
154 static uint32_t profile_max
; /* maximum number of profile probes */
155 static uint32_t profile_total
; /* current number of profile probes */
158 profile_fire(void *arg
)
160 profile_probe_percpu_t
*pcpu
= arg
;
161 profile_probe_t
*prof
= pcpu
->profc_probe
;
164 late
= dtrace_gethrtime() - pcpu
->profc_expected
;
165 pcpu
->profc_expected
+= pcpu
->profc_interval
;
167 #if defined(__x86_64__)
168 x86_saved_state_t
*kern_regs
= find_kern_regs(current_thread());
170 if (NULL
!= kern_regs
) {
171 /* Kernel was interrupted. */
172 dtrace_probe(prof
->prof_id
, saved_state64(kern_regs
)->isf
.rip
, 0x0, late
, 0, 0);
175 pal_register_cache_state(current_thread(), VALID
);
176 /* Possibly a user interrupt */
177 x86_saved_state_t
*tagged_regs
= (x86_saved_state_t
*)find_user_regs(current_thread());
179 if (NULL
== tagged_regs
) {
180 /* Too bad, so sad, no useful interrupt state. */
181 dtrace_probe(prof
->prof_id
, 0xcafebabe,
182 0x0, late
, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
183 } else if (is_saved_state64(tagged_regs
)) {
184 x86_saved_state64_t
*regs
= saved_state64(tagged_regs
);
186 dtrace_probe(prof
->prof_id
, 0x0, regs
->isf
.rip
, late
, 0, 0);
188 x86_saved_state32_t
*regs
= saved_state32(tagged_regs
);
190 dtrace_probe(prof
->prof_id
, 0x0, regs
->eip
, late
, 0, 0);
193 #elif defined(__arm__)
195 arm_saved_state_t
*arm_kern_regs
= (arm_saved_state_t
*) find_kern_regs(current_thread());
197 // We should only come in here from interrupt context, so we should always have valid kernel regs
198 assert(NULL
!= arm_kern_regs
);
200 if (arm_kern_regs
->cpsr
& 0xF) {
201 /* Kernel was interrupted. */
202 dtrace_probe(prof
->prof_id
, arm_kern_regs
->pc
, 0x0, late
, 0, 0);
204 /* Possibly a user interrupt */
205 arm_saved_state_t
*arm_user_regs
= (arm_saved_state_t
*)find_user_regs(current_thread());
207 if (NULL
== arm_user_regs
) {
208 /* Too bad, so sad, no useful interrupt state. */
209 dtrace_probe(prof
->prof_id
, 0xcafebabe, 0x0, late
, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
211 dtrace_probe(prof
->prof_id
, 0x0, arm_user_regs
->pc
, late
, 0, 0);
215 #elif defined(__arm64__)
217 arm_saved_state_t
*arm_kern_regs
= (arm_saved_state_t
*) find_kern_regs(current_thread());
219 // We should only come in here from interrupt context, so we should always have valid kernel regs
220 assert(NULL
!= arm_kern_regs
);
222 if (saved_state64(arm_kern_regs
)->cpsr
& 0xF) {
223 /* Kernel was interrupted. */
224 dtrace_probe(prof
->prof_id
, saved_state64(arm_kern_regs
)->pc
, 0x0, late
, 0, 0);
226 /* Possibly a user interrupt */
227 arm_saved_state_t
*arm_user_regs
= (arm_saved_state_t
*)find_user_regs(current_thread());
229 if (NULL
== arm_user_regs
) {
230 /* Too bad, so sad, no useful interrupt state. */
231 dtrace_probe(prof
->prof_id
, 0xcafebabe, 0x0, late
, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
233 dtrace_probe(prof
->prof_id
, 0x0, get_saved_state_pc(arm_user_regs
), late
, 0, 0);
238 #error Unknown architecture
243 profile_tick(void *arg
)
245 profile_probe_t
*prof
= arg
;
247 #if defined(__x86_64__)
248 x86_saved_state_t
*kern_regs
= find_kern_regs(current_thread());
250 if (NULL
!= kern_regs
) {
251 /* Kernel was interrupted. */
252 dtrace_probe(prof
->prof_id
, saved_state64(kern_regs
)->isf
.rip
, 0x0, 0, 0, 0);
254 pal_register_cache_state(current_thread(), VALID
);
255 /* Possibly a user interrupt */
256 x86_saved_state_t
*tagged_regs
= (x86_saved_state_t
*)find_user_regs(current_thread());
258 if (NULL
== tagged_regs
) {
259 /* Too bad, so sad, no useful interrupt state. */
260 dtrace_probe(prof
->prof_id
, 0xcafebabe,
261 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
262 } else if (is_saved_state64(tagged_regs
)) {
263 x86_saved_state64_t
*regs
= saved_state64(tagged_regs
);
265 dtrace_probe(prof
->prof_id
, 0x0, regs
->isf
.rip
, 0, 0, 0);
267 x86_saved_state32_t
*regs
= saved_state32(tagged_regs
);
269 dtrace_probe(prof
->prof_id
, 0x0, regs
->eip
, 0, 0, 0);
272 #elif defined(__arm__)
274 arm_saved_state_t
*arm_kern_regs
= (arm_saved_state_t
*) find_kern_regs(current_thread());
276 if (NULL
!= arm_kern_regs
) {
277 /* Kernel was interrupted. */
278 dtrace_probe(prof
->prof_id
, arm_kern_regs
->pc
, 0x0, 0, 0, 0);
280 /* Possibly a user interrupt */
281 arm_saved_state_t
*arm_user_regs
= (arm_saved_state_t
*)find_user_regs(current_thread());
283 if (NULL
== arm_user_regs
) {
284 /* Too bad, so sad, no useful interrupt state. */
285 dtrace_probe(prof
->prof_id
, 0xcafebabe, 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
287 dtrace_probe(prof
->prof_id
, 0x0, arm_user_regs
->pc
, 0, 0, 0);
291 #elif defined(__arm64__)
293 arm_saved_state_t
*arm_kern_regs
= (arm_saved_state_t
*) find_kern_regs(current_thread());
295 if (NULL
!= arm_kern_regs
) {
296 /* Kernel was interrupted. */
297 dtrace_probe(prof
->prof_id
, saved_state64(arm_kern_regs
)->pc
, 0x0, 0, 0, 0);
299 /* Possibly a user interrupt */
300 arm_saved_state_t
*arm_user_regs
= (arm_saved_state_t
*)find_user_regs(current_thread());
302 if (NULL
== arm_user_regs
) {
303 /* Too bad, so sad, no useful interrupt state. */
304 dtrace_probe(prof
->prof_id
, 0xcafebabe, 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
306 dtrace_probe(prof
->prof_id
, 0x0, get_saved_state_pc(arm_user_regs
), 0, 0, 0);
312 #error Unknown architecture
317 profile_create(hrtime_t interval
, const char *name
, int kind
)
319 profile_probe_t
*prof
;
321 if (interval
< profile_interval_min
)
324 if (dtrace_probe_lookup(profile_id
, NULL
, NULL
, name
) != 0)
327 atomic_add_32(&profile_total
, 1);
328 if (profile_total
> profile_max
) {
329 atomic_add_32(&profile_total
, -1);
333 if (PROF_TICK
== kind
)
334 prof
= kmem_zalloc(sizeof (profile_probe_t
), KM_SLEEP
);
336 prof
= kmem_zalloc(sizeof (profile_probe_t
) + NCPU
*sizeof(profile_probe_percpu_t
), KM_SLEEP
);
338 (void) strlcpy(prof
->prof_name
, name
, sizeof(prof
->prof_name
));
339 prof
->prof_interval
= interval
;
340 prof
->prof_cyclic
= CYCLIC_NONE
;
341 prof
->prof_kind
= kind
;
342 prof
->prof_id
= dtrace_probe_create(profile_id
,
344 profile_aframes
? profile_aframes
: PROF_ARTIFICIAL_FRAMES
, prof
);
349 profile_provide(void *arg
, const dtrace_probedesc_t
*desc
)
351 #pragma unused(arg) /* __APPLE__ */
352 int i
, j
, rate
, kind
;
353 hrtime_t val
= 0, mult
= 1, len
;
354 const char *name
, *suffix
= NULL
;
360 { PROF_PREFIX_PROFILE
, PROF_PROFILE
},
361 { PROF_PREFIX_TICK
, PROF_TICK
},
369 { "ns", NANOSEC
/ NANOSEC
},
370 { "nsec", NANOSEC
/ NANOSEC
},
371 { "us", NANOSEC
/ MICROSEC
},
372 { "usec", NANOSEC
/ MICROSEC
},
373 { "ms", NANOSEC
/ MILLISEC
},
374 { "msec", NANOSEC
/ MILLISEC
},
375 { "s", NANOSEC
/ SEC
},
376 { "sec", NANOSEC
/ SEC
},
377 { "m", NANOSEC
* (hrtime_t
)60 },
378 { "min", NANOSEC
* (hrtime_t
)60 },
379 { "h", NANOSEC
* (hrtime_t
)(60 * 60) },
380 { "hour", NANOSEC
* (hrtime_t
)(60 * 60) },
381 { "d", NANOSEC
* (hrtime_t
)(24 * 60 * 60) },
382 { "day", NANOSEC
* (hrtime_t
)(24 * 60 * 60) },
388 char n
[PROF_NAMELEN
];
391 * If no description was provided, provide all of our probes.
393 for (i
= 0; i
< (int)(sizeof (profile_rates
) / sizeof (int)); i
++) {
394 if ((rate
= profile_rates
[i
]) == 0)
397 (void) snprintf(n
, PROF_NAMELEN
, "%s%d",
398 PROF_PREFIX_PROFILE
, rate
);
399 profile_create(NANOSEC
/ rate
, n
, PROF_PROFILE
);
402 for (i
= 0; i
< (int)(sizeof (profile_ticks
) / sizeof (int)); i
++) {
403 if ((rate
= profile_ticks
[i
]) == 0)
406 (void) snprintf(n
, PROF_NAMELEN
, "%s%d",
407 PROF_PREFIX_TICK
, rate
);
408 profile_create(NANOSEC
/ rate
, n
, PROF_TICK
);
414 name
= desc
->dtpd_name
;
416 for (i
= 0; types
[i
].prefix
!= NULL
; i
++) {
417 len
= strlen(types
[i
].prefix
);
419 if (strncmp(name
, types
[i
].prefix
, len
) != 0)
424 if (types
[i
].prefix
== NULL
)
427 kind
= types
[i
].kind
;
428 j
= strlen(name
) - len
;
431 * We need to start before any time suffix.
433 for (j
= strlen(name
); j
>= len
; j
--) {
434 if (name
[j
] >= '0' && name
[j
] <= '9')
439 ASSERT(suffix
!= NULL
);
442 * Now determine the numerical value present in the probe name.
444 for (; j
>= len
; j
--) {
445 if (name
[j
] < '0' || name
[j
] > '9')
448 val
+= (name
[j
] - '0') * mult
;
449 mult
*= (hrtime_t
)10;
456 * Look-up the suffix to determine the multiplier.
458 for (i
= 0, mult
= 0; suffixes
[i
].name
!= NULL
; i
++) {
459 /* APPLE NOTE: Darwin employs size bounded string operations */
460 if (strncasecmp(suffixes
[i
].name
, suffix
, strlen(suffixes
[i
].name
) + 1) == 0) {
461 mult
= suffixes
[i
].mult
;
466 if (suffixes
[i
].name
== NULL
&& *suffix
!= '\0')
471 * The default is frequency-per-second.
478 profile_create(val
, name
, kind
);
483 profile_destroy(void *arg
, dtrace_id_t id
, void *parg
)
485 #pragma unused(arg,id) /* __APPLE__ */
486 profile_probe_t
*prof
= parg
;
488 ASSERT(prof
->prof_cyclic
== CYCLIC_NONE
);
490 if (prof
->prof_kind
== PROF_TICK
)
491 kmem_free(prof
, sizeof (profile_probe_t
));
493 kmem_free(prof
, sizeof (profile_probe_t
) + NCPU
*sizeof(profile_probe_percpu_t
));
495 ASSERT(profile_total
>= 1);
496 atomic_add_32(&profile_total
, -1);
501 profile_online(void *arg
, dtrace_cpu_t
*cpu
, cyc_handler_t
*hdlr
, cyc_time_t
*when
)
503 #pragma unused(cpu) /* __APPLE__ */
504 profile_probe_t
*prof
= arg
;
505 profile_probe_percpu_t
*pcpu
;
507 pcpu
= ((profile_probe_percpu_t
*)(&(prof
[1]))) + cpu_number();
508 pcpu
->profc_probe
= prof
;
510 hdlr
->cyh_func
= profile_fire
;
511 hdlr
->cyh_arg
= pcpu
;
512 hdlr
->cyh_level
= CY_HIGH_LEVEL
;
514 when
->cyt_interval
= prof
->prof_interval
;
515 when
->cyt_when
= dtrace_gethrtime() + when
->cyt_interval
;
517 pcpu
->profc_expected
= when
->cyt_when
;
518 pcpu
->profc_interval
= when
->cyt_interval
;
523 profile_offline(void *arg
, dtrace_cpu_t
*cpu
, void *oarg
)
525 profile_probe_percpu_t
*pcpu
= oarg
;
527 ASSERT(pcpu
->profc_probe
== arg
);
528 #pragma unused(pcpu,arg,cpu) /* __APPLE__ */
533 profile_enable(void *arg
, dtrace_id_t id
, void *parg
)
535 #pragma unused(arg,id) /* __APPLE__ */
536 profile_probe_t
*prof
= parg
;
537 cyc_omni_handler_t omni
;
541 ASSERT(prof
->prof_interval
!= 0);
542 ASSERT(MUTEX_HELD(&cpu_lock
));
544 if (prof
->prof_kind
== PROF_TICK
) {
545 hdlr
.cyh_func
= profile_tick
;
547 hdlr
.cyh_level
= CY_HIGH_LEVEL
;
549 when
.cyt_interval
= prof
->prof_interval
;
550 #if !defined(__APPLE__)
551 when
.cyt_when
= dtrace_gethrtime() + when
.cyt_interval
;
554 #endif /* __APPLE__ */
556 ASSERT(prof
->prof_kind
== PROF_PROFILE
);
557 omni
.cyo_online
= profile_online
;
558 omni
.cyo_offline
= profile_offline
;
562 if (prof
->prof_kind
== PROF_TICK
) {
563 prof
->prof_cyclic
= cyclic_timer_add(&hdlr
, &when
);
565 prof
->prof_cyclic
= (cyclic_id_t
)cyclic_add_omni(&omni
); /* cast puns cyclic_id_list_t with cyclic_id_t */
573 profile_disable(void *arg
, dtrace_id_t id
, void *parg
)
575 profile_probe_t
*prof
= parg
;
577 ASSERT(prof
->prof_cyclic
!= CYCLIC_NONE
);
578 ASSERT(MUTEX_HELD(&cpu_lock
));
580 #pragma unused(arg,id)
581 if (prof
->prof_kind
== PROF_TICK
) {
582 cyclic_timer_remove(prof
->prof_cyclic
);
584 cyclic_remove_omni((cyclic_id_list_t
)prof
->prof_cyclic
); /* cast puns cyclic_id_list_t with cyclic_id_t */
586 prof
->prof_cyclic
= CYCLIC_NONE
;
590 profile_getarg(void *arg
, dtrace_id_t id
, void *parg
, int argno
, int aframes
)
592 #pragma unused(arg, id, parg, argno, aframes)
594 * All the required arguments for the profile probe are passed directly
595 * to dtrace_probe, and we do not go through dtrace_getarg which doesn't
596 * know how to hop to the kernel stack from the interrupt stack like
603 profile_getargdesc(void *arg
, dtrace_id_t id
, void *parg
, dtrace_argdesc_t
*desc
)
605 #pragma unused(arg, id)
606 profile_probe_t
*prof
= parg
;
607 const char *argdesc
= NULL
;
608 switch (desc
->dtargd_ndx
) {
613 argdesc
= "user_addr_t";
616 if (prof
->prof_kind
== PROF_PROFILE
) {
617 argdesc
= "hrtime_t";
622 strlcpy(desc
->dtargd_native
, argdesc
, DTRACE_ARGTYPELEN
);
625 desc
->dtargd_ndx
= DTRACE_ARGNONE
;
630 * APPLE NOTE: profile_usermode call not supported.
633 profile_usermode(void *arg
, dtrace_id_t id
, void *parg
)
635 #pragma unused(arg,id,parg)
636 return 1; /* XXX_BOGUS */
639 static dtrace_pattr_t profile_attr
= {
640 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
641 { DTRACE_STABILITY_UNSTABLE
, DTRACE_STABILITY_UNSTABLE
, DTRACE_CLASS_UNKNOWN
},
642 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
643 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
644 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
647 static dtrace_pops_t profile_pops
= {
661 profile_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
667 return (DDI_SUCCESS
);
669 return (DDI_FAILURE
);
672 if (ddi_create_minor_node(devi
, "profile", S_IFCHR
, 0,
673 DDI_PSEUDO
, 0) == DDI_FAILURE
||
674 dtrace_register("profile", &profile_attr
,
675 DTRACE_PRIV_KERNEL
| DTRACE_PRIV_USER
, NULL
,
676 &profile_pops
, NULL
, &profile_id
) != 0) {
677 ddi_remove_minor_node(devi
, NULL
);
678 return (DDI_FAILURE
);
681 profile_max
= PROFILE_MAX_DEFAULT
;
683 ddi_report_dev(devi
);
685 return (DDI_SUCCESS
);
689 * APPLE NOTE: profile_detach not implemented
691 #if !defined(__APPLE__)
693 profile_detach(dev_info_t
*devi
, ddi_detach_cmd_t cmd
)
699 return (DDI_SUCCESS
);
701 return (DDI_FAILURE
);
704 if (dtrace_unregister(profile_id
) != 0)
705 return (DDI_FAILURE
);
707 ddi_remove_minor_node(devi
, NULL
);
708 return (DDI_SUCCESS
);
710 #endif /* __APPLE__ */
712 d_open_t _profile_open
;
714 int _profile_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
716 #pragma unused(dev,flags,devtype,p)
720 #define PROFILE_MAJOR -24 /* let the kernel pick the device number */
723 * A struct describing which functions will get invoked for certain
726 static struct cdevsw profile_cdevsw
=
728 _profile_open
, /* open */
729 eno_opcl
, /* close */
730 eno_rdwrt
, /* read */
731 eno_rdwrt
, /* write */
732 eno_ioctl
, /* ioctl */
733 (stop_fcn_t
*)nulldev
, /* stop */
734 (reset_fcn_t
*)nulldev
, /* reset */
736 eno_select
, /* select */
738 eno_strat
, /* strategy */
744 static int gProfileInited
= 0;
746 void profile_init( void )
748 if (0 == gProfileInited
)
750 int majdevno
= cdevsw_add(PROFILE_MAJOR
, &profile_cdevsw
);
753 printf("profile_init: failed to allocate a major number!\n");
758 profile_attach( (dev_info_t
*)(uintptr_t)majdevno
, DDI_ATTACH
);
762 panic("profile_init: called twice!\n");