4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* #pragma ident "@(#)profile.c 1.7 07/01/10 SMI" */
30 #define _KERNEL /* Solaris vs. Darwin */
34 #include <kern/cpu_data.h>
35 #include <kern/thread.h>
36 #include <kern/assert.h>
37 #include <mach/thread_status.h>
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/errno.h>
43 #include <sys/ioctl.h>
45 #include <sys/fcntl.h>
46 #include <miscfs/devfs/devfs.h>
48 #include <sys/dtrace.h>
49 #include <sys/dtrace_impl.h>
51 #include <sys/dtrace_glue.h>
53 #include <machine/pal_routines.h>
55 #if defined(__x86_64__)
56 extern x86_saved_state_t
*find_kern_regs(thread_t
);
57 #elif defined (__arm__) || defined(__arm64__)
58 extern struct arm_saved_state
*find_kern_regs(thread_t
);
60 #error Unknown architecture
64 #define ASSERT(x) do {} while(0)
66 extern void profile_init(void);
68 static dtrace_provider_id_t profile_id
;
71 * Regardless of platform, the stack frames look like this in the case of the
80 * On x86, there are five frames from the generic interrupt code; further, the
81 * interrupted instruction appears as its own stack frame, giving us a total of
84 * On SPARC, the picture is further complicated because the compiler
85 * optimizes away tail-calls -- so the following frames are optimized away:
90 * This gives three frames. However, on DEBUG kernels, the cyclic_expire
91 * frame cannot be tail-call eliminated, yielding four frames in this case.
93 * All of the above constraints lead to the mess below. Yes, the profile
94 * provider should ideally figure this out on-the-fly by hitting one of its own
95 * probes and then walking its own stack trace. This is complicated, however,
96 * and the static definition doesn't seem to be overly brittle. Still, we
97 * allow for a manual override in case we get it completely wrong.
100 #if defined(__x86_64__)
101 #define PROF_ARTIFICIAL_FRAMES 9
102 #elif defined(__arm__) || defined(__arm64__)
103 #define PROF_ARTIFICIAL_FRAMES 8
105 #error Unknown architecture
108 #define PROF_NAMELEN 15
110 #define PROF_PROFILE 0
112 #define PROF_PREFIX_PROFILE "profile-"
113 #define PROF_PREFIX_TICK "tick-"
115 typedef struct profile_probe
{
116 char prof_name
[PROF_NAMELEN
];
119 hrtime_t prof_interval
;
120 cyclic_id_t prof_cyclic
;
123 typedef struct profile_probe_percpu
{
124 hrtime_t profc_expected
;
125 hrtime_t profc_interval
;
126 profile_probe_t
*profc_probe
;
127 } profile_probe_percpu_t
;
129 hrtime_t profile_interval_min
= NANOSEC
/ 5000; /* 5000 hz */
130 int profile_aframes
= 0; /* override */
132 static int profile_rates
[] = {
133 97, 199, 499, 997, 1999,
139 static int profile_ticks
[] = {
140 1, 10, 100, 500, 1000,
146 * profile_max defines the upper bound on the number of profile probes that
147 * can exist (this is to prevent malicious or clumsy users from exhausing
148 * system resources by creating a slew of profile probes). At mod load time,
149 * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
150 * present in the profile.conf file.
152 #define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */
153 static uint32_t profile_max
; /* maximum number of profile probes */
154 static uint32_t profile_total
; /* current number of profile probes */
157 profile_fire(void *arg
)
159 profile_probe_percpu_t
*pcpu
= arg
;
160 profile_probe_t
*prof
= pcpu
->profc_probe
;
163 late
= dtrace_gethrtime() - pcpu
->profc_expected
;
164 pcpu
->profc_expected
+= pcpu
->profc_interval
;
166 #if defined(__x86_64__)
167 x86_saved_state_t
*kern_regs
= find_kern_regs(current_thread());
169 if (NULL
!= kern_regs
) {
170 /* Kernel was interrupted. */
171 dtrace_probe(prof
->prof_id
, saved_state64(kern_regs
)->isf
.rip
, 0x0, late
, 0, 0);
174 pal_register_cache_state(current_thread(), VALID
);
175 /* Possibly a user interrupt */
176 x86_saved_state_t
*tagged_regs
= (x86_saved_state_t
*)find_user_regs(current_thread());
178 if (NULL
== tagged_regs
) {
179 /* Too bad, so sad, no useful interrupt state. */
180 dtrace_probe(prof
->prof_id
, 0xcafebabe,
181 0x0, late
, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
182 } else if (is_saved_state64(tagged_regs
)) {
183 x86_saved_state64_t
*regs
= saved_state64(tagged_regs
);
185 dtrace_probe(prof
->prof_id
, 0x0, regs
->isf
.rip
, late
, 0, 0);
187 x86_saved_state32_t
*regs
= saved_state32(tagged_regs
);
189 dtrace_probe(prof
->prof_id
, 0x0, regs
->eip
, late
, 0, 0);
192 #elif defined(__arm__)
194 arm_saved_state_t
*arm_kern_regs
= (arm_saved_state_t
*) find_kern_regs(current_thread());
196 // We should only come in here from interrupt context, so we should always have valid kernel regs
197 assert(NULL
!= arm_kern_regs
);
199 if (arm_kern_regs
->cpsr
& 0xF) {
200 /* Kernel was interrupted. */
201 dtrace_probe(prof
->prof_id
, arm_kern_regs
->pc
, 0x0, late
, 0, 0);
203 /* Possibly a user interrupt */
204 arm_saved_state_t
*arm_user_regs
= (arm_saved_state_t
*)find_user_regs(current_thread());
206 if (NULL
== arm_user_regs
) {
207 /* Too bad, so sad, no useful interrupt state. */
208 dtrace_probe(prof
->prof_id
, 0xcafebabe, 0x0, late
, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
210 dtrace_probe(prof
->prof_id
, 0x0, arm_user_regs
->pc
, late
, 0, 0);
214 #elif defined(__arm64__)
216 arm_saved_state_t
*arm_kern_regs
= (arm_saved_state_t
*) find_kern_regs(current_thread());
218 // We should only come in here from interrupt context, so we should always have valid kernel regs
219 assert(NULL
!= arm_kern_regs
);
221 if (saved_state64(arm_kern_regs
)->cpsr
& 0xF) {
222 /* Kernel was interrupted. */
223 dtrace_probe(prof
->prof_id
, saved_state64(arm_kern_regs
)->pc
, 0x0, late
, 0, 0);
225 /* Possibly a user interrupt */
226 arm_saved_state_t
*arm_user_regs
= (arm_saved_state_t
*)find_user_regs(current_thread());
228 if (NULL
== arm_user_regs
) {
229 /* Too bad, so sad, no useful interrupt state. */
230 dtrace_probe(prof
->prof_id
, 0xcafebabe, 0x0, late
, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
232 dtrace_probe(prof
->prof_id
, 0x0, get_saved_state_pc(arm_user_regs
), late
, 0, 0);
237 #error Unknown architecture
242 profile_tick(void *arg
)
244 profile_probe_t
*prof
= arg
;
246 #if defined(__x86_64__)
247 x86_saved_state_t
*kern_regs
= find_kern_regs(current_thread());
249 if (NULL
!= kern_regs
) {
250 /* Kernel was interrupted. */
251 dtrace_probe(prof
->prof_id
, saved_state64(kern_regs
)->isf
.rip
, 0x0, 0, 0, 0);
253 pal_register_cache_state(current_thread(), VALID
);
254 /* Possibly a user interrupt */
255 x86_saved_state_t
*tagged_regs
= (x86_saved_state_t
*)find_user_regs(current_thread());
257 if (NULL
== tagged_regs
) {
258 /* Too bad, so sad, no useful interrupt state. */
259 dtrace_probe(prof
->prof_id
, 0xcafebabe,
260 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
261 } else if (is_saved_state64(tagged_regs
)) {
262 x86_saved_state64_t
*regs
= saved_state64(tagged_regs
);
264 dtrace_probe(prof
->prof_id
, 0x0, regs
->isf
.rip
, 0, 0, 0);
266 x86_saved_state32_t
*regs
= saved_state32(tagged_regs
);
268 dtrace_probe(prof
->prof_id
, 0x0, regs
->eip
, 0, 0, 0);
271 #elif defined(__arm__)
273 arm_saved_state_t
*arm_kern_regs
= (arm_saved_state_t
*) find_kern_regs(current_thread());
275 if (NULL
!= arm_kern_regs
) {
276 /* Kernel was interrupted. */
277 dtrace_probe(prof
->prof_id
, arm_kern_regs
->pc
, 0x0, 0, 0, 0);
279 /* Possibly a user interrupt */
280 arm_saved_state_t
*arm_user_regs
= (arm_saved_state_t
*)find_user_regs(current_thread());
282 if (NULL
== arm_user_regs
) {
283 /* Too bad, so sad, no useful interrupt state. */
284 dtrace_probe(prof
->prof_id
, 0xcafebabe, 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
286 dtrace_probe(prof
->prof_id
, 0x0, arm_user_regs
->pc
, 0, 0, 0);
290 #elif defined(__arm64__)
292 arm_saved_state_t
*arm_kern_regs
= (arm_saved_state_t
*) find_kern_regs(current_thread());
294 if (NULL
!= arm_kern_regs
) {
295 /* Kernel was interrupted. */
296 dtrace_probe(prof
->prof_id
, saved_state64(arm_kern_regs
)->pc
, 0x0, 0, 0, 0);
298 /* Possibly a user interrupt */
299 arm_saved_state_t
*arm_user_regs
= (arm_saved_state_t
*)find_user_regs(current_thread());
301 if (NULL
== arm_user_regs
) {
302 /* Too bad, so sad, no useful interrupt state. */
303 dtrace_probe(prof
->prof_id
, 0xcafebabe, 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
305 dtrace_probe(prof
->prof_id
, 0x0, get_saved_state_pc(arm_user_regs
), 0, 0, 0);
311 #error Unknown architecture
316 profile_create(hrtime_t interval
, const char *name
, int kind
)
318 profile_probe_t
*prof
;
320 if (interval
< profile_interval_min
)
323 if (dtrace_probe_lookup(profile_id
, NULL
, NULL
, name
) != 0)
326 atomic_add_32(&profile_total
, 1);
327 if (profile_total
> profile_max
) {
328 atomic_add_32(&profile_total
, -1);
332 if (PROF_TICK
== kind
)
333 prof
= kmem_zalloc(sizeof (profile_probe_t
), KM_SLEEP
);
335 prof
= kmem_zalloc(sizeof (profile_probe_t
) + NCPU
*sizeof(profile_probe_percpu_t
), KM_SLEEP
);
337 (void) strlcpy(prof
->prof_name
, name
, sizeof(prof
->prof_name
));
338 prof
->prof_interval
= interval
;
339 prof
->prof_cyclic
= CYCLIC_NONE
;
340 prof
->prof_kind
= kind
;
341 prof
->prof_id
= dtrace_probe_create(profile_id
,
343 profile_aframes
? profile_aframes
: PROF_ARTIFICIAL_FRAMES
, prof
);
348 profile_provide(void *arg
, const dtrace_probedesc_t
*desc
)
350 #pragma unused(arg) /* __APPLE__ */
351 int i
, j
, rate
, kind
;
352 hrtime_t val
= 0, mult
= 1, len
;
353 const char *name
, *suffix
= NULL
;
359 { PROF_PREFIX_PROFILE
, PROF_PROFILE
},
360 { PROF_PREFIX_TICK
, PROF_TICK
},
368 { "ns", NANOSEC
/ NANOSEC
},
369 { "nsec", NANOSEC
/ NANOSEC
},
370 { "us", NANOSEC
/ MICROSEC
},
371 { "usec", NANOSEC
/ MICROSEC
},
372 { "ms", NANOSEC
/ MILLISEC
},
373 { "msec", NANOSEC
/ MILLISEC
},
374 { "s", NANOSEC
/ SEC
},
375 { "sec", NANOSEC
/ SEC
},
376 { "m", NANOSEC
* (hrtime_t
)60 },
377 { "min", NANOSEC
* (hrtime_t
)60 },
378 { "h", NANOSEC
* (hrtime_t
)(60 * 60) },
379 { "hour", NANOSEC
* (hrtime_t
)(60 * 60) },
380 { "d", NANOSEC
* (hrtime_t
)(24 * 60 * 60) },
381 { "day", NANOSEC
* (hrtime_t
)(24 * 60 * 60) },
387 char n
[PROF_NAMELEN
];
390 * If no description was provided, provide all of our probes.
392 for (i
= 0; i
< (int)(sizeof (profile_rates
) / sizeof (int)); i
++) {
393 if ((rate
= profile_rates
[i
]) == 0)
396 (void) snprintf(n
, PROF_NAMELEN
, "%s%d",
397 PROF_PREFIX_PROFILE
, rate
);
398 profile_create(NANOSEC
/ rate
, n
, PROF_PROFILE
);
401 for (i
= 0; i
< (int)(sizeof (profile_ticks
) / sizeof (int)); i
++) {
402 if ((rate
= profile_ticks
[i
]) == 0)
405 (void) snprintf(n
, PROF_NAMELEN
, "%s%d",
406 PROF_PREFIX_TICK
, rate
);
407 profile_create(NANOSEC
/ rate
, n
, PROF_TICK
);
413 name
= desc
->dtpd_name
;
415 for (i
= 0; types
[i
].prefix
!= NULL
; i
++) {
416 len
= strlen(types
[i
].prefix
);
418 if (strncmp(name
, types
[i
].prefix
, len
) != 0)
423 if (types
[i
].prefix
== NULL
)
426 kind
= types
[i
].kind
;
427 j
= strlen(name
) - len
;
430 * We need to start before any time suffix.
432 for (j
= strlen(name
); j
>= len
; j
--) {
433 if (name
[j
] >= '0' && name
[j
] <= '9')
438 ASSERT(suffix
!= NULL
);
441 * Now determine the numerical value present in the probe name.
443 for (; j
>= len
; j
--) {
444 if (name
[j
] < '0' || name
[j
] > '9')
447 val
+= (name
[j
] - '0') * mult
;
448 mult
*= (hrtime_t
)10;
455 * Look-up the suffix to determine the multiplier.
457 for (i
= 0, mult
= 0; suffixes
[i
].name
!= NULL
; i
++) {
458 /* APPLE NOTE: Darwin employs size bounded string operations */
459 if (strncasecmp(suffixes
[i
].name
, suffix
, strlen(suffixes
[i
].name
) + 1) == 0) {
460 mult
= suffixes
[i
].mult
;
465 if (suffixes
[i
].name
== NULL
&& *suffix
!= '\0')
470 * The default is frequency-per-second.
477 profile_create(val
, name
, kind
);
482 profile_destroy(void *arg
, dtrace_id_t id
, void *parg
)
484 #pragma unused(arg,id) /* __APPLE__ */
485 profile_probe_t
*prof
= parg
;
487 ASSERT(prof
->prof_cyclic
== CYCLIC_NONE
);
489 if (prof
->prof_kind
== PROF_TICK
)
490 kmem_free(prof
, sizeof (profile_probe_t
));
492 kmem_free(prof
, sizeof (profile_probe_t
) + NCPU
*sizeof(profile_probe_percpu_t
));
494 ASSERT(profile_total
>= 1);
495 atomic_add_32(&profile_total
, -1);
500 profile_online(void *arg
, dtrace_cpu_t
*cpu
, cyc_handler_t
*hdlr
, cyc_time_t
*when
)
502 #pragma unused(cpu) /* __APPLE__ */
503 profile_probe_t
*prof
= arg
;
504 profile_probe_percpu_t
*pcpu
;
506 pcpu
= ((profile_probe_percpu_t
*)(&(prof
[1]))) + cpu_number();
507 pcpu
->profc_probe
= prof
;
509 hdlr
->cyh_func
= profile_fire
;
510 hdlr
->cyh_arg
= pcpu
;
511 hdlr
->cyh_level
= CY_HIGH_LEVEL
;
513 when
->cyt_interval
= prof
->prof_interval
;
514 when
->cyt_when
= dtrace_gethrtime() + when
->cyt_interval
;
516 pcpu
->profc_expected
= when
->cyt_when
;
517 pcpu
->profc_interval
= when
->cyt_interval
;
522 profile_offline(void *arg
, dtrace_cpu_t
*cpu
, void *oarg
)
524 profile_probe_percpu_t
*pcpu
= oarg
;
526 ASSERT(pcpu
->profc_probe
== arg
);
527 #pragma unused(pcpu,arg,cpu) /* __APPLE__ */
532 profile_enable(void *arg
, dtrace_id_t id
, void *parg
)
534 #pragma unused(arg,id) /* __APPLE__ */
535 profile_probe_t
*prof
= parg
;
536 cyc_omni_handler_t omni
;
540 ASSERT(prof
->prof_interval
!= 0);
541 ASSERT(MUTEX_HELD(&cpu_lock
));
543 if (prof
->prof_kind
== PROF_TICK
) {
544 hdlr
.cyh_func
= profile_tick
;
546 hdlr
.cyh_level
= CY_HIGH_LEVEL
;
548 when
.cyt_interval
= prof
->prof_interval
;
549 #if !defined(__APPLE__)
550 when
.cyt_when
= dtrace_gethrtime() + when
.cyt_interval
;
553 #endif /* __APPLE__ */
555 ASSERT(prof
->prof_kind
== PROF_PROFILE
);
556 omni
.cyo_online
= profile_online
;
557 omni
.cyo_offline
= profile_offline
;
561 if (prof
->prof_kind
== PROF_TICK
) {
562 prof
->prof_cyclic
= cyclic_timer_add(&hdlr
, &when
);
564 prof
->prof_cyclic
= (cyclic_id_t
)cyclic_add_omni(&omni
); /* cast puns cyclic_id_list_t with cyclic_id_t */
572 profile_disable(void *arg
, dtrace_id_t id
, void *parg
)
574 profile_probe_t
*prof
= parg
;
576 ASSERT(prof
->prof_cyclic
!= CYCLIC_NONE
);
577 ASSERT(MUTEX_HELD(&cpu_lock
));
579 #pragma unused(arg,id)
580 if (prof
->prof_kind
== PROF_TICK
) {
581 cyclic_timer_remove(prof
->prof_cyclic
);
583 cyclic_remove_omni((cyclic_id_list_t
)prof
->prof_cyclic
); /* cast puns cyclic_id_list_t with cyclic_id_t */
585 prof
->prof_cyclic
= CYCLIC_NONE
;
589 profile_getarg(void *arg
, dtrace_id_t id
, void *parg
, int argno
, int aframes
)
591 #pragma unused(arg, id, parg, argno, aframes)
593 * All the required arguments for the profile probe are passed directly
594 * to dtrace_probe, and we do not go through dtrace_getarg which doesn't
595 * know how to hop to the kernel stack from the interrupt stack like
602 profile_getargdesc(void *arg
, dtrace_id_t id
, void *parg
, dtrace_argdesc_t
*desc
)
604 #pragma unused(arg, id)
605 profile_probe_t
*prof
= parg
;
606 const char *argdesc
= NULL
;
607 switch (desc
->dtargd_ndx
) {
612 argdesc
= "user_addr_t";
615 if (prof
->prof_kind
== PROF_PROFILE
) {
616 argdesc
= "hrtime_t";
621 strlcpy(desc
->dtargd_native
, argdesc
, DTRACE_ARGTYPELEN
);
624 desc
->dtargd_ndx
= DTRACE_ARGNONE
;
629 * APPLE NOTE: profile_usermode call not supported.
632 profile_usermode(void *arg
, dtrace_id_t id
, void *parg
)
634 #pragma unused(arg,id,parg)
635 return 1; /* XXX_BOGUS */
638 static dtrace_pattr_t profile_attr
= {
639 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
640 { DTRACE_STABILITY_UNSTABLE
, DTRACE_STABILITY_UNSTABLE
, DTRACE_CLASS_UNKNOWN
},
641 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
642 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
643 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
646 static dtrace_pops_t profile_pops
= {
647 .dtps_provide
= profile_provide
,
648 .dtps_provide_module
= NULL
,
649 .dtps_enable
= profile_enable
,
650 .dtps_disable
= profile_disable
,
651 .dtps_suspend
= NULL
,
653 .dtps_getargdesc
= profile_getargdesc
,
654 .dtps_getargval
= profile_getarg
,
655 .dtps_usermode
= profile_usermode
,
656 .dtps_destroy
= profile_destroy
660 profile_attach(dev_info_t
*devi
)
662 if (ddi_create_minor_node(devi
, "profile", S_IFCHR
, 0,
663 DDI_PSEUDO
, 0) == DDI_FAILURE
||
664 dtrace_register("profile", &profile_attr
,
665 DTRACE_PRIV_KERNEL
| DTRACE_PRIV_USER
, NULL
,
666 &profile_pops
, NULL
, &profile_id
) != 0) {
667 ddi_remove_minor_node(devi
, NULL
);
668 return (DDI_FAILURE
);
671 profile_max
= PROFILE_MAX_DEFAULT
;
673 return (DDI_SUCCESS
);
677 * APPLE NOTE: profile_detach not implemented
679 #if !defined(__APPLE__)
681 profile_detach(dev_info_t
*devi
, ddi_detach_cmd_t cmd
)
687 return (DDI_SUCCESS
);
689 return (DDI_FAILURE
);
692 if (dtrace_unregister(profile_id
) != 0)
693 return (DDI_FAILURE
);
695 ddi_remove_minor_node(devi
, NULL
);
696 return (DDI_SUCCESS
);
698 #endif /* __APPLE__ */
700 d_open_t _profile_open
;
702 int _profile_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
704 #pragma unused(dev,flags,devtype,p)
708 #define PROFILE_MAJOR -24 /* let the kernel pick the device number */
711 * A struct describing which functions will get invoked for certain
714 static struct cdevsw profile_cdevsw
=
716 _profile_open
, /* open */
717 eno_opcl
, /* close */
718 eno_rdwrt
, /* read */
719 eno_rdwrt
, /* write */
720 eno_ioctl
, /* ioctl */
721 (stop_fcn_t
*)nulldev
, /* stop */
722 (reset_fcn_t
*)nulldev
, /* reset */
724 eno_select
, /* select */
726 eno_strat
, /* strategy */
732 void profile_init( void )
734 int majdevno
= cdevsw_add(PROFILE_MAJOR
, &profile_cdevsw
);
737 printf("profile_init: failed to allocate a major number!\n");
741 profile_attach( (dev_info_t
*)(uintptr_t)majdevno
);