4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* #pragma ident "@(#)profile.c 1.7 07/01/10 SMI" */
30 #define _KERNEL /* Solaris vs. Darwin */
34 #include <kern/cpu_data.h>
35 #include <kern/thread.h>
36 #include <kern/assert.h>
37 #include <mach/thread_status.h>
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/errno.h>
43 #include <sys/ioctl.h>
45 #include <sys/fcntl.h>
46 #include <miscfs/devfs/devfs.h>
48 #include <sys/dtrace.h>
49 #include <sys/dtrace_impl.h>
51 #include <sys/dtrace_glue.h>
53 #include <machine/pal_routines.h>
55 #if defined(__x86_64__)
56 extern x86_saved_state_t
*find_kern_regs(thread_t
);
58 #error Unknown architecture
62 #define ASSERT(x) do {} while(0)
64 extern void profile_init(void);
66 static dev_info_t
*profile_devi
;
67 static dtrace_provider_id_t profile_id
;
70 * Regardless of platform, the stack frames look like this in the case of the
79 * On x86, there are five frames from the generic interrupt code; further, the
80 * interrupted instruction appears as its own stack frame, giving us a total of
83 * On SPARC, the picture is further complicated because the compiler
84 * optimizes away tail-calls -- so the following frames are optimized away:
89 * This gives three frames. However, on DEBUG kernels, the cyclic_expire
90 * frame cannot be tail-call eliminated, yielding four frames in this case.
92 * All of the above constraints lead to the mess below. Yes, the profile
93 * provider should ideally figure this out on-the-fly by hitting one of its own
94 * probes and then walking its own stack trace. This is complicated, however,
95 * and the static definition doesn't seem to be overly brittle. Still, we
96 * allow for a manual override in case we get it completely wrong.
99 #if defined(__x86_64__)
100 #define PROF_ARTIFICIAL_FRAMES 9
102 #error Unknown architecture
105 #define PROF_NAMELEN 15
107 #define PROF_PROFILE 0
109 #define PROF_PREFIX_PROFILE "profile-"
110 #define PROF_PREFIX_TICK "tick-"
112 typedef struct profile_probe
{
113 char prof_name
[PROF_NAMELEN
];
116 hrtime_t prof_interval
;
117 cyclic_id_t prof_cyclic
;
120 typedef struct profile_probe_percpu
{
121 hrtime_t profc_expected
;
122 hrtime_t profc_interval
;
123 profile_probe_t
*profc_probe
;
124 } profile_probe_percpu_t
;
126 hrtime_t profile_interval_min
= NANOSEC
/ 5000; /* 5000 hz */
127 int profile_aframes
= 0; /* override */
129 static int profile_rates
[] = {
130 97, 199, 499, 997, 1999,
136 static int profile_ticks
[] = {
137 1, 10, 100, 500, 1000,
143 * profile_max defines the upper bound on the number of profile probes that
144 * can exist (this is to prevent malicious or clumsy users from exhausing
145 * system resources by creating a slew of profile probes). At mod load time,
146 * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
147 * present in the profile.conf file.
149 #define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */
150 static uint32_t profile_max
; /* maximum number of profile probes */
151 static uint32_t profile_total
; /* current number of profile probes */
154 profile_fire(void *arg
)
156 profile_probe_percpu_t
*pcpu
= arg
;
157 profile_probe_t
*prof
= pcpu
->profc_probe
;
160 late
= dtrace_gethrtime() - pcpu
->profc_expected
;
161 pcpu
->profc_expected
+= pcpu
->profc_interval
;
163 #if defined(__x86_64__)
164 x86_saved_state_t
*kern_regs
= find_kern_regs(current_thread());
166 if (NULL
!= kern_regs
) {
167 /* Kernel was interrupted. */
168 dtrace_probe(prof
->prof_id
, saved_state64(kern_regs
)->isf
.rip
, 0x0, late
, 0, 0);
171 pal_register_cache_state(current_thread(), VALID
);
172 /* Possibly a user interrupt */
173 x86_saved_state_t
*tagged_regs
= (x86_saved_state_t
*)find_user_regs(current_thread());
175 if (NULL
== tagged_regs
) {
176 /* Too bad, so sad, no useful interrupt state. */
177 dtrace_probe(prof
->prof_id
, 0xcafebabe,
178 0x0, late
, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
179 } else if (is_saved_state64(tagged_regs
)) {
180 x86_saved_state64_t
*regs
= saved_state64(tagged_regs
);
182 dtrace_probe(prof
->prof_id
, 0x0, regs
->isf
.rip
, late
, 0, 0);
184 x86_saved_state32_t
*regs
= saved_state32(tagged_regs
);
186 dtrace_probe(prof
->prof_id
, 0x0, regs
->eip
, late
, 0, 0);
190 #error Unknown architecture
195 profile_tick(void *arg
)
197 profile_probe_t
*prof
= arg
;
199 #if defined(__x86_64__)
200 x86_saved_state_t
*kern_regs
= find_kern_regs(current_thread());
202 if (NULL
!= kern_regs
) {
203 /* Kernel was interrupted. */
204 dtrace_probe(prof
->prof_id
, saved_state64(kern_regs
)->isf
.rip
, 0x0, 0, 0, 0);
206 pal_register_cache_state(current_thread(), VALID
);
207 /* Possibly a user interrupt */
208 x86_saved_state_t
*tagged_regs
= (x86_saved_state_t
*)find_user_regs(current_thread());
210 if (NULL
== tagged_regs
) {
211 /* Too bad, so sad, no useful interrupt state. */
212 dtrace_probe(prof
->prof_id
, 0xcafebabe,
213 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
214 } else if (is_saved_state64(tagged_regs
)) {
215 x86_saved_state64_t
*regs
= saved_state64(tagged_regs
);
217 dtrace_probe(prof
->prof_id
, 0x0, regs
->isf
.rip
, 0, 0, 0);
219 x86_saved_state32_t
*regs
= saved_state32(tagged_regs
);
221 dtrace_probe(prof
->prof_id
, 0x0, regs
->eip
, 0, 0, 0);
225 #error Unknown architecture
230 profile_create(hrtime_t interval
, const char *name
, int kind
)
232 profile_probe_t
*prof
;
234 if (interval
< profile_interval_min
)
237 if (dtrace_probe_lookup(profile_id
, NULL
, NULL
, name
) != 0)
240 atomic_add_32(&profile_total
, 1);
241 if (profile_total
> profile_max
) {
242 atomic_add_32(&profile_total
, -1);
246 if (PROF_TICK
== kind
)
247 prof
= kmem_zalloc(sizeof (profile_probe_t
), KM_SLEEP
);
249 prof
= kmem_zalloc(sizeof (profile_probe_t
) + NCPU
*sizeof(profile_probe_percpu_t
), KM_SLEEP
);
251 (void) strlcpy(prof
->prof_name
, name
, sizeof(prof
->prof_name
));
252 prof
->prof_interval
= interval
;
253 prof
->prof_cyclic
= CYCLIC_NONE
;
254 prof
->prof_kind
= kind
;
255 prof
->prof_id
= dtrace_probe_create(profile_id
,
257 profile_aframes
? profile_aframes
: PROF_ARTIFICIAL_FRAMES
, prof
);
262 profile_provide(void *arg
, const dtrace_probedesc_t
*desc
)
264 #pragma unused(arg) /* __APPLE__ */
265 int i
, j
, rate
, kind
;
266 hrtime_t val
= 0, mult
= 1, len
;
267 const char *name
, *suffix
= NULL
;
273 { PROF_PREFIX_PROFILE
, PROF_PROFILE
},
274 { PROF_PREFIX_TICK
, PROF_TICK
},
282 { "ns", NANOSEC
/ NANOSEC
},
283 { "nsec", NANOSEC
/ NANOSEC
},
284 { "us", NANOSEC
/ MICROSEC
},
285 { "usec", NANOSEC
/ MICROSEC
},
286 { "ms", NANOSEC
/ MILLISEC
},
287 { "msec", NANOSEC
/ MILLISEC
},
288 { "s", NANOSEC
/ SEC
},
289 { "sec", NANOSEC
/ SEC
},
290 { "m", NANOSEC
* (hrtime_t
)60 },
291 { "min", NANOSEC
* (hrtime_t
)60 },
292 { "h", NANOSEC
* (hrtime_t
)(60 * 60) },
293 { "hour", NANOSEC
* (hrtime_t
)(60 * 60) },
294 { "d", NANOSEC
* (hrtime_t
)(24 * 60 * 60) },
295 { "day", NANOSEC
* (hrtime_t
)(24 * 60 * 60) },
301 char n
[PROF_NAMELEN
];
304 * If no description was provided, provide all of our probes.
306 for (i
= 0; i
< (int)(sizeof (profile_rates
) / sizeof (int)); i
++) {
307 if ((rate
= profile_rates
[i
]) == 0)
310 (void) snprintf(n
, PROF_NAMELEN
, "%s%d",
311 PROF_PREFIX_PROFILE
, rate
);
312 profile_create(NANOSEC
/ rate
, n
, PROF_PROFILE
);
315 for (i
= 0; i
< (int)(sizeof (profile_ticks
) / sizeof (int)); i
++) {
316 if ((rate
= profile_ticks
[i
]) == 0)
319 (void) snprintf(n
, PROF_NAMELEN
, "%s%d",
320 PROF_PREFIX_TICK
, rate
);
321 profile_create(NANOSEC
/ rate
, n
, PROF_TICK
);
327 name
= desc
->dtpd_name
;
329 for (i
= 0; types
[i
].prefix
!= NULL
; i
++) {
330 len
= strlen(types
[i
].prefix
);
332 if (strncmp(name
, types
[i
].prefix
, len
) != 0)
337 if (types
[i
].prefix
== NULL
)
340 kind
= types
[i
].kind
;
341 j
= strlen(name
) - len
;
344 * We need to start before any time suffix.
346 for (j
= strlen(name
); j
>= len
; j
--) {
347 if (name
[j
] >= '0' && name
[j
] <= '9')
352 ASSERT(suffix
!= NULL
);
355 * Now determine the numerical value present in the probe name.
357 for (; j
>= len
; j
--) {
358 if (name
[j
] < '0' || name
[j
] > '9')
361 val
+= (name
[j
] - '0') * mult
;
362 mult
*= (hrtime_t
)10;
369 * Look-up the suffix to determine the multiplier.
371 for (i
= 0, mult
= 0; suffixes
[i
].name
!= NULL
; i
++) {
372 /* APPLE NOTE: Darwin employs size bounded string operations */
373 if (strncasecmp(suffixes
[i
].name
, suffix
, strlen(suffixes
[i
].name
) + 1) == 0) {
374 mult
= suffixes
[i
].mult
;
379 if (suffixes
[i
].name
== NULL
&& *suffix
!= '\0')
384 * The default is frequency-per-second.
391 profile_create(val
, name
, kind
);
396 profile_destroy(void *arg
, dtrace_id_t id
, void *parg
)
398 #pragma unused(arg,id) /* __APPLE__ */
399 profile_probe_t
*prof
= parg
;
401 ASSERT(prof
->prof_cyclic
== CYCLIC_NONE
);
403 if (prof
->prof_kind
== PROF_TICK
)
404 kmem_free(prof
, sizeof (profile_probe_t
));
406 kmem_free(prof
, sizeof (profile_probe_t
) + NCPU
*sizeof(profile_probe_percpu_t
));
408 ASSERT(profile_total
>= 1);
409 atomic_add_32(&profile_total
, -1);
414 profile_online(void *arg
, dtrace_cpu_t
*cpu
, cyc_handler_t
*hdlr
, cyc_time_t
*when
)
416 #pragma unused(cpu) /* __APPLE__ */
417 profile_probe_t
*prof
= arg
;
418 profile_probe_percpu_t
*pcpu
;
420 pcpu
= ((profile_probe_percpu_t
*)(&(prof
[1]))) + cpu_number();
421 pcpu
->profc_probe
= prof
;
423 hdlr
->cyh_func
= profile_fire
;
424 hdlr
->cyh_arg
= pcpu
;
425 hdlr
->cyh_level
= CY_HIGH_LEVEL
;
427 when
->cyt_interval
= prof
->prof_interval
;
428 when
->cyt_when
= dtrace_gethrtime() + when
->cyt_interval
;
430 pcpu
->profc_expected
= when
->cyt_when
;
431 pcpu
->profc_interval
= when
->cyt_interval
;
436 profile_offline(void *arg
, dtrace_cpu_t
*cpu
, void *oarg
)
438 profile_probe_percpu_t
*pcpu
= oarg
;
440 ASSERT(pcpu
->profc_probe
== arg
);
441 #pragma unused(pcpu,arg,cpu) /* __APPLE__ */
446 profile_enable(void *arg
, dtrace_id_t id
, void *parg
)
448 #pragma unused(arg,id) /* __APPLE__ */
449 profile_probe_t
*prof
= parg
;
450 cyc_omni_handler_t omni
;
454 ASSERT(prof
->prof_interval
!= 0);
455 ASSERT(MUTEX_HELD(&cpu_lock
));
457 if (prof
->prof_kind
== PROF_TICK
) {
458 hdlr
.cyh_func
= profile_tick
;
460 hdlr
.cyh_level
= CY_HIGH_LEVEL
;
462 when
.cyt_interval
= prof
->prof_interval
;
463 #if !defined(__APPLE__)
464 when
.cyt_when
= dtrace_gethrtime() + when
.cyt_interval
;
467 #endif /* __APPLE__ */
469 ASSERT(prof
->prof_kind
== PROF_PROFILE
);
470 omni
.cyo_online
= profile_online
;
471 omni
.cyo_offline
= profile_offline
;
475 if (prof
->prof_kind
== PROF_TICK
) {
476 prof
->prof_cyclic
= cyclic_timer_add(&hdlr
, &when
);
478 prof
->prof_cyclic
= (cyclic_id_t
)cyclic_add_omni(&omni
); /* cast puns cyclic_id_list_t with cyclic_id_t */
486 profile_disable(void *arg
, dtrace_id_t id
, void *parg
)
488 profile_probe_t
*prof
= parg
;
490 ASSERT(prof
->prof_cyclic
!= CYCLIC_NONE
);
491 ASSERT(MUTEX_HELD(&cpu_lock
));
493 #pragma unused(arg,id)
494 if (prof
->prof_kind
== PROF_TICK
) {
495 cyclic_timer_remove(prof
->prof_cyclic
);
497 cyclic_remove_omni((cyclic_id_list_t
)prof
->prof_cyclic
); /* cast puns cyclic_id_list_t with cyclic_id_t */
499 prof
->prof_cyclic
= CYCLIC_NONE
;
503 * APPLE NOTE: profile_usermode call not supported.
506 profile_usermode(void *arg
, dtrace_id_t id
, void *parg
)
508 #pragma unused(arg,id,parg)
509 return 1; /* XXX_BOGUS */
512 static dtrace_pattr_t profile_attr
= {
513 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
514 { DTRACE_STABILITY_UNSTABLE
, DTRACE_STABILITY_UNSTABLE
, DTRACE_CLASS_UNKNOWN
},
515 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
516 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
517 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
520 static dtrace_pops_t profile_pops
= {
534 profile_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
540 return (DDI_SUCCESS
);
542 return (DDI_FAILURE
);
545 if (ddi_create_minor_node(devi
, "profile", S_IFCHR
, 0,
546 DDI_PSEUDO
, 0) == DDI_FAILURE
||
547 dtrace_register("profile", &profile_attr
,
548 DTRACE_PRIV_KERNEL
| DTRACE_PRIV_USER
, NULL
,
549 &profile_pops
, NULL
, &profile_id
) != 0) {
550 ddi_remove_minor_node(devi
, NULL
);
551 return (DDI_FAILURE
);
554 profile_max
= PROFILE_MAX_DEFAULT
;
556 ddi_report_dev(devi
);
558 return (DDI_SUCCESS
);
562 * APPLE NOTE: profile_detach not implemented
564 #if !defined(__APPLE__)
566 profile_detach(dev_info_t
*devi
, ddi_detach_cmd_t cmd
)
572 return (DDI_SUCCESS
);
574 return (DDI_FAILURE
);
577 if (dtrace_unregister(profile_id
) != 0)
578 return (DDI_FAILURE
);
580 ddi_remove_minor_node(devi
, NULL
);
581 return (DDI_SUCCESS
);
583 #endif /* __APPLE__ */
585 d_open_t _profile_open
;
587 int _profile_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
589 #pragma unused(dev,flags,devtype,p)
593 #define PROFILE_MAJOR -24 /* let the kernel pick the device number */
596 * A struct describing which functions will get invoked for certain
599 static struct cdevsw profile_cdevsw
=
601 _profile_open
, /* open */
602 eno_opcl
, /* close */
603 eno_rdwrt
, /* read */
604 eno_rdwrt
, /* write */
605 eno_ioctl
, /* ioctl */
606 (stop_fcn_t
*)nulldev
, /* stop */
607 (reset_fcn_t
*)nulldev
, /* reset */
609 eno_select
, /* select */
611 eno_strat
, /* strategy */
617 static int gProfileInited
= 0;
619 void profile_init( void )
621 if (0 == gProfileInited
)
623 int majdevno
= cdevsw_add(PROFILE_MAJOR
, &profile_cdevsw
);
626 printf("profile_init: failed to allocate a major number!\n");
631 profile_attach( (dev_info_t
*)(uintptr_t)majdevno
, DDI_ATTACH
);
635 panic("profile_init: called twice!\n");