4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* #pragma ident "@(#)profile.c 1.6 06/03/24 SMI" */
28 #if !defined(__APPLE__)
29 #include <sys/errno.h>
31 #include <sys/modctl.h>
33 #include <sys/systm.h>
35 #include <sys/sunddi.h>
36 #include <sys/cpuvar.h>
38 #include <sys/strsubr.h>
39 #include <sys/dtrace.h>
40 #include <sys/cyclic.h>
41 #include <sys/atomic.h>
45 #define _KERNEL /* Solaris vs. Darwin */
49 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
50 #include <kern/cpu_data.h>
51 #include <kern/thread.h>
52 #include <mach/thread_status.h>
54 #include <sys/param.h>
55 #include <sys/systm.h>
56 #include <sys/errno.h>
58 #include <sys/ioctl.h>
60 #include <sys/fcntl.h>
61 #include <miscfs/devfs/devfs.h>
63 #include <sys/dtrace.h>
64 #include <sys/dtrace_impl.h>
66 #include <sys/dtrace_glue.h>
68 #if defined(__ppc__) || defined(__ppc64__)
69 extern struct savearea
*find_kern_regs(thread_t
);
70 #elif defined(__i386__) || defined(__x86_64__)
71 extern x86_saved_state32_t
*find_kern_regs(thread_t
);
73 #error Unknown architecture
77 #define ASSERT(x) do {} while(0)
79 extern void profile_init(void);
80 #endif /* __APPLE__ */
82 static dev_info_t
*profile_devi
;
83 static dtrace_provider_id_t profile_id
;
86 * Regardless of platform, there are five artificial frames in the case of the
95 * On amd64, there are two frames associated with locore: one in locore, and
96 * another in common interrupt dispatch code. (i386 has not been modified to
97 * use this common layer.) Further, on i386, the interrupted instruction
98 * appears as its own stack frame. All of this means that we need to add one
99 * frame for amd64, and then take one away for both amd64 and i386.
101 * On SPARC, the picture is further complicated because the compiler
102 * optimizes away tail-calls -- so the following frames are optimized away:
107 * This gives three frames. However, on DEBUG kernels, the cyclic_expire
108 * frame cannot be tail-call eliminated, yielding four frames in this case.
110 * All of the above constraints lead to the mess below. Yes, the profile
111 * provider should ideally figure this out on-the-fly by hiting one of its own
112 * probes and then walking its own stack trace. This is complicated, however,
113 * and the static definition doesn't seem to be overly brittle. Still, we
114 * allow for a manual override in case we get it completely wrong.
116 #if !defined(__APPLE__)
119 #define PROF_ARTIFICIAL_FRAMES 7
122 #define PROF_ARTIFICIAL_FRAMES 6
126 #define PROF_ARTIFICIAL_FRAMES 4
128 #define PROF_ARTIFICIAL_FRAMES 3
134 #else /* is Mac OS X */
136 #if defined(__ppc__) || defined(__ppc64__)
137 #define PROF_ARTIFICIAL_FRAMES 8
138 #elif defined(__i386__) || defined(__x86_64__)
139 #define PROF_ARTIFICIAL_FRAMES 9
141 #error Unknown architecture
144 #endif /* __APPLE__ */
146 #define PROF_NAMELEN 15
148 #define PROF_PROFILE 0
150 #define PROF_PREFIX_PROFILE "profile-"
151 #define PROF_PREFIX_TICK "tick-"
153 typedef struct profile_probe
{
154 char prof_name
[PROF_NAMELEN
];
157 hrtime_t prof_interval
;
158 cyclic_id_t prof_cyclic
;
161 typedef struct profile_probe_percpu
{
162 hrtime_t profc_expected
;
163 hrtime_t profc_interval
;
164 profile_probe_t
*profc_probe
;
165 } profile_probe_percpu_t
;
167 hrtime_t profile_interval_min
= NANOSEC
/ 5000; /* 5000 hz */
168 int profile_aframes
= 0; /* override */
170 static int profile_rates
[] = {
171 97, 199, 499, 997, 1999,
177 static int profile_ticks
[] = {
178 1, 10, 100, 500, 1000,
184 * profile_max defines the upper bound on the number of profile probes that
185 * can exist (this is to prevent malicious or clumsy users from exhausing
186 * system resources by creating a slew of profile probes). At mod load time,
187 * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
188 * present in the profile.conf file.
190 #define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */
191 static uint32_t profile_max
; /* maximum number of profile probes */
192 static uint32_t profile_total
; /* current number of profile probes */
195 profile_fire(void *arg
)
197 profile_probe_percpu_t
*pcpu
= arg
;
198 profile_probe_t
*prof
= pcpu
->profc_probe
;
201 late
= dtrace_gethrtime() - pcpu
->profc_expected
;
202 pcpu
->profc_expected
+= pcpu
->profc_interval
;
204 #if !defined(__APPLE__)
205 dtrace_probe(prof
->prof_id
, CPU
->cpu_profile_pc
,
206 CPU
->cpu_profile_upc
, late
, 0, 0);
208 #if defined(__ppc__) || defined(__ppc64__)
210 struct savearea
*sv
= find_kern_regs(current_thread());
213 if (USERMODE(sv
->save_srr1
)) {
214 dtrace_probe(prof
->prof_id
, 0x0, sv
->save_srr0
, late
, 0, 0);
216 dtrace_probe(prof
->prof_id
, sv
->save_srr0
, 0x0, late
, 0, 0);
219 dtrace_probe(prof
->prof_id
, 0xcafebabe,
220 0x0, late
, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
223 #elif defined(__i386__) || defined(__x86_64__)
225 x86_saved_state32_t
*kern_regs
= find_kern_regs(current_thread());
227 if (NULL
!= kern_regs
) {
228 /* Kernel was interrupted. */
229 dtrace_probe(prof
->prof_id
, kern_regs
->eip
, 0x0, 0, 0, 0);
231 /* Possibly a user interrupt */
232 x86_saved_state_t
*tagged_regs
= (x86_saved_state_t
*)find_user_regs(current_thread());
234 if (NULL
== tagged_regs
) {
235 /* Too bad, so sad, no useful interrupt state. */
236 dtrace_probe(prof
->prof_id
, 0xcafebabe,
237 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
238 } else if (is_saved_state64(tagged_regs
)) {
239 x86_saved_state64_t
*regs
= saved_state64(tagged_regs
);
241 dtrace_probe(prof
->prof_id
, 0x0, regs
->isf
.rip
, 0, 0, 0);
243 x86_saved_state32_t
*regs
= saved_state32(tagged_regs
);
245 dtrace_probe(prof
->prof_id
, 0x0, regs
->eip
, 0, 0, 0);
250 #error Unknown architecture
252 #endif /* __APPLE__ */
256 profile_tick(void *arg
)
258 profile_probe_t
*prof
= arg
;
260 #if !defined(__APPLE__)
261 dtrace_probe(prof
->prof_id
, CPU
->cpu_profile_pc
,
262 CPU
->cpu_profile_upc
, 0, 0, 0);
264 #if defined(__ppc__) || defined(__ppc64__)
266 struct savearea
*sv
= find_kern_regs(current_thread());
269 if (USERMODE(sv
->save_srr1
)) {
270 dtrace_probe(prof
->prof_id
, 0x0, sv
->save_srr0
, 0, 0, 0);
272 dtrace_probe(prof
->prof_id
, sv
->save_srr0
, 0x0, 0, 0, 0);
275 dtrace_probe(prof
->prof_id
, 0xcafebabe,
276 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
279 #elif defined(__i386__) || defined(__x86_64__)
281 x86_saved_state32_t
*kern_regs
= find_kern_regs(current_thread());
283 if (NULL
!= kern_regs
) {
284 /* Kernel was interrupted. */
285 dtrace_probe(prof
->prof_id
, kern_regs
->eip
, 0x0, 0, 0, 0);
287 /* Possibly a user interrupt */
288 x86_saved_state_t
*tagged_regs
= (x86_saved_state_t
*)find_user_regs(current_thread());
290 if (NULL
== tagged_regs
) {
291 /* Too bad, so sad, no useful interrupt state. */
292 dtrace_probe(prof
->prof_id
, 0xcafebabe,
293 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
294 } else if (is_saved_state64(tagged_regs
)) {
295 x86_saved_state64_t
*regs
= saved_state64(tagged_regs
);
297 dtrace_probe(prof
->prof_id
, 0x0, regs
->isf
.rip
, 0, 0, 0);
299 x86_saved_state32_t
*regs
= saved_state32(tagged_regs
);
301 dtrace_probe(prof
->prof_id
, 0x0, regs
->eip
, 0, 0, 0);
306 #error Unknown architecture
308 #endif /* __APPLE__ */
312 profile_create(hrtime_t interval
, const char *name
, int kind
)
314 profile_probe_t
*prof
;
316 if (interval
< profile_interval_min
)
319 if (dtrace_probe_lookup(profile_id
, NULL
, NULL
, name
) != 0)
322 atomic_add_32(&profile_total
, 1);
323 if (profile_total
> profile_max
) {
324 atomic_add_32(&profile_total
, -1);
328 #if !defined(__APPLE__)
329 prof
= kmem_zalloc(sizeof (profile_probe_t
), KM_SLEEP
);
331 if (PROF_TICK
== kind
)
332 prof
= kmem_zalloc(sizeof (profile_probe_t
), KM_SLEEP
);
334 prof
= kmem_zalloc(sizeof (profile_probe_t
) + NCPU
*sizeof(profile_probe_percpu_t
), KM_SLEEP
);
335 #endif /* __APPLE__ */
336 (void) strlcpy(prof
->prof_name
, name
, sizeof(prof
->prof_name
));
337 prof
->prof_interval
= interval
;
338 prof
->prof_cyclic
= CYCLIC_NONE
;
339 prof
->prof_kind
= kind
;
340 prof
->prof_id
= dtrace_probe_create(profile_id
,
342 profile_aframes
? profile_aframes
: PROF_ARTIFICIAL_FRAMES
, prof
);
347 profile_provide(void *arg
, const dtrace_probedesc_t
*desc
)
349 int i
, j
, rate
, kind
;
350 hrtime_t val
= 0, mult
= 1, len
;
351 const char *name
, *suffix
= NULL
;
357 { PROF_PREFIX_PROFILE
, PROF_PROFILE
},
358 { PROF_PREFIX_TICK
, PROF_TICK
},
366 { "ns", NANOSEC
/ NANOSEC
},
367 { "nsec", NANOSEC
/ NANOSEC
},
368 { "us", NANOSEC
/ MICROSEC
},
369 { "usec", NANOSEC
/ MICROSEC
},
370 { "ms", NANOSEC
/ MILLISEC
},
371 { "msec", NANOSEC
/ MILLISEC
},
372 { "s", NANOSEC
/ SEC
},
373 { "sec", NANOSEC
/ SEC
},
374 { "m", NANOSEC
* (hrtime_t
)60 },
375 { "min", NANOSEC
* (hrtime_t
)60 },
376 { "h", NANOSEC
* (hrtime_t
)(60 * 60) },
377 { "hour", NANOSEC
* (hrtime_t
)(60 * 60) },
378 { "d", NANOSEC
* (hrtime_t
)(24 * 60 * 60) },
379 { "day", NANOSEC
* (hrtime_t
)(24 * 60 * 60) },
381 #if !defined(__APPLE__)
385 #endif /* __APPLE__ */
389 char n
[PROF_NAMELEN
];
392 * If no description was provided, provide all of our probes.
394 for (i
= 0; i
< sizeof (profile_rates
) / sizeof (int); i
++) {
395 if ((rate
= profile_rates
[i
]) == 0)
398 (void) snprintf(n
, PROF_NAMELEN
, "%s%d",
399 PROF_PREFIX_PROFILE
, rate
);
400 profile_create(NANOSEC
/ rate
, n
, PROF_PROFILE
);
403 for (i
= 0; i
< sizeof (profile_ticks
) / sizeof (int); i
++) {
404 if ((rate
= profile_ticks
[i
]) == 0)
407 (void) snprintf(n
, PROF_NAMELEN
, "%s%d",
408 PROF_PREFIX_TICK
, rate
);
409 profile_create(NANOSEC
/ rate
, n
, PROF_TICK
);
415 name
= desc
->dtpd_name
;
417 for (i
= 0; types
[i
].prefix
!= NULL
; i
++) {
418 len
= strlen(types
[i
].prefix
);
420 if (strncmp(name
, types
[i
].prefix
, len
) != 0)
425 if (types
[i
].prefix
== NULL
)
428 kind
= types
[i
].kind
;
429 j
= strlen(name
) - len
;
432 * We need to start before any time suffix.
434 for (j
= strlen(name
); j
>= len
; j
--) {
435 if (name
[j
] >= '0' && name
[j
] <= '9')
440 ASSERT(suffix
!= NULL
);
443 * Now determine the numerical value present in the probe name.
445 for (; j
>= len
; j
--) {
446 if (name
[j
] < '0' || name
[j
] > '9')
449 val
+= (name
[j
] - '0') * mult
;
450 mult
*= (hrtime_t
)10;
457 * Look-up the suffix to determine the multiplier.
459 for (i
= 0, mult
= 0; suffixes
[i
].name
!= NULL
; i
++) {
460 if (strcasecmp(suffixes
[i
].name
, suffix
) == 0) {
461 mult
= suffixes
[i
].mult
;
466 if (suffixes
[i
].name
== NULL
&& *suffix
!= '\0')
471 * The default is frequency-per-second.
478 profile_create(val
, name
, kind
);
483 profile_destroy(void *arg
, dtrace_id_t id
, void *parg
)
485 profile_probe_t
*prof
= parg
;
487 ASSERT(prof
->prof_cyclic
== CYCLIC_NONE
);
488 #if !defined(__APPLE__)
489 kmem_free(prof
, sizeof (profile_probe_t
));
491 if (prof
->prof_kind
== PROF_TICK
)
492 kmem_free(prof
, sizeof (profile_probe_t
));
494 kmem_free(prof
, sizeof (profile_probe_t
) + NCPU
*sizeof(profile_probe_percpu_t
));
495 #endif /* __APPLE__ */
497 ASSERT(profile_total
>= 1);
498 atomic_add_32(&profile_total
, -1);
503 profile_online(void *arg
, cpu_t
*cpu
, cyc_handler_t
*hdlr
, cyc_time_t
*when
)
505 profile_probe_t
*prof
= arg
;
506 profile_probe_percpu_t
*pcpu
;
508 #if !defined(__APPLE__)
509 pcpu
= kmem_zalloc(sizeof (profile_probe_percpu_t
), KM_SLEEP
);
511 pcpu
= ((profile_probe_percpu_t
*)(&(prof
[1]))) + cpu_number();
512 #endif /* __APPLE__ */
513 pcpu
->profc_probe
= prof
;
515 hdlr
->cyh_func
= profile_fire
;
516 hdlr
->cyh_arg
= pcpu
;
517 hdlr
->cyh_level
= CY_HIGH_LEVEL
;
519 when
->cyt_interval
= prof
->prof_interval
;
520 #if !defined(__APPLE__)
521 when
->cyt_when
= dtrace_gethrtime() + when
->cyt_interval
;
524 #endif /* __APPLE__ */
526 pcpu
->profc_expected
= when
->cyt_when
;
527 pcpu
->profc_interval
= when
->cyt_interval
;
532 profile_offline(void *arg
, cpu_t
*cpu
, void *oarg
)
534 profile_probe_percpu_t
*pcpu
= oarg
;
536 ASSERT(pcpu
->profc_probe
== arg
);
537 #if !defined(__APPLE__)
538 kmem_free(pcpu
, sizeof (profile_probe_percpu_t
));
539 #endif /* __APPLE__ */
544 profile_enable(void *arg
, dtrace_id_t id
, void *parg
)
546 profile_probe_t
*prof
= parg
;
547 cyc_omni_handler_t omni
;
551 ASSERT(prof
->prof_interval
!= 0);
552 ASSERT(MUTEX_HELD(&cpu_lock
));
554 if (prof
->prof_kind
== PROF_TICK
) {
555 hdlr
.cyh_func
= profile_tick
;
557 hdlr
.cyh_level
= CY_HIGH_LEVEL
;
559 when
.cyt_interval
= prof
->prof_interval
;
560 #if !defined(__APPLE__)
561 when
.cyt_when
= dtrace_gethrtime() + when
.cyt_interval
;
564 #endif /* __APPLE__ */
566 ASSERT(prof
->prof_kind
== PROF_PROFILE
);
567 omni
.cyo_online
= profile_online
;
568 omni
.cyo_offline
= profile_offline
;
572 #if !defined(__APPLE__)
573 if (prof
->prof_kind
== PROF_TICK
) {
574 prof
->prof_cyclic
= cyclic_add(&hdlr
, &when
);
576 prof
->prof_cyclic
= cyclic_add_omni(&omni
);
579 if (prof
->prof_kind
== PROF_TICK
) {
580 prof
->prof_cyclic
= cyclic_timer_add(&hdlr
, &when
);
582 prof
->prof_cyclic
= (cyclic_id_t
)cyclic_add_omni(&omni
); /* cast puns cyclic_id_list_t with cyclic_id_t */
584 #endif /* __APPLE__ */
589 profile_disable(void *arg
, dtrace_id_t id
, void *parg
)
591 profile_probe_t
*prof
= parg
;
593 ASSERT(prof
->prof_cyclic
!= CYCLIC_NONE
);
594 ASSERT(MUTEX_HELD(&cpu_lock
));
596 #if !defined(__APPLE__)
597 cyclic_remove(prof
->prof_cyclic
);
599 if (prof
->prof_kind
== PROF_TICK
) {
600 cyclic_timer_remove(prof
->prof_cyclic
);
602 cyclic_remove_omni((cyclic_id_list_t
)prof
->prof_cyclic
); /* cast puns cyclic_id_list_t with cyclic_id_t */
604 #endif /* __APPLE__ */
605 prof
->prof_cyclic
= CYCLIC_NONE
;
608 #if !defined(__APPLE__)
611 profile_usermode(void *arg
, dtrace_id_t id
, void *parg
)
613 return (CPU
->cpu_profile_pc
== 0);
617 profile_usermode(void *arg
, dtrace_id_t id
, void *parg
)
619 #pragma unused(arg,id,parg)
620 return 1; /* XXX_BOGUS */
622 #endif /* __APPLE__ */
624 static dtrace_pattr_t profile_attr
= {
625 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
626 { DTRACE_STABILITY_UNSTABLE
, DTRACE_STABILITY_UNSTABLE
, DTRACE_CLASS_UNKNOWN
},
627 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
628 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
629 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
632 static dtrace_pops_t profile_pops
= {
646 profile_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
652 return (DDI_SUCCESS
);
654 return (DDI_FAILURE
);
657 if (ddi_create_minor_node(devi
, "profile", S_IFCHR
, 0,
658 DDI_PSEUDO
, NULL
) == DDI_FAILURE
||
659 dtrace_register("profile", &profile_attr
,
660 DTRACE_PRIV_KERNEL
| DTRACE_PRIV_USER
, NULL
,
661 &profile_pops
, NULL
, &profile_id
) != 0) {
662 ddi_remove_minor_node(devi
, NULL
);
663 return (DDI_FAILURE
);
666 #if !defined(__APPLE__)
667 profile_max
= ddi_getprop(DDI_DEV_T_ANY
, devi
, DDI_PROP_DONTPASS
,
668 "profile-max-probes", PROFILE_MAX_DEFAULT
);
670 profile_max
= PROFILE_MAX_DEFAULT
;
671 #endif /* __APPLE__ */
673 ddi_report_dev(devi
);
675 return (DDI_SUCCESS
);
678 #if !defined(__APPLE__)
680 profile_detach(dev_info_t
*devi
, ddi_detach_cmd_t cmd
)
686 return (DDI_SUCCESS
);
688 return (DDI_FAILURE
);
691 if (dtrace_unregister(profile_id
) != 0)
692 return (DDI_FAILURE
);
694 ddi_remove_minor_node(devi
, NULL
);
695 return (DDI_SUCCESS
);
700 profile_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
705 case DDI_INFO_DEVT2DEVINFO
:
706 *result
= (void *)profile_devi
;
709 case DDI_INFO_DEVT2INSTANCE
:
721 profile_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
726 static struct cb_ops profile_cb_ops
= {
727 profile_open
, /* open */
729 nulldev
, /* strategy */
739 ddi_prop_op
, /* cb_prop_op */
741 D_NEW
| D_MP
/* Driver compatibility flag */
744 static struct dev_ops profile_ops
= {
745 DEVO_REV
, /* devo_rev, */
747 profile_info
, /* get_dev_info */
748 nulldev
, /* identify */
750 profile_attach
, /* attach */
751 profile_detach
, /* detach */
753 &profile_cb_ops
, /* driver operations */
754 NULL
, /* bus operations */
755 nodev
/* dev power */
759 * Module linkage information for the kernel.
761 static struct modldrv modldrv
= {
762 &mod_driverops
, /* module type (this is a pseudo driver) */
763 "Profile Interrupt Tracing", /* name of module */
764 &profile_ops
, /* driver ops */
767 static struct modlinkage modlinkage
= {
776 return (mod_install(&modlinkage
));
780 _info(struct modinfo
*modinfop
)
782 return (mod_info(&modlinkage
, modinfop
));
788 return (mod_remove(&modlinkage
));
791 d_open_t _profile_open
;
793 int _profile_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
795 #pragma unused(dev,flags,devtype,p)
799 #define PROFILE_MAJOR -24 /* let the kernel pick the device number */
802 * A struct describing which functions will get invoked for certain
805 static struct cdevsw profile_cdevsw
=
807 _profile_open
, /* open */
808 eno_opcl
, /* close */
809 eno_rdwrt
, /* read */
810 eno_rdwrt
, /* write */
811 eno_ioctl
, /* ioctl */
812 (stop_fcn_t
*)nulldev
, /* stop */
813 (reset_fcn_t
*)nulldev
, /* reset */
815 eno_select
, /* select */
817 eno_strat
, /* strategy */
823 static int gProfileInited
= 0;
825 void profile_init( void )
827 if (0 == gProfileInited
)
829 int majdevno
= cdevsw_add(PROFILE_MAJOR
, &profile_cdevsw
);
832 printf("profile_init: failed to allocate a major number!\n");
837 profile_attach( (dev_info_t
*)majdevno
, DDI_ATTACH
);
841 panic("profile_init: called twice!\n");
844 #endif /* __APPLE__ */