4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* #pragma ident "@(#)profile.c 1.6 06/03/24 SMI" */
28 #if !defined(__APPLE__)
29 #include <sys/errno.h>
31 #include <sys/modctl.h>
33 #include <sys/systm.h>
35 #include <sys/sunddi.h>
36 #include <sys/cpuvar.h>
38 #include <sys/strsubr.h>
39 #include <sys/dtrace.h>
40 #include <sys/cyclic.h>
41 #include <sys/atomic.h>
45 #define _KERNEL /* Solaris vs. Darwin */
49 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
50 #include <kern/cpu_data.h>
51 #include <kern/thread.h>
52 #include <mach/thread_status.h>
54 #include <sys/param.h>
55 #include <sys/systm.h>
56 #include <sys/errno.h>
58 #include <sys/ioctl.h>
60 #include <sys/fcntl.h>
61 #include <miscfs/devfs/devfs.h>
63 #include <sys/dtrace.h>
64 #include <sys/dtrace_impl.h>
66 #include <sys/dtrace_glue.h>
68 #if defined(__ppc__) || defined(__ppc64__)
69 extern struct savearea
*find_kern_regs(thread_t
);
70 #elif defined(__i386__) || defined(__x86_64__)
71 extern x86_saved_state32_t
*find_kern_regs(thread_t
);
73 #error Unknown architecture
77 #define ASSERT(x) do {} while(0)
79 extern void profile_init(void);
80 #endif /* __APPLE__ */
82 static dev_info_t
*profile_devi
;
83 static dtrace_provider_id_t profile_id
;
86 * Regardless of platform, there are five artificial frames in the case of the
95 * On amd64, there are two frames associated with locore: one in locore, and
96 * another in common interrupt dispatch code. (i386 has not been modified to
97 * use this common layer.) Further, on i386, the interrupted instruction
98 * appears as its own stack frame. All of this means that we need to add one
99 * frame for amd64, and then take one away for both amd64 and i386.
101 * On SPARC, the picture is further complicated because the compiler
102 * optimizes away tail-calls -- so the following frames are optimized away:
107 * This gives three frames. However, on DEBUG kernels, the cyclic_expire
108 * frame cannot be tail-call eliminated, yielding four frames in this case.
110 * All of the above constraints lead to the mess below. Yes, the profile
111 * provider should ideally figure this out on-the-fly by hiting one of its own
112 * probes and then walking its own stack trace. This is complicated, however,
113 * and the static definition doesn't seem to be overly brittle. Still, we
114 * allow for a manual override in case we get it completely wrong.
116 #if !defined(__APPLE__)
119 #define PROF_ARTIFICIAL_FRAMES 7
122 #define PROF_ARTIFICIAL_FRAMES 6
126 #define PROF_ARTIFICIAL_FRAMES 4
128 #define PROF_ARTIFICIAL_FRAMES 3
134 #else /* is Mac OS X */
136 #if defined(__ppc__) || defined(__ppc64__)
137 #define PROF_ARTIFICIAL_FRAMES 8
138 #elif defined(__i386__) || defined(__x86_64__)
139 #define PROF_ARTIFICIAL_FRAMES 9
141 #error Unknown architecture
144 #endif /* __APPLE__ */
146 #define PROF_NAMELEN 15
148 #define PROF_PROFILE 0
150 #define PROF_PREFIX_PROFILE "profile-"
151 #define PROF_PREFIX_TICK "tick-"
153 typedef struct profile_probe
{
154 char prof_name
[PROF_NAMELEN
];
157 hrtime_t prof_interval
;
158 cyclic_id_t prof_cyclic
;
161 typedef struct profile_probe_percpu
{
162 hrtime_t profc_expected
;
163 hrtime_t profc_interval
;
164 profile_probe_t
*profc_probe
;
165 } profile_probe_percpu_t
;
167 hrtime_t profile_interval_min
= NANOSEC
/ 5000; /* 5000 hz */
168 int profile_aframes
= 0; /* override */
170 static int profile_rates
[] = {
171 97, 199, 499, 997, 1999,
177 static int profile_ticks
[] = {
178 1, 10, 100, 500, 1000,
184 * profile_max defines the upper bound on the number of profile probes that
185 * can exist (this is to prevent malicious or clumsy users from exhausing
186 * system resources by creating a slew of profile probes). At mod load time,
187 * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
188 * present in the profile.conf file.
190 #define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */
191 static uint32_t profile_max
; /* maximum number of profile probes */
192 static uint32_t profile_total
; /* current number of profile probes */
195 profile_fire(void *arg
)
197 profile_probe_percpu_t
*pcpu
= arg
;
198 profile_probe_t
*prof
= pcpu
->profc_probe
;
201 late
= dtrace_gethrtime() - pcpu
->profc_expected
;
202 pcpu
->profc_expected
+= pcpu
->profc_interval
;
204 #if !defined(__APPLE__)
205 dtrace_probe(prof
->prof_id
, CPU
->cpu_profile_pc
,
206 CPU
->cpu_profile_upc
, late
, 0, 0);
208 #if defined(__ppc__) || defined(__ppc64__)
209 struct savearea
*sv
= find_kern_regs(current_thread());
212 if (USERMODE(sv
->save_srr1
)) {
213 dtrace_probe(prof
->prof_id
, 0x0, sv
->save_srr0
, late
, 0, 0);
215 dtrace_probe(prof
->prof_id
, sv
->save_srr0
, 0x0, late
, 0, 0);
218 dtrace_probe(prof
->prof_id
, 0xcafebabe,
219 0x0, late
, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
221 #elif defined(__i386__) || defined(__x86_64__)
222 x86_saved_state32_t
*kern_regs
= find_kern_regs(current_thread());
224 if (NULL
!= kern_regs
) {
225 /* Kernel was interrupted. */
226 dtrace_probe(prof
->prof_id
, kern_regs
->eip
, 0x0, 0, 0, 0);
228 /* Possibly a user interrupt */
229 x86_saved_state_t
*tagged_regs
= (x86_saved_state_t
*)find_user_regs(current_thread());
231 if (NULL
== tagged_regs
) {
232 /* Too bad, so sad, no useful interrupt state. */
233 dtrace_probe(prof
->prof_id
, 0xcafebabe,
234 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
235 } else if (is_saved_state64(tagged_regs
)) {
236 x86_saved_state64_t
*regs
= saved_state64(tagged_regs
);
238 dtrace_probe(prof
->prof_id
, 0x0, regs
->isf
.rip
, 0, 0, 0);
240 x86_saved_state32_t
*regs
= saved_state32(tagged_regs
);
242 dtrace_probe(prof
->prof_id
, 0x0, regs
->eip
, 0, 0, 0);
246 #error Unknown architecture
248 #endif /* __APPLE__ */
252 profile_tick(void *arg
)
254 profile_probe_t
*prof
= arg
;
256 #if !defined(__APPLE__)
257 dtrace_probe(prof
->prof_id
, CPU
->cpu_profile_pc
,
258 CPU
->cpu_profile_upc
, 0, 0, 0);
260 #if defined(__ppc__) || defined(__ppc64__)
261 struct savearea
*sv
= find_kern_regs(current_thread());
264 if (USERMODE(sv
->save_srr1
)) {
265 dtrace_probe(prof
->prof_id
, 0x0, sv
->save_srr0
, 0, 0, 0);
267 dtrace_probe(prof
->prof_id
, sv
->save_srr0
, 0x0, 0, 0, 0);
270 dtrace_probe(prof
->prof_id
, 0xcafebabe,
271 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
273 #elif defined(__i386__) || defined(__x86_64__)
274 x86_saved_state32_t
*kern_regs
= find_kern_regs(current_thread());
276 if (NULL
!= kern_regs
) {
277 /* Kernel was interrupted. */
278 dtrace_probe(prof
->prof_id
, kern_regs
->eip
, 0x0, 0, 0, 0);
280 /* Possibly a user interrupt */
281 x86_saved_state_t
*tagged_regs
= (x86_saved_state_t
*)find_user_regs(current_thread());
283 if (NULL
== tagged_regs
) {
284 /* Too bad, so sad, no useful interrupt state. */
285 dtrace_probe(prof
->prof_id
, 0xcafebabe,
286 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
287 } else if (is_saved_state64(tagged_regs
)) {
288 x86_saved_state64_t
*regs
= saved_state64(tagged_regs
);
290 dtrace_probe(prof
->prof_id
, 0x0, regs
->isf
.rip
, 0, 0, 0);
292 x86_saved_state32_t
*regs
= saved_state32(tagged_regs
);
294 dtrace_probe(prof
->prof_id
, 0x0, regs
->eip
, 0, 0, 0);
298 #error Unknown architecture
300 #endif /* __APPLE__ */
304 profile_create(hrtime_t interval
, const char *name
, int kind
)
306 profile_probe_t
*prof
;
308 if (interval
< profile_interval_min
)
311 if (dtrace_probe_lookup(profile_id
, NULL
, NULL
, name
) != 0)
314 atomic_add_32(&profile_total
, 1);
315 if (profile_total
> profile_max
) {
316 atomic_add_32(&profile_total
, -1);
320 #if !defined(__APPLE__)
321 prof
= kmem_zalloc(sizeof (profile_probe_t
), KM_SLEEP
);
323 if (PROF_TICK
== kind
)
324 prof
= kmem_zalloc(sizeof (profile_probe_t
), KM_SLEEP
);
326 prof
= kmem_zalloc(sizeof (profile_probe_t
) + NCPU
*sizeof(profile_probe_percpu_t
), KM_SLEEP
);
327 #endif /* __APPLE__ */
328 (void) strlcpy(prof
->prof_name
, name
, sizeof(prof
->prof_name
));
329 prof
->prof_interval
= interval
;
330 prof
->prof_cyclic
= CYCLIC_NONE
;
331 prof
->prof_kind
= kind
;
332 prof
->prof_id
= dtrace_probe_create(profile_id
,
334 profile_aframes
? profile_aframes
: PROF_ARTIFICIAL_FRAMES
, prof
);
339 profile_provide(void *arg
, const dtrace_probedesc_t
*desc
)
341 int i
, j
, rate
, kind
;
342 hrtime_t val
= 0, mult
= 1, len
;
343 const char *name
, *suffix
= NULL
;
349 { PROF_PREFIX_PROFILE
, PROF_PROFILE
},
350 { PROF_PREFIX_TICK
, PROF_TICK
},
358 { "ns", NANOSEC
/ NANOSEC
},
359 { "nsec", NANOSEC
/ NANOSEC
},
360 { "us", NANOSEC
/ MICROSEC
},
361 { "usec", NANOSEC
/ MICROSEC
},
362 { "ms", NANOSEC
/ MILLISEC
},
363 { "msec", NANOSEC
/ MILLISEC
},
364 { "s", NANOSEC
/ SEC
},
365 { "sec", NANOSEC
/ SEC
},
366 { "m", NANOSEC
* (hrtime_t
)60 },
367 { "min", NANOSEC
* (hrtime_t
)60 },
368 { "h", NANOSEC
* (hrtime_t
)(60 * 60) },
369 { "hour", NANOSEC
* (hrtime_t
)(60 * 60) },
370 { "d", NANOSEC
* (hrtime_t
)(24 * 60 * 60) },
371 { "day", NANOSEC
* (hrtime_t
)(24 * 60 * 60) },
373 #if !defined(__APPLE__)
377 #endif /* __APPLE__ */
381 char n
[PROF_NAMELEN
];
384 * If no description was provided, provide all of our probes.
386 for (i
= 0; i
< sizeof (profile_rates
) / sizeof (int); i
++) {
387 if ((rate
= profile_rates
[i
]) == 0)
390 (void) snprintf(n
, PROF_NAMELEN
, "%s%d",
391 PROF_PREFIX_PROFILE
, rate
);
392 profile_create(NANOSEC
/ rate
, n
, PROF_PROFILE
);
395 for (i
= 0; i
< sizeof (profile_ticks
) / sizeof (int); i
++) {
396 if ((rate
= profile_ticks
[i
]) == 0)
399 (void) snprintf(n
, PROF_NAMELEN
, "%s%d",
400 PROF_PREFIX_TICK
, rate
);
401 profile_create(NANOSEC
/ rate
, n
, PROF_TICK
);
407 name
= desc
->dtpd_name
;
409 for (i
= 0; types
[i
].prefix
!= NULL
; i
++) {
410 len
= strlen(types
[i
].prefix
);
412 if (strncmp(name
, types
[i
].prefix
, len
) != 0)
417 if (types
[i
].prefix
== NULL
)
420 kind
= types
[i
].kind
;
421 j
= strlen(name
) - len
;
424 * We need to start before any time suffix.
426 for (j
= strlen(name
); j
>= len
; j
--) {
427 if (name
[j
] >= '0' && name
[j
] <= '9')
432 ASSERT(suffix
!= NULL
);
435 * Now determine the numerical value present in the probe name.
437 for (; j
>= len
; j
--) {
438 if (name
[j
] < '0' || name
[j
] > '9')
441 val
+= (name
[j
] - '0') * mult
;
442 mult
*= (hrtime_t
)10;
449 * Look-up the suffix to determine the multiplier.
451 for (i
= 0, mult
= 0; suffixes
[i
].name
!= NULL
; i
++) {
452 if (strcasecmp(suffixes
[i
].name
, suffix
) == 0) {
453 mult
= suffixes
[i
].mult
;
458 if (suffixes
[i
].name
== NULL
&& *suffix
!= '\0')
463 * The default is frequency-per-second.
470 profile_create(val
, name
, kind
);
475 profile_destroy(void *arg
, dtrace_id_t id
, void *parg
)
477 profile_probe_t
*prof
= parg
;
479 ASSERT(prof
->prof_cyclic
== CYCLIC_NONE
);
480 #if !defined(__APPLE__)
481 kmem_free(prof
, sizeof (profile_probe_t
));
483 if (prof
->prof_kind
== PROF_TICK
)
484 kmem_free(prof
, sizeof (profile_probe_t
));
486 kmem_free(prof
, sizeof (profile_probe_t
) + NCPU
*sizeof(profile_probe_percpu_t
));
487 #endif /* __APPLE__ */
489 ASSERT(profile_total
>= 1);
490 atomic_add_32(&profile_total
, -1);
495 profile_online(void *arg
, cpu_t
*cpu
, cyc_handler_t
*hdlr
, cyc_time_t
*when
)
497 profile_probe_t
*prof
= arg
;
498 profile_probe_percpu_t
*pcpu
;
500 #if !defined(__APPLE__)
501 pcpu
= kmem_zalloc(sizeof (profile_probe_percpu_t
), KM_SLEEP
);
503 pcpu
= ((profile_probe_percpu_t
*)(&(prof
[1]))) + cpu_number();
504 #endif /* __APPLE__ */
505 pcpu
->profc_probe
= prof
;
507 hdlr
->cyh_func
= profile_fire
;
508 hdlr
->cyh_arg
= pcpu
;
509 hdlr
->cyh_level
= CY_HIGH_LEVEL
;
511 when
->cyt_interval
= prof
->prof_interval
;
512 #if !defined(__APPLE__)
513 when
->cyt_when
= dtrace_gethrtime() + when
->cyt_interval
;
516 #endif /* __APPLE__ */
518 pcpu
->profc_expected
= when
->cyt_when
;
519 pcpu
->profc_interval
= when
->cyt_interval
;
524 profile_offline(void *arg
, cpu_t
*cpu
, void *oarg
)
526 profile_probe_percpu_t
*pcpu
= oarg
;
528 ASSERT(pcpu
->profc_probe
== arg
);
529 #if !defined(__APPLE__)
530 kmem_free(pcpu
, sizeof (profile_probe_percpu_t
));
531 #endif /* __APPLE__ */
536 profile_enable(void *arg
, dtrace_id_t id
, void *parg
)
538 profile_probe_t
*prof
= parg
;
539 cyc_omni_handler_t omni
;
543 ASSERT(prof
->prof_interval
!= 0);
544 ASSERT(MUTEX_HELD(&cpu_lock
));
546 if (prof
->prof_kind
== PROF_TICK
) {
547 hdlr
.cyh_func
= profile_tick
;
549 hdlr
.cyh_level
= CY_HIGH_LEVEL
;
551 when
.cyt_interval
= prof
->prof_interval
;
552 #if !defined(__APPLE__)
553 when
.cyt_when
= dtrace_gethrtime() + when
.cyt_interval
;
556 #endif /* __APPLE__ */
558 ASSERT(prof
->prof_kind
== PROF_PROFILE
);
559 omni
.cyo_online
= profile_online
;
560 omni
.cyo_offline
= profile_offline
;
564 #if !defined(__APPLE__)
565 if (prof
->prof_kind
== PROF_TICK
) {
566 prof
->prof_cyclic
= cyclic_add(&hdlr
, &when
);
568 prof
->prof_cyclic
= cyclic_add_omni(&omni
);
571 if (prof
->prof_kind
== PROF_TICK
) {
572 prof
->prof_cyclic
= cyclic_timer_add(&hdlr
, &when
);
574 prof
->prof_cyclic
= (cyclic_id_t
)cyclic_add_omni(&omni
); /* cast puns cyclic_id_list_t with cyclic_id_t */
576 #endif /* __APPLE__ */
581 profile_disable(void *arg
, dtrace_id_t id
, void *parg
)
583 profile_probe_t
*prof
= parg
;
585 ASSERT(prof
->prof_cyclic
!= CYCLIC_NONE
);
586 ASSERT(MUTEX_HELD(&cpu_lock
));
588 #if !defined(__APPLE__)
589 cyclic_remove(prof
->prof_cyclic
);
591 if (prof
->prof_kind
== PROF_TICK
) {
592 cyclic_timer_remove(prof
->prof_cyclic
);
594 cyclic_remove_omni((cyclic_id_list_t
)prof
->prof_cyclic
); /* cast puns cyclic_id_list_t with cyclic_id_t */
596 #endif /* __APPLE__ */
597 prof
->prof_cyclic
= CYCLIC_NONE
;
600 #if !defined(__APPLE__)
603 profile_usermode(void *arg
, dtrace_id_t id
, void *parg
)
605 return (CPU
->cpu_profile_pc
== 0);
609 profile_usermode(void *arg
, dtrace_id_t id
, void *parg
)
611 #pragma unused(arg,id,parg)
612 return 1; /* XXX_BOGUS */
614 #endif /* __APPLE__ */
616 static dtrace_pattr_t profile_attr
= {
617 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
618 { DTRACE_STABILITY_UNSTABLE
, DTRACE_STABILITY_UNSTABLE
, DTRACE_CLASS_UNKNOWN
},
619 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
620 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
621 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
624 static dtrace_pops_t profile_pops
= {
638 profile_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
644 return (DDI_SUCCESS
);
646 return (DDI_FAILURE
);
649 if (ddi_create_minor_node(devi
, "profile", S_IFCHR
, 0,
650 DDI_PSEUDO
, NULL
) == DDI_FAILURE
||
651 dtrace_register("profile", &profile_attr
,
652 DTRACE_PRIV_KERNEL
| DTRACE_PRIV_USER
, NULL
,
653 &profile_pops
, NULL
, &profile_id
) != 0) {
654 ddi_remove_minor_node(devi
, NULL
);
655 return (DDI_FAILURE
);
658 #if !defined(__APPLE__)
659 profile_max
= ddi_getprop(DDI_DEV_T_ANY
, devi
, DDI_PROP_DONTPASS
,
660 "profile-max-probes", PROFILE_MAX_DEFAULT
);
662 profile_max
= PROFILE_MAX_DEFAULT
;
663 #endif /* __APPLE__ */
665 ddi_report_dev(devi
);
667 return (DDI_SUCCESS
);
670 #if !defined(__APPLE__)
672 profile_detach(dev_info_t
*devi
, ddi_detach_cmd_t cmd
)
678 return (DDI_SUCCESS
);
680 return (DDI_FAILURE
);
683 if (dtrace_unregister(profile_id
) != 0)
684 return (DDI_FAILURE
);
686 ddi_remove_minor_node(devi
, NULL
);
687 return (DDI_SUCCESS
);
692 profile_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
697 case DDI_INFO_DEVT2DEVINFO
:
698 *result
= (void *)profile_devi
;
701 case DDI_INFO_DEVT2INSTANCE
:
713 profile_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
718 static struct cb_ops profile_cb_ops
= {
719 profile_open
, /* open */
721 nulldev
, /* strategy */
731 ddi_prop_op
, /* cb_prop_op */
733 D_NEW
| D_MP
/* Driver compatibility flag */
736 static struct dev_ops profile_ops
= {
737 DEVO_REV
, /* devo_rev, */
739 profile_info
, /* get_dev_info */
740 nulldev
, /* identify */
742 profile_attach
, /* attach */
743 profile_detach
, /* detach */
745 &profile_cb_ops
, /* driver operations */
746 NULL
, /* bus operations */
747 nodev
/* dev power */
751 * Module linkage information for the kernel.
753 static struct modldrv modldrv
= {
754 &mod_driverops
, /* module type (this is a pseudo driver) */
755 "Profile Interrupt Tracing", /* name of module */
756 &profile_ops
, /* driver ops */
759 static struct modlinkage modlinkage
= {
768 return (mod_install(&modlinkage
));
772 _info(struct modinfo
*modinfop
)
774 return (mod_info(&modlinkage
, modinfop
));
780 return (mod_remove(&modlinkage
));
783 d_open_t _profile_open
;
785 int _profile_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
787 #pragma unused(dev,flags,devtype,p)
791 #define PROFILE_MAJOR -24 /* let the kernel pick the device number */
794 * A struct describing which functions will get invoked for certain
797 static struct cdevsw profile_cdevsw
=
799 _profile_open
, /* open */
800 eno_opcl
, /* close */
801 eno_rdwrt
, /* read */
802 eno_rdwrt
, /* write */
803 eno_ioctl
, /* ioctl */
804 (stop_fcn_t
*)nulldev
, /* stop */
805 (reset_fcn_t
*)nulldev
, /* reset */
807 eno_select
, /* select */
809 eno_strat
, /* strategy */
815 static int gProfileInited
= 0;
817 void profile_init( void )
819 if (0 == gProfileInited
)
821 int majdevno
= cdevsw_add(PROFILE_MAJOR
, &profile_cdevsw
);
824 printf("profile_init: failed to allocate a major number!\n");
829 profile_attach( (dev_info_t
*)majdevno
, DDI_ATTACH
);
833 panic("profile_init: called twice!\n");
836 #endif /* __APPLE__ */