]> git.saurik.com Git - apple/xnu.git/blame - bsd/dev/dtrace/profile_prvd.c
xnu-2782.40.9.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / profile_prvd.c
CommitLineData
2d21ac55
A
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
b0d623f7 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
2d21ac55
A
23 * Use is subject to license terms.
24 */
25
b0d623f7 26/* #pragma ident "@(#)profile.c 1.7 07/01/10 SMI" */
2d21ac55 27
2d21ac55
A
28#ifdef KERNEL
29#ifndef _KERNEL
30#define _KERNEL /* Solaris vs. Darwin */
31#endif
32#endif
33
2d21ac55
A
34#include <kern/cpu_data.h>
35#include <kern/thread.h>
6d2010ae 36#include <kern/assert.h>
2d21ac55
A
37#include <mach/thread_status.h>
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/errno.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
44#include <sys/conf.h>
45#include <sys/fcntl.h>
46#include <miscfs/devfs/devfs.h>
47
48#include <sys/dtrace.h>
49#include <sys/dtrace_impl.h>
50
51#include <sys/dtrace_glue.h>
52
6d2010ae
A
53#include <machine/pal_routines.h>
54
39236c6e 55#if defined(__x86_64__)
b0d623f7 56extern x86_saved_state_t *find_kern_regs(thread_t);
2d21ac55
A
57#else
58#error Unknown architecture
59#endif
60
61#undef ASSERT
62#define ASSERT(x) do {} while(0)
63
64extern void profile_init(void);
2d21ac55
A
65
66static dev_info_t *profile_devi;
67static dtrace_provider_id_t profile_id;
68
69/*
b0d623f7 70 * Regardless of platform, the stack frames look like this in the case of the
2d21ac55
A
71 * profile provider:
72 *
73 * profile_fire
74 * cyclic_expire
75 * cyclic_fire
76 * [ cbe ]
b0d623f7 77 * [ interrupt code ]
2d21ac55 78 *
b0d623f7
A
79 * On x86, there are five frames from the generic interrupt code; further, the
80 * interrupted instruction appears as its own stack frame, giving us a total of
81 * 10.
2d21ac55
A
82 *
83 * On SPARC, the picture is further complicated because the compiler
84 * optimizes away tail-calls -- so the following frames are optimized away:
85 *
86 * profile_fire
87 * cyclic_expire
88 *
89 * This gives three frames. However, on DEBUG kernels, the cyclic_expire
90 * frame cannot be tail-call eliminated, yielding four frames in this case.
91 *
92 * All of the above constraints lead to the mess below. Yes, the profile
b0d623f7 93 * provider should ideally figure this out on-the-fly by hitting one of its own
2d21ac55
A
94 * probes and then walking its own stack trace. This is complicated, however,
95 * and the static definition doesn't seem to be overly brittle. Still, we
96 * allow for a manual override in case we get it completely wrong.
97 */
2d21ac55 98
39236c6e 99#if defined(__x86_64__)
2d21ac55 100#define PROF_ARTIFICIAL_FRAMES 9
2d21ac55
A
101#else
102#error Unknown architecture
103#endif
104
2d21ac55
A
105#define PROF_NAMELEN 15
106
107#define PROF_PROFILE 0
108#define PROF_TICK 1
109#define PROF_PREFIX_PROFILE "profile-"
110#define PROF_PREFIX_TICK "tick-"
111
112typedef struct profile_probe {
113 char prof_name[PROF_NAMELEN];
114 dtrace_id_t prof_id;
115 int prof_kind;
116 hrtime_t prof_interval;
117 cyclic_id_t prof_cyclic;
118} profile_probe_t;
119
120typedef struct profile_probe_percpu {
121 hrtime_t profc_expected;
122 hrtime_t profc_interval;
123 profile_probe_t *profc_probe;
124} profile_probe_percpu_t;
125
126hrtime_t profile_interval_min = NANOSEC / 5000; /* 5000 hz */
127int profile_aframes = 0; /* override */
128
129static int profile_rates[] = {
130 97, 199, 499, 997, 1999,
131 4001, 4999, 0, 0, 0,
132 0, 0, 0, 0, 0,
133 0, 0, 0, 0, 0
134};
135
136static int profile_ticks[] = {
137 1, 10, 100, 500, 1000,
138 5000, 0, 0, 0, 0,
139 0, 0, 0, 0, 0
140};
141
142/*
143 * profile_max defines the upper bound on the number of profile probes that
144 * can exist (this is to prevent malicious or clumsy users from exhausing
145 * system resources by creating a slew of profile probes). At mod load time,
146 * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
147 * present in the profile.conf file.
148 */
149#define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */
150static uint32_t profile_max; /* maximum number of profile probes */
151static uint32_t profile_total; /* current number of profile probes */
152
153static void
154profile_fire(void *arg)
155{
156 profile_probe_percpu_t *pcpu = arg;
157 profile_probe_t *prof = pcpu->profc_probe;
158 hrtime_t late;
159
160 late = dtrace_gethrtime() - pcpu->profc_expected;
161 pcpu->profc_expected += pcpu->profc_interval;
162
39236c6e 163#if defined(__x86_64__)
b0d623f7 164 x86_saved_state_t *kern_regs = find_kern_regs(current_thread());
2d21ac55
A
165
166 if (NULL != kern_regs) {
167 /* Kernel was interrupted. */
fe8ab488 168 dtrace_probe(prof->prof_id, saved_state64(kern_regs)->isf.rip, 0x0, late, 0, 0);
39236c6e 169
2d21ac55 170 } else {
6d2010ae 171 pal_register_cache_state(current_thread(), VALID);
2d21ac55
A
172 /* Possibly a user interrupt */
173 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
174
175 if (NULL == tagged_regs) {
176 /* Too bad, so sad, no useful interrupt state. */
177 dtrace_probe(prof->prof_id, 0xcafebabe,
fe8ab488 178 0x0, late, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
2d21ac55
A
179 } else if (is_saved_state64(tagged_regs)) {
180 x86_saved_state64_t *regs = saved_state64(tagged_regs);
181
fe8ab488 182 dtrace_probe(prof->prof_id, 0x0, regs->isf.rip, late, 0, 0);
2d21ac55
A
183 } else {
184 x86_saved_state32_t *regs = saved_state32(tagged_regs);
185
fe8ab488 186 dtrace_probe(prof->prof_id, 0x0, regs->eip, late, 0, 0);
316670eb 187 }
2d21ac55
A
188 }
189#else
190#error Unknown architecture
191#endif
2d21ac55
A
192}
193
194static void
195profile_tick(void *arg)
196{
197 profile_probe_t *prof = arg;
198
39236c6e 199#if defined(__x86_64__)
b0d623f7 200 x86_saved_state_t *kern_regs = find_kern_regs(current_thread());
2d21ac55
A
201
202 if (NULL != kern_regs) {
203 /* Kernel was interrupted. */
b0d623f7 204 dtrace_probe(prof->prof_id, saved_state64(kern_regs)->isf.rip, 0x0, 0, 0, 0);
2d21ac55 205 } else {
6d2010ae 206 pal_register_cache_state(current_thread(), VALID);
2d21ac55
A
207 /* Possibly a user interrupt */
208 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
209
210 if (NULL == tagged_regs) {
211 /* Too bad, so sad, no useful interrupt state. */
212 dtrace_probe(prof->prof_id, 0xcafebabe,
213 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
214 } else if (is_saved_state64(tagged_regs)) {
215 x86_saved_state64_t *regs = saved_state64(tagged_regs);
216
217 dtrace_probe(prof->prof_id, 0x0, regs->isf.rip, 0, 0, 0);
218 } else {
219 x86_saved_state32_t *regs = saved_state32(tagged_regs);
220
221 dtrace_probe(prof->prof_id, 0x0, regs->eip, 0, 0, 0);
316670eb 222 }
2d21ac55
A
223 }
224#else
225#error Unknown architecture
226#endif
2d21ac55
A
227}
228
229static void
230profile_create(hrtime_t interval, const char *name, int kind)
231{
232 profile_probe_t *prof;
233
234 if (interval < profile_interval_min)
235 return;
236
237 if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0)
238 return;
239
240 atomic_add_32(&profile_total, 1);
241 if (profile_total > profile_max) {
242 atomic_add_32(&profile_total, -1);
243 return;
244 }
245
2d21ac55
A
246 if (PROF_TICK == kind)
247 prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP);
248 else
249 prof = kmem_zalloc(sizeof (profile_probe_t) + NCPU*sizeof(profile_probe_percpu_t), KM_SLEEP);
fe8ab488 250
2d21ac55
A
251 (void) strlcpy(prof->prof_name, name, sizeof(prof->prof_name));
252 prof->prof_interval = interval;
253 prof->prof_cyclic = CYCLIC_NONE;
254 prof->prof_kind = kind;
255 prof->prof_id = dtrace_probe_create(profile_id,
256 NULL, NULL, name,
257 profile_aframes ? profile_aframes : PROF_ARTIFICIAL_FRAMES, prof);
258}
259
260/*ARGSUSED*/
261static void
262profile_provide(void *arg, const dtrace_probedesc_t *desc)
263{
b0d623f7 264#pragma unused(arg) /* __APPLE__ */
2d21ac55
A
265 int i, j, rate, kind;
266 hrtime_t val = 0, mult = 1, len;
267 const char *name, *suffix = NULL;
268
b0d623f7
A
269 const struct {
270 const char *prefix;
271 int kind;
272 } types[] = {
273 { PROF_PREFIX_PROFILE, PROF_PROFILE },
274 { PROF_PREFIX_TICK, PROF_TICK },
2d21ac55 275 { NULL, 0 }
2d21ac55
A
276 };
277
b0d623f7
A
278 const struct {
279 const char *name;
280 hrtime_t mult;
281 } suffixes[] = {
282 { "ns", NANOSEC / NANOSEC },
283 { "nsec", NANOSEC / NANOSEC },
284 { "us", NANOSEC / MICROSEC },
285 { "usec", NANOSEC / MICROSEC },
286 { "ms", NANOSEC / MILLISEC },
287 { "msec", NANOSEC / MILLISEC },
288 { "s", NANOSEC / SEC },
289 { "sec", NANOSEC / SEC },
290 { "m", NANOSEC * (hrtime_t)60 },
291 { "min", NANOSEC * (hrtime_t)60 },
292 { "h", NANOSEC * (hrtime_t)(60 * 60) },
293 { "hour", NANOSEC * (hrtime_t)(60 * 60) },
294 { "d", NANOSEC * (hrtime_t)(24 * 60 * 60) },
295 { "day", NANOSEC * (hrtime_t)(24 * 60 * 60) },
296 { "hz", 0 },
297 { NULL, 0 }
298 };
b0d623f7 299
2d21ac55
A
300 if (desc == NULL) {
301 char n[PROF_NAMELEN];
302
303 /*
304 * If no description was provided, provide all of our probes.
305 */
b0d623f7 306 for (i = 0; i < (int)(sizeof (profile_rates) / sizeof (int)); i++) {
2d21ac55
A
307 if ((rate = profile_rates[i]) == 0)
308 continue;
309
310 (void) snprintf(n, PROF_NAMELEN, "%s%d",
311 PROF_PREFIX_PROFILE, rate);
312 profile_create(NANOSEC / rate, n, PROF_PROFILE);
313 }
314
b0d623f7 315 for (i = 0; i < (int)(sizeof (profile_ticks) / sizeof (int)); i++) {
2d21ac55
A
316 if ((rate = profile_ticks[i]) == 0)
317 continue;
318
319 (void) snprintf(n, PROF_NAMELEN, "%s%d",
320 PROF_PREFIX_TICK, rate);
321 profile_create(NANOSEC / rate, n, PROF_TICK);
322 }
323
324 return;
325 }
326
327 name = desc->dtpd_name;
328
329 for (i = 0; types[i].prefix != NULL; i++) {
330 len = strlen(types[i].prefix);
331
332 if (strncmp(name, types[i].prefix, len) != 0)
333 continue;
334 break;
335 }
336
337 if (types[i].prefix == NULL)
338 return;
339
340 kind = types[i].kind;
341 j = strlen(name) - len;
342
343 /*
344 * We need to start before any time suffix.
345 */
346 for (j = strlen(name); j >= len; j--) {
347 if (name[j] >= '0' && name[j] <= '9')
348 break;
349 suffix = &name[j];
350 }
351
352 ASSERT(suffix != NULL);
353
354 /*
355 * Now determine the numerical value present in the probe name.
356 */
357 for (; j >= len; j--) {
358 if (name[j] < '0' || name[j] > '9')
359 return;
360
361 val += (name[j] - '0') * mult;
362 mult *= (hrtime_t)10;
363 }
364
365 if (val == 0)
366 return;
367
368 /*
369 * Look-up the suffix to determine the multiplier.
370 */
371 for (i = 0, mult = 0; suffixes[i].name != NULL; i++) {
fe8ab488 372 /* APPLE NOTE: Darwin employs size bounded string operations */
b0d623f7
A
373 if (strncasecmp(suffixes[i].name, suffix, strlen(suffixes[i].name) + 1) == 0) {
374 mult = suffixes[i].mult;
375 break;
376 }
2d21ac55
A
377 }
378
379 if (suffixes[i].name == NULL && *suffix != '\0')
380 return;
381
382 if (mult == 0) {
383 /*
384 * The default is frequency-per-second.
385 */
386 val = NANOSEC / val;
387 } else {
388 val *= mult;
389 }
390
391 profile_create(val, name, kind);
392}
393
394/*ARGSUSED*/
395static void
396profile_destroy(void *arg, dtrace_id_t id, void *parg)
397{
b0d623f7 398#pragma unused(arg,id) /* __APPLE__ */
2d21ac55
A
399 profile_probe_t *prof = parg;
400
401 ASSERT(prof->prof_cyclic == CYCLIC_NONE);
fe8ab488 402
2d21ac55
A
403 if (prof->prof_kind == PROF_TICK)
404 kmem_free(prof, sizeof (profile_probe_t));
405 else
406 kmem_free(prof, sizeof (profile_probe_t) + NCPU*sizeof(profile_probe_percpu_t));
2d21ac55
A
407
408 ASSERT(profile_total >= 1);
409 atomic_add_32(&profile_total, -1);
410}
411
412/*ARGSUSED*/
413static void
6d2010ae 414profile_online(void *arg, dtrace_cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
2d21ac55 415{
b0d623f7 416#pragma unused(cpu) /* __APPLE__ */
2d21ac55
A
417 profile_probe_t *prof = arg;
418 profile_probe_percpu_t *pcpu;
419
2d21ac55 420 pcpu = ((profile_probe_percpu_t *)(&(prof[1]))) + cpu_number();
2d21ac55
A
421 pcpu->profc_probe = prof;
422
423 hdlr->cyh_func = profile_fire;
424 hdlr->cyh_arg = pcpu;
425 hdlr->cyh_level = CY_HIGH_LEVEL;
426
427 when->cyt_interval = prof->prof_interval;
2d21ac55 428 when->cyt_when = dtrace_gethrtime() + when->cyt_interval;
2d21ac55
A
429
430 pcpu->profc_expected = when->cyt_when;
431 pcpu->profc_interval = when->cyt_interval;
432}
433
434/*ARGSUSED*/
435static void
6d2010ae 436profile_offline(void *arg, dtrace_cpu_t *cpu, void *oarg)
2d21ac55
A
437{
438 profile_probe_percpu_t *pcpu = oarg;
439
440 ASSERT(pcpu->profc_probe == arg);
b0d623f7 441#pragma unused(pcpu,arg,cpu) /* __APPLE__ */
2d21ac55
A
442}
443
444/*ARGSUSED*/
6d2010ae 445static int
2d21ac55
A
446profile_enable(void *arg, dtrace_id_t id, void *parg)
447{
b0d623f7 448#pragma unused(arg,id) /* __APPLE__ */
2d21ac55
A
449 profile_probe_t *prof = parg;
450 cyc_omni_handler_t omni;
451 cyc_handler_t hdlr;
452 cyc_time_t when;
453
454 ASSERT(prof->prof_interval != 0);
455 ASSERT(MUTEX_HELD(&cpu_lock));
456
457 if (prof->prof_kind == PROF_TICK) {
458 hdlr.cyh_func = profile_tick;
459 hdlr.cyh_arg = prof;
460 hdlr.cyh_level = CY_HIGH_LEVEL;
461
462 when.cyt_interval = prof->prof_interval;
463#if !defined(__APPLE__)
464 when.cyt_when = dtrace_gethrtime() + when.cyt_interval;
465#else
466 when.cyt_when = 0;
467#endif /* __APPLE__ */
468 } else {
469 ASSERT(prof->prof_kind == PROF_PROFILE);
470 omni.cyo_online = profile_online;
471 omni.cyo_offline = profile_offline;
472 omni.cyo_arg = prof;
473 }
474
2d21ac55
A
475 if (prof->prof_kind == PROF_TICK) {
476 prof->prof_cyclic = cyclic_timer_add(&hdlr, &when);
477 } else {
478 prof->prof_cyclic = (cyclic_id_t)cyclic_add_omni(&omni); /* cast puns cyclic_id_list_t with cyclic_id_t */
479 }
fe8ab488 480
6d2010ae 481 return(0);
2d21ac55
A
482}
483
484/*ARGSUSED*/
485static void
486profile_disable(void *arg, dtrace_id_t id, void *parg)
487{
488 profile_probe_t *prof = parg;
489
490 ASSERT(prof->prof_cyclic != CYCLIC_NONE);
491 ASSERT(MUTEX_HELD(&cpu_lock));
492
b0d623f7 493#pragma unused(arg,id)
2d21ac55
A
494 if (prof->prof_kind == PROF_TICK) {
495 cyclic_timer_remove(prof->prof_cyclic);
496 } else {
497 cyclic_remove_omni((cyclic_id_list_t)prof->prof_cyclic); /* cast puns cyclic_id_list_t with cyclic_id_t */
498 }
2d21ac55
A
499 prof->prof_cyclic = CYCLIC_NONE;
500}
501
fe8ab488
A
502/*
503 * APPLE NOTE: profile_usermode call not supported.
504 */
2d21ac55
A
505static int
506profile_usermode(void *arg, dtrace_id_t id, void *parg)
507{
508#pragma unused(arg,id,parg)
509 return 1; /* XXX_BOGUS */
510}
2d21ac55
A
511
512static dtrace_pattr_t profile_attr = {
513{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
514{ DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_UNKNOWN },
515{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
516{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
517{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
518};
519
520static dtrace_pops_t profile_pops = {
521 profile_provide,
522 NULL,
523 profile_enable,
524 profile_disable,
525 NULL,
526 NULL,
527 NULL,
528 NULL,
529 profile_usermode,
530 profile_destroy
531};
532
533static int
534profile_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
535{
536 switch (cmd) {
537 case DDI_ATTACH:
538 break;
539 case DDI_RESUME:
540 return (DDI_SUCCESS);
541 default:
542 return (DDI_FAILURE);
543 }
544
b0d623f7
A
545 if (ddi_create_minor_node(devi, "profile", S_IFCHR, 0,
546 DDI_PSEUDO, 0) == DDI_FAILURE ||
547 dtrace_register("profile", &profile_attr,
548 DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER, NULL,
549 &profile_pops, NULL, &profile_id) != 0) {
550 ddi_remove_minor_node(devi, NULL);
551 return (DDI_FAILURE);
552 }
553
2d21ac55 554 profile_max = PROFILE_MAX_DEFAULT;
2d21ac55
A
555
556 ddi_report_dev(devi);
557 profile_devi = devi;
558 return (DDI_SUCCESS);
559}
560
fe8ab488
A
561/*
562 * APPLE NOTE: profile_detach not implemented
563 */
2d21ac55
A
564#if !defined(__APPLE__)
565static int
566profile_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
567{
568 switch (cmd) {
569 case DDI_DETACH:
570 break;
571 case DDI_SUSPEND:
572 return (DDI_SUCCESS);
573 default:
574 return (DDI_FAILURE);
575 }
576
577 if (dtrace_unregister(profile_id) != 0)
578 return (DDI_FAILURE);
579
580 ddi_remove_minor_node(devi, NULL);
581 return (DDI_SUCCESS);
582}
fe8ab488 583#endif /* __APPLE__ */
2d21ac55 584
2d21ac55
A
585d_open_t _profile_open;
586
587int _profile_open(dev_t dev, int flags, int devtype, struct proc *p)
588{
589#pragma unused(dev,flags,devtype,p)
590 return 0;
591}
592
593#define PROFILE_MAJOR -24 /* let the kernel pick the device number */
594
595/*
596 * A struct describing which functions will get invoked for certain
597 * actions.
598 */
599static struct cdevsw profile_cdevsw =
600{
601 _profile_open, /* open */
602 eno_opcl, /* close */
603 eno_rdwrt, /* read */
604 eno_rdwrt, /* write */
605 eno_ioctl, /* ioctl */
606 (stop_fcn_t *)nulldev, /* stop */
607 (reset_fcn_t *)nulldev, /* reset */
608 NULL, /* tty's */
609 eno_select, /* select */
610 eno_mmap, /* mmap */
611 eno_strat, /* strategy */
612 eno_getc, /* getc */
613 eno_putc, /* putc */
614 0 /* type */
615};
616
617static int gProfileInited = 0;
618
619void profile_init( void )
620{
621 if (0 == gProfileInited)
622 {
623 int majdevno = cdevsw_add(PROFILE_MAJOR, &profile_cdevsw);
624
625 if (majdevno < 0) {
626 printf("profile_init: failed to allocate a major number!\n");
627 gProfileInited = 0;
628 return;
629 }
630
b0d623f7 631 profile_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
2d21ac55
A
632
633 gProfileInited = 1;
634 } else
635 panic("profile_init: called twice!\n");
636}
637#undef PROFILE_MAJOR