]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/dtrace/profile_prvd.c
xnu-3789.51.2.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / profile_prvd.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* #pragma ident "@(#)profile.c 1.7 07/01/10 SMI" */
27
28 #ifdef KERNEL
29 #ifndef _KERNEL
30 #define _KERNEL /* Solaris vs. Darwin */
31 #endif
32 #endif
33
34 #include <kern/cpu_data.h>
35 #include <kern/thread.h>
36 #include <kern/assert.h>
37 #include <mach/thread_status.h>
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/errno.h>
42 #include <sys/stat.h>
43 #include <sys/ioctl.h>
44 #include <sys/conf.h>
45 #include <sys/fcntl.h>
46 #include <miscfs/devfs/devfs.h>
47
48 #include <sys/dtrace.h>
49 #include <sys/dtrace_impl.h>
50
51 #include <sys/dtrace_glue.h>
52
53 #include <machine/pal_routines.h>
54
55 #if defined(__x86_64__)
56 extern x86_saved_state_t *find_kern_regs(thread_t);
57 #else
58 #error Unknown architecture
59 #endif
60
61 #undef ASSERT
62 #define ASSERT(x) do {} while(0)
63
64 extern void profile_init(void);
65
66 static dev_info_t *profile_devi;
67 static dtrace_provider_id_t profile_id;
68
69 /*
70 * Regardless of platform, the stack frames look like this in the case of the
71 * profile provider:
72 *
73 * profile_fire
74 * cyclic_expire
75 * cyclic_fire
76 * [ cbe ]
77 * [ interrupt code ]
78 *
79 * On x86, there are five frames from the generic interrupt code; further, the
80 * interrupted instruction appears as its own stack frame, giving us a total of
81 * 10.
82 *
83 * On SPARC, the picture is further complicated because the compiler
84 * optimizes away tail-calls -- so the following frames are optimized away:
85 *
86 * profile_fire
87 * cyclic_expire
88 *
89 * This gives three frames. However, on DEBUG kernels, the cyclic_expire
90 * frame cannot be tail-call eliminated, yielding four frames in this case.
91 *
92 * All of the above constraints lead to the mess below. Yes, the profile
93 * provider should ideally figure this out on-the-fly by hitting one of its own
94 * probes and then walking its own stack trace. This is complicated, however,
95 * and the static definition doesn't seem to be overly brittle. Still, we
96 * allow for a manual override in case we get it completely wrong.
97 */
98
99 #if defined(__x86_64__)
100 #define PROF_ARTIFICIAL_FRAMES 9
101 #else
102 #error Unknown architecture
103 #endif
104
105 #define PROF_NAMELEN 15
106
107 #define PROF_PROFILE 0
108 #define PROF_TICK 1
109 #define PROF_PREFIX_PROFILE "profile-"
110 #define PROF_PREFIX_TICK "tick-"
111
112 typedef struct profile_probe {
113 char prof_name[PROF_NAMELEN];
114 dtrace_id_t prof_id;
115 int prof_kind;
116 hrtime_t prof_interval;
117 cyclic_id_t prof_cyclic;
118 } profile_probe_t;
119
120 typedef struct profile_probe_percpu {
121 hrtime_t profc_expected;
122 hrtime_t profc_interval;
123 profile_probe_t *profc_probe;
124 } profile_probe_percpu_t;
125
126 hrtime_t profile_interval_min = NANOSEC / 5000; /* 5000 hz */
127 int profile_aframes = 0; /* override */
128
129 static int profile_rates[] = {
130 97, 199, 499, 997, 1999,
131 4001, 4999, 0, 0, 0,
132 0, 0, 0, 0, 0,
133 0, 0, 0, 0, 0
134 };
135
136 static int profile_ticks[] = {
137 1, 10, 100, 500, 1000,
138 5000, 0, 0, 0, 0,
139 0, 0, 0, 0, 0
140 };
141
142 /*
143 * profile_max defines the upper bound on the number of profile probes that
144 * can exist (this is to prevent malicious or clumsy users from exhausing
145 * system resources by creating a slew of profile probes). At mod load time,
146 * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
147 * present in the profile.conf file.
148 */
149 #define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */
150 static uint32_t profile_max; /* maximum number of profile probes */
151 static uint32_t profile_total; /* current number of profile probes */
152
153 static void
154 profile_fire(void *arg)
155 {
156 profile_probe_percpu_t *pcpu = arg;
157 profile_probe_t *prof = pcpu->profc_probe;
158 hrtime_t late;
159
160 late = dtrace_gethrtime() - pcpu->profc_expected;
161 pcpu->profc_expected += pcpu->profc_interval;
162
163 #if defined(__x86_64__)
164 x86_saved_state_t *kern_regs = find_kern_regs(current_thread());
165
166 if (NULL != kern_regs) {
167 /* Kernel was interrupted. */
168 dtrace_probe(prof->prof_id, saved_state64(kern_regs)->isf.rip, 0x0, late, 0, 0);
169
170 } else {
171 pal_register_cache_state(current_thread(), VALID);
172 /* Possibly a user interrupt */
173 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
174
175 if (NULL == tagged_regs) {
176 /* Too bad, so sad, no useful interrupt state. */
177 dtrace_probe(prof->prof_id, 0xcafebabe,
178 0x0, late, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
179 } else if (is_saved_state64(tagged_regs)) {
180 x86_saved_state64_t *regs = saved_state64(tagged_regs);
181
182 dtrace_probe(prof->prof_id, 0x0, regs->isf.rip, late, 0, 0);
183 } else {
184 x86_saved_state32_t *regs = saved_state32(tagged_regs);
185
186 dtrace_probe(prof->prof_id, 0x0, regs->eip, late, 0, 0);
187 }
188 }
189 #else
190 #error Unknown architecture
191 #endif
192 }
193
194 static void
195 profile_tick(void *arg)
196 {
197 profile_probe_t *prof = arg;
198
199 #if defined(__x86_64__)
200 x86_saved_state_t *kern_regs = find_kern_regs(current_thread());
201
202 if (NULL != kern_regs) {
203 /* Kernel was interrupted. */
204 dtrace_probe(prof->prof_id, saved_state64(kern_regs)->isf.rip, 0x0, 0, 0, 0);
205 } else {
206 pal_register_cache_state(current_thread(), VALID);
207 /* Possibly a user interrupt */
208 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
209
210 if (NULL == tagged_regs) {
211 /* Too bad, so sad, no useful interrupt state. */
212 dtrace_probe(prof->prof_id, 0xcafebabe,
213 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
214 } else if (is_saved_state64(tagged_regs)) {
215 x86_saved_state64_t *regs = saved_state64(tagged_regs);
216
217 dtrace_probe(prof->prof_id, 0x0, regs->isf.rip, 0, 0, 0);
218 } else {
219 x86_saved_state32_t *regs = saved_state32(tagged_regs);
220
221 dtrace_probe(prof->prof_id, 0x0, regs->eip, 0, 0, 0);
222 }
223 }
224 #else
225 #error Unknown architecture
226 #endif
227 }
228
229 static void
230 profile_create(hrtime_t interval, const char *name, int kind)
231 {
232 profile_probe_t *prof;
233
234 if (interval < profile_interval_min)
235 return;
236
237 if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0)
238 return;
239
240 atomic_add_32(&profile_total, 1);
241 if (profile_total > profile_max) {
242 atomic_add_32(&profile_total, -1);
243 return;
244 }
245
246 if (PROF_TICK == kind)
247 prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP);
248 else
249 prof = kmem_zalloc(sizeof (profile_probe_t) + NCPU*sizeof(profile_probe_percpu_t), KM_SLEEP);
250
251 (void) strlcpy(prof->prof_name, name, sizeof(prof->prof_name));
252 prof->prof_interval = interval;
253 prof->prof_cyclic = CYCLIC_NONE;
254 prof->prof_kind = kind;
255 prof->prof_id = dtrace_probe_create(profile_id,
256 NULL, NULL, name,
257 profile_aframes ? profile_aframes : PROF_ARTIFICIAL_FRAMES, prof);
258 }
259
260 /*ARGSUSED*/
261 static void
262 profile_provide(void *arg, const dtrace_probedesc_t *desc)
263 {
264 #pragma unused(arg) /* __APPLE__ */
265 int i, j, rate, kind;
266 hrtime_t val = 0, mult = 1, len;
267 const char *name, *suffix = NULL;
268
269 const struct {
270 const char *prefix;
271 int kind;
272 } types[] = {
273 { PROF_PREFIX_PROFILE, PROF_PROFILE },
274 { PROF_PREFIX_TICK, PROF_TICK },
275 { NULL, 0 }
276 };
277
278 const struct {
279 const char *name;
280 hrtime_t mult;
281 } suffixes[] = {
282 { "ns", NANOSEC / NANOSEC },
283 { "nsec", NANOSEC / NANOSEC },
284 { "us", NANOSEC / MICROSEC },
285 { "usec", NANOSEC / MICROSEC },
286 { "ms", NANOSEC / MILLISEC },
287 { "msec", NANOSEC / MILLISEC },
288 { "s", NANOSEC / SEC },
289 { "sec", NANOSEC / SEC },
290 { "m", NANOSEC * (hrtime_t)60 },
291 { "min", NANOSEC * (hrtime_t)60 },
292 { "h", NANOSEC * (hrtime_t)(60 * 60) },
293 { "hour", NANOSEC * (hrtime_t)(60 * 60) },
294 { "d", NANOSEC * (hrtime_t)(24 * 60 * 60) },
295 { "day", NANOSEC * (hrtime_t)(24 * 60 * 60) },
296 { "hz", 0 },
297 { NULL, 0 }
298 };
299
300 if (desc == NULL) {
301 char n[PROF_NAMELEN];
302
303 /*
304 * If no description was provided, provide all of our probes.
305 */
306 for (i = 0; i < (int)(sizeof (profile_rates) / sizeof (int)); i++) {
307 if ((rate = profile_rates[i]) == 0)
308 continue;
309
310 (void) snprintf(n, PROF_NAMELEN, "%s%d",
311 PROF_PREFIX_PROFILE, rate);
312 profile_create(NANOSEC / rate, n, PROF_PROFILE);
313 }
314
315 for (i = 0; i < (int)(sizeof (profile_ticks) / sizeof (int)); i++) {
316 if ((rate = profile_ticks[i]) == 0)
317 continue;
318
319 (void) snprintf(n, PROF_NAMELEN, "%s%d",
320 PROF_PREFIX_TICK, rate);
321 profile_create(NANOSEC / rate, n, PROF_TICK);
322 }
323
324 return;
325 }
326
327 name = desc->dtpd_name;
328
329 for (i = 0; types[i].prefix != NULL; i++) {
330 len = strlen(types[i].prefix);
331
332 if (strncmp(name, types[i].prefix, len) != 0)
333 continue;
334 break;
335 }
336
337 if (types[i].prefix == NULL)
338 return;
339
340 kind = types[i].kind;
341 j = strlen(name) - len;
342
343 /*
344 * We need to start before any time suffix.
345 */
346 for (j = strlen(name); j >= len; j--) {
347 if (name[j] >= '0' && name[j] <= '9')
348 break;
349 suffix = &name[j];
350 }
351
352 ASSERT(suffix != NULL);
353
354 /*
355 * Now determine the numerical value present in the probe name.
356 */
357 for (; j >= len; j--) {
358 if (name[j] < '0' || name[j] > '9')
359 return;
360
361 val += (name[j] - '0') * mult;
362 mult *= (hrtime_t)10;
363 }
364
365 if (val == 0)
366 return;
367
368 /*
369 * Look-up the suffix to determine the multiplier.
370 */
371 for (i = 0, mult = 0; suffixes[i].name != NULL; i++) {
372 /* APPLE NOTE: Darwin employs size bounded string operations */
373 if (strncasecmp(suffixes[i].name, suffix, strlen(suffixes[i].name) + 1) == 0) {
374 mult = suffixes[i].mult;
375 break;
376 }
377 }
378
379 if (suffixes[i].name == NULL && *suffix != '\0')
380 return;
381
382 if (mult == 0) {
383 /*
384 * The default is frequency-per-second.
385 */
386 val = NANOSEC / val;
387 } else {
388 val *= mult;
389 }
390
391 profile_create(val, name, kind);
392 }
393
394 /*ARGSUSED*/
395 static void
396 profile_destroy(void *arg, dtrace_id_t id, void *parg)
397 {
398 #pragma unused(arg,id) /* __APPLE__ */
399 profile_probe_t *prof = parg;
400
401 ASSERT(prof->prof_cyclic == CYCLIC_NONE);
402
403 if (prof->prof_kind == PROF_TICK)
404 kmem_free(prof, sizeof (profile_probe_t));
405 else
406 kmem_free(prof, sizeof (profile_probe_t) + NCPU*sizeof(profile_probe_percpu_t));
407
408 ASSERT(profile_total >= 1);
409 atomic_add_32(&profile_total, -1);
410 }
411
412 /*ARGSUSED*/
413 static void
414 profile_online(void *arg, dtrace_cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
415 {
416 #pragma unused(cpu) /* __APPLE__ */
417 profile_probe_t *prof = arg;
418 profile_probe_percpu_t *pcpu;
419
420 pcpu = ((profile_probe_percpu_t *)(&(prof[1]))) + cpu_number();
421 pcpu->profc_probe = prof;
422
423 hdlr->cyh_func = profile_fire;
424 hdlr->cyh_arg = pcpu;
425 hdlr->cyh_level = CY_HIGH_LEVEL;
426
427 when->cyt_interval = prof->prof_interval;
428 when->cyt_when = dtrace_gethrtime() + when->cyt_interval;
429
430 pcpu->profc_expected = when->cyt_when;
431 pcpu->profc_interval = when->cyt_interval;
432 }
433
434 /*ARGSUSED*/
435 static void
436 profile_offline(void *arg, dtrace_cpu_t *cpu, void *oarg)
437 {
438 profile_probe_percpu_t *pcpu = oarg;
439
440 ASSERT(pcpu->profc_probe == arg);
441 #pragma unused(pcpu,arg,cpu) /* __APPLE__ */
442 }
443
444 /*ARGSUSED*/
445 static int
446 profile_enable(void *arg, dtrace_id_t id, void *parg)
447 {
448 #pragma unused(arg,id) /* __APPLE__ */
449 profile_probe_t *prof = parg;
450 cyc_omni_handler_t omni;
451 cyc_handler_t hdlr;
452 cyc_time_t when;
453
454 ASSERT(prof->prof_interval != 0);
455 ASSERT(MUTEX_HELD(&cpu_lock));
456
457 if (prof->prof_kind == PROF_TICK) {
458 hdlr.cyh_func = profile_tick;
459 hdlr.cyh_arg = prof;
460 hdlr.cyh_level = CY_HIGH_LEVEL;
461
462 when.cyt_interval = prof->prof_interval;
463 #if !defined(__APPLE__)
464 when.cyt_when = dtrace_gethrtime() + when.cyt_interval;
465 #else
466 when.cyt_when = 0;
467 #endif /* __APPLE__ */
468 } else {
469 ASSERT(prof->prof_kind == PROF_PROFILE);
470 omni.cyo_online = profile_online;
471 omni.cyo_offline = profile_offline;
472 omni.cyo_arg = prof;
473 }
474
475 if (prof->prof_kind == PROF_TICK) {
476 prof->prof_cyclic = cyclic_timer_add(&hdlr, &when);
477 } else {
478 prof->prof_cyclic = (cyclic_id_t)cyclic_add_omni(&omni); /* cast puns cyclic_id_list_t with cyclic_id_t */
479 }
480
481 return(0);
482 }
483
484 /*ARGSUSED*/
485 static void
486 profile_disable(void *arg, dtrace_id_t id, void *parg)
487 {
488 profile_probe_t *prof = parg;
489
490 ASSERT(prof->prof_cyclic != CYCLIC_NONE);
491 ASSERT(MUTEX_HELD(&cpu_lock));
492
493 #pragma unused(arg,id)
494 if (prof->prof_kind == PROF_TICK) {
495 cyclic_timer_remove(prof->prof_cyclic);
496 } else {
497 cyclic_remove_omni((cyclic_id_list_t)prof->prof_cyclic); /* cast puns cyclic_id_list_t with cyclic_id_t */
498 }
499 prof->prof_cyclic = CYCLIC_NONE;
500 }
501
502 /*
503 * APPLE NOTE: profile_usermode call not supported.
504 */
505 static int
506 profile_usermode(void *arg, dtrace_id_t id, void *parg)
507 {
508 #pragma unused(arg,id,parg)
509 return 1; /* XXX_BOGUS */
510 }
511
512 static dtrace_pattr_t profile_attr = {
513 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
514 { DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_UNKNOWN },
515 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
516 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
517 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
518 };
519
520 static dtrace_pops_t profile_pops = {
521 profile_provide,
522 NULL,
523 profile_enable,
524 profile_disable,
525 NULL,
526 NULL,
527 NULL,
528 NULL,
529 profile_usermode,
530 profile_destroy
531 };
532
533 static int
534 profile_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
535 {
536 switch (cmd) {
537 case DDI_ATTACH:
538 break;
539 case DDI_RESUME:
540 return (DDI_SUCCESS);
541 default:
542 return (DDI_FAILURE);
543 }
544
545 if (ddi_create_minor_node(devi, "profile", S_IFCHR, 0,
546 DDI_PSEUDO, 0) == DDI_FAILURE ||
547 dtrace_register("profile", &profile_attr,
548 DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER, NULL,
549 &profile_pops, NULL, &profile_id) != 0) {
550 ddi_remove_minor_node(devi, NULL);
551 return (DDI_FAILURE);
552 }
553
554 profile_max = PROFILE_MAX_DEFAULT;
555
556 ddi_report_dev(devi);
557 profile_devi = devi;
558 return (DDI_SUCCESS);
559 }
560
561 /*
562 * APPLE NOTE: profile_detach not implemented
563 */
564 #if !defined(__APPLE__)
565 static int
566 profile_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
567 {
568 switch (cmd) {
569 case DDI_DETACH:
570 break;
571 case DDI_SUSPEND:
572 return (DDI_SUCCESS);
573 default:
574 return (DDI_FAILURE);
575 }
576
577 if (dtrace_unregister(profile_id) != 0)
578 return (DDI_FAILURE);
579
580 ddi_remove_minor_node(devi, NULL);
581 return (DDI_SUCCESS);
582 }
583 #endif /* __APPLE__ */
584
585 d_open_t _profile_open;
586
587 int _profile_open(dev_t dev, int flags, int devtype, struct proc *p)
588 {
589 #pragma unused(dev,flags,devtype,p)
590 return 0;
591 }
592
593 #define PROFILE_MAJOR -24 /* let the kernel pick the device number */
594
595 /*
596 * A struct describing which functions will get invoked for certain
597 * actions.
598 */
599 static struct cdevsw profile_cdevsw =
600 {
601 _profile_open, /* open */
602 eno_opcl, /* close */
603 eno_rdwrt, /* read */
604 eno_rdwrt, /* write */
605 eno_ioctl, /* ioctl */
606 (stop_fcn_t *)nulldev, /* stop */
607 (reset_fcn_t *)nulldev, /* reset */
608 NULL, /* tty's */
609 eno_select, /* select */
610 eno_mmap, /* mmap */
611 eno_strat, /* strategy */
612 eno_getc, /* getc */
613 eno_putc, /* putc */
614 0 /* type */
615 };
616
617 static int gProfileInited = 0;
618
619 void profile_init( void )
620 {
621 if (0 == gProfileInited)
622 {
623 int majdevno = cdevsw_add(PROFILE_MAJOR, &profile_cdevsw);
624
625 if (majdevno < 0) {
626 printf("profile_init: failed to allocate a major number!\n");
627 gProfileInited = 0;
628 return;
629 }
630
631 profile_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
632
633 gProfileInited = 1;
634 } else
635 panic("profile_init: called twice!\n");
636 }
637 #undef PROFILE_MAJOR