]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/dtrace/systrace.c
xnu-1699.24.8.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */
27
28 #if !defined(__APPLE__)
29 #include <sys/dtrace.h>
30 #include <sys/systrace.h>
31 #include <sys/stat.h>
32 #include <sys/systm.h>
33 #include <sys/conf.h>
34 #include <sys/ddi.h>
35 #include <sys/sunddi.h>
36 #include <sys/atomic.h>
37 #define SYSTRACE_ARTIFICIAL_FRAMES 1
38 #else
39
40 #ifdef KERNEL
41 #ifndef _KERNEL
42 #define _KERNEL /* Solaris vs. Darwin */
43 #endif
44 #endif
45
46 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
47 #include <kern/thread.h>
48 #include <mach/thread_status.h>
49 /* XXX All of these should really be derived from syscall_sw.h */
50 #if defined(__i386__) || defined (__x86_64__)
51 #define SYSCALL_CLASS_SHIFT 24
52 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
53 #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
54 #define I386_SYSCALL_NUMBER_MASK (0xFFFF)
55
56 typedef x86_saved_state_t savearea_t;
57 #endif
58
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/proc.h>
62 #include <sys/errno.h>
63 #include <sys/ioctl.h>
64 #include <sys/conf.h>
65 #include <sys/fcntl.h>
66 #include <miscfs/devfs/devfs.h>
67
68 #include <sys/dtrace.h>
69 #include <sys/dtrace_impl.h>
70 #include "systrace.h"
71 #include <sys/stat.h>
72 #include <sys/systm.h>
73 #include <sys/conf.h>
74 #include <sys/user.h>
75
76 #include <machine/pal_routines.h>
77
78 #if defined(__i386__) || defined (__x86_64__)
79 #define SYSTRACE_ARTIFICIAL_FRAMES 2
80 #define MACHTRACE_ARTIFICIAL_FRAMES 3
81 #else
82 #error Unknown Architecture
83 #endif
84
85 #include <sys/sysent.h>
86 #define sy_callc sy_call /* Map Solaris slot name to Darwin's */
87 #define NSYSCALL nsysent /* and is less than 500 or so */
88
89 extern const char *syscallnames[];
90
91 #include <sys/dtrace_glue.h>
92 #define casptr dtrace_casptr
93 #define membar_enter dtrace_membar_producer
94
95 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
96 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
97
98 systrace_sysent_t *systrace_sysent = NULL;
99 void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t,
100 uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
101
102 void
103 systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
104 uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7)
105 {
106 #pragma unused(id,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7)
107 }
108
109 int32_t
110 dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
111 {
112 boolean_t flavor;
113 unsigned short code;
114
115 systrace_sysent_t *sy;
116 dtrace_id_t id;
117 int32_t rval;
118 #if 0 /* XXX */
119 proc_t *p;
120 #endif
121 syscall_arg_t *ip = (syscall_arg_t *)uap;
122
123 #if defined(__i386__) || defined (__x86_64__)
124 #pragma unused(flavor)
125 {
126 pal_register_cache_state(current_thread(), VALID);
127 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
128
129 if (is_saved_state64(tagged_regs)) {
130 x86_saved_state64_t *regs = saved_state64(tagged_regs);
131 code = regs->rax & SYSCALL_NUMBER_MASK;
132 /*
133 * Check for indirect system call... system call number
134 * passed as 'arg0'
135 */
136 if (code == 0) {
137 code = regs->rdi;
138 }
139 } else {
140 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
141
142 if (code == 0) {
143 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int));
144 code = fuword(params);
145 }
146 }
147 }
148 #else
149 #error Unknown Architecture
150 #endif
151
152 // Bounds "check" the value of code a la unix_syscall
153 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
154
155 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
156 if (ip)
157 (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4), *(ip+5), *(ip+6), *(ip+7));
158 else
159 (*systrace_probe)(id, 0, 0, 0, 0, 0, 0, 0, 0);
160 }
161
162 #if 0 /* XXX */
163 /*
164 * We want to explicitly allow DTrace consumers to stop a process
165 * before it actually executes the meat of the syscall.
166 */
167 p = ttoproc(curthread);
168 mutex_enter(&p->p_lock);
169 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
170 curthread->t_dtrace_stop = 0;
171 stop(PR_REQUESTED, 0);
172 }
173 mutex_exit(&p->p_lock);
174 #endif
175
176 rval = (*sy->stsy_underlying)(pp, uap, rv);
177
178 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
179 uint64_t munged_rv0, munged_rv1;
180 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
181
182 if (uthread)
183 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
184
185 /*
186 * "Decode" rv for use in the call to dtrace_probe()
187 */
188 if (rval == ERESTART) {
189 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
190 munged_rv1 = -1LL;
191 } else if (rval != EJUSTRETURN) {
192 if (rval) {
193 munged_rv0 = -1LL; /* Mimic what libc will do. */
194 munged_rv1 = -1LL;
195 } else {
196 switch (sy->stsy_return_type) {
197 case _SYSCALL_RET_INT_T:
198 munged_rv0 = rv[0];
199 munged_rv1 = rv[1];
200 break;
201 case _SYSCALL_RET_UINT_T:
202 munged_rv0 = ((u_int)rv[0]);
203 munged_rv1 = ((u_int)rv[1]);
204 break;
205 case _SYSCALL_RET_OFF_T:
206 case _SYSCALL_RET_UINT64_T:
207 munged_rv0 = *(u_int64_t *)rv;
208 munged_rv1 = 0LL;
209 break;
210 case _SYSCALL_RET_ADDR_T:
211 case _SYSCALL_RET_SIZE_T:
212 case _SYSCALL_RET_SSIZE_T:
213 munged_rv0 = *(user_addr_t *)rv;
214 munged_rv1 = 0LL;
215 break;
216 case _SYSCALL_RET_NONE:
217 munged_rv0 = 0LL;
218 munged_rv1 = 0LL;
219 break;
220 default:
221 munged_rv0 = 0LL;
222 munged_rv1 = 0LL;
223 break;
224 }
225 }
226 } else {
227 munged_rv0 = 0LL;
228 munged_rv1 = 0LL;
229 }
230
231 /*
232 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
233 *
234 * "This is a bit of an historical artifact. At first, the syscall provider just
235 * had its return value in arg0, and the fbt and pid providers had their return
236 * values in arg1 (so that we could use arg0 for the offset of the return site).
237 *
238 * We inevitably started writing scripts where we wanted to see the return
239 * values from probes in all three providers, and we made this script easier
240 * to write by replicating the syscall return values in arg1 to match fbt and
241 * pid. We debated briefly about removing the return value from arg0, but
242 * decided that it would be less confusing to have the same data in two places
243 * than to have some non-helpful, non-intuitive value in arg0.
244 *
245 * This change was made 4/23/2003 according to the DTrace project's putback log."
246 */
247 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0, 0, 0, 0);
248 }
249
250 return (rval);
251 }
252
253 void
254 dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
255 {
256 systrace_sysent_t *sy;
257 dtrace_id_t id;
258
259 // Bounds "check" the value of code a la unix_syscall_return
260 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
261
262 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
263 uint64_t munged_rv0, munged_rv1;
264 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
265
266 if (uthread)
267 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
268
269 /*
270 * "Decode" rv for use in the call to dtrace_probe()
271 */
272 if (rval == ERESTART) {
273 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
274 munged_rv1 = -1LL;
275 } else if (rval != EJUSTRETURN) {
276 if (rval) {
277 munged_rv0 = -1LL; /* Mimic what libc will do. */
278 munged_rv1 = -1LL;
279 } else {
280 switch (sy->stsy_return_type) {
281 case _SYSCALL_RET_INT_T:
282 munged_rv0 = rv[0];
283 munged_rv1 = rv[1];
284 break;
285 case _SYSCALL_RET_UINT_T:
286 munged_rv0 = ((u_int)rv[0]);
287 munged_rv1 = ((u_int)rv[1]);
288 break;
289 case _SYSCALL_RET_OFF_T:
290 case _SYSCALL_RET_UINT64_T:
291 munged_rv0 = *(u_int64_t *)rv;
292 munged_rv1 = 0LL;
293 break;
294 case _SYSCALL_RET_ADDR_T:
295 case _SYSCALL_RET_SIZE_T:
296 case _SYSCALL_RET_SSIZE_T:
297 munged_rv0 = *(user_addr_t *)rv;
298 munged_rv1 = 0LL;
299 break;
300 case _SYSCALL_RET_NONE:
301 munged_rv0 = 0LL;
302 munged_rv1 = 0LL;
303 break;
304 default:
305 munged_rv0 = 0LL;
306 munged_rv1 = 0LL;
307 break;
308 }
309 }
310 } else {
311 munged_rv0 = 0LL;
312 munged_rv1 = 0LL;
313 }
314
315 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0, 0, 0, 0);
316 }
317 }
318 #endif /* __APPLE__ */
319
320 #define SYSTRACE_SHIFT 16
321 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
322 #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
323 #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
324 #define SYSTRACE_RETURN(id) (id)
325
326 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
327 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
328 #endif
329
330 static dev_info_t *systrace_devi;
331 static dtrace_provider_id_t systrace_id;
332
333 #if !defined (__APPLE__)
334 static void
335 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
336 {
337 systrace_sysent_t *sysent = *interposed;
338 int i;
339
340 if (sysent == NULL) {
341 *interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) *
342 NSYSCALL, KM_SLEEP);
343 }
344
345 for (i = 0; i < NSYSCALL; i++) {
346 struct sysent *a = &actual[i];
347 systrace_sysent_t *s = &sysent[i];
348
349 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
350 continue;
351
352 if (a->sy_callc == dtrace_systrace_syscall)
353 continue;
354
355 #ifdef _SYSCALL32_IMPL
356 if (a->sy_callc == dtrace_systrace_syscall32)
357 continue;
358 #endif
359
360 s->stsy_underlying = a->sy_callc;
361 }
362 }
363 #else
364 #define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */
365 static void
366 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
367 {
368
369 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
370 from bsd/sys/sysent.h */
371 int i;
372
373 if (ssysent == NULL) {
374 *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) *
375 NSYSCALL, KM_SLEEP);
376 }
377
378 for (i = 0; i < NSYSCALL; i++) {
379 struct sysent *a = &actual[i];
380 systrace_sysent_t *s = &ssysent[i];
381
382 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
383 continue;
384
385 if (a->sy_callc == dtrace_systrace_syscall)
386 continue;
387
388 #ifdef _SYSCALL32_IMPL
389 if (a->sy_callc == dtrace_systrace_syscall32)
390 continue;
391 #endif
392
393 s->stsy_underlying = a->sy_callc;
394 s->stsy_return_type = a->sy_return_type;
395 }
396 }
397
398 #endif /* __APPLE__ */
399
400 /*ARGSUSED*/
401 static void
402 systrace_provide(void *arg, const dtrace_probedesc_t *desc)
403 {
404 #pragma unused(arg) /* __APPLE__ */
405 int i;
406
407 if (desc != NULL)
408 return;
409
410 systrace_init(sysent, &systrace_sysent);
411 #ifdef _SYSCALL32_IMPL
412 systrace_init(sysent32, &systrace_sysent32);
413 #endif
414
415 for (i = 0; i < NSYSCALL; i++) {
416 if (systrace_sysent[i].stsy_underlying == NULL)
417 continue;
418
419 if (dtrace_probe_lookup(systrace_id, NULL,
420 syscallnames[i], "entry") != 0)
421 continue;
422
423 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
424 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
425 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
426 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
427 "return", SYSTRACE_ARTIFICIAL_FRAMES,
428 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
429
430 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
431 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
432 #ifdef _SYSCALL32_IMPL
433 systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
434 systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
435 #endif
436 }
437 }
438 #if defined(__APPLE__)
439 #undef systrace_init
440 #endif
441
442 /*ARGSUSED*/
443 static void
444 systrace_destroy(void *arg, dtrace_id_t id, void *parg)
445 {
446 #pragma unused(arg,id) /* __APPLE__ */
447
448 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
449
450 #pragma unused(sysnum) /* __APPLE__ */
451 /*
452 * There's nothing to do here but assert that we have actually been
453 * disabled.
454 */
455 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
456 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
457 #ifdef _SYSCALL32_IMPL
458 ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
459 #endif
460 } else {
461 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
462 #ifdef _SYSCALL32_IMPL
463 ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
464 #endif
465 }
466 }
467
468 /*ARGSUSED*/
469 static int
470 systrace_enable(void *arg, dtrace_id_t id, void *parg)
471 {
472 #pragma unused(arg) /* __APPLE__ */
473
474 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
475 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
476 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
477
478 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
479 systrace_sysent[sysnum].stsy_entry = id;
480 #ifdef _SYSCALL32_IMPL
481 systrace_sysent32[sysnum].stsy_entry = id;
482 #endif
483 } else {
484 systrace_sysent[sysnum].stsy_return = id;
485 #ifdef _SYSCALL32_IMPL
486 systrace_sysent32[sysnum].stsy_return = id;
487 #endif
488 }
489
490 if (enabled) {
491 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
492 return(0);
493 }
494
495 (void) casptr(&sysent[sysnum].sy_callc,
496 (void *)systrace_sysent[sysnum].stsy_underlying,
497 (void *)dtrace_systrace_syscall);
498 #ifdef _SYSCALL32_IMPL
499 (void) casptr(&sysent32[sysnum].sy_callc,
500 (void *)systrace_sysent32[sysnum].stsy_underlying,
501 (void *)dtrace_systrace_syscall32);
502 #endif
503 return (0);
504 }
505
506 /*ARGSUSED*/
507 static void
508 systrace_disable(void *arg, dtrace_id_t id, void *parg)
509 {
510 #pragma unused(arg,id) /* __APPLE__ */
511
512 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
513 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
514 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
515
516 if (disable) {
517 (void) casptr(&sysent[sysnum].sy_callc,
518 (void *)dtrace_systrace_syscall,
519 (void *)systrace_sysent[sysnum].stsy_underlying);
520
521 #ifdef _SYSCALL32_IMPL
522 (void) casptr(&sysent32[sysnum].sy_callc,
523 (void *)dtrace_systrace_syscall32,
524 (void *)systrace_sysent32[sysnum].stsy_underlying);
525 #endif
526 }
527
528 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
529 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
530 #ifdef _SYSCALL32_IMPL
531 systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
532 #endif
533 } else {
534 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
535 #ifdef _SYSCALL32_IMPL
536 systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
537 #endif
538 }
539 }
540
541 static dtrace_pattr_t systrace_attr = {
542 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
543 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
544 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
545 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
546 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
547 };
548
549 static dtrace_pops_t systrace_pops = {
550 systrace_provide,
551 NULL,
552 systrace_enable,
553 systrace_disable,
554 NULL,
555 NULL,
556 NULL,
557 NULL,
558 NULL,
559 systrace_destroy
560 };
561
562 static int
563 systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
564 {
565 switch (cmd) {
566 case DDI_ATTACH:
567 break;
568 case DDI_RESUME:
569 return (DDI_SUCCESS);
570 default:
571 return (DDI_FAILURE);
572 }
573
574 #if !defined(__APPLE__)
575 systrace_probe = (void (*)())dtrace_probe;
576 membar_enter();
577
578 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
579 DDI_PSEUDO, NULL) == DDI_FAILURE ||
580 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
581 &systrace_pops, NULL, &systrace_id) != 0) {
582 systrace_probe = systrace_stub;
583 ddi_remove_minor_node(devi, NULL);
584 return (DDI_FAILURE);
585 }
586 #else
587 systrace_probe = (void(*))&dtrace_probe;
588 membar_enter();
589
590 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
591 DDI_PSEUDO, 0) == DDI_FAILURE ||
592 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
593 &systrace_pops, NULL, &systrace_id) != 0) {
594 systrace_probe = systrace_stub;
595 ddi_remove_minor_node(devi, NULL);
596 return (DDI_FAILURE);
597 }
598 #endif /* __APPLE__ */
599
600 ddi_report_dev(devi);
601 systrace_devi = devi;
602
603 return (DDI_SUCCESS);
604 }
605
606 #if !defined(__APPLE__)
607 static int
608 systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
609 {
610 switch (cmd) {
611 case DDI_DETACH:
612 break;
613 case DDI_SUSPEND:
614 return (DDI_SUCCESS);
615 default:
616 return (DDI_FAILURE);
617 }
618
619 if (dtrace_unregister(systrace_id) != 0)
620 return (DDI_FAILURE);
621
622 ddi_remove_minor_node(devi, NULL);
623 systrace_probe = systrace_stub;
624 return (DDI_SUCCESS);
625 }
626
627 /*ARGSUSED*/
628 static int
629 systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
630 {
631 int error;
632
633 switch (infocmd) {
634 case DDI_INFO_DEVT2DEVINFO:
635 *result = (void *)systrace_devi;
636 error = DDI_SUCCESS;
637 break;
638 case DDI_INFO_DEVT2INSTANCE:
639 *result = (void *)0;
640 error = DDI_SUCCESS;
641 break;
642 default:
643 error = DDI_FAILURE;
644 }
645 return (error);
646 }
647
648 /*ARGSUSED*/
649 static int
650 systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
651 {
652 return (0);
653 }
654
655 static struct cb_ops systrace_cb_ops = {
656 systrace_open, /* open */
657 nodev, /* close */
658 nulldev, /* strategy */
659 nulldev, /* print */
660 nodev, /* dump */
661 nodev, /* read */
662 nodev, /* write */
663 nodev, /* ioctl */
664 nodev, /* devmap */
665 nodev, /* mmap */
666 nodev, /* segmap */
667 nochpoll, /* poll */
668 ddi_prop_op, /* cb_prop_op */
669 0, /* streamtab */
670 D_NEW | D_MP /* Driver compatibility flag */
671 };
672
673 static struct dev_ops systrace_ops = {
674 DEVO_REV, /* devo_rev, */
675 0, /* refcnt */
676 systrace_info, /* get_dev_info */
677 nulldev, /* identify */
678 nulldev, /* probe */
679 systrace_attach, /* attach */
680 systrace_detach, /* detach */
681 nodev, /* reset */
682 &systrace_cb_ops, /* driver operations */
683 NULL, /* bus operations */
684 nodev /* dev power */
685 };
686
687 /*
688 * Module linkage information for the kernel.
689 */
690 static struct modldrv modldrv = {
691 &mod_driverops, /* module type (this is a pseudo driver) */
692 "System Call Tracing", /* name of module */
693 &systrace_ops, /* driver ops */
694 };
695
696 static struct modlinkage modlinkage = {
697 MODREV_1,
698 (void *)&modldrv,
699 NULL
700 };
701
702 int
703 _init(void)
704 {
705 return (mod_install(&modlinkage));
706 }
707
708 int
709 _info(struct modinfo *modinfop)
710 {
711 return (mod_info(&modlinkage, modinfop));
712 }
713
714 int
715 _fini(void)
716 {
717 return (mod_remove(&modlinkage));
718 }
719 #else
720 typedef kern_return_t (*mach_call_t)(void *);
721
722 /* XXX From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
723 typedef void mach_munge_t(const void *, void *);
724
725 typedef struct {
726 int mach_trap_arg_count;
727 int (*mach_trap_function)(void);
728 #if 0 /* no active architectures use mungers for mach traps */
729 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
730 mach_munge_t *mach_trap_arg_munge64; /* system call arguments for 64-bit */
731 #endif
732 #if MACH_ASSERT
733 const char* mach_trap_name;
734 #endif /* MACH_ASSERT */
735 } mach_trap_t;
736
737 extern mach_trap_t mach_trap_table[];
738 extern int mach_trap_count;
739
740 extern const char *mach_syscall_name_table[];
741
742 /* XXX From osfmk/i386/bsd_i386.c */
743 struct mach_call_args {
744 syscall_arg_t arg1;
745 syscall_arg_t arg2;
746 syscall_arg_t arg3;
747 syscall_arg_t arg4;
748 syscall_arg_t arg5;
749 syscall_arg_t arg6;
750 syscall_arg_t arg7;
751 syscall_arg_t arg8;
752 syscall_arg_t arg9;
753 };
754
755 #undef NSYSCALL
756 #define NSYSCALL mach_trap_count
757
758 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
759 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
760 #endif
761
762 typedef systrace_sysent_t machtrace_sysent_t;
763
764 static machtrace_sysent_t *machtrace_sysent = NULL;
765
766 void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
767 uint64_t, uint64_t, uint64_t);
768
769 static dev_info_t *machtrace_devi;
770 static dtrace_provider_id_t machtrace_id;
771
772 static kern_return_t
773 dtrace_machtrace_syscall(struct mach_call_args *args)
774 {
775 boolean_t flavor;
776 unsigned short code;
777
778 machtrace_sysent_t *sy;
779 dtrace_id_t id;
780 kern_return_t rval;
781 #if 0 /* XXX */
782 proc_t *p;
783 #endif
784 syscall_arg_t *ip = (syscall_arg_t *)args;
785 mach_call_t mach_call;
786
787 #if defined(__i386__) || defined (__x86_64__)
788 #pragma unused(flavor)
789 {
790 pal_register_cache_state(current_thread(), VALID);
791 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
792
793 if (is_saved_state64(tagged_regs)) {
794 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
795 } else {
796 code = -saved_state32(tagged_regs)->eax;
797 }
798 }
799 #else
800 #error Unknown Architecture
801 #endif
802
803 sy = &machtrace_sysent[code];
804
805 if ((id = sy->stsy_entry) != DTRACE_IDNONE)
806 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
807
808 #if 0 /* XXX */
809 /*
810 * We want to explicitly allow DTrace consumers to stop a process
811 * before it actually executes the meat of the syscall.
812 */
813 p = ttoproc(curthread);
814 mutex_enter(&p->p_lock);
815 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
816 curthread->t_dtrace_stop = 0;
817 stop(PR_REQUESTED, 0);
818 }
819 mutex_exit(&p->p_lock);
820 #endif
821
822 mach_call = (mach_call_t)(*sy->stsy_underlying);
823 rval = mach_call(args);
824
825 if ((id = sy->stsy_return) != DTRACE_IDNONE)
826 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
827
828 return (rval);
829 }
830
831 static void
832 machtrace_init(mach_trap_t *actual, machtrace_sysent_t **interposed)
833 {
834 machtrace_sysent_t *msysent = *interposed;
835 int i;
836
837 if (msysent == NULL) {
838 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) *
839 NSYSCALL, KM_SLEEP);
840 }
841
842 for (i = 0; i < NSYSCALL; i++) {
843 mach_trap_t *a = &actual[i];
844 machtrace_sysent_t *s = &msysent[i];
845
846 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
847 continue;
848
849 if ((mach_call_t)(a->mach_trap_function) == (mach_call_t)(dtrace_machtrace_syscall))
850 continue;
851
852 s->stsy_underlying = (sy_call_t *)a->mach_trap_function;
853 }
854 }
855
856 /*ARGSUSED*/
857 static void
858 machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
859 {
860 #pragma unused(arg) /* __APPLE__ */
861
862 int i;
863
864 if (desc != NULL)
865 return;
866
867 machtrace_init(mach_trap_table, &machtrace_sysent);
868
869 for (i = 0; i < NSYSCALL; i++) {
870
871 if (machtrace_sysent[i].stsy_underlying == NULL)
872 continue;
873
874 if (dtrace_probe_lookup(machtrace_id, NULL,
875 mach_syscall_name_table[i], "entry") != 0)
876 continue;
877
878 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
879 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
880 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
881 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
882 "return", MACHTRACE_ARTIFICIAL_FRAMES,
883 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
884
885 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
886 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
887 }
888 }
889
890 /*ARGSUSED*/
891 static void
892 machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
893 {
894 #pragma unused(arg,id) /* __APPLE__ */
895 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
896
897 #pragma unused(sysnum) /* __APPLE__ */
898
899 /*
900 * There's nothing to do here but assert that we have actually been
901 * disabled.
902 */
903 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
904 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
905 } else {
906 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
907 }
908 }
909
910 /*ARGSUSED*/
911 static int
912 machtrace_enable(void *arg, dtrace_id_t id, void *parg)
913 {
914 #pragma unused(arg) /* __APPLE__ */
915
916 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
917 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
918 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
919
920 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
921 machtrace_sysent[sysnum].stsy_entry = id;
922 } else {
923 machtrace_sysent[sysnum].stsy_return = id;
924 }
925
926 if (enabled) {
927 ASSERT(sysent[sysnum].sy_callc == (void *)dtrace_machtrace_syscall);
928 return(0);
929 }
930
931 (void) casptr(&mach_trap_table[sysnum].mach_trap_function,
932 (void *)machtrace_sysent[sysnum].stsy_underlying,
933 (void *)dtrace_machtrace_syscall);
934 return(0);
935 }
936
937 /*ARGSUSED*/
938 static void
939 machtrace_disable(void *arg, dtrace_id_t id, void *parg)
940 {
941 #pragma unused(arg,id) /* __APPLE__ */
942
943 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
944 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
945 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
946
947 if (disable) {
948 (void) casptr(&mach_trap_table[sysnum].mach_trap_function,
949 (void *)dtrace_machtrace_syscall,
950 (void *)machtrace_sysent[sysnum].stsy_underlying);
951
952 }
953
954 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
955 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
956 } else {
957 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
958 }
959 }
960
961 static dtrace_pattr_t machtrace_attr = {
962 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
963 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
964 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
965 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
966 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
967 };
968
969 static dtrace_pops_t machtrace_pops = {
970 machtrace_provide,
971 NULL,
972 machtrace_enable,
973 machtrace_disable,
974 NULL,
975 NULL,
976 NULL,
977 NULL,
978 NULL,
979 machtrace_destroy
980 };
981
982 static int
983 machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
984 {
985 switch (cmd) {
986 case DDI_ATTACH:
987 break;
988 case DDI_RESUME:
989 return (DDI_SUCCESS);
990 default:
991 return (DDI_FAILURE);
992 }
993
994 #if !defined(__APPLE__)
995 machtrace_probe = (void (*)())dtrace_probe;
996 membar_enter();
997
998 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
999 DDI_PSEUDO, NULL) == DDI_FAILURE ||
1000 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1001 &machtrace_pops, NULL, &machtrace_id) != 0) {
1002 machtrace_probe = systrace_stub;
1003 #else
1004 machtrace_probe = dtrace_probe;
1005 membar_enter();
1006
1007 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
1008 DDI_PSEUDO, 0) == DDI_FAILURE ||
1009 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1010 &machtrace_pops, NULL, &machtrace_id) != 0) {
1011 machtrace_probe = (void (*))&systrace_stub;
1012 #endif /* __APPLE__ */
1013 ddi_remove_minor_node(devi, NULL);
1014 return (DDI_FAILURE);
1015 }
1016
1017 ddi_report_dev(devi);
1018 machtrace_devi = devi;
1019
1020 return (DDI_SUCCESS);
1021 }
1022
1023 d_open_t _systrace_open;
1024
1025 int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
1026 {
1027 #pragma unused(dev,flags,devtype,p)
1028 return 0;
1029 }
1030
1031 #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
1032
1033 /*
1034 * A struct describing which functions will get invoked for certain
1035 * actions.
1036 */
1037 static struct cdevsw systrace_cdevsw =
1038 {
1039 _systrace_open, /* open */
1040 eno_opcl, /* close */
1041 eno_rdwrt, /* read */
1042 eno_rdwrt, /* write */
1043 eno_ioctl, /* ioctl */
1044 (stop_fcn_t *)nulldev, /* stop */
1045 (reset_fcn_t *)nulldev, /* reset */
1046 NULL, /* tty's */
1047 eno_select, /* select */
1048 eno_mmap, /* mmap */
1049 eno_strat, /* strategy */
1050 eno_getc, /* getc */
1051 eno_putc, /* putc */
1052 0 /* type */
1053 };
1054
1055 static int gSysTraceInited = 0;
1056
1057 void systrace_init( void );
1058
1059 void systrace_init( void )
1060 {
1061 if (0 == gSysTraceInited) {
1062 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
1063
1064 if (majdevno < 0) {
1065 printf("systrace_init: failed to allocate a major number!\n");
1066 gSysTraceInited = 0;
1067 return;
1068 }
1069
1070 systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
1071 machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
1072
1073 gSysTraceInited = 1;
1074 } else
1075 panic("systrace_init: called twice!\n");
1076 }
1077 #undef SYSTRACE_MAJOR
1078 #endif /* __APPLE__ */