]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/dtrace/systrace.c
35601e943fec93c912b68554450657e5e7abda5f
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* #pragma ident "@(#)systrace.c 1.5 06/03/24 SMI" */
27
28 #if !defined(__APPLE__)
29 #include <sys/dtrace.h>
30 #include <sys/systrace.h>
31 #include <sys/stat.h>
32 #include <sys/systm.h>
33 #include <sys/conf.h>
34 #include <sys/ddi.h>
35 #include <sys/sunddi.h>
36 #include <sys/atomic.h>
37 #define SYSTRACE_ARTIFICIAL_FRAMES 1
38 #else
39
40 #ifdef KERNEL
41 #ifndef _KERNEL
42 #define _KERNEL /* Solaris vs. Darwin */
43 #endif
44 #endif
45
46 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
47 #include <kern/thread.h>
48 #include <mach/thread_status.h>
49 /* XXX All of these should really be derived from syscall_sw.h */
50 #if defined(__i386__) || defined (__x86_64__)
51 #define SYSCALL_CLASS_SHIFT 24
52 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
53 #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
54 #define I386_SYSCALL_NUMBER_MASK (0xFFFF)
55
56 typedef x86_saved_state_t savearea_t;
57 #endif
58
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/proc.h>
62 #include <sys/errno.h>
63 #include <sys/ioctl.h>
64 #include <sys/conf.h>
65 #include <sys/fcntl.h>
66 #include <miscfs/devfs/devfs.h>
67
68 #include <sys/dtrace.h>
69 #include <sys/dtrace_impl.h>
70 #include "systrace.h"
71 #include <sys/stat.h>
72 #include <sys/systm.h>
73 #include <sys/conf.h>
74 #include <sys/user.h>
75
76 #if defined (__ppc__) || defined (__ppc64__)
77 #define SYSTRACE_ARTIFICIAL_FRAMES 3
78 #define MACHTRACE_ARTIFICIAL_FRAMES 4
79 #elif defined(__i386__) || defined (__x86_64__)
80 #define SYSTRACE_ARTIFICIAL_FRAMES 2
81 #define MACHTRACE_ARTIFICIAL_FRAMES 3
82 #else
83 #error Unknown Architecture
84 #endif
85
86 #include <sys/sysent.h>
87 #define sy_callc sy_call /* Map Solaris slot name to Darwin's */
88 #define NSYSCALL nsysent /* and is less than 500 or so */
89
90 extern const char *syscallnames[];
91
92 #include <sys/dtrace_glue.h>
93 #define casptr dtrace_casptr
94 #define membar_enter dtrace_membar_producer
95
96 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
97 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
98
99 systrace_sysent_t *systrace_sysent = NULL;
100 void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t,
101 uint64_t, uint64_t, uint64_t);
102
103 void
104 systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
105 uint64_t arg2, uint64_t arg3, uint64_t arg4)
106 {
107 #pragma unused(id,arg0,arg1,arg2,arg3,arg4)
108 }
109
110 int32_t
111 dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
112 {
113 boolean_t flavor;
114 unsigned short code;
115
116 systrace_sysent_t *sy;
117 dtrace_id_t id;
118 int32_t rval;
119 #if 0 /* XXX */
120 proc_t *p;
121 #endif
122 syscall_arg_t *ip = (syscall_arg_t *)uap;
123
124 #if defined (__ppc__) || defined (__ppc64__)
125 {
126 savearea_t *regs = (savearea_t *)find_user_regs(current_thread());
127
128 flavor = (((unsigned int)regs->save_r0) == 0)? 1: 0;
129
130 if (flavor)
131 code = regs->save_r3;
132 else
133 code = regs->save_r0;
134 }
135 #elif defined(__i386__) || defined (__x86_64__)
136 #pragma unused(flavor)
137 {
138 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
139
140 if (is_saved_state64(tagged_regs)) {
141 x86_saved_state64_t *regs = saved_state64(tagged_regs);
142 code = regs->rax & SYSCALL_NUMBER_MASK;
143 /*
144 * Check for indirect system call... system call number
145 * passed as 'arg0'
146 */
147 if (code == 0) {
148 code = regs->rdi;
149 }
150 } else {
151 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
152 /*
153 * TODO: handle indirect system calls
154 */
155 }
156 }
157 #else
158 #error Unknown Architecture
159 #endif
160
161 // Bounds "check" the value of code a la unix_syscall
162 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
163
164 if ((id = sy->stsy_entry) != DTRACE_IDNONE)
165 (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
166
167 #if 0 /* XXX */
168 /*
169 * We want to explicitly allow DTrace consumers to stop a process
170 * before it actually executes the meat of the syscall.
171 */
172 p = ttoproc(curthread);
173 mutex_enter(&p->p_lock);
174 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
175 curthread->t_dtrace_stop = 0;
176 stop(PR_REQUESTED, 0);
177 }
178 mutex_exit(&p->p_lock);
179 #endif
180
181 rval = (*sy->stsy_underlying)(pp, uap, rv);
182
183 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
184 uint64_t munged_rv;
185 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
186
187 if (uthread)
188 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
189
190 /*
191 * "Decode" rv for use in the call to dtrace_probe()
192 */
193 if (rval == ERESTART) {
194 munged_rv = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
195 } else if (rval != EJUSTRETURN) {
196 if (rval) {
197 munged_rv = -1LL; /* Mimic what libc will do. */
198 } else {
199 switch (sy->stsy_return_type) {
200 case _SYSCALL_RET_INT_T:
201 munged_rv = rv[0];
202 break;
203 case _SYSCALL_RET_UINT_T:
204 munged_rv = ((u_int)rv[0]);
205 break;
206 case _SYSCALL_RET_OFF_T:
207 munged_rv = *(u_int64_t *)rv;
208 break;
209 case _SYSCALL_RET_ADDR_T:
210 case _SYSCALL_RET_SIZE_T:
211 case _SYSCALL_RET_SSIZE_T:
212 munged_rv = *(user_addr_t *)rv;
213 break;
214 case _SYSCALL_RET_NONE:
215 munged_rv = 0LL;
216 break;
217 default:
218 munged_rv = 0LL;
219 break;
220 }
221 }
222 } else
223 munged_rv = 0LL;
224
225 (*systrace_probe)(id, munged_rv, munged_rv, (uint64_t)rval, 0, 0);
226 }
227
228 return (rval);
229 }
230
231 void
232 dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
233 {
234 systrace_sysent_t *sy;
235 dtrace_id_t id;
236
237 // Bounds "check" the value of code a la unix_syscall_return
238 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
239
240 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
241 uint64_t munged_rv;
242 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
243
244 if (uthread)
245 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
246
247 /*
248 * "Decode" rv for use in the call to dtrace_probe()
249 */
250 if (rval == ERESTART) {
251 munged_rv = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
252 } else if (rval != EJUSTRETURN) {
253 if (rval) {
254 munged_rv = -1LL; /* Mimic what libc will do. */
255 } else {
256 switch (sy->stsy_return_type) {
257 case _SYSCALL_RET_INT_T:
258 munged_rv = rv[0];
259 break;
260 case _SYSCALL_RET_UINT_T:
261 munged_rv = ((u_int)rv[0]);
262 break;
263 case _SYSCALL_RET_OFF_T:
264 munged_rv = *(u_int64_t *)rv;
265 break;
266 case _SYSCALL_RET_ADDR_T:
267 case _SYSCALL_RET_SIZE_T:
268 case _SYSCALL_RET_SSIZE_T:
269 munged_rv = *(user_addr_t *)rv;
270 break;
271 case _SYSCALL_RET_NONE:
272 munged_rv = 0LL;
273 break;
274 default:
275 munged_rv = 0LL;
276 break;
277 }
278 }
279 } else
280 munged_rv = 0LL;
281
282 (*systrace_probe)(id, munged_rv, munged_rv, (uint64_t)rval, 0, 0);
283 }
284 }
285 #endif /* __APPLE__ */
286
287 #define SYSTRACE_SHIFT 16
288 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
289 #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
290 #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
291 #define SYSTRACE_RETURN(id) (id)
292
293 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
294 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
295 #endif
296
297 static dev_info_t *systrace_devi;
298 static dtrace_provider_id_t systrace_id;
299
300 #if defined(__APPLE__)
301 #define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */
302 #endif
303 static void
304 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
305 {
306 systrace_sysent_t *sysent = *interposed;
307 int i;
308
309 if (sysent == NULL) {
310 *interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) *
311 NSYSCALL, KM_SLEEP);
312 }
313
314 for (i = 0; i < NSYSCALL; i++) {
315 struct sysent *a = &actual[i];
316 systrace_sysent_t *s = &sysent[i];
317
318 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
319 continue;
320
321 if (a->sy_callc == dtrace_systrace_syscall)
322 continue;
323
324 #ifdef _SYSCALL32_IMPL
325 if (a->sy_callc == dtrace_systrace_syscall32)
326 continue;
327 #endif
328
329 s->stsy_underlying = a->sy_callc;
330 #if defined(__APPLE__)
331 s->stsy_return_type = a->sy_return_type;
332 #endif
333 }
334 }
335
336 /*ARGSUSED*/
337 static void
338 systrace_provide(void *arg, const dtrace_probedesc_t *desc)
339 {
340 int i;
341
342 if (desc != NULL)
343 return;
344
345 systrace_init(sysent, &systrace_sysent);
346 #ifdef _SYSCALL32_IMPL
347 systrace_init(sysent32, &systrace_sysent32);
348 #endif
349
350 for (i = 0; i < NSYSCALL; i++) {
351 if (systrace_sysent[i].stsy_underlying == NULL)
352 continue;
353
354 if (dtrace_probe_lookup(systrace_id, NULL,
355 syscallnames[i], "entry") != 0)
356 continue;
357
358 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
359 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
360 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
361 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
362 "return", SYSTRACE_ARTIFICIAL_FRAMES,
363 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
364
365 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
366 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
367 #ifdef _SYSCALL32_IMPL
368 systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
369 systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
370 #endif
371 }
372 }
373 #if defined(__APPLE__)
374 #undef systrace_init
375 #endif
376
377 /*ARGSUSED*/
378 static void
379 systrace_destroy(void *arg, dtrace_id_t id, void *parg)
380 {
381 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
382
383 /*
384 * There's nothing to do here but assert that we have actually been
385 * disabled.
386 */
387 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
388 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
389 #ifdef _SYSCALL32_IMPL
390 ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
391 #endif
392 } else {
393 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
394 #ifdef _SYSCALL32_IMPL
395 ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
396 #endif
397 }
398 }
399
400 /*ARGSUSED*/
401 static void
402 systrace_enable(void *arg, dtrace_id_t id, void *parg)
403 {
404 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
405 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
406 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
407
408 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
409 systrace_sysent[sysnum].stsy_entry = id;
410 #ifdef _SYSCALL32_IMPL
411 systrace_sysent32[sysnum].stsy_entry = id;
412 #endif
413 } else {
414 systrace_sysent[sysnum].stsy_return = id;
415 #ifdef _SYSCALL32_IMPL
416 systrace_sysent32[sysnum].stsy_return = id;
417 #endif
418 }
419
420 if (enabled) {
421 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
422 return;
423 }
424
425 (void) casptr(&sysent[sysnum].sy_callc,
426 (void *)systrace_sysent[sysnum].stsy_underlying,
427 (void *)dtrace_systrace_syscall);
428 #ifdef _SYSCALL32_IMPL
429 (void) casptr(&sysent32[sysnum].sy_callc,
430 (void *)systrace_sysent32[sysnum].stsy_underlying,
431 (void *)dtrace_systrace_syscall32);
432 #endif
433 }
434
435 /*ARGSUSED*/
436 static void
437 systrace_disable(void *arg, dtrace_id_t id, void *parg)
438 {
439 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
440 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
441 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
442
443 if (disable) {
444 (void) casptr(&sysent[sysnum].sy_callc,
445 (void *)dtrace_systrace_syscall,
446 (void *)systrace_sysent[sysnum].stsy_underlying);
447
448 #ifdef _SYSCALL32_IMPL
449 (void) casptr(&sysent32[sysnum].sy_callc,
450 (void *)dtrace_systrace_syscall32,
451 (void *)systrace_sysent32[sysnum].stsy_underlying);
452 #endif
453 }
454
455 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
456 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
457 #ifdef _SYSCALL32_IMPL
458 systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
459 #endif
460 } else {
461 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
462 #ifdef _SYSCALL32_IMPL
463 systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
464 #endif
465 }
466 }
467
468 static dtrace_pattr_t systrace_attr = {
469 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
470 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
471 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
472 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
473 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
474 };
475
476 static dtrace_pops_t systrace_pops = {
477 systrace_provide,
478 NULL,
479 systrace_enable,
480 systrace_disable,
481 NULL,
482 NULL,
483 NULL,
484 NULL,
485 NULL,
486 systrace_destroy
487 };
488
489 static int
490 systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
491 {
492 switch (cmd) {
493 case DDI_ATTACH:
494 break;
495 case DDI_RESUME:
496 return (DDI_SUCCESS);
497 default:
498 return (DDI_FAILURE);
499 }
500
501 systrace_probe = dtrace_probe;
502 membar_enter();
503
504 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
505 DDI_PSEUDO, NULL) == DDI_FAILURE ||
506 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
507 &systrace_pops, NULL, &systrace_id) != 0) {
508 systrace_probe = systrace_stub;
509 ddi_remove_minor_node(devi, NULL);
510 return (DDI_FAILURE);
511 }
512
513 ddi_report_dev(devi);
514 systrace_devi = devi;
515
516 return (DDI_SUCCESS);
517 }
518
519 #if !defined(__APPLE__)
520 static int
521 systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
522 {
523 switch (cmd) {
524 case DDI_DETACH:
525 break;
526 case DDI_SUSPEND:
527 return (DDI_SUCCESS);
528 default:
529 return (DDI_FAILURE);
530 }
531
532 if (dtrace_unregister(systrace_id) != 0)
533 return (DDI_FAILURE);
534
535 ddi_remove_minor_node(devi, NULL);
536 systrace_probe = systrace_stub;
537 return (DDI_SUCCESS);
538 }
539
540 /*ARGSUSED*/
541 static int
542 systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
543 {
544 int error;
545
546 switch (infocmd) {
547 case DDI_INFO_DEVT2DEVINFO:
548 *result = (void *)systrace_devi;
549 error = DDI_SUCCESS;
550 break;
551 case DDI_INFO_DEVT2INSTANCE:
552 *result = (void *)0;
553 error = DDI_SUCCESS;
554 break;
555 default:
556 error = DDI_FAILURE;
557 }
558 return (error);
559 }
560
561 /*ARGSUSED*/
562 static int
563 systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
564 {
565 return (0);
566 }
567
568 static struct cb_ops systrace_cb_ops = {
569 systrace_open, /* open */
570 nodev, /* close */
571 nulldev, /* strategy */
572 nulldev, /* print */
573 nodev, /* dump */
574 nodev, /* read */
575 nodev, /* write */
576 nodev, /* ioctl */
577 nodev, /* devmap */
578 nodev, /* mmap */
579 nodev, /* segmap */
580 nochpoll, /* poll */
581 ddi_prop_op, /* cb_prop_op */
582 0, /* streamtab */
583 D_NEW | D_MP /* Driver compatibility flag */
584 };
585
586 static struct dev_ops systrace_ops = {
587 DEVO_REV, /* devo_rev, */
588 0, /* refcnt */
589 systrace_info, /* get_dev_info */
590 nulldev, /* identify */
591 nulldev, /* probe */
592 systrace_attach, /* attach */
593 systrace_detach, /* detach */
594 nodev, /* reset */
595 &systrace_cb_ops, /* driver operations */
596 NULL, /* bus operations */
597 nodev /* dev power */
598 };
599
600 /*
601 * Module linkage information for the kernel.
602 */
603 static struct modldrv modldrv = {
604 &mod_driverops, /* module type (this is a pseudo driver) */
605 "System Call Tracing", /* name of module */
606 &systrace_ops, /* driver ops */
607 };
608
609 static struct modlinkage modlinkage = {
610 MODREV_1,
611 (void *)&modldrv,
612 NULL
613 };
614
615 int
616 _init(void)
617 {
618 return (mod_install(&modlinkage));
619 }
620
621 int
622 _info(struct modinfo *modinfop)
623 {
624 return (mod_info(&modlinkage, modinfop));
625 }
626
627 int
628 _fini(void)
629 {
630 return (mod_remove(&modlinkage));
631 }
632 #else
633 typedef kern_return_t (*mach_call_t)(void *);
634
635 /* XXX From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
636 typedef void mach_munge_t(const void *, void *);
637
638 typedef struct {
639 int mach_trap_arg_count;
640 int (*mach_trap_function)(void);
641 #if defined(__i386__)
642 boolean_t mach_trap_stack;
643 #else
644 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
645 mach_munge_t *mach_trap_arg_munge64; /* system call arguments for 64-bit */
646 #endif
647 #if !MACH_ASSERT
648 int mach_trap_unused;
649 #else
650 const char* mach_trap_name;
651 #endif /* !MACH_ASSERT */
652 } mach_trap_t;
653
654 #define MACH_TRAP_TABLE_COUNT 128
655
656 extern mach_trap_t mach_trap_table[];
657 extern int mach_trap_count;
658
659 #define MACH_TRAP(name, foo, bar, baz) #name
660
661 /* XXX From osfmk/kern/syscall_sw.c */
662 static const char * mach_name_table[MACH_TRAP_TABLE_COUNT] = {
663 /* 0 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
664 /* 1 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
665 /* 2 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
666 /* 3 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
667 /* 4 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
668 /* 5 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
669 /* 6 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
670 /* 7 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
671 /* 8 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
672 /* 9 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
673 /* 10 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
674 /* 11 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
675 /* 12 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
676 /* 13 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
677 /* 14 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
678 /* 15 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
679 /* 16 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
680 /* 17 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
681 /* 18 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
682 /* 19 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
683 /* 20 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
684 /* 21 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
685 /* 22 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
686 /* 23 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
687 /* 24 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
688 /* 25 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
689 /* 26 */ MACH_TRAP(mach_reply_port, 0, NULL, NULL),
690 /* 27 */ MACH_TRAP(thread_self_trap, 0, NULL, NULL),
691 /* 28 */ MACH_TRAP(task_self_trap, 0, NULL, NULL),
692 /* 29 */ MACH_TRAP(host_self_trap, 0, NULL, NULL),
693 /* 30 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
694 /* 31 */ MACH_TRAP(mach_msg_trap, 7, munge_wwwwwww, munge_ddddddd),
695 /* 32 */ MACH_TRAP(mach_msg_overwrite_trap, 8, munge_wwwwwwww, munge_dddddddd),
696 /* 33 */ MACH_TRAP(semaphore_signal_trap, 1, munge_w, munge_d),
697 /* 34 */ MACH_TRAP(semaphore_signal_all_trap, 1, munge_w, munge_d),
698 /* 35 */ MACH_TRAP(semaphore_signal_thread_trap, 2, munge_ww, munge_dd),
699 /* 36 */ MACH_TRAP(semaphore_wait_trap, 1, munge_w, munge_d),
700 /* 37 */ MACH_TRAP(semaphore_wait_signal_trap, 2, munge_ww, munge_dd),
701 /* 38 */ MACH_TRAP(semaphore_timedwait_trap, 3, munge_www, munge_ddd),
702 /* 39 */ MACH_TRAP(semaphore_timedwait_signal_trap, 4, munge_wwww, munge_dddd),
703 /* 40 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
704 /* 41 */ MACH_TRAP(init_process, 0, NULL, NULL),
705 /* 42 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
706 /* 43 */ MACH_TRAP(map_fd, 5, munge_wwwww, munge_ddddd),
707 /* 44 */ MACH_TRAP(task_name_for_pid, 3, munge_www, munge_ddd),
708 /* 45 */ MACH_TRAP(task_for_pid, 3, munge_www, munge_ddd),
709 /* 46 */ MACH_TRAP(pid_for_task, 2, munge_ww,munge_dd),
710 /* 47 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
711 /* 48 */ MACH_TRAP(macx_swapon, 4, munge_wwww, munge_dddd),
712 /* 49 */ MACH_TRAP(macx_swapoff, 2, munge_ww, munge_dd),
713 /* 50 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
714 /* 51 */ MACH_TRAP(macx_triggers, 4, munge_wwww, munge_dddd),
715 /* 52 */ MACH_TRAP(macx_backing_store_suspend, 1, munge_w, munge_d),
716 /* 53 */ MACH_TRAP(macx_backing_store_recovery, 1, munge_w, munge_d),
717 /* 54 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
718 /* 55 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
719 /* 56 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
720 /* 57 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
721 /* 58 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
722 /* 59 */ MACH_TRAP(swtch_pri, 0, NULL, NULL),
723 /* 60 */ MACH_TRAP(swtch, 0, NULL, NULL),
724 /* 61 */ MACH_TRAP(thread_switch, 3, munge_www, munge_ddd),
725 /* 62 */ MACH_TRAP(clock_sleep_trap, 5, munge_wwwww, munge_ddddd),
726 /* 63 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
727 /* traps 64 - 95 reserved (debo) */
728 /* 64 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
729 /* 65 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
730 /* 66 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
731 /* 67 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
732 /* 68 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
733 /* 69 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
734 /* 70 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
735 /* 71 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
736 /* 72 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
737 /* 73 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
738 /* 74 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
739 /* 75 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
740 /* 76 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
741 /* 77 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
742 /* 78 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
743 /* 79 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
744 /* 80 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
745 /* 81 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
746 /* 82 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
747 /* 83 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
748 /* 84 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
749 /* 85 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
750 /* 86 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
751 /* 87 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
752 /* 88 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
753 /* 89 */ MACH_TRAP(mach_timebase_info_trap, 1, munge_w, munge_d),
754 /* 90 */ MACH_TRAP(mach_wait_until_trap, 2, munge_l, munge_d),
755 /* 91 */ MACH_TRAP(mk_timer_create_trap, 0, NULL, NULL),
756 /* 92 */ MACH_TRAP(mk_timer_destroy_trap, 1, munge_w, munge_d),
757 /* 93 */ MACH_TRAP(mk_timer_arm_trap, 3, munge_wl, munge_dd),
758 /* 94 */ MACH_TRAP(mk_timer_cancel_trap, 2, munge_ww, munge_dd),
759 /* 95 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
760 /* traps 64 - 95 reserved (debo) */
761 /* 96 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
762 /* 97 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
763 /* 98 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
764 /* 99 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
765 /* traps 100-107 reserved for iokit (esb) */
766 /* 100 */ MACH_TRAP(iokit_user_client_trap, 8, munge_wwwwwwww, munge_dddddddd),
767 /* 101 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
768 /* 102 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
769 /* 103 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
770 /* 104 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
771 /* 105 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
772 /* 106 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
773 /* 107 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
774 /* traps 108-127 unused */
775 /* 108 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
776 /* 109 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
777 /* 110 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
778 /* 111 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
779 /* 112 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
780 /* 113 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
781 /* 114 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
782 /* 115 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
783 /* 116 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
784 /* 117 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
785 /* 118 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
786 /* 119 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
787 /* 120 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
788 /* 121 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
789 /* 122 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
790 /* 123 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
791 /* 124 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
792 /* 125 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
793 /* 126 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
794 /* 127 */ MACH_TRAP(kern_invalid, 0, NULL, NULL),
795 };
796
797 /* XXX From osfmk/i386/bsd_i386.c */
798 struct mach_call_args {
799 syscall_arg_t arg1;
800 syscall_arg_t arg2;
801 syscall_arg_t arg3;
802 syscall_arg_t arg4;
803 syscall_arg_t arg5;
804 syscall_arg_t arg6;
805 syscall_arg_t arg7;
806 syscall_arg_t arg8;
807 syscall_arg_t arg9;
808 };
809
810 #undef NSYSCALL
811 #define NSYSCALL mach_trap_count
812
813 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
814 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
815 #endif
816
817 typedef systrace_sysent_t machtrace_sysent_t;
818
819 static machtrace_sysent_t *machtrace_sysent = NULL;
820
821 void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
822 uint64_t, uint64_t, uint64_t);
823
824 static dev_info_t *machtrace_devi;
825 static dtrace_provider_id_t machtrace_id;
826
827 static kern_return_t
828 dtrace_machtrace_syscall(struct mach_call_args *args)
829 {
830 boolean_t flavor;
831 unsigned short code;
832
833 machtrace_sysent_t *sy;
834 dtrace_id_t id;
835 kern_return_t rval;
836 #if 0 /* XXX */
837 proc_t *p;
838 #endif
839 syscall_arg_t *ip = (syscall_arg_t *)args;
840 mach_call_t mach_call;
841
842 #if defined (__ppc__) || defined (__ppc64__)
843 {
844 savearea_t *regs = (savearea_t *)find_user_regs(current_thread());
845
846 flavor = (((unsigned int)regs->save_r0) == 0)? 1: 0;
847
848 if (flavor)
849 code = -regs->save_r3;
850 else
851 code = -regs->save_r0;
852 }
853 #elif defined(__i386__) || defined (__x86_64__)
854 #pragma unused(flavor)
855 {
856 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
857
858 if (is_saved_state64(tagged_regs)) {
859 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
860 } else {
861 code = -saved_state32(tagged_regs)->eax;
862 }
863 }
864 #else
865 #error Unknown Architecture
866 #endif
867
868 sy = &machtrace_sysent[code];
869
870 if ((id = sy->stsy_entry) != DTRACE_IDNONE)
871 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
872
873 #if 0 /* XXX */
874 /*
875 * We want to explicitly allow DTrace consumers to stop a process
876 * before it actually executes the meat of the syscall.
877 */
878 p = ttoproc(curthread);
879 mutex_enter(&p->p_lock);
880 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
881 curthread->t_dtrace_stop = 0;
882 stop(PR_REQUESTED, 0);
883 }
884 mutex_exit(&p->p_lock);
885 #endif
886
887 mach_call = (mach_call_t)(*sy->stsy_underlying);
888 rval = mach_call(args);
889
890 if ((id = sy->stsy_return) != DTRACE_IDNONE)
891 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
892
893 return (rval);
894 }
895
896 static void
897 machtrace_init(mach_trap_t *actual, machtrace_sysent_t **interposed)
898 {
899 machtrace_sysent_t *msysent = *interposed;
900 int i;
901
902 if (msysent == NULL) {
903 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) *
904 NSYSCALL, KM_SLEEP);
905 }
906
907 for (i = 0; i < NSYSCALL; i++) {
908 mach_trap_t *a = &actual[i];
909 machtrace_sysent_t *s = &msysent[i];
910
911 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
912 continue;
913
914 if ((mach_call_t)(a->mach_trap_function) == (mach_call_t)(dtrace_machtrace_syscall))
915 continue;
916
917 s->stsy_underlying = a->mach_trap_function;
918 }
919 }
920
921 /*ARGSUSED*/
922 static void
923 machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
924 {
925 int i;
926
927 if (desc != NULL)
928 return;
929
930 machtrace_init(mach_trap_table, &machtrace_sysent);
931
932 for (i = 0; i < NSYSCALL; i++) {
933
934 if (machtrace_sysent[i].stsy_underlying == NULL)
935 continue;
936
937 if (dtrace_probe_lookup(machtrace_id, NULL,
938 mach_name_table[i], "entry") != 0)
939 continue;
940
941 (void) dtrace_probe_create(machtrace_id, NULL, mach_name_table[i],
942 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
943 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
944 (void) dtrace_probe_create(machtrace_id, NULL, mach_name_table[i],
945 "return", MACHTRACE_ARTIFICIAL_FRAMES,
946 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
947
948 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
949 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
950 }
951 }
952
953 /*ARGSUSED*/
954 static void
955 machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
956 {
957 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
958
959 /*
960 * There's nothing to do here but assert that we have actually been
961 * disabled.
962 */
963 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
964 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
965 } else {
966 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
967 }
968 }
969
970 /*ARGSUSED*/
971 static void
972 machtrace_enable(void *arg, dtrace_id_t id, void *parg)
973 {
974 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
975 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
976 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
977
978 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
979 machtrace_sysent[sysnum].stsy_entry = id;
980 } else {
981 machtrace_sysent[sysnum].stsy_return = id;
982 }
983
984 if (enabled) {
985 ASSERT(sysent[sysnum].sy_callc == dtrace_machtrace_syscall);
986 return;
987 }
988
989 (void) casptr(&mach_trap_table[sysnum].mach_trap_function,
990 (void *)machtrace_sysent[sysnum].stsy_underlying,
991 (void *)dtrace_machtrace_syscall);
992 }
993
994 /*ARGSUSED*/
995 static void
996 machtrace_disable(void *arg, dtrace_id_t id, void *parg)
997 {
998 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
999 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
1000 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
1001
1002 if (disable) {
1003 (void) casptr(&mach_trap_table[sysnum].mach_trap_function,
1004 (void *)dtrace_machtrace_syscall,
1005 (void *)machtrace_sysent[sysnum].stsy_underlying);
1006
1007 }
1008
1009 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
1010 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
1011 } else {
1012 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
1013 }
1014 }
1015
1016 static dtrace_pattr_t machtrace_attr = {
1017 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
1018 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
1019 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
1020 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
1021 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
1022 };
1023
1024 static dtrace_pops_t machtrace_pops = {
1025 machtrace_provide,
1026 NULL,
1027 machtrace_enable,
1028 machtrace_disable,
1029 NULL,
1030 NULL,
1031 NULL,
1032 NULL,
1033 NULL,
1034 machtrace_destroy
1035 };
1036
1037 static int
1038 machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1039 {
1040 switch (cmd) {
1041 case DDI_ATTACH:
1042 break;
1043 case DDI_RESUME:
1044 return (DDI_SUCCESS);
1045 default:
1046 return (DDI_FAILURE);
1047 }
1048
1049 machtrace_probe = dtrace_probe;
1050 membar_enter();
1051
1052 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
1053 DDI_PSEUDO, NULL) == DDI_FAILURE ||
1054 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1055 &machtrace_pops, NULL, &machtrace_id) != 0) {
1056 machtrace_probe = systrace_stub;
1057 ddi_remove_minor_node(devi, NULL);
1058 return (DDI_FAILURE);
1059 }
1060
1061 ddi_report_dev(devi);
1062 machtrace_devi = devi;
1063
1064 return (DDI_SUCCESS);
1065 }
1066
1067 d_open_t _systrace_open;
1068
1069 int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
1070 {
1071 #pragma unused(dev,flags,devtype,p)
1072 return 0;
1073 }
1074
1075 #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
1076
1077 /*
1078 * A struct describing which functions will get invoked for certain
1079 * actions.
1080 */
1081 static struct cdevsw systrace_cdevsw =
1082 {
1083 _systrace_open, /* open */
1084 eno_opcl, /* close */
1085 eno_rdwrt, /* read */
1086 eno_rdwrt, /* write */
1087 eno_ioctl, /* ioctl */
1088 (stop_fcn_t *)nulldev, /* stop */
1089 (reset_fcn_t *)nulldev, /* reset */
1090 NULL, /* tty's */
1091 eno_select, /* select */
1092 eno_mmap, /* mmap */
1093 eno_strat, /* strategy */
1094 eno_getc, /* getc */
1095 eno_putc, /* putc */
1096 0 /* type */
1097 };
1098
1099 static int gSysTraceInited = 0;
1100
1101 void systrace_init( void );
1102
1103 void systrace_init( void )
1104 {
1105 if (0 == gSysTraceInited) {
1106 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
1107
1108 if (majdevno < 0) {
1109 printf("systrace_init: failed to allocate a major number!\n");
1110 gSysTraceInited = 0;
1111 return;
1112 }
1113
1114 systrace_attach( (dev_info_t *)majdevno, DDI_ATTACH );
1115 machtrace_attach( (dev_info_t *)majdevno, DDI_ATTACH );
1116
1117 gSysTraceInited = 1;
1118 } else
1119 panic("systrace_init: called twice!\n");
1120 }
1121 #undef SYSTRACE_MAJOR
1122 #endif /* __APPLE__ */