]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/dtrace/systrace.c
6761beec99c3d04066d44b58f0e784c32c8f0e80
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */
27
28 #if !defined(__APPLE__)
29 #include <sys/dtrace.h>
30 #include <sys/systrace.h>
31 #include <sys/stat.h>
32 #include <sys/systm.h>
33 #include <sys/conf.h>
34 #include <sys/ddi.h>
35 #include <sys/sunddi.h>
36 #include <sys/atomic.h>
37 #define SYSTRACE_ARTIFICIAL_FRAMES 1
38 #else
39
40 #ifdef KERNEL
41 #ifndef _KERNEL
42 #define _KERNEL /* Solaris vs. Darwin */
43 #endif
44 #endif
45
46 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
47 #include <kern/thread.h>
48 #include <mach/thread_status.h>
49 /* XXX All of these should really be derived from syscall_sw.h */
50 #if defined(__i386__) || defined (__x86_64__)
51 #define SYSCALL_CLASS_SHIFT 24
52 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
53 #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
54 #define I386_SYSCALL_NUMBER_MASK (0xFFFF)
55
56 typedef x86_saved_state_t savearea_t;
57 #endif
58
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/proc.h>
62 #include <sys/errno.h>
63 #include <sys/ioctl.h>
64 #include <sys/conf.h>
65 #include <sys/fcntl.h>
66 #include <miscfs/devfs/devfs.h>
67
68 #include <sys/dtrace.h>
69 #include <sys/dtrace_impl.h>
70 #include "systrace.h"
71 #include <sys/stat.h>
72 #include <sys/systm.h>
73 #include <sys/conf.h>
74 #include <sys/user.h>
75
76 #include <machine/pal_routines.h>
77
78 #if defined(__i386__) || defined (__x86_64__)
79 #define SYSTRACE_ARTIFICIAL_FRAMES 2
80 #define MACHTRACE_ARTIFICIAL_FRAMES 3
81 #else
82 #error Unknown Architecture
83 #endif
84
85 #include <sys/sysent.h>
86 #define sy_callc sy_call /* Map Solaris slot name to Darwin's */
87 #define NSYSCALL nsysent /* and is less than 500 or so */
88
89 extern const char *syscallnames[];
90
91 #include <sys/dtrace_glue.h>
92 #define casptr dtrace_casptr
93 #define membar_enter dtrace_membar_producer
94
95 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
96 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
97
98 extern lck_attr_t* dtrace_lck_attr;
99 extern lck_grp_t* dtrace_lck_grp;
100 static lck_mtx_t dtrace_systrace_lock; /* probe state lock */
101
102 systrace_sysent_t *systrace_sysent = NULL;
103 void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
104
105 static uint64_t systrace_getarg(void *, dtrace_id_t, void *, int, int);
106
107 void
108 systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
109 uint64_t arg2, uint64_t arg3, uint64_t arg4)
110 {
111 #pragma unused(id,arg0,arg1,arg2,arg3,arg4)
112 }
113
114 int32_t
115 dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
116 {
117 boolean_t flavor;
118 unsigned short code;
119
120 systrace_sysent_t *sy;
121 dtrace_id_t id;
122 int32_t rval;
123 #if 0 /* XXX */
124 proc_t *p;
125 #endif
126 syscall_arg_t *ip = (syscall_arg_t *)uap;
127
128 #if defined(__i386__) || defined (__x86_64__)
129 #pragma unused(flavor)
130 {
131 pal_register_cache_state(current_thread(), VALID);
132 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
133
134 if (is_saved_state64(tagged_regs)) {
135 x86_saved_state64_t *regs = saved_state64(tagged_regs);
136 code = regs->rax & SYSCALL_NUMBER_MASK;
137 /*
138 * Check for indirect system call... system call number
139 * passed as 'arg0'
140 */
141 if (code == 0) {
142 code = regs->rdi;
143 }
144 } else {
145 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
146
147 if (code == 0) {
148 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int));
149 code = fuword(params);
150 }
151 }
152 }
153 #else
154 #error Unknown Architecture
155 #endif
156
157 // Bounds "check" the value of code a la unix_syscall
158 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
159
160 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
161 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
162 if (uthread)
163 uthread->t_dtrace_syscall_args = (void *)ip;
164
165 if (ip)
166 (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
167 else
168 (*systrace_probe)(id, 0, 0, 0, 0, 0);
169
170 if (uthread)
171 uthread->t_dtrace_syscall_args = (void *)0;
172 }
173
174 #if 0 /* XXX */
175 /*
176 * We want to explicitly allow DTrace consumers to stop a process
177 * before it actually executes the meat of the syscall.
178 */
179 p = ttoproc(curthread);
180 mutex_enter(&p->p_lock);
181 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
182 curthread->t_dtrace_stop = 0;
183 stop(PR_REQUESTED, 0);
184 }
185 mutex_exit(&p->p_lock);
186 #endif
187
188 rval = (*sy->stsy_underlying)(pp, uap, rv);
189
190 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
191 uint64_t munged_rv0, munged_rv1;
192 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
193
194 if (uthread)
195 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
196
197 /*
198 * "Decode" rv for use in the call to dtrace_probe()
199 */
200 if (rval == ERESTART) {
201 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
202 munged_rv1 = -1LL;
203 } else if (rval != EJUSTRETURN) {
204 if (rval) {
205 munged_rv0 = -1LL; /* Mimic what libc will do. */
206 munged_rv1 = -1LL;
207 } else {
208 switch (sy->stsy_return_type) {
209 case _SYSCALL_RET_INT_T:
210 munged_rv0 = rv[0];
211 munged_rv1 = rv[1];
212 break;
213 case _SYSCALL_RET_UINT_T:
214 munged_rv0 = ((u_int)rv[0]);
215 munged_rv1 = ((u_int)rv[1]);
216 break;
217 case _SYSCALL_RET_OFF_T:
218 case _SYSCALL_RET_UINT64_T:
219 munged_rv0 = *(u_int64_t *)rv;
220 munged_rv1 = 0LL;
221 break;
222 case _SYSCALL_RET_ADDR_T:
223 case _SYSCALL_RET_SIZE_T:
224 case _SYSCALL_RET_SSIZE_T:
225 munged_rv0 = *(user_addr_t *)rv;
226 munged_rv1 = 0LL;
227 break;
228 case _SYSCALL_RET_NONE:
229 munged_rv0 = 0LL;
230 munged_rv1 = 0LL;
231 break;
232 default:
233 munged_rv0 = 0LL;
234 munged_rv1 = 0LL;
235 break;
236 }
237 }
238 } else {
239 munged_rv0 = 0LL;
240 munged_rv1 = 0LL;
241 }
242
243 /*
244 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
245 *
246 * "This is a bit of an historical artifact. At first, the syscall provider just
247 * had its return value in arg0, and the fbt and pid providers had their return
248 * values in arg1 (so that we could use arg0 for the offset of the return site).
249 *
250 * We inevitably started writing scripts where we wanted to see the return
251 * values from probes in all three providers, and we made this script easier
252 * to write by replicating the syscall return values in arg1 to match fbt and
253 * pid. We debated briefly about removing the return value from arg0, but
254 * decided that it would be less confusing to have the same data in two places
255 * than to have some non-helpful, non-intuitive value in arg0.
256 *
257 * This change was made 4/23/2003 according to the DTrace project's putback log."
258 */
259 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
260 }
261
262 return (rval);
263 }
264
265 void
266 dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
267 {
268 systrace_sysent_t *sy;
269 dtrace_id_t id;
270
271 // Bounds "check" the value of code a la unix_syscall_return
272 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
273
274 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
275 uint64_t munged_rv0, munged_rv1;
276 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
277
278 if (uthread)
279 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
280
281 /*
282 * "Decode" rv for use in the call to dtrace_probe()
283 */
284 if (rval == ERESTART) {
285 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
286 munged_rv1 = -1LL;
287 } else if (rval != EJUSTRETURN) {
288 if (rval) {
289 munged_rv0 = -1LL; /* Mimic what libc will do. */
290 munged_rv1 = -1LL;
291 } else {
292 switch (sy->stsy_return_type) {
293 case _SYSCALL_RET_INT_T:
294 munged_rv0 = rv[0];
295 munged_rv1 = rv[1];
296 break;
297 case _SYSCALL_RET_UINT_T:
298 munged_rv0 = ((u_int)rv[0]);
299 munged_rv1 = ((u_int)rv[1]);
300 break;
301 case _SYSCALL_RET_OFF_T:
302 case _SYSCALL_RET_UINT64_T:
303 munged_rv0 = *(u_int64_t *)rv;
304 munged_rv1 = 0LL;
305 break;
306 case _SYSCALL_RET_ADDR_T:
307 case _SYSCALL_RET_SIZE_T:
308 case _SYSCALL_RET_SSIZE_T:
309 munged_rv0 = *(user_addr_t *)rv;
310 munged_rv1 = 0LL;
311 break;
312 case _SYSCALL_RET_NONE:
313 munged_rv0 = 0LL;
314 munged_rv1 = 0LL;
315 break;
316 default:
317 munged_rv0 = 0LL;
318 munged_rv1 = 0LL;
319 break;
320 }
321 }
322 } else {
323 munged_rv0 = 0LL;
324 munged_rv1 = 0LL;
325 }
326
327 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
328 }
329 }
330 #endif /* __APPLE__ */
331
332 #define SYSTRACE_SHIFT 16
333 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
334 #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
335 #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
336 #define SYSTRACE_RETURN(id) (id)
337
338 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
339 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
340 #endif
341
342 static dev_info_t *systrace_devi;
343 static dtrace_provider_id_t systrace_id;
344
345 #if !defined (__APPLE__)
346 static void
347 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
348 {
349 systrace_sysent_t *sysent = *interposed;
350 int i;
351
352 if (sysent == NULL) {
353 *interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) *
354 NSYSCALL, KM_SLEEP);
355 }
356
357 for (i = 0; i < NSYSCALL; i++) {
358 struct sysent *a = &actual[i];
359 systrace_sysent_t *s = &sysent[i];
360
361 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
362 continue;
363
364 if (a->sy_callc == dtrace_systrace_syscall)
365 continue;
366
367 #ifdef _SYSCALL32_IMPL
368 if (a->sy_callc == dtrace_systrace_syscall32)
369 continue;
370 #endif
371
372 s->stsy_underlying = a->sy_callc;
373 }
374 }
375 #else
376 #define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */
377 static void
378 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
379 {
380
381 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
382 from bsd/sys/sysent.h */
383 int i;
384
385 if (ssysent == NULL) {
386 *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) *
387 NSYSCALL, KM_SLEEP);
388 }
389
390 for (i = 0; i < NSYSCALL; i++) {
391 struct sysent *a = &actual[i];
392 systrace_sysent_t *s = &ssysent[i];
393
394 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
395 continue;
396
397 if (a->sy_callc == dtrace_systrace_syscall)
398 continue;
399
400 #ifdef _SYSCALL32_IMPL
401 if (a->sy_callc == dtrace_systrace_syscall32)
402 continue;
403 #endif
404
405 s->stsy_underlying = a->sy_callc;
406 s->stsy_return_type = a->sy_return_type;
407 }
408 lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr);
409 }
410
411 #endif /* __APPLE__ */
412
413 /*ARGSUSED*/
414 static void
415 systrace_provide(void *arg, const dtrace_probedesc_t *desc)
416 {
417 #pragma unused(arg) /* __APPLE__ */
418 int i;
419
420 if (desc != NULL)
421 return;
422
423 systrace_init(sysent, &systrace_sysent);
424 #ifdef _SYSCALL32_IMPL
425 systrace_init(sysent32, &systrace_sysent32);
426 #endif
427
428 for (i = 0; i < NSYSCALL; i++) {
429 if (systrace_sysent[i].stsy_underlying == NULL)
430 continue;
431
432 if (dtrace_probe_lookup(systrace_id, NULL,
433 syscallnames[i], "entry") != 0)
434 continue;
435
436 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
437 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
438 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
439 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
440 "return", SYSTRACE_ARTIFICIAL_FRAMES,
441 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
442
443 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
444 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
445 #ifdef _SYSCALL32_IMPL
446 systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
447 systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
448 #endif
449 }
450 }
451 #if defined(__APPLE__)
452 #undef systrace_init
453 #endif
454
455 /*ARGSUSED*/
456 static void
457 systrace_destroy(void *arg, dtrace_id_t id, void *parg)
458 {
459 #pragma unused(arg,id) /* __APPLE__ */
460
461 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
462
463 #pragma unused(sysnum) /* __APPLE__ */
464 /*
465 * There's nothing to do here but assert that we have actually been
466 * disabled.
467 */
468 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
469 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
470 #ifdef _SYSCALL32_IMPL
471 ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
472 #endif
473 } else {
474 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
475 #ifdef _SYSCALL32_IMPL
476 ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
477 #endif
478 }
479 }
480
481 /*ARGSUSED*/
482 static int
483 systrace_enable(void *arg, dtrace_id_t id, void *parg)
484 {
485 #pragma unused(arg) /* __APPLE__ */
486
487 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
488 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
489 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
490
491 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
492 systrace_sysent[sysnum].stsy_entry = id;
493 #ifdef _SYSCALL32_IMPL
494 systrace_sysent32[sysnum].stsy_entry = id;
495 #endif
496 } else {
497 systrace_sysent[sysnum].stsy_return = id;
498 #ifdef _SYSCALL32_IMPL
499 systrace_sysent32[sysnum].stsy_return = id;
500 #endif
501 }
502
503 if (enabled) {
504 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
505 return(0);
506 }
507 #ifdef _SYSCALL32_IMPL
508 (void) casptr(&sysent32[sysnum].sy_callc,
509 (void *)systrace_sysent32[sysnum].stsy_underlying,
510 (void *)dtrace_systrace_syscall32);
511 #endif
512
513 lck_mtx_lock(&dtrace_systrace_lock);
514 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
515 vm_offset_t dss = (vm_offset_t)&dtrace_systrace_syscall;
516 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
517 }
518 lck_mtx_unlock(&dtrace_systrace_lock);
519 return (0);
520 }
521
522 /*ARGSUSED*/
523 static void
524 systrace_disable(void *arg, dtrace_id_t id, void *parg)
525 {
526 #pragma unused(arg,id) /* __APPLE__ */
527
528 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
529 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
530 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
531
532 if (disable) {
533 lck_mtx_lock(&dtrace_systrace_lock);
534 if (sysent[sysnum].sy_callc == dtrace_systrace_syscall)
535 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying));
536 lck_mtx_unlock(&dtrace_systrace_lock);
537
538 #ifdef _SYSCALL32_IMPL
539 (void) casptr(&sysent32[sysnum].sy_callc,
540 (void *)dtrace_systrace_syscall32,
541 (void *)systrace_sysent32[sysnum].stsy_underlying);
542 #endif
543 }
544
545 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
546 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
547 #ifdef _SYSCALL32_IMPL
548 systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
549 #endif
550 } else {
551 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
552 #ifdef _SYSCALL32_IMPL
553 systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
554 #endif
555 }
556 }
557
558 static dtrace_pattr_t systrace_attr = {
559 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
560 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
561 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
562 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
563 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
564 };
565
566 static dtrace_pops_t systrace_pops = {
567 systrace_provide,
568 NULL,
569 systrace_enable,
570 systrace_disable,
571 NULL,
572 NULL,
573 NULL,
574 systrace_getarg,
575 NULL,
576 systrace_destroy
577 };
578
579 static int
580 systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
581 {
582 switch (cmd) {
583 case DDI_ATTACH:
584 break;
585 case DDI_RESUME:
586 return (DDI_SUCCESS);
587 default:
588 return (DDI_FAILURE);
589 }
590
591 #if !defined(__APPLE__)
592 systrace_probe = (void (*)())dtrace_probe;
593 membar_enter();
594
595 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
596 DDI_PSEUDO, NULL) == DDI_FAILURE ||
597 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
598 &systrace_pops, NULL, &systrace_id) != 0) {
599 systrace_probe = systrace_stub;
600 ddi_remove_minor_node(devi, NULL);
601 return (DDI_FAILURE);
602 }
603 #else
604 systrace_probe = (void(*))&dtrace_probe;
605 membar_enter();
606
607 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
608 DDI_PSEUDO, 0) == DDI_FAILURE ||
609 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
610 &systrace_pops, NULL, &systrace_id) != 0) {
611 systrace_probe = systrace_stub;
612 ddi_remove_minor_node(devi, NULL);
613 return (DDI_FAILURE);
614 }
615 #endif /* __APPLE__ */
616
617 ddi_report_dev(devi);
618 systrace_devi = devi;
619
620 return (DDI_SUCCESS);
621 }
622
623 #if !defined(__APPLE__)
624 static int
625 systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
626 {
627 switch (cmd) {
628 case DDI_DETACH:
629 break;
630 case DDI_SUSPEND:
631 return (DDI_SUCCESS);
632 default:
633 return (DDI_FAILURE);
634 }
635
636 if (dtrace_unregister(systrace_id) != 0)
637 return (DDI_FAILURE);
638
639 ddi_remove_minor_node(devi, NULL);
640 systrace_probe = systrace_stub;
641 return (DDI_SUCCESS);
642 }
643
644 /*ARGSUSED*/
645 static int
646 systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
647 {
648 int error;
649
650 switch (infocmd) {
651 case DDI_INFO_DEVT2DEVINFO:
652 *result = (void *)systrace_devi;
653 error = DDI_SUCCESS;
654 break;
655 case DDI_INFO_DEVT2INSTANCE:
656 *result = (void *)0;
657 error = DDI_SUCCESS;
658 break;
659 default:
660 error = DDI_FAILURE;
661 }
662 return (error);
663 }
664
665 /*ARGSUSED*/
666 static int
667 systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
668 {
669 return (0);
670 }
671
672 static struct cb_ops systrace_cb_ops = {
673 systrace_open, /* open */
674 nodev, /* close */
675 nulldev, /* strategy */
676 nulldev, /* print */
677 nodev, /* dump */
678 nodev, /* read */
679 nodev, /* write */
680 nodev, /* ioctl */
681 nodev, /* devmap */
682 nodev, /* mmap */
683 nodev, /* segmap */
684 nochpoll, /* poll */
685 ddi_prop_op, /* cb_prop_op */
686 0, /* streamtab */
687 D_NEW | D_MP /* Driver compatibility flag */
688 };
689
690 static struct dev_ops systrace_ops = {
691 DEVO_REV, /* devo_rev, */
692 0, /* refcnt */
693 systrace_info, /* get_dev_info */
694 nulldev, /* identify */
695 nulldev, /* probe */
696 systrace_attach, /* attach */
697 systrace_detach, /* detach */
698 nodev, /* reset */
699 &systrace_cb_ops, /* driver operations */
700 NULL, /* bus operations */
701 nodev /* dev power */
702 };
703
704 /*
705 * Module linkage information for the kernel.
706 */
707 static struct modldrv modldrv = {
708 &mod_driverops, /* module type (this is a pseudo driver) */
709 "System Call Tracing", /* name of module */
710 &systrace_ops, /* driver ops */
711 };
712
713 static struct modlinkage modlinkage = {
714 MODREV_1,
715 (void *)&modldrv,
716 NULL
717 };
718
719 int
720 _init(void)
721 {
722 return (mod_install(&modlinkage));
723 }
724
725 int
726 _info(struct modinfo *modinfop)
727 {
728 return (mod_info(&modlinkage, modinfop));
729 }
730
731 int
732 _fini(void)
733 {
734 return (mod_remove(&modlinkage));
735 }
736 #else
737 typedef kern_return_t (*mach_call_t)(void *);
738
739 /* XXX From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
740 typedef void mach_munge_t(const void *, void *);
741
742 typedef struct {
743 int mach_trap_arg_count;
744 kern_return_t (*mach_trap_function)(void *);
745 #if 0 /* no active architectures use mungers for mach traps */
746 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
747 mach_munge_t *mach_trap_arg_munge64; /* system call arguments for 64-bit */
748 #endif
749 #if MACH_ASSERT
750 const char* mach_trap_name;
751 #endif /* MACH_ASSERT */
752 } mach_trap_t;
753
754 extern mach_trap_t mach_trap_table[];
755 extern int mach_trap_count;
756
757 extern const char *mach_syscall_name_table[];
758
759 /* XXX From osfmk/i386/bsd_i386.c */
760 struct mach_call_args {
761 syscall_arg_t arg1;
762 syscall_arg_t arg2;
763 syscall_arg_t arg3;
764 syscall_arg_t arg4;
765 syscall_arg_t arg5;
766 syscall_arg_t arg6;
767 syscall_arg_t arg7;
768 syscall_arg_t arg8;
769 syscall_arg_t arg9;
770 };
771
772 #undef NSYSCALL
773 #define NSYSCALL mach_trap_count
774
775 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
776 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
777 #endif
778
779 typedef struct machtrace_sysent {
780 dtrace_id_t stsy_entry;
781 dtrace_id_t stsy_return;
782 kern_return_t (*stsy_underlying)(void *);
783 int32_t stsy_return_type;
784 } machtrace_sysent_t;
785
786 static machtrace_sysent_t *machtrace_sysent = NULL;
787
788 void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
789 uint64_t, uint64_t, uint64_t);
790
791 static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int);
792
793 static dev_info_t *machtrace_devi;
794 static dtrace_provider_id_t machtrace_id;
795
796 static kern_return_t
797 dtrace_machtrace_syscall(struct mach_call_args *args)
798 {
799 boolean_t flavor;
800 unsigned short code;
801
802 machtrace_sysent_t *sy;
803 dtrace_id_t id;
804 kern_return_t rval;
805 #if 0 /* XXX */
806 proc_t *p;
807 #endif
808 syscall_arg_t *ip = (syscall_arg_t *)args;
809 mach_call_t mach_call;
810
811 #if defined(__i386__) || defined (__x86_64__)
812 #pragma unused(flavor)
813 {
814 pal_register_cache_state(current_thread(), VALID);
815 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
816
817 if (is_saved_state64(tagged_regs)) {
818 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
819 } else {
820 code = -saved_state32(tagged_regs)->eax;
821 }
822 }
823 #else
824 #error Unknown Architecture
825 #endif
826
827 sy = &machtrace_sysent[code];
828
829 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
830 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
831
832 if (uthread)
833 uthread->t_dtrace_syscall_args = (void *)ip;
834
835 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
836
837 if (uthread)
838 uthread->t_dtrace_syscall_args = (void *)0;
839 }
840
841 #if 0 /* XXX */
842 /*
843 * We want to explicitly allow DTrace consumers to stop a process
844 * before it actually executes the meat of the syscall.
845 */
846 p = ttoproc(curthread);
847 mutex_enter(&p->p_lock);
848 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
849 curthread->t_dtrace_stop = 0;
850 stop(PR_REQUESTED, 0);
851 }
852 mutex_exit(&p->p_lock);
853 #endif
854
855 mach_call = (mach_call_t)(*sy->stsy_underlying);
856 rval = mach_call(args);
857
858 if ((id = sy->stsy_return) != DTRACE_IDNONE)
859 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
860
861 return (rval);
862 }
863
864 static void
865 machtrace_init(mach_trap_t *actual, machtrace_sysent_t **interposed)
866 {
867 machtrace_sysent_t *msysent = *interposed;
868 int i;
869
870 if (msysent == NULL) {
871 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) *
872 NSYSCALL, KM_SLEEP);
873 }
874
875 for (i = 0; i < NSYSCALL; i++) {
876 mach_trap_t *a = &actual[i];
877 machtrace_sysent_t *s = &msysent[i];
878
879 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
880 continue;
881
882 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall))
883 continue;
884
885 s->stsy_underlying = a->mach_trap_function;
886 }
887 }
888
889 /*ARGSUSED*/
890 static void
891 machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
892 {
893 #pragma unused(arg) /* __APPLE__ */
894
895 int i;
896
897 if (desc != NULL)
898 return;
899
900 machtrace_init(mach_trap_table, &machtrace_sysent);
901
902 for (i = 0; i < NSYSCALL; i++) {
903
904 if (machtrace_sysent[i].stsy_underlying == NULL)
905 continue;
906
907 if (dtrace_probe_lookup(machtrace_id, NULL,
908 mach_syscall_name_table[i], "entry") != 0)
909 continue;
910
911 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
912 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
913 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
914 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
915 "return", MACHTRACE_ARTIFICIAL_FRAMES,
916 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
917
918 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
919 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
920 }
921 }
922
923 /*ARGSUSED*/
924 static void
925 machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
926 {
927 #pragma unused(arg,id) /* __APPLE__ */
928 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
929
930 #pragma unused(sysnum) /* __APPLE__ */
931
932 /*
933 * There's nothing to do here but assert that we have actually been
934 * disabled.
935 */
936 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
937 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
938 } else {
939 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
940 }
941 }
942
943 /*ARGSUSED*/
944 static int
945 machtrace_enable(void *arg, dtrace_id_t id, void *parg)
946 {
947 #pragma unused(arg) /* __APPLE__ */
948
949 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
950 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
951 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
952
953 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
954 machtrace_sysent[sysnum].stsy_entry = id;
955 } else {
956 machtrace_sysent[sysnum].stsy_return = id;
957 }
958
959 if (enabled) {
960 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall);
961 return(0);
962 }
963
964 lck_mtx_lock(&dtrace_systrace_lock);
965
966 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
967 vm_offset_t dss = (vm_offset_t)&dtrace_machtrace_syscall;
968 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
969 }
970
971 lck_mtx_unlock(&dtrace_systrace_lock);
972
973 return(0);
974 }
975
976 /*ARGSUSED*/
977 static void
978 machtrace_disable(void *arg, dtrace_id_t id, void *parg)
979 {
980 #pragma unused(arg,id) /* __APPLE__ */
981
982 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
983 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
984 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
985
986 if (disable) {
987
988 lck_mtx_lock(&dtrace_systrace_lock);
989
990 if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
991 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
992 }
993 lck_mtx_unlock(&dtrace_systrace_lock);
994 }
995
996 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
997 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
998 } else {
999 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
1000 }
1001 }
1002
1003 static dtrace_pattr_t machtrace_attr = {
1004 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
1005 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
1006 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
1007 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
1008 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
1009 };
1010
1011 static dtrace_pops_t machtrace_pops = {
1012 machtrace_provide,
1013 NULL,
1014 machtrace_enable,
1015 machtrace_disable,
1016 NULL,
1017 NULL,
1018 NULL,
1019 machtrace_getarg,
1020 NULL,
1021 machtrace_destroy
1022 };
1023
1024 static int
1025 machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1026 {
1027 switch (cmd) {
1028 case DDI_ATTACH:
1029 break;
1030 case DDI_RESUME:
1031 return (DDI_SUCCESS);
1032 default:
1033 return (DDI_FAILURE);
1034 }
1035
1036 #if !defined(__APPLE__)
1037 machtrace_probe = (void (*)())dtrace_probe;
1038 membar_enter();
1039
1040 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
1041 DDI_PSEUDO, NULL) == DDI_FAILURE ||
1042 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1043 &machtrace_pops, NULL, &machtrace_id) != 0) {
1044 machtrace_probe = systrace_stub;
1045 #else
1046 machtrace_probe = dtrace_probe;
1047 membar_enter();
1048
1049 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
1050 DDI_PSEUDO, 0) == DDI_FAILURE ||
1051 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1052 &machtrace_pops, NULL, &machtrace_id) != 0) {
1053 machtrace_probe = (void (*))&systrace_stub;
1054 #endif /* __APPLE__ */
1055 ddi_remove_minor_node(devi, NULL);
1056 return (DDI_FAILURE);
1057 }
1058
1059 ddi_report_dev(devi);
1060 machtrace_devi = devi;
1061
1062 return (DDI_SUCCESS);
1063 }
1064
1065 d_open_t _systrace_open;
1066
1067 int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
1068 {
1069 #pragma unused(dev,flags,devtype,p)
1070 return 0;
1071 }
1072
1073 #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
1074
1075 /*
1076 * A struct describing which functions will get invoked for certain
1077 * actions.
1078 */
1079 static struct cdevsw systrace_cdevsw =
1080 {
1081 _systrace_open, /* open */
1082 eno_opcl, /* close */
1083 eno_rdwrt, /* read */
1084 eno_rdwrt, /* write */
1085 eno_ioctl, /* ioctl */
1086 (stop_fcn_t *)nulldev, /* stop */
1087 (reset_fcn_t *)nulldev, /* reset */
1088 NULL, /* tty's */
1089 eno_select, /* select */
1090 eno_mmap, /* mmap */
1091 eno_strat, /* strategy */
1092 eno_getc, /* getc */
1093 eno_putc, /* putc */
1094 0 /* type */
1095 };
1096
1097 static int gSysTraceInited = 0;
1098
1099 void systrace_init( void );
1100
1101 void systrace_init( void )
1102 {
1103 if (0 == gSysTraceInited) {
1104 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
1105
1106 if (majdevno < 0) {
1107 printf("systrace_init: failed to allocate a major number!\n");
1108 gSysTraceInited = 0;
1109 return;
1110 }
1111
1112 systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
1113 machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
1114
1115 gSysTraceInited = 1;
1116 } else
1117 panic("systrace_init: called twice!\n");
1118 }
1119 #undef SYSTRACE_MAJOR
1120 #endif /* __APPLE__ */
1121
1122 static uint64_t
1123 systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1124 {
1125 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1126 uint64_t val = 0;
1127 syscall_arg_t *stack = (syscall_arg_t *)NULL;
1128
1129 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1130
1131 if (uthread)
1132 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
1133
1134 if (!stack)
1135 return(0);
1136
1137 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1138 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1139 val = (uint64_t)*(stack+argno);
1140 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1141 return (val);
1142 }
1143
1144
1145 static uint64_t
1146 machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1147 {
1148 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1149 uint64_t val = 0;
1150 syscall_arg_t *stack = (syscall_arg_t *)NULL;
1151
1152 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1153
1154 if (uthread)
1155 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
1156
1157 if (!stack)
1158 return(0);
1159
1160 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1161 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1162 val = (uint64_t)*(stack+argno);
1163 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1164 return (val);
1165 }
1166