]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/dtrace/systrace.c
27d199eebc7b98d375769ae0d3a2ee683b6edea3
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */
27
28 #ifdef KERNEL
29 #ifndef _KERNEL
30 #define _KERNEL /* Solaris vs. Darwin */
31 #endif
32 #endif
33
34 #include <kern/thread.h>
35 #include <mach/thread_status.h>
36
37 /* XXX All of these should really be derived from syscall_sw.h */
38 #if defined (__x86_64__)
39 #define SYSCALL_CLASS_SHIFT 24
40 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
41 #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
42 #define I386_SYSCALL_NUMBER_MASK (0xFFFF)
43 #endif
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/errno.h>
49 #include <sys/ioctl.h>
50 #include <sys/conf.h>
51 #include <sys/fcntl.h>
52 #include <sys/syscall.h>
53 #include <miscfs/devfs/devfs.h>
54
55 #include <sys/dtrace.h>
56 #include <sys/dtrace_impl.h>
57 #include <sys/systrace_args.h>
58 #include "systrace.h"
59 #include <sys/stat.h>
60 #include <sys/systm.h>
61 #include <sys/conf.h>
62 #include <sys/user.h>
63
64 #include <machine/pal_routines.h>
65
66 #if defined (__x86_64__)
67 #define SYSTRACE_ARTIFICIAL_FRAMES 2
68 #define MACHTRACE_ARTIFICIAL_FRAMES 3
69 #elif defined(__arm__) || defined(__arm64__)
70 #define SYSTRACE_ARTIFICIAL_FRAMES 2
71 #define MACHTRACE_ARTIFICIAL_FRAMES 3
72 #else
73 #error Unknown Architecture
74 #endif
75
76 #define SYSTRACE_NARGS (int)(sizeof(((uthread_t)NULL)->uu_arg) / sizeof(((uthread_t)NULL)->uu_arg[0]))
77
78 #include <sys/sysent.h>
79 #define sy_callc sy_call /* Map Solaris slot name to Darwin's */
80 #define NSYSCALL nsysent /* and is less than 500 or so */
81
82 extern const char *syscallnames[];
83
84 #include <sys/dtrace_glue.h>
85 #define casptr dtrace_casptr
86 #define membar_enter dtrace_membar_producer
87
88 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
89 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
90
91 extern lck_attr_t* dtrace_lck_attr;
92 extern lck_grp_t* dtrace_lck_grp;
93 static lck_mtx_t dtrace_systrace_lock; /* probe state lock */
94
95 systrace_sysent_t *systrace_sysent = NULL;
96 void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
97
98 static uint64_t systrace_getargval(void *, dtrace_id_t, void *, int, int);
99 static void systrace_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
100
101 void
102 systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
103 uint64_t arg2, uint64_t arg3, uint64_t arg4)
104 {
105 #pragma unused(id,arg0,arg1,arg2,arg3,arg4)
106 }
107
108 int32_t
109 dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
110 {
111 unsigned short code; /* The system call number */
112
113 systrace_sysent_t *sy;
114 dtrace_id_t id;
115 int32_t rval;
116 syscall_arg_t *ip = (syscall_arg_t *)uap;
117 uint64_t uargs[SYSTRACE_NARGS] = {0};
118
119 #if defined (__x86_64__)
120 {
121 pal_register_cache_state(current_thread(), VALID);
122 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
123
124 if (is_saved_state64(tagged_regs)) {
125 x86_saved_state64_t *regs = saved_state64(tagged_regs);
126 code = regs->rax & SYSCALL_NUMBER_MASK;
127 /*
128 * Check for indirect system call... system call number
129 * passed as 'arg0'
130 */
131 if (code == 0) {
132 code = regs->rdi;
133 }
134 } else {
135 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
136
137 if (code == 0) {
138 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof(int));
139 code = fuword(params);
140 }
141 }
142 }
143 #elif defined(__arm__)
144 {
145 /*
146 * On arm, syscall numbers depend on a flavor (indirect or not)
147 * and can be in either r0 or r12 (always u32)
148 */
149
150 /* See bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
151 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
152
153 /* Check for indirect system call */
154 if (arm_regs->r[12] != 0) {
155 code = arm_regs->r[12];
156 } else {
157 code = arm_regs->r[0];
158 }
159 }
160 #elif defined(__arm64__)
161 {
162 /*
163 * On arm64, syscall numbers depend on a flavor (indirect or not)
164 * ... and for u32 can be in either r0 or r12
165 * ... and for u64 can be in either x0 or x16
166 */
167
168 /* see bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
169 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
170
171 if (is_saved_state32(arm_regs)) {
172 /* Check for indirect system call */
173 if (saved_state32(arm_regs)->r[12] != 0) {
174 code = saved_state32(arm_regs)->r[12];
175 } else {
176 code = saved_state32(arm_regs)->r[0];
177 }
178 } else {
179 /* Check for indirect system call */
180 if (saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM] != 0) {
181 code = saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
182 } else {
183 code = saved_state64(arm_regs)->x[0];
184 }
185 }
186 }
187 #else
188 #error Unknown Architecture
189 #endif
190
191 // Bounds "check" the value of code a la unix_syscall
192 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
193
194 systrace_args(code, ip, uargs);
195
196 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
197 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
198 if (uthread) {
199 uthread->t_dtrace_syscall_args = uargs;
200 }
201
202 static_assert(SYSTRACE_NARGS >= 5, "not enough system call arguments");
203 (*systrace_probe)(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]);
204
205 if (uthread) {
206 uthread->t_dtrace_syscall_args = NULL;
207 }
208 }
209
210
211
212 #if 0 /* XXX */
213 /*
214 * APPLE NOTE: Not implemented.
215 * We want to explicitly allow DTrace consumers to stop a process
216 * before it actually executes the meat of the syscall.
217 */
218 p = ttoproc(curthread);
219 mutex_enter(&p->p_lock);
220 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
221 curthread->t_dtrace_stop = 0;
222 stop(PR_REQUESTED, 0);
223 }
224 mutex_exit(&p->p_lock);
225 #endif
226
227 rval = (*sy->stsy_underlying)(pp, uap, rv);
228
229 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
230 uint64_t munged_rv0, munged_rv1;
231 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
232
233 if (uthread) {
234 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
235 }
236 /*
237 * "Decode" rv for use in the call to dtrace_probe()
238 */
239 if (rval == ERESTART) {
240 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
241 munged_rv1 = -1LL;
242 } else if (rval != EJUSTRETURN) {
243 if (rval) {
244 munged_rv0 = -1LL; /* Mimic what libc will do. */
245 munged_rv1 = -1LL;
246 } else {
247 switch (sy->stsy_return_type) {
248 case _SYSCALL_RET_INT_T:
249 munged_rv0 = rv[0];
250 munged_rv1 = rv[1];
251 break;
252 case _SYSCALL_RET_UINT_T:
253 munged_rv0 = ((u_int)rv[0]);
254 munged_rv1 = ((u_int)rv[1]);
255 break;
256 case _SYSCALL_RET_OFF_T:
257 case _SYSCALL_RET_UINT64_T:
258 munged_rv0 = *(u_int64_t *)rv;
259 munged_rv1 = 0LL;
260 break;
261 case _SYSCALL_RET_ADDR_T:
262 case _SYSCALL_RET_SIZE_T:
263 case _SYSCALL_RET_SSIZE_T:
264 munged_rv0 = *(user_addr_t *)rv;
265 munged_rv1 = 0LL;
266 break;
267 case _SYSCALL_RET_NONE:
268 munged_rv0 = 0LL;
269 munged_rv1 = 0LL;
270 break;
271 default:
272 munged_rv0 = 0LL;
273 munged_rv1 = 0LL;
274 break;
275 }
276 }
277 } else {
278 munged_rv0 = 0LL;
279 munged_rv1 = 0LL;
280 }
281
282 /*
283 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
284 *
285 * "This is a bit of an historical artifact. At first, the syscall provider just
286 * had its return value in arg0, and the fbt and pid providers had their return
287 * values in arg1 (so that we could use arg0 for the offset of the return site).
288 *
289 * We inevitably started writing scripts where we wanted to see the return
290 * values from probes in all three providers, and we made this script easier
291 * to write by replicating the syscall return values in arg1 to match fbt and
292 * pid. We debated briefly about removing the return value from arg0, but
293 * decided that it would be less confusing to have the same data in two places
294 * than to have some non-helpful, non-intuitive value in arg0.
295 *
296 * This change was made 4/23/2003 according to the DTrace project's putback log."
297 */
298 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
299 }
300
301 return rval;
302 }
303
304 void
305 dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
306 {
307 systrace_sysent_t *sy;
308 dtrace_id_t id;
309
310 // Bounds "check" the value of code a la unix_syscall_return
311 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
312
313 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
314 uint64_t munged_rv0, munged_rv1;
315 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
316
317 if (uthread) {
318 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
319 }
320 /*
321 * "Decode" rv for use in the call to dtrace_probe()
322 */
323 if (rval == ERESTART) {
324 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
325 munged_rv1 = -1LL;
326 } else if (rval != EJUSTRETURN) {
327 if (rval) {
328 munged_rv0 = -1LL; /* Mimic what libc will do. */
329 munged_rv1 = -1LL;
330 } else {
331 switch (sy->stsy_return_type) {
332 case _SYSCALL_RET_INT_T:
333 munged_rv0 = rv[0];
334 munged_rv1 = rv[1];
335 break;
336 case _SYSCALL_RET_UINT_T:
337 munged_rv0 = ((u_int)rv[0]);
338 munged_rv1 = ((u_int)rv[1]);
339 break;
340 case _SYSCALL_RET_OFF_T:
341 case _SYSCALL_RET_UINT64_T:
342 munged_rv0 = *(u_int64_t *)rv;
343 munged_rv1 = 0LL;
344 break;
345 case _SYSCALL_RET_ADDR_T:
346 case _SYSCALL_RET_SIZE_T:
347 case _SYSCALL_RET_SSIZE_T:
348 munged_rv0 = *(user_addr_t *)rv;
349 munged_rv1 = 0LL;
350 break;
351 case _SYSCALL_RET_NONE:
352 munged_rv0 = 0LL;
353 munged_rv1 = 0LL;
354 break;
355 default:
356 munged_rv0 = 0LL;
357 munged_rv1 = 0LL;
358 break;
359 }
360 }
361 } else {
362 munged_rv0 = 0LL;
363 munged_rv1 = 0LL;
364 }
365
366 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
367 }
368 }
369
370 #define SYSTRACE_SHIFT 16
371 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
372 #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
373 #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
374 #define SYSTRACE_RETURN(id) (id)
375
376 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
377 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
378 #endif
379
380 static dtrace_provider_id_t systrace_id;
381
382 /*
383 * APPLE NOTE: Avoid name clash with Darwin automagic conf symbol.
384 * See balanced undef below.
385 */
386 #define systrace_init _systrace_init
387
388 static void
389 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
390 {
391 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
392 * from bsd/sys/sysent.h */
393 unsigned int i;
394
395 if (ssysent == NULL) {
396 *interposed = ssysent = kmem_zalloc(sizeof(systrace_sysent_t) *
397 NSYSCALL, KM_SLEEP);
398 }
399
400 for (i = 0; i < NSYSCALL; i++) {
401 struct sysent *a = &actual[i];
402 systrace_sysent_t *s = &ssysent[i];
403
404 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) {
405 continue;
406 }
407
408 if (a->sy_callc == dtrace_systrace_syscall) {
409 continue;
410 }
411
412 s->stsy_underlying = a->sy_callc;
413 s->stsy_return_type = a->sy_return_type;
414 }
415 lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr);
416 }
417
418
419 /*ARGSUSED*/
420 static void
421 systrace_provide(void *arg, const dtrace_probedesc_t *desc)
422 {
423 #pragma unused(arg) /* __APPLE__ */
424 unsigned int i;
425
426 if (desc != NULL) {
427 return;
428 }
429
430 systrace_init(sysent, &systrace_sysent);
431
432 for (i = 0; i < NSYSCALL; i++) {
433 if (systrace_sysent[i].stsy_underlying == NULL) {
434 continue;
435 }
436
437 if (dtrace_probe_lookup(systrace_id, NULL,
438 syscallnames[i], "entry") != 0) {
439 continue;
440 }
441
442 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
443 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
444 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
445 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
446 "return", SYSTRACE_ARTIFICIAL_FRAMES,
447 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
448
449 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
450 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
451 }
452 }
453 #undef systrace_init
454
455 /*ARGSUSED*/
456 static void
457 systrace_destroy(void *arg, dtrace_id_t id, void *parg)
458 {
459 #pragma unused(arg,id) /* __APPLE__ */
460
461 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
462
463 #pragma unused(sysnum) /* __APPLE__ */
464 /*
465 * There's nothing to do here but assert that we have actually been
466 * disabled.
467 */
468 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
469 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
470 } else {
471 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
472 }
473 }
474
475 /*ARGSUSED*/
476 static int
477 systrace_enable(void *arg, dtrace_id_t id, void *parg)
478 {
479 #pragma unused(arg) /* __APPLE__ */
480
481 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
482 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
483 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
484
485 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
486 systrace_sysent[sysnum].stsy_entry = id;
487 } else {
488 systrace_sysent[sysnum].stsy_return = id;
489 }
490
491 if (enabled) {
492 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
493 return 0;
494 }
495
496 lck_mtx_lock(&dtrace_systrace_lock);
497 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
498 vm_offset_t dss = (vm_offset_t)&dtrace_systrace_syscall;
499 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
500 }
501 lck_mtx_unlock(&dtrace_systrace_lock);
502 return 0;
503 }
504
505 /*ARGSUSED*/
506 static void
507 systrace_disable(void *arg, dtrace_id_t id, void *parg)
508 {
509 #pragma unused(arg,id) /* __APPLE__ */
510
511 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
512 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
513 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
514
515 if (disable) {
516 lck_mtx_lock(&dtrace_systrace_lock);
517 if (sysent[sysnum].sy_callc == dtrace_systrace_syscall) {
518 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying));
519 }
520 lck_mtx_unlock(&dtrace_systrace_lock);
521 }
522
523 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
524 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
525 } else {
526 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
527 }
528 }
529
530 static dtrace_pattr_t systrace_attr = {
531 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
532 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
533 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
534 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
535 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
536 };
537
538 static dtrace_pops_t systrace_pops = {
539 .dtps_provide = systrace_provide,
540 .dtps_provide_module = NULL,
541 .dtps_enable = systrace_enable,
542 .dtps_disable = systrace_disable,
543 .dtps_suspend = NULL,
544 .dtps_resume = NULL,
545 .dtps_getargdesc = systrace_getargdesc,
546 .dtps_getargval = systrace_getargval,
547 .dtps_usermode = NULL,
548 .dtps_destroy = systrace_destroy
549 };
550
551 static int
552 systrace_attach(dev_info_t *devi)
553 {
554 systrace_probe = (void*)&dtrace_probe;
555 membar_enter();
556
557 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
558 DDI_PSEUDO, 0) == DDI_FAILURE ||
559 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
560 &systrace_pops, NULL, &systrace_id) != 0) {
561 systrace_probe = systrace_stub;
562 ddi_remove_minor_node(devi, NULL);
563 return DDI_FAILURE;
564 }
565
566 return DDI_SUCCESS;
567 }
568
569
570 /*
571 * APPLE NOTE: systrace_detach not implemented
572 */
573 #if !defined(__APPLE__)
574 static int
575 systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
576 {
577 switch (cmd) {
578 case DDI_DETACH:
579 break;
580 case DDI_SUSPEND:
581 return DDI_SUCCESS;
582 default:
583 return DDI_FAILURE;
584 }
585
586 if (dtrace_unregister(systrace_id) != 0) {
587 return DDI_FAILURE;
588 }
589
590 ddi_remove_minor_node(devi, NULL);
591 systrace_probe = systrace_stub;
592 return DDI_SUCCESS;
593 }
594 #endif /* __APPLE__ */
595
596
597 typedef kern_return_t (*mach_call_t)(void *);
598
599 /* APPLE NOTE: From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
600 typedef void mach_munge_t(void *);
601
602 typedef struct {
603 int mach_trap_arg_count;
604 kern_return_t (*mach_trap_function)(void *);
605 #if defined(__arm64__) || defined(__x86_64__)
606 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
607 #endif
608 int mach_trap_u32_words;
609 #if MACH_ASSERT
610 const char* mach_trap_name;
611 #endif /* MACH_ASSERT */
612 } mach_trap_t;
613
614 extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */
615 extern int mach_trap_count;
616
617 extern const char *mach_syscall_name_table[];
618
619 /* XXX From osfmk/i386/bsd_i386.c */
620 struct mach_call_args {
621 syscall_arg_t arg1;
622 syscall_arg_t arg2;
623 syscall_arg_t arg3;
624 syscall_arg_t arg4;
625 syscall_arg_t arg5;
626 syscall_arg_t arg6;
627 syscall_arg_t arg7;
628 syscall_arg_t arg8;
629 syscall_arg_t arg9;
630 };
631
632 #undef NSYSCALL
633 #define NSYSCALL mach_trap_count
634
635 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
636 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
637 #endif
638
639 typedef struct machtrace_sysent {
640 dtrace_id_t stsy_entry;
641 dtrace_id_t stsy_return;
642 kern_return_t (*stsy_underlying)(void *);
643 int32_t stsy_return_type;
644 } machtrace_sysent_t;
645
646 static machtrace_sysent_t *machtrace_sysent = NULL;
647
648 void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
649 uint64_t, uint64_t, uint64_t);
650
651 static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int);
652
653 static dtrace_provider_id_t machtrace_id;
654
655 static kern_return_t
656 dtrace_machtrace_syscall(struct mach_call_args *args)
657 {
658 int code; /* The mach call number */
659
660 machtrace_sysent_t *sy;
661 dtrace_id_t id;
662 kern_return_t rval;
663 #if 0 /* XXX */
664 proc_t *p;
665 #endif
666 syscall_arg_t *ip = (syscall_arg_t *)args;
667 mach_call_t mach_call;
668
669 #if defined (__x86_64__)
670 {
671 pal_register_cache_state(current_thread(), VALID);
672 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
673
674 if (is_saved_state64(tagged_regs)) {
675 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
676 } else {
677 code = -saved_state32(tagged_regs)->eax;
678 }
679 }
680 #elif defined(__arm__)
681 {
682 /* r12 has the machcall number, but it is -ve */
683 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
684 code = (int)arm_regs->r[12];
685 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
686 code = -code;
687 }
688 #elif defined(__arm64__)
689 {
690 /* From arm/thread_status.h:get_saved_state_svc_number */
691 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
692 if (is_saved_state32(arm_regs)) {
693 code = (int)saved_state32(arm_regs)->r[12];
694 } else {
695 code = (int)saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
696 }
697
698 /* From bsd/arm64.c:mach_syscall */
699 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
700 code = -code;
701 }
702 #else
703 #error Unknown Architecture
704 #endif
705
706 sy = &machtrace_sysent[code];
707
708 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
709 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
710
711 if (uthread) {
712 uthread->t_dtrace_syscall_args = (void *)ip;
713 }
714
715 (*machtrace_probe)(id, *ip, *(ip + 1), *(ip + 2), *(ip + 3), *(ip + 4));
716
717 if (uthread) {
718 uthread->t_dtrace_syscall_args = (void *)0;
719 }
720 }
721
722 #if 0 /* XXX */
723 /*
724 * APPLE NOTE: Not implemented.
725 * We want to explicitly allow DTrace consumers to stop a process
726 * before it actually executes the meat of the syscall.
727 */
728 p = ttoproc(curthread);
729 mutex_enter(&p->p_lock);
730 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
731 curthread->t_dtrace_stop = 0;
732 stop(PR_REQUESTED, 0);
733 }
734 mutex_exit(&p->p_lock);
735 #endif
736
737 mach_call = (mach_call_t)(*sy->stsy_underlying);
738 rval = mach_call(args);
739
740 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
741 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
742 }
743
744 return rval;
745 }
746
747 static void
748 machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed)
749 {
750 machtrace_sysent_t *msysent = *interposed;
751 int i;
752
753 if (msysent == NULL) {
754 *interposed = msysent = kmem_zalloc(sizeof(machtrace_sysent_t) *
755 NSYSCALL, KM_SLEEP);
756 }
757
758 for (i = 0; i < NSYSCALL; i++) {
759 const mach_trap_t *a = &actual[i];
760 machtrace_sysent_t *s = &msysent[i];
761
762 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) {
763 continue;
764 }
765
766 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall)) {
767 continue;
768 }
769
770 s->stsy_underlying = a->mach_trap_function;
771 }
772 }
773
774 /*ARGSUSED*/
775 static void
776 machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
777 {
778 #pragma unused(arg) /* __APPLE__ */
779
780 int i;
781
782 if (desc != NULL) {
783 return;
784 }
785
786 machtrace_init(mach_trap_table, &machtrace_sysent);
787
788 for (i = 0; i < NSYSCALL; i++) {
789 if (machtrace_sysent[i].stsy_underlying == NULL) {
790 continue;
791 }
792
793 if (dtrace_probe_lookup(machtrace_id, NULL,
794 mach_syscall_name_table[i], "entry") != 0) {
795 continue;
796 }
797
798 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
799 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
800 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
801 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
802 "return", MACHTRACE_ARTIFICIAL_FRAMES,
803 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
804
805 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
806 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
807 }
808 }
809
810 /*ARGSUSED*/
811 static void
812 machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
813 {
814 #pragma unused(arg,id) /* __APPLE__ */
815 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
816
817 #pragma unused(sysnum) /* __APPLE__ */
818
819 /*
820 * There's nothing to do here but assert that we have actually been
821 * disabled.
822 */
823 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
824 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
825 } else {
826 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
827 }
828 }
829
830 /*ARGSUSED*/
831 static int
832 machtrace_enable(void *arg, dtrace_id_t id, void *parg)
833 {
834 #pragma unused(arg) /* __APPLE__ */
835
836 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
837 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
838 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
839
840 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
841 machtrace_sysent[sysnum].stsy_entry = id;
842 } else {
843 machtrace_sysent[sysnum].stsy_return = id;
844 }
845
846 if (enabled) {
847 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall);
848 return 0;
849 }
850
851 lck_mtx_lock(&dtrace_systrace_lock);
852
853 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
854 vm_offset_t dss = (vm_offset_t)&dtrace_machtrace_syscall;
855 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
856 }
857
858 lck_mtx_unlock(&dtrace_systrace_lock);
859
860 return 0;
861 }
862
863 /*ARGSUSED*/
864 static void
865 machtrace_disable(void *arg, dtrace_id_t id, void *parg)
866 {
867 #pragma unused(arg,id) /* __APPLE__ */
868
869 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
870 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
871 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
872
873 if (disable) {
874 lck_mtx_lock(&dtrace_systrace_lock);
875
876 if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
877 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
878 }
879 lck_mtx_unlock(&dtrace_systrace_lock);
880 }
881
882 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
883 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
884 } else {
885 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
886 }
887 }
888
889 static dtrace_pattr_t machtrace_attr = {
890 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
891 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
892 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
893 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
894 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
895 };
896
897 static dtrace_pops_t machtrace_pops = {
898 .dtps_provide = machtrace_provide,
899 .dtps_provide_module = NULL,
900 .dtps_enable = machtrace_enable,
901 .dtps_disable = machtrace_disable,
902 .dtps_suspend = NULL,
903 .dtps_resume = NULL,
904 .dtps_getargdesc = NULL,
905 .dtps_getargval = machtrace_getarg,
906 .dtps_usermode = NULL,
907 .dtps_destroy = machtrace_destroy
908 };
909
910 static int
911 machtrace_attach(dev_info_t *devi)
912 {
913 machtrace_probe = dtrace_probe;
914 membar_enter();
915
916 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
917 DDI_PSEUDO, 0) == DDI_FAILURE ||
918 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
919 &machtrace_pops, NULL, &machtrace_id) != 0) {
920 machtrace_probe = (void*)&systrace_stub;
921 ddi_remove_minor_node(devi, NULL);
922 return DDI_FAILURE;
923 }
924
925 return DDI_SUCCESS;
926 }
927
928 d_open_t _systrace_open;
929
930 int
931 _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
932 {
933 #pragma unused(dev,flags,devtype,p)
934 return 0;
935 }
936
937 #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
938
939 /*
940 * A struct describing which functions will get invoked for certain
941 * actions.
942 */
943 static struct cdevsw systrace_cdevsw =
944 {
945 _systrace_open, /* open */
946 eno_opcl, /* close */
947 eno_rdwrt, /* read */
948 eno_rdwrt, /* write */
949 eno_ioctl, /* ioctl */
950 (stop_fcn_t *)nulldev, /* stop */
951 (reset_fcn_t *)nulldev, /* reset */
952 NULL, /* tty's */
953 eno_select, /* select */
954 eno_mmap, /* mmap */
955 eno_strat, /* strategy */
956 eno_getc, /* getc */
957 eno_putc, /* putc */
958 0 /* type */
959 };
960
961 void systrace_init( void );
962
963 void
964 systrace_init( void )
965 {
966 if (dtrace_sdt_probes_restricted()) {
967 return;
968 }
969
970 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
971
972 if (majdevno < 0) {
973 printf("systrace_init: failed to allocate a major number!\n");
974 return;
975 }
976
977 systrace_attach((dev_info_t*)(uintptr_t)majdevno);
978 machtrace_attach((dev_info_t*)(uintptr_t)majdevno);
979 }
980 #undef SYSTRACE_MAJOR
981
982 static uint64_t
983 systrace_getargval(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
984 {
985 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
986 uint64_t val = 0;
987 uint64_t *uargs = NULL;
988
989 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
990
991 if (uthread) {
992 uargs = uthread->t_dtrace_syscall_args;
993 }
994 if (!uargs) {
995 return 0;
996 }
997 if (argno < 0 || argno >= SYSTRACE_NARGS) {
998 return 0;
999 }
1000
1001 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1002 val = uargs[argno];
1003 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1004 return val;
1005 }
1006
1007 static void
1008 systrace_getargdesc(void *arg, dtrace_id_t id, void *parg,
1009 dtrace_argdesc_t *desc)
1010 {
1011 #pragma unused(arg, id)
1012 int sysnum = SYSTRACE_SYSNUM(parg);
1013 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1014 uint64_t *uargs = NULL;
1015
1016 if (!uthread) {
1017 desc->dtargd_ndx = DTRACE_ARGNONE;
1018 return;
1019 }
1020
1021 uargs = uthread->t_dtrace_syscall_args;
1022
1023 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
1024 systrace_entry_setargdesc(sysnum, desc->dtargd_ndx,
1025 desc->dtargd_native, sizeof(desc->dtargd_native));
1026 } else {
1027 systrace_return_setargdesc(sysnum, desc->dtargd_ndx,
1028 desc->dtargd_native, sizeof(desc->dtargd_native));
1029 }
1030
1031 if (desc->dtargd_native[0] == '\0') {
1032 desc->dtargd_ndx = DTRACE_ARGNONE;
1033 }
1034 }
1035
1036 static uint64_t
1037 machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1038 {
1039 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1040 uint64_t val = 0;
1041 syscall_arg_t *stack = (syscall_arg_t *)NULL;
1042
1043 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1044
1045 if (uthread) {
1046 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
1047 }
1048
1049 if (!stack) {
1050 return 0;
1051 }
1052
1053 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1054 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1055 val = (uint64_t)*(stack + argno);
1056 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1057 return val;
1058 }