]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/dtrace/systrace.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <ptrauth.h>
27
28 #include <kern/thread.h>
29 #include <mach/thread_status.h>
30
31 /* XXX All of these should really be derived from syscall_sw.h */
32 #if defined (__x86_64__)
33 #define SYSCALL_CLASS_SHIFT 24
34 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
35 #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
36 #define I386_SYSCALL_NUMBER_MASK (0xFFFF)
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/proc.h>
42 #include <sys/errno.h>
43 #include <sys/ioctl.h>
44 #include <sys/conf.h>
45 #include <sys/fcntl.h>
46 #include <sys/syscall.h>
47 #include <miscfs/devfs/devfs.h>
48
49 #include <sys/dtrace.h>
50 #include <sys/dtrace_impl.h>
51 #include <sys/systrace_args.h>
52 #include "systrace.h"
53 #include <sys/stat.h>
54 #include <sys/systm.h>
55 #include <sys/conf.h>
56 #include <sys/user.h>
57
58 #include <machine/pal_routines.h>
59
60 #if defined (__x86_64__)
61 #define SYSTRACE_ARTIFICIAL_FRAMES 2
62 #define MACHTRACE_ARTIFICIAL_FRAMES 3
63 #elif defined(__arm__) || defined(__arm64__)
64 #define SYSTRACE_ARTIFICIAL_FRAMES 2
65 #define MACHTRACE_ARTIFICIAL_FRAMES 3
66 #else
67 #error Unknown Architecture
68 #endif
69
70 #define SYSTRACE_NARGS (int)(sizeof(((uthread_t)NULL)->uu_arg) / sizeof(((uthread_t)NULL)->uu_arg[0]))
71
72 #include <sys/sysent.h>
73 #define sy_callc sy_call /* Map Solaris slot name to Darwin's */
74 #define NSYSCALL nsysent /* and is less than 500 or so */
75
76 extern const char *syscallnames[];
77
78 #include <sys/dtrace_glue.h>
79 #define casptr dtrace_casptr
80 #define membar_enter dtrace_membar_producer
81
82 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
83 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
84
85 extern lck_attr_t* dtrace_lck_attr;
86 extern lck_grp_t* dtrace_lck_grp;
87 static lck_mtx_t dtrace_systrace_lock; /* probe state lock */
88
89 systrace_sysent_t *systrace_sysent = NULL;
90 void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
91
92 static uint64_t systrace_getargval(void *, dtrace_id_t, void *, int, int);
93 static void systrace_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
94
95 void
96 systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
97 uint64_t arg2, uint64_t arg3, uint64_t arg4)
98 {
99 #pragma unused(id,arg0,arg1,arg2,arg3,arg4)
100 }
101
102 int32_t
103 dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
104 {
105 unsigned short code; /* The system call number */
106
107 systrace_sysent_t *sy;
108 dtrace_id_t id;
109 int32_t rval;
110 syscall_arg_t *ip = (syscall_arg_t *)uap;
111 uint64_t uargs[SYSTRACE_NARGS] = {0};
112
113 #if defined (__x86_64__)
114 {
115 pal_register_cache_state(current_thread(), VALID);
116 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
117
118 if (is_saved_state64(tagged_regs)) {
119 x86_saved_state64_t *regs = saved_state64(tagged_regs);
120 code = regs->rax & SYSCALL_NUMBER_MASK;
121 /*
122 * Check for indirect system call... system call number
123 * passed as 'arg0'
124 */
125 if (code == 0) {
126 code = regs->rdi;
127 }
128 } else {
129 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
130
131 if (code == 0) {
132 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof(int));
133 code = fuword(params);
134 }
135 }
136 }
137 #elif defined(__arm__)
138 {
139 /*
140 * On arm, syscall numbers depend on a flavor (indirect or not)
141 * and can be in either r0 or r12 (always u32)
142 */
143
144 /* See bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
145 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
146
147 /* Check for indirect system call */
148 if (arm_regs->r[12] != 0) {
149 code = arm_regs->r[12];
150 } else {
151 code = arm_regs->r[0];
152 }
153 }
154 #elif defined(__arm64__)
155 {
156 /*
157 * On arm64, syscall numbers depend on a flavor (indirect or not)
158 * ... and for u32 can be in either r0 or r12
159 * ... and for u64 can be in either x0 or x16
160 */
161
162 /* see bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
163 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
164
165 if (is_saved_state32(arm_regs)) {
166 /* Check for indirect system call */
167 if (saved_state32(arm_regs)->r[12] != 0) {
168 code = saved_state32(arm_regs)->r[12];
169 } else {
170 code = saved_state32(arm_regs)->r[0];
171 }
172 } else {
173 /* Check for indirect system call */
174 if (saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM] != 0) {
175 code = saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
176 } else {
177 code = saved_state64(arm_regs)->x[0];
178 }
179 }
180 }
181 #else
182 #error Unknown Architecture
183 #endif
184
185 // Bounds "check" the value of code a la unix_syscall
186 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
187
188 systrace_args(code, ip, uargs);
189
190 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
191 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
192 if (uthread) {
193 uthread->t_dtrace_syscall_args = uargs;
194 }
195
196 static_assert(SYSTRACE_NARGS >= 5, "not enough system call arguments");
197 (*systrace_probe)(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]);
198
199 if (uthread) {
200 uthread->t_dtrace_syscall_args = NULL;
201 }
202 }
203
204
205
206 #if 0 /* XXX */
207 /*
208 * APPLE NOTE: Not implemented.
209 * We want to explicitly allow DTrace consumers to stop a process
210 * before it actually executes the meat of the syscall.
211 */
212 p = ttoproc(curthread);
213 mutex_enter(&p->p_lock);
214 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
215 curthread->t_dtrace_stop = 0;
216 stop(PR_REQUESTED, 0);
217 }
218 mutex_exit(&p->p_lock);
219 #endif
220
221 rval = (*sy->stsy_underlying)(pp, uap, rv);
222
223 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
224 uint64_t munged_rv0, munged_rv1;
225 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
226
227 if (uthread) {
228 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
229 }
230 /*
231 * "Decode" rv for use in the call to dtrace_probe()
232 */
233 if (rval == ERESTART) {
234 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
235 munged_rv1 = -1LL;
236 } else if (rval != EJUSTRETURN) {
237 if (rval) {
238 munged_rv0 = -1LL; /* Mimic what libc will do. */
239 munged_rv1 = -1LL;
240 } else {
241 switch (sy->stsy_return_type) {
242 case _SYSCALL_RET_INT_T:
243 munged_rv0 = rv[0];
244 munged_rv1 = rv[1];
245 break;
246 case _SYSCALL_RET_UINT_T:
247 munged_rv0 = ((u_int)rv[0]);
248 munged_rv1 = ((u_int)rv[1]);
249 break;
250 case _SYSCALL_RET_OFF_T:
251 case _SYSCALL_RET_UINT64_T:
252 munged_rv0 = *(u_int64_t *)rv;
253 munged_rv1 = 0LL;
254 break;
255 case _SYSCALL_RET_ADDR_T:
256 case _SYSCALL_RET_SIZE_T:
257 case _SYSCALL_RET_SSIZE_T:
258 munged_rv0 = *(user_addr_t *)rv;
259 munged_rv1 = 0LL;
260 break;
261 case _SYSCALL_RET_NONE:
262 munged_rv0 = 0LL;
263 munged_rv1 = 0LL;
264 break;
265 default:
266 munged_rv0 = 0LL;
267 munged_rv1 = 0LL;
268 break;
269 }
270 }
271 } else {
272 munged_rv0 = 0LL;
273 munged_rv1 = 0LL;
274 }
275
276 /*
277 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
278 *
279 * "This is a bit of an historical artifact. At first, the syscall provider just
280 * had its return value in arg0, and the fbt and pid providers had their return
281 * values in arg1 (so that we could use arg0 for the offset of the return site).
282 *
283 * We inevitably started writing scripts where we wanted to see the return
284 * values from probes in all three providers, and we made this script easier
285 * to write by replicating the syscall return values in arg1 to match fbt and
286 * pid. We debated briefly about removing the return value from arg0, but
287 * decided that it would be less confusing to have the same data in two places
288 * than to have some non-helpful, non-intuitive value in arg0.
289 *
290 * This change was made 4/23/2003 according to the DTrace project's putback log."
291 */
292 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
293 }
294
295 return rval;
296 }
297
298 void
299 dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
300 {
301 systrace_sysent_t *sy;
302 dtrace_id_t id;
303
304 // Bounds "check" the value of code a la unix_syscall_return
305 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
306
307 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
308 uint64_t munged_rv0, munged_rv1;
309 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
310
311 if (uthread) {
312 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
313 }
314 /*
315 * "Decode" rv for use in the call to dtrace_probe()
316 */
317 if (rval == ERESTART) {
318 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
319 munged_rv1 = -1LL;
320 } else if (rval != EJUSTRETURN) {
321 if (rval) {
322 munged_rv0 = -1LL; /* Mimic what libc will do. */
323 munged_rv1 = -1LL;
324 } else {
325 switch (sy->stsy_return_type) {
326 case _SYSCALL_RET_INT_T:
327 munged_rv0 = rv[0];
328 munged_rv1 = rv[1];
329 break;
330 case _SYSCALL_RET_UINT_T:
331 munged_rv0 = ((u_int)rv[0]);
332 munged_rv1 = ((u_int)rv[1]);
333 break;
334 case _SYSCALL_RET_OFF_T:
335 case _SYSCALL_RET_UINT64_T:
336 munged_rv0 = *(u_int64_t *)rv;
337 munged_rv1 = 0LL;
338 break;
339 case _SYSCALL_RET_ADDR_T:
340 case _SYSCALL_RET_SIZE_T:
341 case _SYSCALL_RET_SSIZE_T:
342 munged_rv0 = *(user_addr_t *)rv;
343 munged_rv1 = 0LL;
344 break;
345 case _SYSCALL_RET_NONE:
346 munged_rv0 = 0LL;
347 munged_rv1 = 0LL;
348 break;
349 default:
350 munged_rv0 = 0LL;
351 munged_rv1 = 0LL;
352 break;
353 }
354 }
355 } else {
356 munged_rv0 = 0LL;
357 munged_rv1 = 0LL;
358 }
359
360 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
361 }
362 }
363
364 #define SYSTRACE_SHIFT 16
365 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
366 #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
367 #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
368 #define SYSTRACE_RETURN(id) (id)
369
370 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
371 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
372 #endif
373
374 static dtrace_provider_id_t systrace_id;
375
376 /*
377 * APPLE NOTE: Avoid name clash with Darwin automagic conf symbol.
378 * See balanced undef below.
379 */
380 #define systrace_init _systrace_init
381
382 static void
383 systrace_init(const struct sysent *actual, systrace_sysent_t **interposed)
384 {
385 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
386 * from bsd/sys/sysent.h */
387 unsigned int i;
388
389 if (ssysent == NULL) {
390 *interposed = ssysent = kmem_zalloc(sizeof(systrace_sysent_t) *
391 NSYSCALL, KM_SLEEP);
392 }
393
394 for (i = 0; i < NSYSCALL; i++) {
395 const struct sysent *a = &actual[i];
396 systrace_sysent_t *s = &ssysent[i];
397
398 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) {
399 continue;
400 }
401
402 if (a->sy_callc == dtrace_systrace_syscall) {
403 continue;
404 }
405
406 s->stsy_underlying = a->sy_callc;
407 s->stsy_return_type = a->sy_return_type;
408 }
409 lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr);
410 }
411
412
413 /*ARGSUSED*/
414 static void
415 systrace_provide(void *arg, const dtrace_probedesc_t *desc)
416 {
417 #pragma unused(arg) /* __APPLE__ */
418 unsigned int i;
419
420 if (desc != NULL) {
421 return;
422 }
423
424 systrace_init(sysent, &systrace_sysent);
425
426 for (i = 0; i < NSYSCALL; i++) {
427 if (systrace_sysent[i].stsy_underlying == NULL) {
428 continue;
429 }
430
431 if (dtrace_probe_lookup(systrace_id, NULL,
432 syscallnames[i], "entry") != 0) {
433 continue;
434 }
435
436 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
437 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
438 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
439 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
440 "return", SYSTRACE_ARTIFICIAL_FRAMES,
441 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
442
443 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
444 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
445 }
446 }
447 #undef systrace_init
448
449 /*ARGSUSED*/
450 static void
451 systrace_destroy(void *arg, dtrace_id_t id, void *parg)
452 {
453 #pragma unused(arg,id) /* __APPLE__ */
454
455 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
456
457 #pragma unused(sysnum) /* __APPLE__ */
458 /*
459 * There's nothing to do here but assert that we have actually been
460 * disabled.
461 */
462 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
463 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
464 } else {
465 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
466 }
467 }
468
469 /*ARGSUSED*/
470 static int
471 systrace_enable(void *arg, dtrace_id_t id, void *parg)
472 {
473 #pragma unused(arg) /* __APPLE__ */
474
475 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
476 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
477 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
478
479 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
480 systrace_sysent[sysnum].stsy_entry = id;
481 } else {
482 systrace_sysent[sysnum].stsy_return = id;
483 }
484
485 if (enabled) {
486 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
487 return 0;
488 }
489
490 lck_mtx_lock(&dtrace_systrace_lock);
491 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
492 vm_offset_t dss = ptrauth_nop_cast(vm_offset_t, &dtrace_systrace_syscall);
493 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
494 }
495 lck_mtx_unlock(&dtrace_systrace_lock);
496 return 0;
497 }
498
499 /*ARGSUSED*/
500 static void
501 systrace_disable(void *arg, dtrace_id_t id, void *parg)
502 {
503 #pragma unused(arg,id) /* __APPLE__ */
504
505 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
506 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
507 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
508
509 if (disable) {
510 lck_mtx_lock(&dtrace_systrace_lock);
511 if (sysent[sysnum].sy_callc == dtrace_systrace_syscall) {
512 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying));
513 }
514 lck_mtx_unlock(&dtrace_systrace_lock);
515 }
516
517 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
518 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
519 } else {
520 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
521 }
522 }
523
524 static dtrace_pattr_t systrace_attr = {
525 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
526 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
527 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
528 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
529 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
530 };
531
532 static dtrace_pops_t systrace_pops = {
533 .dtps_provide = systrace_provide,
534 .dtps_provide_module = NULL,
535 .dtps_enable = systrace_enable,
536 .dtps_disable = systrace_disable,
537 .dtps_suspend = NULL,
538 .dtps_resume = NULL,
539 .dtps_getargdesc = systrace_getargdesc,
540 .dtps_getargval = systrace_getargval,
541 .dtps_usermode = NULL,
542 .dtps_destroy = systrace_destroy
543 };
544
545 static int
546 systrace_attach(dev_info_t *devi)
547 {
548 systrace_probe = (void*)&dtrace_probe;
549 membar_enter();
550
551 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
552 DDI_PSEUDO, 0) == DDI_FAILURE ||
553 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
554 &systrace_pops, NULL, &systrace_id) != 0) {
555 systrace_probe = systrace_stub;
556 ddi_remove_minor_node(devi, NULL);
557 return DDI_FAILURE;
558 }
559
560 return DDI_SUCCESS;
561 }
562
563
564 /*
565 * APPLE NOTE: systrace_detach not implemented
566 */
567 #if !defined(__APPLE__)
568 static int
569 systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
570 {
571 switch (cmd) {
572 case DDI_DETACH:
573 break;
574 case DDI_SUSPEND:
575 return DDI_SUCCESS;
576 default:
577 return DDI_FAILURE;
578 }
579
580 if (dtrace_unregister(systrace_id) != 0) {
581 return DDI_FAILURE;
582 }
583
584 ddi_remove_minor_node(devi, NULL);
585 systrace_probe = systrace_stub;
586 return DDI_SUCCESS;
587 }
588 #endif /* __APPLE__ */
589
590
591 typedef kern_return_t (*mach_call_t)(void *);
592
593 /* APPLE NOTE: From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
594 typedef void mach_munge_t(void *);
595
596 typedef struct {
597 int mach_trap_arg_count;
598 kern_return_t (*mach_trap_function)(void *);
599 #if defined(__arm64__) || defined(__x86_64__)
600 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
601 #endif
602 int mach_trap_u32_words;
603 #if MACH_ASSERT
604 const char* mach_trap_name;
605 #endif /* MACH_ASSERT */
606 } mach_trap_t;
607
608 extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */
609 extern int mach_trap_count;
610
611 extern const char *mach_syscall_name_table[];
612
613 /* XXX From osfmk/i386/bsd_i386.c */
614 struct mach_call_args {
615 syscall_arg_t arg1;
616 syscall_arg_t arg2;
617 syscall_arg_t arg3;
618 syscall_arg_t arg4;
619 syscall_arg_t arg5;
620 syscall_arg_t arg6;
621 syscall_arg_t arg7;
622 syscall_arg_t arg8;
623 syscall_arg_t arg9;
624 };
625
626 #undef NSYSCALL
627 #define NSYSCALL mach_trap_count
628
629 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
630 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
631 #endif
632
633 typedef struct machtrace_sysent {
634 dtrace_id_t stsy_entry;
635 dtrace_id_t stsy_return;
636 kern_return_t (*stsy_underlying)(void *);
637 int32_t stsy_return_type;
638 } machtrace_sysent_t;
639
640 static machtrace_sysent_t *machtrace_sysent = NULL;
641
642 void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
643 uint64_t, uint64_t, uint64_t);
644
645 static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int);
646
647 static dtrace_provider_id_t machtrace_id;
648
649 static kern_return_t
650 dtrace_machtrace_syscall(struct mach_call_args *args)
651 {
652 int code; /* The mach call number */
653
654 machtrace_sysent_t *sy;
655 dtrace_id_t id;
656 kern_return_t rval;
657 #if 0 /* XXX */
658 proc_t *p;
659 #endif
660 syscall_arg_t *ip = (syscall_arg_t *)args;
661 mach_call_t mach_call;
662
663 #if defined (__x86_64__)
664 {
665 pal_register_cache_state(current_thread(), VALID);
666 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
667
668 if (is_saved_state64(tagged_regs)) {
669 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
670 } else {
671 code = -saved_state32(tagged_regs)->eax;
672 }
673 }
674 #elif defined(__arm__)
675 {
676 /* r12 has the machcall number, but it is -ve */
677 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
678 code = (int)arm_regs->r[12];
679 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
680 code = -code;
681 }
682 #elif defined(__arm64__)
683 {
684 /* From arm/thread_status.h:get_saved_state_svc_number */
685 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
686 if (is_saved_state32(arm_regs)) {
687 code = (int)saved_state32(arm_regs)->r[12];
688 } else {
689 code = (int)saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
690 }
691
692 /* From bsd/arm64.c:mach_syscall */
693 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
694 code = -code;
695 }
696 #else
697 #error Unknown Architecture
698 #endif
699
700 sy = &machtrace_sysent[code];
701
702 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
703 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
704
705 if (uthread) {
706 uthread->t_dtrace_syscall_args = (void *)ip;
707 }
708
709 (*machtrace_probe)(id, *ip, *(ip + 1), *(ip + 2), *(ip + 3), *(ip + 4));
710
711 if (uthread) {
712 uthread->t_dtrace_syscall_args = (void *)0;
713 }
714 }
715
716 #if 0 /* XXX */
717 /*
718 * APPLE NOTE: Not implemented.
719 * We want to explicitly allow DTrace consumers to stop a process
720 * before it actually executes the meat of the syscall.
721 */
722 p = ttoproc(curthread);
723 mutex_enter(&p->p_lock);
724 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
725 curthread->t_dtrace_stop = 0;
726 stop(PR_REQUESTED, 0);
727 }
728 mutex_exit(&p->p_lock);
729 #endif
730
731 mach_call = (mach_call_t)(*sy->stsy_underlying);
732 rval = mach_call(args);
733
734 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
735 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
736 }
737
738 return rval;
739 }
740
741 static void
742 machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed)
743 {
744 machtrace_sysent_t *msysent = *interposed;
745 int i;
746
747 if (msysent == NULL) {
748 *interposed = msysent = kmem_zalloc(sizeof(machtrace_sysent_t) *
749 NSYSCALL, KM_SLEEP);
750 }
751
752 for (i = 0; i < NSYSCALL; i++) {
753 const mach_trap_t *a = &actual[i];
754 machtrace_sysent_t *s = &msysent[i];
755
756 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) {
757 continue;
758 }
759
760 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall)) {
761 continue;
762 }
763
764 s->stsy_underlying = a->mach_trap_function;
765 }
766 }
767
768 /*ARGSUSED*/
769 static void
770 machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
771 {
772 #pragma unused(arg) /* __APPLE__ */
773
774 int i;
775
776 if (desc != NULL) {
777 return;
778 }
779
780 machtrace_init(mach_trap_table, &machtrace_sysent);
781
782 for (i = 0; i < NSYSCALL; i++) {
783 if (machtrace_sysent[i].stsy_underlying == NULL) {
784 continue;
785 }
786
787 if (dtrace_probe_lookup(machtrace_id, NULL,
788 mach_syscall_name_table[i], "entry") != 0) {
789 continue;
790 }
791
792 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
793 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
794 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
795 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
796 "return", MACHTRACE_ARTIFICIAL_FRAMES,
797 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
798
799 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
800 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
801 }
802 }
803
804 /*ARGSUSED*/
805 static void
806 machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
807 {
808 #pragma unused(arg,id) /* __APPLE__ */
809 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
810
811 #pragma unused(sysnum) /* __APPLE__ */
812
813 /*
814 * There's nothing to do here but assert that we have actually been
815 * disabled.
816 */
817 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
818 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
819 } else {
820 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
821 }
822 }
823
824 /*ARGSUSED*/
825 static int
826 machtrace_enable(void *arg, dtrace_id_t id, void *parg)
827 {
828 #pragma unused(arg) /* __APPLE__ */
829
830 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
831 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
832 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
833
834 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
835 machtrace_sysent[sysnum].stsy_entry = id;
836 } else {
837 machtrace_sysent[sysnum].stsy_return = id;
838 }
839
840 if (enabled) {
841 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall);
842 return 0;
843 }
844
845 lck_mtx_lock(&dtrace_systrace_lock);
846
847 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
848 vm_offset_t dss = ptrauth_nop_cast(vm_offset_t, &dtrace_machtrace_syscall);
849 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
850 }
851
852 lck_mtx_unlock(&dtrace_systrace_lock);
853
854 return 0;
855 }
856
857 /*ARGSUSED*/
858 static void
859 machtrace_disable(void *arg, dtrace_id_t id, void *parg)
860 {
861 #pragma unused(arg,id) /* __APPLE__ */
862
863 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
864 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
865 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
866
867 if (disable) {
868 lck_mtx_lock(&dtrace_systrace_lock);
869
870 if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
871 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
872 }
873 lck_mtx_unlock(&dtrace_systrace_lock);
874 }
875
876 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
877 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
878 } else {
879 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
880 }
881 }
882
883 static dtrace_pattr_t machtrace_attr = {
884 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
885 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
886 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
887 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
888 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
889 };
890
891 static dtrace_pops_t machtrace_pops = {
892 .dtps_provide = machtrace_provide,
893 .dtps_provide_module = NULL,
894 .dtps_enable = machtrace_enable,
895 .dtps_disable = machtrace_disable,
896 .dtps_suspend = NULL,
897 .dtps_resume = NULL,
898 .dtps_getargdesc = NULL,
899 .dtps_getargval = machtrace_getarg,
900 .dtps_usermode = NULL,
901 .dtps_destroy = machtrace_destroy
902 };
903
904 static int
905 machtrace_attach(dev_info_t *devi)
906 {
907 machtrace_probe = dtrace_probe;
908 membar_enter();
909
910 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
911 DDI_PSEUDO, 0) == DDI_FAILURE ||
912 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
913 &machtrace_pops, NULL, &machtrace_id) != 0) {
914 machtrace_probe = (void*)&systrace_stub;
915 ddi_remove_minor_node(devi, NULL);
916 return DDI_FAILURE;
917 }
918
919 return DDI_SUCCESS;
920 }
921
922 d_open_t _systrace_open;
923
924 int
925 _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
926 {
927 #pragma unused(dev,flags,devtype,p)
928 return 0;
929 }
930
931 #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
932
933 static struct cdevsw systrace_cdevsw =
934 {
935 .d_open = _systrace_open,
936 .d_close = eno_opcl,
937 .d_read = eno_rdwrt,
938 .d_write = eno_rdwrt,
939 .d_ioctl = eno_ioctl,
940 .d_stop = (stop_fcn_t *)nulldev,
941 .d_reset = (reset_fcn_t *)nulldev,
942 .d_select = eno_select,
943 .d_mmap = eno_mmap,
944 .d_strategy = eno_strat,
945 .d_reserved_1 = eno_getc,
946 .d_reserved_2 = eno_putc,
947 };
948
949 void systrace_init( void );
950
951 void
952 systrace_init( void )
953 {
954 if (dtrace_sdt_probes_restricted()) {
955 return;
956 }
957
958 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
959
960 if (majdevno < 0) {
961 printf("systrace_init: failed to allocate a major number!\n");
962 return;
963 }
964
965 systrace_attach((dev_info_t*)(uintptr_t)majdevno);
966 machtrace_attach((dev_info_t*)(uintptr_t)majdevno);
967 }
968 #undef SYSTRACE_MAJOR
969
970 static uint64_t
971 systrace_getargval(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
972 {
973 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
974 uint64_t val = 0;
975 uint64_t *uargs = NULL;
976
977 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
978
979 if (uthread) {
980 uargs = uthread->t_dtrace_syscall_args;
981 }
982 if (!uargs) {
983 return 0;
984 }
985 if (argno < 0 || argno >= SYSTRACE_NARGS) {
986 return 0;
987 }
988
989 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
990 val = uargs[argno];
991 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
992 return val;
993 }
994
995 static void
996 systrace_getargdesc(void *arg, dtrace_id_t id, void *parg,
997 dtrace_argdesc_t *desc)
998 {
999 #pragma unused(arg, id)
1000 int sysnum = SYSTRACE_SYSNUM(parg);
1001 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1002 uint64_t *uargs = NULL;
1003
1004 if (!uthread) {
1005 desc->dtargd_ndx = DTRACE_ARGNONE;
1006 return;
1007 }
1008
1009 uargs = uthread->t_dtrace_syscall_args;
1010
1011 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
1012 systrace_entry_setargdesc(sysnum, desc->dtargd_ndx,
1013 desc->dtargd_native, sizeof(desc->dtargd_native));
1014 } else {
1015 systrace_return_setargdesc(sysnum, desc->dtargd_ndx,
1016 desc->dtargd_native, sizeof(desc->dtargd_native));
1017 }
1018
1019 if (desc->dtargd_native[0] == '\0') {
1020 desc->dtargd_ndx = DTRACE_ARGNONE;
1021 }
1022 }
1023
1024 static uint64_t
1025 machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1026 {
1027 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1028 uint64_t val = 0;
1029 syscall_arg_t *stack = (syscall_arg_t *)NULL;
1030
1031 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1032
1033 if (uthread) {
1034 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
1035 }
1036
1037 if (!stack) {
1038 return 0;
1039 }
1040
1041 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1042 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1043 val = (uint64_t)*(stack + argno);
1044 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1045 return val;
1046 }