]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/dtrace/systrace.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <ptrauth.h>
27
28 #include <kern/thread.h>
29 #include <mach/thread_status.h>
30
31 /* XXX All of these should really be derived from syscall_sw.h */
32 #if defined (__x86_64__)
33 #define SYSCALL_CLASS_SHIFT 24
34 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
35 #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
36 #define I386_SYSCALL_NUMBER_MASK (0xFFFF)
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/proc.h>
42 #include <sys/errno.h>
43 #include <sys/ioctl.h>
44 #include <sys/conf.h>
45 #include <sys/fcntl.h>
46 #include <sys/syscall.h>
47 #include <miscfs/devfs/devfs.h>
48
49 #include <sys/dtrace.h>
50 #include <sys/dtrace_impl.h>
51 #include <sys/systrace_args.h>
52 #include "systrace.h"
53 #include <sys/stat.h>
54 #include <sys/systm.h>
55 #include <sys/conf.h>
56 #include <sys/user.h>
57
58 #include <machine/pal_routines.h>
59
60 #if defined (__x86_64__)
61 #define SYSTRACE_ARTIFICIAL_FRAMES 2
62 #define MACHTRACE_ARTIFICIAL_FRAMES 3
63 #elif defined(__arm__) || defined(__arm64__)
64 #define SYSTRACE_ARTIFICIAL_FRAMES 2
65 #define MACHTRACE_ARTIFICIAL_FRAMES 3
66 #else
67 #error Unknown Architecture
68 #endif
69
70 #define SYSTRACE_NARGS (int)(sizeof(((uthread_t)NULL)->uu_arg) / sizeof(((uthread_t)NULL)->uu_arg[0]))
71
72 #include <sys/sysent.h>
73 #define sy_callc sy_call /* Map Solaris slot name to Darwin's */
74 #define NSYSCALL nsysent /* and is less than 500 or so */
75
76 extern const char *syscallnames[];
77
78 #include <sys/dtrace_glue.h>
79 #define casptr dtrace_casptr
80 #define membar_enter dtrace_membar_producer
81
82 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
83 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
84
85 static LCK_MTX_DECLARE_ATTR(dtrace_systrace_lock,
86 &dtrace_lck_grp, &dtrace_lck_attr); /* probe state lock */
87
88 systrace_sysent_t *systrace_sysent = NULL;
89 void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
90
91 static uint64_t systrace_getargval(void *, dtrace_id_t, void *, int, int);
92 static void systrace_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
93
94 void
95 systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
96 uint64_t arg2, uint64_t arg3, uint64_t arg4)
97 {
98 #pragma unused(id,arg0,arg1,arg2,arg3,arg4)
99 }
100
101 int32_t
102 dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
103 {
104 unsigned short code; /* The system call number */
105
106 systrace_sysent_t *sy;
107 dtrace_id_t id;
108 int32_t rval;
109 syscall_arg_t *ip = (syscall_arg_t *)uap;
110 uint64_t uargs[SYSTRACE_NARGS] = {0};
111
112 #if defined (__x86_64__)
113 {
114 pal_register_cache_state(current_thread(), VALID);
115 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
116
117 if (is_saved_state64(tagged_regs)) {
118 x86_saved_state64_t *regs = saved_state64(tagged_regs);
119 code = regs->rax & SYSCALL_NUMBER_MASK;
120 /*
121 * Check for indirect system call... system call number
122 * passed as 'arg0'
123 */
124 if (code == 0) {
125 code = regs->rdi;
126 }
127 } else {
128 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
129
130 if (code == 0) {
131 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof(int));
132 code = fuword(params);
133 }
134 }
135 }
136 #elif defined(__arm__)
137 {
138 /*
139 * On arm, syscall numbers depend on a flavor (indirect or not)
140 * and can be in either r0 or r12 (always u32)
141 */
142
143 /* See bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
144 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
145
146 /* Check for indirect system call */
147 if (arm_regs->r[12] != 0) {
148 code = arm_regs->r[12];
149 } else {
150 code = arm_regs->r[0];
151 }
152 }
153 #elif defined(__arm64__)
154 {
155 /*
156 * On arm64, syscall numbers depend on a flavor (indirect or not)
157 * ... and for u32 can be in either r0 or r12
158 * ... and for u64 can be in either x0 or x16
159 */
160
161 /* see bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
162 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
163
164 if (is_saved_state32(arm_regs)) {
165 /* Check for indirect system call */
166 if (saved_state32(arm_regs)->r[12] != 0) {
167 code = saved_state32(arm_regs)->r[12];
168 } else {
169 code = saved_state32(arm_regs)->r[0];
170 }
171 } else {
172 /* Check for indirect system call */
173 if (saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM] != 0) {
174 code = saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
175 } else {
176 code = saved_state64(arm_regs)->x[0];
177 }
178 }
179 }
180 #else
181 #error Unknown Architecture
182 #endif
183
184 // Bounds "check" the value of code a la unix_syscall
185 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
186
187 systrace_args(code, ip, uargs);
188
189 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
190 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
191 if (uthread) {
192 uthread->t_dtrace_syscall_args = uargs;
193 }
194
195 static_assert(SYSTRACE_NARGS >= 5, "not enough system call arguments");
196 (*systrace_probe)(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]);
197
198 if (uthread) {
199 uthread->t_dtrace_syscall_args = NULL;
200 }
201 }
202
203
204
205 #if 0 /* XXX */
206 /*
207 * APPLE NOTE: Not implemented.
208 * We want to explicitly allow DTrace consumers to stop a process
209 * before it actually executes the meat of the syscall.
210 */
211 p = ttoproc(curthread);
212 mutex_enter(&p->p_lock);
213 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
214 curthread->t_dtrace_stop = 0;
215 stop(PR_REQUESTED, 0);
216 }
217 mutex_exit(&p->p_lock);
218 #endif
219
220 rval = (*sy->stsy_underlying)(pp, uap, rv);
221
222 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
223 uint64_t munged_rv0, munged_rv1;
224 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
225
226 if (uthread) {
227 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
228 }
229 /*
230 * "Decode" rv for use in the call to dtrace_probe()
231 */
232 if (rval == ERESTART) {
233 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
234 munged_rv1 = -1LL;
235 } else if (rval != EJUSTRETURN) {
236 if (rval) {
237 munged_rv0 = -1LL; /* Mimic what libc will do. */
238 munged_rv1 = -1LL;
239 } else {
240 switch (sy->stsy_return_type) {
241 case _SYSCALL_RET_INT_T:
242 munged_rv0 = rv[0];
243 munged_rv1 = rv[1];
244 break;
245 case _SYSCALL_RET_UINT_T:
246 munged_rv0 = ((u_int)rv[0]);
247 munged_rv1 = ((u_int)rv[1]);
248 break;
249 case _SYSCALL_RET_OFF_T:
250 case _SYSCALL_RET_UINT64_T:
251 munged_rv0 = *(u_int64_t *)rv;
252 munged_rv1 = 0LL;
253 break;
254 case _SYSCALL_RET_ADDR_T:
255 case _SYSCALL_RET_SIZE_T:
256 case _SYSCALL_RET_SSIZE_T:
257 munged_rv0 = *(user_addr_t *)rv;
258 munged_rv1 = 0LL;
259 break;
260 case _SYSCALL_RET_NONE:
261 munged_rv0 = 0LL;
262 munged_rv1 = 0LL;
263 break;
264 default:
265 munged_rv0 = 0LL;
266 munged_rv1 = 0LL;
267 break;
268 }
269 }
270 } else {
271 munged_rv0 = 0LL;
272 munged_rv1 = 0LL;
273 }
274
275 /*
276 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
277 *
278 * "This is a bit of an historical artifact. At first, the syscall provider just
279 * had its return value in arg0, and the fbt and pid providers had their return
280 * values in arg1 (so that we could use arg0 for the offset of the return site).
281 *
282 * We inevitably started writing scripts where we wanted to see the return
283 * values from probes in all three providers, and we made this script easier
284 * to write by replicating the syscall return values in arg1 to match fbt and
285 * pid. We debated briefly about removing the return value from arg0, but
286 * decided that it would be less confusing to have the same data in two places
287 * than to have some non-helpful, non-intuitive value in arg0.
288 *
289 * This change was made 4/23/2003 according to the DTrace project's putback log."
290 */
291 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
292 }
293
294 return rval;
295 }
296
297 void
298 dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
299 {
300 systrace_sysent_t *sy;
301 dtrace_id_t id;
302
303 // Bounds "check" the value of code a la unix_syscall_return
304 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
305
306 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
307 uint64_t munged_rv0, munged_rv1;
308 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
309
310 if (uthread) {
311 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
312 }
313 /*
314 * "Decode" rv for use in the call to dtrace_probe()
315 */
316 if (rval == ERESTART) {
317 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
318 munged_rv1 = -1LL;
319 } else if (rval != EJUSTRETURN) {
320 if (rval) {
321 munged_rv0 = -1LL; /* Mimic what libc will do. */
322 munged_rv1 = -1LL;
323 } else {
324 switch (sy->stsy_return_type) {
325 case _SYSCALL_RET_INT_T:
326 munged_rv0 = rv[0];
327 munged_rv1 = rv[1];
328 break;
329 case _SYSCALL_RET_UINT_T:
330 munged_rv0 = ((u_int)rv[0]);
331 munged_rv1 = ((u_int)rv[1]);
332 break;
333 case _SYSCALL_RET_OFF_T:
334 case _SYSCALL_RET_UINT64_T:
335 munged_rv0 = *(u_int64_t *)rv;
336 munged_rv1 = 0LL;
337 break;
338 case _SYSCALL_RET_ADDR_T:
339 case _SYSCALL_RET_SIZE_T:
340 case _SYSCALL_RET_SSIZE_T:
341 munged_rv0 = *(user_addr_t *)rv;
342 munged_rv1 = 0LL;
343 break;
344 case _SYSCALL_RET_NONE:
345 munged_rv0 = 0LL;
346 munged_rv1 = 0LL;
347 break;
348 default:
349 munged_rv0 = 0LL;
350 munged_rv1 = 0LL;
351 break;
352 }
353 }
354 } else {
355 munged_rv0 = 0LL;
356 munged_rv1 = 0LL;
357 }
358
359 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
360 }
361 }
362
363 #define SYSTRACE_SHIFT 16
364 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
365 #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
366 #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
367 #define SYSTRACE_RETURN(id) (id)
368
369 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
370 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
371 #endif
372
373 static dtrace_provider_id_t systrace_id;
374
375 /*
376 * APPLE NOTE: Avoid name clash with Darwin automagic conf symbol.
377 * See balanced undef below.
378 */
379 #define systrace_init _systrace_init
380
381 static void
382 systrace_init(const struct sysent *actual, systrace_sysent_t **interposed)
383 {
384 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
385 * from bsd/sys/sysent.h */
386 unsigned int i;
387
388 if (ssysent == NULL) {
389 *interposed = ssysent = kmem_zalloc(sizeof(systrace_sysent_t) *
390 NSYSCALL, KM_SLEEP);
391 }
392
393 for (i = 0; i < NSYSCALL; i++) {
394 const struct sysent *a = &actual[i];
395 systrace_sysent_t *s = &ssysent[i];
396
397 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) {
398 continue;
399 }
400
401 if (a->sy_callc == dtrace_systrace_syscall) {
402 continue;
403 }
404
405 s->stsy_underlying = a->sy_callc;
406 s->stsy_return_type = a->sy_return_type;
407 }
408 }
409
410
411 /*ARGSUSED*/
412 static void
413 systrace_provide(void *arg, const dtrace_probedesc_t *desc)
414 {
415 #pragma unused(arg) /* __APPLE__ */
416 unsigned int i;
417
418 if (desc != NULL) {
419 return;
420 }
421
422 systrace_init(sysent, &systrace_sysent);
423
424 for (i = 0; i < NSYSCALL; i++) {
425 if (systrace_sysent[i].stsy_underlying == NULL) {
426 continue;
427 }
428
429 if (dtrace_probe_lookup(systrace_id, NULL,
430 syscallnames[i], "entry") != 0) {
431 continue;
432 }
433
434 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
435 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
436 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
437 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
438 "return", SYSTRACE_ARTIFICIAL_FRAMES,
439 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
440
441 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
442 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
443 }
444 }
445 #undef systrace_init
446
447 /*ARGSUSED*/
448 static void
449 systrace_destroy(void *arg, dtrace_id_t id, void *parg)
450 {
451 #pragma unused(arg,id) /* __APPLE__ */
452
453 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
454
455 #pragma unused(sysnum) /* __APPLE__ */
456 /*
457 * There's nothing to do here but assert that we have actually been
458 * disabled.
459 */
460 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
461 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
462 } else {
463 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
464 }
465 }
466
467 /*ARGSUSED*/
468 static int
469 systrace_enable(void *arg, dtrace_id_t id, void *parg)
470 {
471 #pragma unused(arg) /* __APPLE__ */
472
473 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
474 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
475 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
476
477 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
478 systrace_sysent[sysnum].stsy_entry = id;
479 } else {
480 systrace_sysent[sysnum].stsy_return = id;
481 }
482
483 if (enabled) {
484 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
485 return 0;
486 }
487
488 lck_mtx_lock(&dtrace_systrace_lock);
489 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
490 /* It is not possible to write to sysent[] directly because it is const. */
491 vm_offset_t dss = ptrauth_nop_cast(vm_offset_t, &dtrace_systrace_syscall);
492 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
493 }
494 lck_mtx_unlock(&dtrace_systrace_lock);
495
496 return 0;
497 }
498
499 /*ARGSUSED*/
500 static void
501 systrace_disable(void *arg, dtrace_id_t id, void *parg)
502 {
503 #pragma unused(arg,id) /* __APPLE__ */
504
505 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
506 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
507 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
508
509 if (disable) {
510 /*
511 * Usage of volatile protects the if statement below from being optimized away.
512 *
513 * Compilers are clever and know that const array values can't change in time
514 * and the if below is always false. That is because it can't see that DTrace
515 * injects dtrace_systrace_syscall dynamically and violates constness of the
516 * array.
517 */
518 volatile const struct sysent *syscallent = &sysent[sysnum];
519
520 lck_mtx_lock(&dtrace_systrace_lock);
521 if (syscallent->sy_callc == dtrace_systrace_syscall) {
522 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying,
523 (vm_offset_t)&syscallent->sy_callc, sizeof(vm_offset_t));
524 }
525 lck_mtx_unlock(&dtrace_systrace_lock);
526 }
527
528 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
529 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
530 } else {
531 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
532 }
533 }
534
535 static dtrace_pattr_t systrace_attr = {
536 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
537 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
538 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
539 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
540 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
541 };
542
543 static dtrace_pops_t systrace_pops = {
544 .dtps_provide = systrace_provide,
545 .dtps_provide_module = NULL,
546 .dtps_enable = systrace_enable,
547 .dtps_disable = systrace_disable,
548 .dtps_suspend = NULL,
549 .dtps_resume = NULL,
550 .dtps_getargdesc = systrace_getargdesc,
551 .dtps_getargval = systrace_getargval,
552 .dtps_usermode = NULL,
553 .dtps_destroy = systrace_destroy
554 };
555
556 static int
557 systrace_attach(dev_info_t *devi)
558 {
559 systrace_probe = (void*)&dtrace_probe;
560 membar_enter();
561
562 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
563 DDI_PSEUDO, 0) == DDI_FAILURE ||
564 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
565 &systrace_pops, NULL, &systrace_id) != 0) {
566 systrace_probe = systrace_stub;
567 ddi_remove_minor_node(devi, NULL);
568 return DDI_FAILURE;
569 }
570
571 return DDI_SUCCESS;
572 }
573
574
575 /*
576 * APPLE NOTE: systrace_detach not implemented
577 */
578 #if !defined(__APPLE__)
579 static int
580 systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
581 {
582 switch (cmd) {
583 case DDI_DETACH:
584 break;
585 case DDI_SUSPEND:
586 return DDI_SUCCESS;
587 default:
588 return DDI_FAILURE;
589 }
590
591 if (dtrace_unregister(systrace_id) != 0) {
592 return DDI_FAILURE;
593 }
594
595 ddi_remove_minor_node(devi, NULL);
596 systrace_probe = systrace_stub;
597 return DDI_SUCCESS;
598 }
599 #endif /* __APPLE__ */
600
601
602 typedef kern_return_t (*mach_call_t)(void *);
603
604 /* APPLE NOTE: From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
605 typedef void mach_munge_t(void *);
606
607 typedef struct {
608 int mach_trap_arg_count;
609 kern_return_t (*mach_trap_function)(void *);
610 #if defined(__arm64__) || defined(__x86_64__)
611 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
612 #endif
613 int mach_trap_u32_words;
614 #if MACH_ASSERT
615 const char* mach_trap_name;
616 #endif /* MACH_ASSERT */
617 } mach_trap_t;
618
619 extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */
620 extern const int mach_trap_count;
621
622 extern const char *const mach_syscall_name_table[];
623
624 /* XXX From osfmk/i386/bsd_i386.c */
625 struct mach_call_args {
626 syscall_arg_t arg1;
627 syscall_arg_t arg2;
628 syscall_arg_t arg3;
629 syscall_arg_t arg4;
630 syscall_arg_t arg5;
631 syscall_arg_t arg6;
632 syscall_arg_t arg7;
633 syscall_arg_t arg8;
634 syscall_arg_t arg9;
635 };
636
637 #undef NSYSCALL
638 #define NSYSCALL mach_trap_count
639
640 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
641 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
642 #endif
643
644 typedef struct machtrace_sysent {
645 dtrace_id_t stsy_entry;
646 dtrace_id_t stsy_return;
647 kern_return_t (*stsy_underlying)(void *);
648 int32_t stsy_return_type;
649 } machtrace_sysent_t;
650
651 static machtrace_sysent_t *machtrace_sysent = NULL;
652
653 void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
654 uint64_t, uint64_t, uint64_t);
655
656 static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int);
657
658 static dtrace_provider_id_t machtrace_id;
659
660 static kern_return_t
661 dtrace_machtrace_syscall(struct mach_call_args *args)
662 {
663 int code; /* The mach call number */
664
665 machtrace_sysent_t *sy;
666 dtrace_id_t id;
667 kern_return_t rval;
668 #if 0 /* XXX */
669 proc_t *p;
670 #endif
671 syscall_arg_t *ip = (syscall_arg_t *)args;
672 mach_call_t mach_call;
673
674 #if defined (__x86_64__)
675 {
676 pal_register_cache_state(current_thread(), VALID);
677 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
678
679 if (is_saved_state64(tagged_regs)) {
680 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
681 } else {
682 code = -saved_state32(tagged_regs)->eax;
683 }
684 }
685 #elif defined(__arm__)
686 {
687 /* r12 has the machcall number, but it is -ve */
688 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
689 code = (int)arm_regs->r[12];
690 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
691 code = -code;
692 }
693 #elif defined(__arm64__)
694 {
695 /* From arm/thread_status.h:get_saved_state_svc_number */
696 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
697 if (is_saved_state32(arm_regs)) {
698 code = (int)saved_state32(arm_regs)->r[12];
699 } else {
700 code = (int)saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
701 }
702
703 /* From bsd/arm64.c:mach_syscall */
704 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
705 code = -code;
706 }
707 #else
708 #error Unknown Architecture
709 #endif
710
711 sy = &machtrace_sysent[code];
712
713 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
714 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
715
716 if (uthread) {
717 uthread->t_dtrace_syscall_args = (void *)ip;
718 }
719
720 (*machtrace_probe)(id, *ip, *(ip + 1), *(ip + 2), *(ip + 3), *(ip + 4));
721
722 if (uthread) {
723 uthread->t_dtrace_syscall_args = (void *)0;
724 }
725 }
726
727 #if 0 /* XXX */
728 /*
729 * APPLE NOTE: Not implemented.
730 * We want to explicitly allow DTrace consumers to stop a process
731 * before it actually executes the meat of the syscall.
732 */
733 p = ttoproc(curthread);
734 mutex_enter(&p->p_lock);
735 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
736 curthread->t_dtrace_stop = 0;
737 stop(PR_REQUESTED, 0);
738 }
739 mutex_exit(&p->p_lock);
740 #endif
741
742 mach_call = (mach_call_t)(*sy->stsy_underlying);
743 rval = mach_call(args);
744
745 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
746 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
747 }
748
749 return rval;
750 }
751
752 static void
753 machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed)
754 {
755 machtrace_sysent_t *msysent = *interposed;
756 int i;
757
758 if (msysent == NULL) {
759 *interposed = msysent = kmem_zalloc(sizeof(machtrace_sysent_t) *
760 NSYSCALL, KM_SLEEP);
761 }
762
763 for (i = 0; i < NSYSCALL; i++) {
764 const mach_trap_t *a = &actual[i];
765 machtrace_sysent_t *s = &msysent[i];
766
767 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) {
768 continue;
769 }
770
771 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall)) {
772 continue;
773 }
774
775 s->stsy_underlying = a->mach_trap_function;
776 }
777 }
778
779 /*ARGSUSED*/
780 static void
781 machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
782 {
783 #pragma unused(arg) /* __APPLE__ */
784
785 int i;
786
787 if (desc != NULL) {
788 return;
789 }
790
791 machtrace_init(mach_trap_table, &machtrace_sysent);
792
793 for (i = 0; i < NSYSCALL; i++) {
794 if (machtrace_sysent[i].stsy_underlying == NULL) {
795 continue;
796 }
797
798 if (dtrace_probe_lookup(machtrace_id, NULL,
799 mach_syscall_name_table[i], "entry") != 0) {
800 continue;
801 }
802
803 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
804 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
805 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
806 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
807 "return", MACHTRACE_ARTIFICIAL_FRAMES,
808 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
809
810 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
811 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
812 }
813 }
814
815 /*ARGSUSED*/
816 static void
817 machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
818 {
819 #pragma unused(arg,id) /* __APPLE__ */
820 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
821
822 #pragma unused(sysnum) /* __APPLE__ */
823
824 /*
825 * There's nothing to do here but assert that we have actually been
826 * disabled.
827 */
828 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
829 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
830 } else {
831 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
832 }
833 }
834
835 /*ARGSUSED*/
836 static int
837 machtrace_enable(void *arg, dtrace_id_t id, void *parg)
838 {
839 #pragma unused(arg) /* __APPLE__ */
840
841 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
842 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
843 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
844
845 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
846 machtrace_sysent[sysnum].stsy_entry = id;
847 } else {
848 machtrace_sysent[sysnum].stsy_return = id;
849 }
850
851 if (enabled) {
852 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall);
853 return 0;
854 }
855
856 lck_mtx_lock(&dtrace_systrace_lock);
857
858 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
859 /* It is not possible to write to mach_trap_table[] directly because it is const. */
860 vm_offset_t dss = ptrauth_nop_cast(vm_offset_t, &dtrace_machtrace_syscall);
861 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
862 }
863
864 lck_mtx_unlock(&dtrace_systrace_lock);
865
866 return 0;
867 }
868
869 /*ARGSUSED*/
870 static void
871 machtrace_disable(void *arg, dtrace_id_t id, void *parg)
872 {
873 #pragma unused(arg,id) /* __APPLE__ */
874
875 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
876 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
877 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
878
879 if (disable) {
880 /*
881 * Usage of volatile protects the if statement below from being optimized away.
882 *
883 * Compilers are clever and know that const array values can't change in time
884 * and the if below is always false. That is because it can't see that DTrace
885 * injects dtrace_machtrace_syscall dynamically and violates constness of the
886 * array.
887 */
888 volatile const mach_trap_t *machtrap = &mach_trap_table[sysnum];
889
890 lck_mtx_lock(&dtrace_systrace_lock);
891 if (machtrap->mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
892 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying,
893 (vm_offset_t)&machtrap->mach_trap_function, sizeof(vm_offset_t));
894 }
895 lck_mtx_unlock(&dtrace_systrace_lock);
896 }
897
898 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
899 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
900 } else {
901 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
902 }
903 }
904
905 static dtrace_pattr_t machtrace_attr = {
906 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
907 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
908 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
909 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
910 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
911 };
912
913 static dtrace_pops_t machtrace_pops = {
914 .dtps_provide = machtrace_provide,
915 .dtps_provide_module = NULL,
916 .dtps_enable = machtrace_enable,
917 .dtps_disable = machtrace_disable,
918 .dtps_suspend = NULL,
919 .dtps_resume = NULL,
920 .dtps_getargdesc = NULL,
921 .dtps_getargval = machtrace_getarg,
922 .dtps_usermode = NULL,
923 .dtps_destroy = machtrace_destroy
924 };
925
926 static int
927 machtrace_attach(dev_info_t *devi)
928 {
929 machtrace_probe = dtrace_probe;
930 membar_enter();
931
932 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
933 DDI_PSEUDO, 0) == DDI_FAILURE ||
934 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
935 &machtrace_pops, NULL, &machtrace_id) != 0) {
936 machtrace_probe = (void*)&systrace_stub;
937 ddi_remove_minor_node(devi, NULL);
938 return DDI_FAILURE;
939 }
940
941 return DDI_SUCCESS;
942 }
943
944 d_open_t _systrace_open;
945
946 int
947 _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
948 {
949 #pragma unused(dev,flags,devtype,p)
950 return 0;
951 }
952
953 #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
954
955 static struct cdevsw systrace_cdevsw =
956 {
957 .d_open = _systrace_open,
958 .d_close = eno_opcl,
959 .d_read = eno_rdwrt,
960 .d_write = eno_rdwrt,
961 .d_ioctl = eno_ioctl,
962 .d_stop = (stop_fcn_t *)nulldev,
963 .d_reset = (reset_fcn_t *)nulldev,
964 .d_select = eno_select,
965 .d_mmap = eno_mmap,
966 .d_strategy = eno_strat,
967 .d_reserved_1 = eno_getc,
968 .d_reserved_2 = eno_putc,
969 };
970
971 void systrace_init( void );
972
973 void
974 systrace_init( void )
975 {
976 if (dtrace_sdt_probes_restricted()) {
977 return;
978 }
979
980 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
981
982 if (majdevno < 0) {
983 printf("systrace_init: failed to allocate a major number!\n");
984 return;
985 }
986
987 systrace_attach((dev_info_t*)(uintptr_t)majdevno);
988 machtrace_attach((dev_info_t*)(uintptr_t)majdevno);
989 }
990 #undef SYSTRACE_MAJOR
991
992 static uint64_t
993 systrace_getargval(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
994 {
995 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
996 uint64_t val = 0;
997 uint64_t *uargs = NULL;
998
999 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1000
1001 if (uthread) {
1002 uargs = uthread->t_dtrace_syscall_args;
1003 }
1004 if (!uargs) {
1005 return 0;
1006 }
1007 if (argno < 0 || argno >= SYSTRACE_NARGS) {
1008 return 0;
1009 }
1010
1011 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1012 val = uargs[argno];
1013 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1014 return val;
1015 }
1016
1017 static void
1018 systrace_getargdesc(void *arg, dtrace_id_t id, void *parg,
1019 dtrace_argdesc_t *desc)
1020 {
1021 #pragma unused(arg, id)
1022 int sysnum = SYSTRACE_SYSNUM(parg);
1023 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1024 uint64_t *uargs = NULL;
1025
1026 if (!uthread) {
1027 desc->dtargd_ndx = DTRACE_ARGNONE;
1028 return;
1029 }
1030
1031 uargs = uthread->t_dtrace_syscall_args;
1032
1033 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
1034 systrace_entry_setargdesc(sysnum, desc->dtargd_ndx,
1035 desc->dtargd_native, sizeof(desc->dtargd_native));
1036 } else {
1037 systrace_return_setargdesc(sysnum, desc->dtargd_ndx,
1038 desc->dtargd_native, sizeof(desc->dtargd_native));
1039 }
1040
1041 if (desc->dtargd_native[0] == '\0') {
1042 desc->dtargd_ndx = DTRACE_ARGNONE;
1043 }
1044 }
1045
1046 static uint64_t
1047 machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1048 {
1049 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1050 uint64_t val = 0;
1051 syscall_arg_t *stack = (syscall_arg_t *)NULL;
1052
1053 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1054
1055 if (uthread) {
1056 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
1057 }
1058
1059 if (!stack) {
1060 return 0;
1061 }
1062
1063 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1064 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1065 val = (uint64_t)*(stack + argno);
1066 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1067 return val;
1068 }