]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/dtrace/systrace.c
xnu-6153.101.6.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <kern/thread.h>
27 #include <mach/thread_status.h>
28
29 /* XXX All of these should really be derived from syscall_sw.h */
30 #if defined (__x86_64__)
31 #define SYSCALL_CLASS_SHIFT 24
32 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
33 #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
34 #define I386_SYSCALL_NUMBER_MASK (0xFFFF)
35 #endif
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40 #include <sys/errno.h>
41 #include <sys/ioctl.h>
42 #include <sys/conf.h>
43 #include <sys/fcntl.h>
44 #include <sys/syscall.h>
45 #include <miscfs/devfs/devfs.h>
46
47 #include <sys/dtrace.h>
48 #include <sys/dtrace_impl.h>
49 #include <sys/systrace_args.h>
50 #include "systrace.h"
51 #include <sys/stat.h>
52 #include <sys/systm.h>
53 #include <sys/conf.h>
54 #include <sys/user.h>
55
56 #include <machine/pal_routines.h>
57
58 #if defined (__x86_64__)
59 #define SYSTRACE_ARTIFICIAL_FRAMES 2
60 #define MACHTRACE_ARTIFICIAL_FRAMES 3
61 #elif defined(__arm__) || defined(__arm64__)
62 #define SYSTRACE_ARTIFICIAL_FRAMES 2
63 #define MACHTRACE_ARTIFICIAL_FRAMES 3
64 #else
65 #error Unknown Architecture
66 #endif
67
68 #define SYSTRACE_NARGS (int)(sizeof(((uthread_t)NULL)->uu_arg) / sizeof(((uthread_t)NULL)->uu_arg[0]))
69
70 #include <sys/sysent.h>
71 #define sy_callc sy_call /* Map Solaris slot name to Darwin's */
72 #define NSYSCALL nsysent /* and is less than 500 or so */
73
74 extern const char *syscallnames[];
75
76 #include <sys/dtrace_glue.h>
77 #define casptr dtrace_casptr
78 #define membar_enter dtrace_membar_producer
79
80 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
81 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
82
83 extern lck_attr_t* dtrace_lck_attr;
84 extern lck_grp_t* dtrace_lck_grp;
85 static lck_mtx_t dtrace_systrace_lock; /* probe state lock */
86
87 systrace_sysent_t *systrace_sysent = NULL;
88 void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
89
90 static uint64_t systrace_getargval(void *, dtrace_id_t, void *, int, int);
91 static void systrace_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
92
93 void
94 systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
95 uint64_t arg2, uint64_t arg3, uint64_t arg4)
96 {
97 #pragma unused(id,arg0,arg1,arg2,arg3,arg4)
98 }
99
100 int32_t
101 dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
102 {
103 unsigned short code; /* The system call number */
104
105 systrace_sysent_t *sy;
106 dtrace_id_t id;
107 int32_t rval;
108 syscall_arg_t *ip = (syscall_arg_t *)uap;
109 uint64_t uargs[SYSTRACE_NARGS] = {0};
110
111 #if defined (__x86_64__)
112 {
113 pal_register_cache_state(current_thread(), VALID);
114 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
115
116 if (is_saved_state64(tagged_regs)) {
117 x86_saved_state64_t *regs = saved_state64(tagged_regs);
118 code = regs->rax & SYSCALL_NUMBER_MASK;
119 /*
120 * Check for indirect system call... system call number
121 * passed as 'arg0'
122 */
123 if (code == 0) {
124 code = regs->rdi;
125 }
126 } else {
127 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
128
129 if (code == 0) {
130 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof(int));
131 code = fuword(params);
132 }
133 }
134 }
135 #elif defined(__arm__)
136 {
137 /*
138 * On arm, syscall numbers depend on a flavor (indirect or not)
139 * and can be in either r0 or r12 (always u32)
140 */
141
142 /* See bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
143 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
144
145 /* Check for indirect system call */
146 if (arm_regs->r[12] != 0) {
147 code = arm_regs->r[12];
148 } else {
149 code = arm_regs->r[0];
150 }
151 }
152 #elif defined(__arm64__)
153 {
154 /*
155 * On arm64, syscall numbers depend on a flavor (indirect or not)
156 * ... and for u32 can be in either r0 or r12
157 * ... and for u64 can be in either x0 or x16
158 */
159
160 /* see bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
161 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
162
163 if (is_saved_state32(arm_regs)) {
164 /* Check for indirect system call */
165 if (saved_state32(arm_regs)->r[12] != 0) {
166 code = saved_state32(arm_regs)->r[12];
167 } else {
168 code = saved_state32(arm_regs)->r[0];
169 }
170 } else {
171 /* Check for indirect system call */
172 if (saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM] != 0) {
173 code = saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
174 } else {
175 code = saved_state64(arm_regs)->x[0];
176 }
177 }
178 }
179 #else
180 #error Unknown Architecture
181 #endif
182
183 // Bounds "check" the value of code a la unix_syscall
184 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
185
186 systrace_args(code, ip, uargs);
187
188 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
189 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
190 if (uthread) {
191 uthread->t_dtrace_syscall_args = uargs;
192 }
193
194 static_assert(SYSTRACE_NARGS >= 5, "not enough system call arguments");
195 (*systrace_probe)(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]);
196
197 if (uthread) {
198 uthread->t_dtrace_syscall_args = NULL;
199 }
200 }
201
202
203
204 #if 0 /* XXX */
205 /*
206 * APPLE NOTE: Not implemented.
207 * We want to explicitly allow DTrace consumers to stop a process
208 * before it actually executes the meat of the syscall.
209 */
210 p = ttoproc(curthread);
211 mutex_enter(&p->p_lock);
212 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
213 curthread->t_dtrace_stop = 0;
214 stop(PR_REQUESTED, 0);
215 }
216 mutex_exit(&p->p_lock);
217 #endif
218
219 rval = (*sy->stsy_underlying)(pp, uap, rv);
220
221 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
222 uint64_t munged_rv0, munged_rv1;
223 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
224
225 if (uthread) {
226 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
227 }
228 /*
229 * "Decode" rv for use in the call to dtrace_probe()
230 */
231 if (rval == ERESTART) {
232 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
233 munged_rv1 = -1LL;
234 } else if (rval != EJUSTRETURN) {
235 if (rval) {
236 munged_rv0 = -1LL; /* Mimic what libc will do. */
237 munged_rv1 = -1LL;
238 } else {
239 switch (sy->stsy_return_type) {
240 case _SYSCALL_RET_INT_T:
241 munged_rv0 = rv[0];
242 munged_rv1 = rv[1];
243 break;
244 case _SYSCALL_RET_UINT_T:
245 munged_rv0 = ((u_int)rv[0]);
246 munged_rv1 = ((u_int)rv[1]);
247 break;
248 case _SYSCALL_RET_OFF_T:
249 case _SYSCALL_RET_UINT64_T:
250 munged_rv0 = *(u_int64_t *)rv;
251 munged_rv1 = 0LL;
252 break;
253 case _SYSCALL_RET_ADDR_T:
254 case _SYSCALL_RET_SIZE_T:
255 case _SYSCALL_RET_SSIZE_T:
256 munged_rv0 = *(user_addr_t *)rv;
257 munged_rv1 = 0LL;
258 break;
259 case _SYSCALL_RET_NONE:
260 munged_rv0 = 0LL;
261 munged_rv1 = 0LL;
262 break;
263 default:
264 munged_rv0 = 0LL;
265 munged_rv1 = 0LL;
266 break;
267 }
268 }
269 } else {
270 munged_rv0 = 0LL;
271 munged_rv1 = 0LL;
272 }
273
274 /*
275 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
276 *
277 * "This is a bit of an historical artifact. At first, the syscall provider just
278 * had its return value in arg0, and the fbt and pid providers had their return
279 * values in arg1 (so that we could use arg0 for the offset of the return site).
280 *
281 * We inevitably started writing scripts where we wanted to see the return
282 * values from probes in all three providers, and we made this script easier
283 * to write by replicating the syscall return values in arg1 to match fbt and
284 * pid. We debated briefly about removing the return value from arg0, but
285 * decided that it would be less confusing to have the same data in two places
286 * than to have some non-helpful, non-intuitive value in arg0.
287 *
288 * This change was made 4/23/2003 according to the DTrace project's putback log."
289 */
290 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
291 }
292
293 return rval;
294 }
295
296 void
297 dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
298 {
299 systrace_sysent_t *sy;
300 dtrace_id_t id;
301
302 // Bounds "check" the value of code a la unix_syscall_return
303 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
304
305 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
306 uint64_t munged_rv0, munged_rv1;
307 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
308
309 if (uthread) {
310 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
311 }
312 /*
313 * "Decode" rv for use in the call to dtrace_probe()
314 */
315 if (rval == ERESTART) {
316 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
317 munged_rv1 = -1LL;
318 } else if (rval != EJUSTRETURN) {
319 if (rval) {
320 munged_rv0 = -1LL; /* Mimic what libc will do. */
321 munged_rv1 = -1LL;
322 } else {
323 switch (sy->stsy_return_type) {
324 case _SYSCALL_RET_INT_T:
325 munged_rv0 = rv[0];
326 munged_rv1 = rv[1];
327 break;
328 case _SYSCALL_RET_UINT_T:
329 munged_rv0 = ((u_int)rv[0]);
330 munged_rv1 = ((u_int)rv[1]);
331 break;
332 case _SYSCALL_RET_OFF_T:
333 case _SYSCALL_RET_UINT64_T:
334 munged_rv0 = *(u_int64_t *)rv;
335 munged_rv1 = 0LL;
336 break;
337 case _SYSCALL_RET_ADDR_T:
338 case _SYSCALL_RET_SIZE_T:
339 case _SYSCALL_RET_SSIZE_T:
340 munged_rv0 = *(user_addr_t *)rv;
341 munged_rv1 = 0LL;
342 break;
343 case _SYSCALL_RET_NONE:
344 munged_rv0 = 0LL;
345 munged_rv1 = 0LL;
346 break;
347 default:
348 munged_rv0 = 0LL;
349 munged_rv1 = 0LL;
350 break;
351 }
352 }
353 } else {
354 munged_rv0 = 0LL;
355 munged_rv1 = 0LL;
356 }
357
358 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
359 }
360 }
361
362 #define SYSTRACE_SHIFT 16
363 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
364 #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
365 #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
366 #define SYSTRACE_RETURN(id) (id)
367
368 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
369 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
370 #endif
371
372 static dtrace_provider_id_t systrace_id;
373
374 /*
375 * APPLE NOTE: Avoid name clash with Darwin automagic conf symbol.
376 * See balanced undef below.
377 */
378 #define systrace_init _systrace_init
379
380 static void
381 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
382 {
383 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
384 * from bsd/sys/sysent.h */
385 unsigned int i;
386
387 if (ssysent == NULL) {
388 *interposed = ssysent = kmem_zalloc(sizeof(systrace_sysent_t) *
389 NSYSCALL, KM_SLEEP);
390 }
391
392 for (i = 0; i < NSYSCALL; i++) {
393 struct sysent *a = &actual[i];
394 systrace_sysent_t *s = &ssysent[i];
395
396 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) {
397 continue;
398 }
399
400 if (a->sy_callc == dtrace_systrace_syscall) {
401 continue;
402 }
403
404 s->stsy_underlying = a->sy_callc;
405 s->stsy_return_type = a->sy_return_type;
406 }
407 lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr);
408 }
409
410
411 /*ARGSUSED*/
412 static void
413 systrace_provide(void *arg, const dtrace_probedesc_t *desc)
414 {
415 #pragma unused(arg) /* __APPLE__ */
416 unsigned int i;
417
418 if (desc != NULL) {
419 return;
420 }
421
422 systrace_init(sysent, &systrace_sysent);
423
424 for (i = 0; i < NSYSCALL; i++) {
425 if (systrace_sysent[i].stsy_underlying == NULL) {
426 continue;
427 }
428
429 if (dtrace_probe_lookup(systrace_id, NULL,
430 syscallnames[i], "entry") != 0) {
431 continue;
432 }
433
434 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
435 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
436 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
437 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
438 "return", SYSTRACE_ARTIFICIAL_FRAMES,
439 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
440
441 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
442 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
443 }
444 }
445 #undef systrace_init
446
447 /*ARGSUSED*/
448 static void
449 systrace_destroy(void *arg, dtrace_id_t id, void *parg)
450 {
451 #pragma unused(arg,id) /* __APPLE__ */
452
453 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
454
455 #pragma unused(sysnum) /* __APPLE__ */
456 /*
457 * There's nothing to do here but assert that we have actually been
458 * disabled.
459 */
460 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
461 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
462 } else {
463 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
464 }
465 }
466
467 /*ARGSUSED*/
468 static int
469 systrace_enable(void *arg, dtrace_id_t id, void *parg)
470 {
471 #pragma unused(arg) /* __APPLE__ */
472
473 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
474 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
475 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
476
477 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
478 systrace_sysent[sysnum].stsy_entry = id;
479 } else {
480 systrace_sysent[sysnum].stsy_return = id;
481 }
482
483 if (enabled) {
484 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
485 return 0;
486 }
487
488 lck_mtx_lock(&dtrace_systrace_lock);
489 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
490 vm_offset_t dss = (vm_offset_t)&dtrace_systrace_syscall;
491 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
492 }
493 lck_mtx_unlock(&dtrace_systrace_lock);
494 return 0;
495 }
496
497 /*ARGSUSED*/
498 static void
499 systrace_disable(void *arg, dtrace_id_t id, void *parg)
500 {
501 #pragma unused(arg,id) /* __APPLE__ */
502
503 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
504 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
505 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
506
507 if (disable) {
508 lck_mtx_lock(&dtrace_systrace_lock);
509 if (sysent[sysnum].sy_callc == dtrace_systrace_syscall) {
510 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying));
511 }
512 lck_mtx_unlock(&dtrace_systrace_lock);
513 }
514
515 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
516 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
517 } else {
518 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
519 }
520 }
521
522 static dtrace_pattr_t systrace_attr = {
523 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
524 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
525 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
526 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
527 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
528 };
529
530 static dtrace_pops_t systrace_pops = {
531 .dtps_provide = systrace_provide,
532 .dtps_provide_module = NULL,
533 .dtps_enable = systrace_enable,
534 .dtps_disable = systrace_disable,
535 .dtps_suspend = NULL,
536 .dtps_resume = NULL,
537 .dtps_getargdesc = systrace_getargdesc,
538 .dtps_getargval = systrace_getargval,
539 .dtps_usermode = NULL,
540 .dtps_destroy = systrace_destroy
541 };
542
543 static int
544 systrace_attach(dev_info_t *devi)
545 {
546 systrace_probe = (void*)&dtrace_probe;
547 membar_enter();
548
549 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
550 DDI_PSEUDO, 0) == DDI_FAILURE ||
551 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
552 &systrace_pops, NULL, &systrace_id) != 0) {
553 systrace_probe = systrace_stub;
554 ddi_remove_minor_node(devi, NULL);
555 return DDI_FAILURE;
556 }
557
558 return DDI_SUCCESS;
559 }
560
561
562 /*
563 * APPLE NOTE: systrace_detach not implemented
564 */
565 #if !defined(__APPLE__)
566 static int
567 systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
568 {
569 switch (cmd) {
570 case DDI_DETACH:
571 break;
572 case DDI_SUSPEND:
573 return DDI_SUCCESS;
574 default:
575 return DDI_FAILURE;
576 }
577
578 if (dtrace_unregister(systrace_id) != 0) {
579 return DDI_FAILURE;
580 }
581
582 ddi_remove_minor_node(devi, NULL);
583 systrace_probe = systrace_stub;
584 return DDI_SUCCESS;
585 }
586 #endif /* __APPLE__ */
587
588
589 typedef kern_return_t (*mach_call_t)(void *);
590
591 /* APPLE NOTE: From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
592 typedef void mach_munge_t(void *);
593
594 typedef struct {
595 int mach_trap_arg_count;
596 kern_return_t (*mach_trap_function)(void *);
597 #if defined(__arm64__) || defined(__x86_64__)
598 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
599 #endif
600 int mach_trap_u32_words;
601 #if MACH_ASSERT
602 const char* mach_trap_name;
603 #endif /* MACH_ASSERT */
604 } mach_trap_t;
605
606 extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */
607 extern int mach_trap_count;
608
609 extern const char *mach_syscall_name_table[];
610
611 /* XXX From osfmk/i386/bsd_i386.c */
612 struct mach_call_args {
613 syscall_arg_t arg1;
614 syscall_arg_t arg2;
615 syscall_arg_t arg3;
616 syscall_arg_t arg4;
617 syscall_arg_t arg5;
618 syscall_arg_t arg6;
619 syscall_arg_t arg7;
620 syscall_arg_t arg8;
621 syscall_arg_t arg9;
622 };
623
624 #undef NSYSCALL
625 #define NSYSCALL mach_trap_count
626
627 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
628 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
629 #endif
630
631 typedef struct machtrace_sysent {
632 dtrace_id_t stsy_entry;
633 dtrace_id_t stsy_return;
634 kern_return_t (*stsy_underlying)(void *);
635 int32_t stsy_return_type;
636 } machtrace_sysent_t;
637
638 static machtrace_sysent_t *machtrace_sysent = NULL;
639
640 void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
641 uint64_t, uint64_t, uint64_t);
642
643 static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int);
644
645 static dtrace_provider_id_t machtrace_id;
646
647 static kern_return_t
648 dtrace_machtrace_syscall(struct mach_call_args *args)
649 {
650 int code; /* The mach call number */
651
652 machtrace_sysent_t *sy;
653 dtrace_id_t id;
654 kern_return_t rval;
655 #if 0 /* XXX */
656 proc_t *p;
657 #endif
658 syscall_arg_t *ip = (syscall_arg_t *)args;
659 mach_call_t mach_call;
660
661 #if defined (__x86_64__)
662 {
663 pal_register_cache_state(current_thread(), VALID);
664 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
665
666 if (is_saved_state64(tagged_regs)) {
667 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
668 } else {
669 code = -saved_state32(tagged_regs)->eax;
670 }
671 }
672 #elif defined(__arm__)
673 {
674 /* r12 has the machcall number, but it is -ve */
675 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
676 code = (int)arm_regs->r[12];
677 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
678 code = -code;
679 }
680 #elif defined(__arm64__)
681 {
682 /* From arm/thread_status.h:get_saved_state_svc_number */
683 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
684 if (is_saved_state32(arm_regs)) {
685 code = (int)saved_state32(arm_regs)->r[12];
686 } else {
687 code = (int)saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
688 }
689
690 /* From bsd/arm64.c:mach_syscall */
691 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
692 code = -code;
693 }
694 #else
695 #error Unknown Architecture
696 #endif
697
698 sy = &machtrace_sysent[code];
699
700 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
701 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
702
703 if (uthread) {
704 uthread->t_dtrace_syscall_args = (void *)ip;
705 }
706
707 (*machtrace_probe)(id, *ip, *(ip + 1), *(ip + 2), *(ip + 3), *(ip + 4));
708
709 if (uthread) {
710 uthread->t_dtrace_syscall_args = (void *)0;
711 }
712 }
713
714 #if 0 /* XXX */
715 /*
716 * APPLE NOTE: Not implemented.
717 * We want to explicitly allow DTrace consumers to stop a process
718 * before it actually executes the meat of the syscall.
719 */
720 p = ttoproc(curthread);
721 mutex_enter(&p->p_lock);
722 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
723 curthread->t_dtrace_stop = 0;
724 stop(PR_REQUESTED, 0);
725 }
726 mutex_exit(&p->p_lock);
727 #endif
728
729 mach_call = (mach_call_t)(*sy->stsy_underlying);
730 rval = mach_call(args);
731
732 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
733 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
734 }
735
736 return rval;
737 }
738
739 static void
740 machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed)
741 {
742 machtrace_sysent_t *msysent = *interposed;
743 int i;
744
745 if (msysent == NULL) {
746 *interposed = msysent = kmem_zalloc(sizeof(machtrace_sysent_t) *
747 NSYSCALL, KM_SLEEP);
748 }
749
750 for (i = 0; i < NSYSCALL; i++) {
751 const mach_trap_t *a = &actual[i];
752 machtrace_sysent_t *s = &msysent[i];
753
754 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) {
755 continue;
756 }
757
758 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall)) {
759 continue;
760 }
761
762 s->stsy_underlying = a->mach_trap_function;
763 }
764 }
765
766 /*ARGSUSED*/
767 static void
768 machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
769 {
770 #pragma unused(arg) /* __APPLE__ */
771
772 int i;
773
774 if (desc != NULL) {
775 return;
776 }
777
778 machtrace_init(mach_trap_table, &machtrace_sysent);
779
780 for (i = 0; i < NSYSCALL; i++) {
781 if (machtrace_sysent[i].stsy_underlying == NULL) {
782 continue;
783 }
784
785 if (dtrace_probe_lookup(machtrace_id, NULL,
786 mach_syscall_name_table[i], "entry") != 0) {
787 continue;
788 }
789
790 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
791 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
792 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
793 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
794 "return", MACHTRACE_ARTIFICIAL_FRAMES,
795 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
796
797 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
798 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
799 }
800 }
801
802 /*ARGSUSED*/
803 static void
804 machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
805 {
806 #pragma unused(arg,id) /* __APPLE__ */
807 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
808
809 #pragma unused(sysnum) /* __APPLE__ */
810
811 /*
812 * There's nothing to do here but assert that we have actually been
813 * disabled.
814 */
815 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
816 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
817 } else {
818 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
819 }
820 }
821
822 /*ARGSUSED*/
823 static int
824 machtrace_enable(void *arg, dtrace_id_t id, void *parg)
825 {
826 #pragma unused(arg) /* __APPLE__ */
827
828 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
829 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
830 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
831
832 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
833 machtrace_sysent[sysnum].stsy_entry = id;
834 } else {
835 machtrace_sysent[sysnum].stsy_return = id;
836 }
837
838 if (enabled) {
839 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall);
840 return 0;
841 }
842
843 lck_mtx_lock(&dtrace_systrace_lock);
844
845 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
846 vm_offset_t dss = (vm_offset_t)&dtrace_machtrace_syscall;
847 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
848 }
849
850 lck_mtx_unlock(&dtrace_systrace_lock);
851
852 return 0;
853 }
854
855 /*ARGSUSED*/
856 static void
857 machtrace_disable(void *arg, dtrace_id_t id, void *parg)
858 {
859 #pragma unused(arg,id) /* __APPLE__ */
860
861 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
862 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
863 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
864
865 if (disable) {
866 lck_mtx_lock(&dtrace_systrace_lock);
867
868 if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
869 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
870 }
871 lck_mtx_unlock(&dtrace_systrace_lock);
872 }
873
874 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
875 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
876 } else {
877 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
878 }
879 }
880
881 static dtrace_pattr_t machtrace_attr = {
882 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
883 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
884 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
885 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
886 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
887 };
888
889 static dtrace_pops_t machtrace_pops = {
890 .dtps_provide = machtrace_provide,
891 .dtps_provide_module = NULL,
892 .dtps_enable = machtrace_enable,
893 .dtps_disable = machtrace_disable,
894 .dtps_suspend = NULL,
895 .dtps_resume = NULL,
896 .dtps_getargdesc = NULL,
897 .dtps_getargval = machtrace_getarg,
898 .dtps_usermode = NULL,
899 .dtps_destroy = machtrace_destroy
900 };
901
902 static int
903 machtrace_attach(dev_info_t *devi)
904 {
905 machtrace_probe = dtrace_probe;
906 membar_enter();
907
908 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
909 DDI_PSEUDO, 0) == DDI_FAILURE ||
910 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
911 &machtrace_pops, NULL, &machtrace_id) != 0) {
912 machtrace_probe = (void*)&systrace_stub;
913 ddi_remove_minor_node(devi, NULL);
914 return DDI_FAILURE;
915 }
916
917 return DDI_SUCCESS;
918 }
919
920 d_open_t _systrace_open;
921
922 int
923 _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
924 {
925 #pragma unused(dev,flags,devtype,p)
926 return 0;
927 }
928
929 #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
930
931 /*
932 * A struct describing which functions will get invoked for certain
933 * actions.
934 */
935 static struct cdevsw systrace_cdevsw =
936 {
937 _systrace_open, /* open */
938 eno_opcl, /* close */
939 eno_rdwrt, /* read */
940 eno_rdwrt, /* write */
941 eno_ioctl, /* ioctl */
942 (stop_fcn_t *)nulldev, /* stop */
943 (reset_fcn_t *)nulldev, /* reset */
944 NULL, /* tty's */
945 eno_select, /* select */
946 eno_mmap, /* mmap */
947 eno_strat, /* strategy */
948 eno_getc, /* getc */
949 eno_putc, /* putc */
950 0 /* type */
951 };
952
953 void systrace_init( void );
954
955 void
956 systrace_init( void )
957 {
958 if (dtrace_sdt_probes_restricted()) {
959 return;
960 }
961
962 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
963
964 if (majdevno < 0) {
965 printf("systrace_init: failed to allocate a major number!\n");
966 return;
967 }
968
969 systrace_attach((dev_info_t*)(uintptr_t)majdevno);
970 machtrace_attach((dev_info_t*)(uintptr_t)majdevno);
971 }
972 #undef SYSTRACE_MAJOR
973
974 static uint64_t
975 systrace_getargval(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
976 {
977 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
978 uint64_t val = 0;
979 uint64_t *uargs = NULL;
980
981 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
982
983 if (uthread) {
984 uargs = uthread->t_dtrace_syscall_args;
985 }
986 if (!uargs) {
987 return 0;
988 }
989 if (argno < 0 || argno >= SYSTRACE_NARGS) {
990 return 0;
991 }
992
993 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
994 val = uargs[argno];
995 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
996 return val;
997 }
998
999 static void
1000 systrace_getargdesc(void *arg, dtrace_id_t id, void *parg,
1001 dtrace_argdesc_t *desc)
1002 {
1003 #pragma unused(arg, id)
1004 int sysnum = SYSTRACE_SYSNUM(parg);
1005 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1006 uint64_t *uargs = NULL;
1007
1008 if (!uthread) {
1009 desc->dtargd_ndx = DTRACE_ARGNONE;
1010 return;
1011 }
1012
1013 uargs = uthread->t_dtrace_syscall_args;
1014
1015 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
1016 systrace_entry_setargdesc(sysnum, desc->dtargd_ndx,
1017 desc->dtargd_native, sizeof(desc->dtargd_native));
1018 } else {
1019 systrace_return_setargdesc(sysnum, desc->dtargd_ndx,
1020 desc->dtargd_native, sizeof(desc->dtargd_native));
1021 }
1022
1023 if (desc->dtargd_native[0] == '\0') {
1024 desc->dtargd_ndx = DTRACE_ARGNONE;
1025 }
1026 }
1027
1028 static uint64_t
1029 machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1030 {
1031 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1032 uint64_t val = 0;
1033 syscall_arg_t *stack = (syscall_arg_t *)NULL;
1034
1035 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1036
1037 if (uthread) {
1038 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
1039 }
1040
1041 if (!stack) {
1042 return 0;
1043 }
1044
1045 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1046 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1047 val = (uint64_t)*(stack + argno);
1048 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1049 return val;
1050 }