]> git.saurik.com Git - apple/xnu.git/blame - bsd/dev/dtrace/systrace.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
CommitLineData
2d21ac55
A
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
6d2010ae 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
2d21ac55
A
23 * Use is subject to license terms.
24 */
25
f427ee49
A
26#include <ptrauth.h>
27
2d21ac55
A
28#include <kern/thread.h>
29#include <mach/thread_status.h>
39236c6e 30
2d21ac55 31/* XXX All of these should really be derived from syscall_sw.h */
39236c6e 32#if defined (__x86_64__)
2d21ac55
A
33#define SYSCALL_CLASS_SHIFT 24
34#define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
35#define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
36#define I386_SYSCALL_NUMBER_MASK (0xFFFF)
2d21ac55
A
37#endif
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/proc.h>
42#include <sys/errno.h>
43#include <sys/ioctl.h>
44#include <sys/conf.h>
45#include <sys/fcntl.h>
39037602 46#include <sys/syscall.h>
2d21ac55
A
47#include <miscfs/devfs/devfs.h>
48
49#include <sys/dtrace.h>
50#include <sys/dtrace_impl.h>
5ba3f43e 51#include <sys/systrace_args.h>
2d21ac55
A
52#include "systrace.h"
53#include <sys/stat.h>
54#include <sys/systm.h>
55#include <sys/conf.h>
56#include <sys/user.h>
57
6d2010ae
A
58#include <machine/pal_routines.h>
59
39236c6e 60#if defined (__x86_64__)
0a7de745 61#define SYSTRACE_ARTIFICIAL_FRAMES 2
2d21ac55 62#define MACHTRACE_ARTIFICIAL_FRAMES 3
5ba3f43e
A
63#elif defined(__arm__) || defined(__arm64__)
64#define SYSTRACE_ARTIFICIAL_FRAMES 2
65#define MACHTRACE_ARTIFICIAL_FRAMES 3
2d21ac55
A
66#else
67#error Unknown Architecture
68#endif
69
5ba3f43e
A
70#define SYSTRACE_NARGS (int)(sizeof(((uthread_t)NULL)->uu_arg) / sizeof(((uthread_t)NULL)->uu_arg[0]))
71
2d21ac55
A
72#include <sys/sysent.h>
73#define sy_callc sy_call /* Map Solaris slot name to Darwin's */
74#define NSYSCALL nsysent /* and is less than 500 or so */
75
76extern const char *syscallnames[];
77
78#include <sys/dtrace_glue.h>
79#define casptr dtrace_casptr
80#define membar_enter dtrace_membar_producer
81
82#define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
83#define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
84
316670eb
A
85extern lck_attr_t* dtrace_lck_attr;
86extern lck_grp_t* dtrace_lck_grp;
0a7de745 87static lck_mtx_t dtrace_systrace_lock; /* probe state lock */
316670eb 88
2d21ac55 89systrace_sysent_t *systrace_sysent = NULL;
316670eb
A
90void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
91
5ba3f43e
A
92static uint64_t systrace_getargval(void *, dtrace_id_t, void *, int, int);
93static void systrace_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
2d21ac55
A
94
95void
96systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
316670eb 97 uint64_t arg2, uint64_t arg3, uint64_t arg4)
2d21ac55 98{
316670eb 99#pragma unused(id,arg0,arg1,arg2,arg3,arg4)
2d21ac55
A
100}
101
102int32_t
103dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
104{
0a7de745 105 unsigned short code; /* The system call number */
2d21ac55
A
106
107 systrace_sysent_t *sy;
108 dtrace_id_t id;
109 int32_t rval;
2d21ac55 110 syscall_arg_t *ip = (syscall_arg_t *)uap;
5ba3f43e 111 uint64_t uargs[SYSTRACE_NARGS] = {0};
2d21ac55 112
39236c6e 113#if defined (__x86_64__)
2d21ac55 114 {
6d2010ae 115 pal_register_cache_state(current_thread(), VALID);
2d21ac55
A
116 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
117
118 if (is_saved_state64(tagged_regs)) {
119 x86_saved_state64_t *regs = saved_state64(tagged_regs);
120 code = regs->rax & SYSCALL_NUMBER_MASK;
121 /*
122 * Check for indirect system call... system call number
123 * passed as 'arg0'
124 */
125 if (code == 0) {
126 code = regs->rdi;
127 }
128 } else {
129 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
b0d623f7
A
130
131 if (code == 0) {
0a7de745 132 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof(int));
b0d623f7
A
133 code = fuword(params);
134 }
2d21ac55
A
135 }
136 }
5ba3f43e
A
137#elif defined(__arm__)
138 {
139 /*
140 * On arm, syscall numbers depend on a flavor (indirect or not)
141 * and can be in either r0 or r12 (always u32)
142 */
143
144 /* See bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
145 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
146
147 /* Check for indirect system call */
0a7de745 148 if (arm_regs->r[12] != 0) {
5ba3f43e 149 code = arm_regs->r[12];
0a7de745 150 } else {
5ba3f43e 151 code = arm_regs->r[0];
0a7de745 152 }
5ba3f43e
A
153 }
154#elif defined(__arm64__)
155 {
156 /*
157 * On arm64, syscall numbers depend on a flavor (indirect or not)
158 * ... and for u32 can be in either r0 or r12
159 * ... and for u64 can be in either x0 or x16
160 */
161
0a7de745 162 /* see bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
5ba3f43e
A
163 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
164
165 if (is_saved_state32(arm_regs)) {
0a7de745 166 /* Check for indirect system call */
5ba3f43e
A
167 if (saved_state32(arm_regs)->r[12] != 0) {
168 code = saved_state32(arm_regs)->r[12];
0a7de745 169 } else {
5ba3f43e
A
170 code = saved_state32(arm_regs)->r[0];
171 }
172 } else {
173 /* Check for indirect system call */
0a7de745 174 if (saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM] != 0) {
5ba3f43e 175 code = saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
0a7de745 176 } else {
5ba3f43e
A
177 code = saved_state64(arm_regs)->x[0];
178 }
179 }
180 }
2d21ac55
A
181#else
182#error Unknown Architecture
183#endif
184
185 // Bounds "check" the value of code a la unix_syscall
39037602 186 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
2d21ac55 187
5ba3f43e
A
188 systrace_args(code, ip, uargs);
189
c910b4d9 190 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
0a7de745
A
191 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
192 if (uthread) {
5ba3f43e 193 uthread->t_dtrace_syscall_args = uargs;
0a7de745
A
194 }
195
5ba3f43e
A
196 static_assert(SYSTRACE_NARGS >= 5, "not enough system call arguments");
197 (*systrace_probe)(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]);
0a7de745
A
198
199 if (uthread) {
5ba3f43e 200 uthread->t_dtrace_syscall_args = NULL;
0a7de745 201 }
c910b4d9 202 }
2d21ac55 203
5ba3f43e
A
204
205
2d21ac55
A
206#if 0 /* XXX */
207 /*
fe8ab488 208 * APPLE NOTE: Not implemented.
2d21ac55
A
209 * We want to explicitly allow DTrace consumers to stop a process
210 * before it actually executes the meat of the syscall.
211 */
212 p = ttoproc(curthread);
213 mutex_enter(&p->p_lock);
214 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
215 curthread->t_dtrace_stop = 0;
216 stop(PR_REQUESTED, 0);
217 }
218 mutex_exit(&p->p_lock);
219#endif
220
221 rval = (*sy->stsy_underlying)(pp, uap, rv);
222
223 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
b0d623f7 224 uint64_t munged_rv0, munged_rv1;
0a7de745 225 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
2d21ac55 226
0a7de745 227 if (uthread) {
2d21ac55 228 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
0a7de745 229 }
2d21ac55 230 /*
0a7de745
A
231 * "Decode" rv for use in the call to dtrace_probe()
232 */
2d21ac55 233 if (rval == ERESTART) {
b0d623f7
A
234 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
235 munged_rv1 = -1LL;
2d21ac55
A
236 } else if (rval != EJUSTRETURN) {
237 if (rval) {
b0d623f7
A
238 munged_rv0 = -1LL; /* Mimic what libc will do. */
239 munged_rv1 = -1LL;
2d21ac55
A
240 } else {
241 switch (sy->stsy_return_type) {
242 case _SYSCALL_RET_INT_T:
b0d623f7
A
243 munged_rv0 = rv[0];
244 munged_rv1 = rv[1];
2d21ac55
A
245 break;
246 case _SYSCALL_RET_UINT_T:
b0d623f7
A
247 munged_rv0 = ((u_int)rv[0]);
248 munged_rv1 = ((u_int)rv[1]);
2d21ac55
A
249 break;
250 case _SYSCALL_RET_OFF_T:
d1ecb069 251 case _SYSCALL_RET_UINT64_T:
b0d623f7
A
252 munged_rv0 = *(u_int64_t *)rv;
253 munged_rv1 = 0LL;
2d21ac55
A
254 break;
255 case _SYSCALL_RET_ADDR_T:
256 case _SYSCALL_RET_SIZE_T:
257 case _SYSCALL_RET_SSIZE_T:
b0d623f7
A
258 munged_rv0 = *(user_addr_t *)rv;
259 munged_rv1 = 0LL;
2d21ac55
A
260 break;
261 case _SYSCALL_RET_NONE:
b0d623f7
A
262 munged_rv0 = 0LL;
263 munged_rv1 = 0LL;
2d21ac55
A
264 break;
265 default:
b0d623f7
A
266 munged_rv0 = 0LL;
267 munged_rv1 = 0LL;
2d21ac55
A
268 break;
269 }
270 }
b0d623f7
A
271 } else {
272 munged_rv0 = 0LL;
273 munged_rv1 = 0LL;
274 }
2d21ac55 275
b0d623f7
A
276 /*
277 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
278 *
279 * "This is a bit of an historical artifact. At first, the syscall provider just
280 * had its return value in arg0, and the fbt and pid providers had their return
281 * values in arg1 (so that we could use arg0 for the offset of the return site).
0a7de745 282 *
b0d623f7
A
283 * We inevitably started writing scripts where we wanted to see the return
284 * values from probes in all three providers, and we made this script easier
285 * to write by replicating the syscall return values in arg1 to match fbt and
286 * pid. We debated briefly about removing the return value from arg0, but
287 * decided that it would be less confusing to have the same data in two places
288 * than to have some non-helpful, non-intuitive value in arg0.
0a7de745 289 *
b0d623f7 290 * This change was made 4/23/2003 according to the DTrace project's putback log."
0a7de745 291 */
316670eb 292 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
2d21ac55
A
293 }
294
0a7de745 295 return rval;
2d21ac55
A
296}
297
298void
299dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
300{
301 systrace_sysent_t *sy;
302 dtrace_id_t id;
303
304 // Bounds "check" the value of code a la unix_syscall_return
39037602 305 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
2d21ac55
A
306
307 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
b0d623f7 308 uint64_t munged_rv0, munged_rv1;
0a7de745 309 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
2d21ac55 310
0a7de745 311 if (uthread) {
2d21ac55 312 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
0a7de745 313 }
2d21ac55 314 /*
0a7de745
A
315 * "Decode" rv for use in the call to dtrace_probe()
316 */
2d21ac55 317 if (rval == ERESTART) {
b0d623f7
A
318 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
319 munged_rv1 = -1LL;
2d21ac55
A
320 } else if (rval != EJUSTRETURN) {
321 if (rval) {
b0d623f7
A
322 munged_rv0 = -1LL; /* Mimic what libc will do. */
323 munged_rv1 = -1LL;
2d21ac55
A
324 } else {
325 switch (sy->stsy_return_type) {
326 case _SYSCALL_RET_INT_T:
b0d623f7
A
327 munged_rv0 = rv[0];
328 munged_rv1 = rv[1];
2d21ac55
A
329 break;
330 case _SYSCALL_RET_UINT_T:
b0d623f7
A
331 munged_rv0 = ((u_int)rv[0]);
332 munged_rv1 = ((u_int)rv[1]);
2d21ac55
A
333 break;
334 case _SYSCALL_RET_OFF_T:
d1ecb069 335 case _SYSCALL_RET_UINT64_T:
b0d623f7
A
336 munged_rv0 = *(u_int64_t *)rv;
337 munged_rv1 = 0LL;
2d21ac55
A
338 break;
339 case _SYSCALL_RET_ADDR_T:
340 case _SYSCALL_RET_SIZE_T:
341 case _SYSCALL_RET_SSIZE_T:
b0d623f7
A
342 munged_rv0 = *(user_addr_t *)rv;
343 munged_rv1 = 0LL;
2d21ac55
A
344 break;
345 case _SYSCALL_RET_NONE:
b0d623f7
A
346 munged_rv0 = 0LL;
347 munged_rv1 = 0LL;
2d21ac55
A
348 break;
349 default:
b0d623f7
A
350 munged_rv0 = 0LL;
351 munged_rv1 = 0LL;
2d21ac55
A
352 break;
353 }
354 }
b0d623f7
A
355 } else {
356 munged_rv0 = 0LL;
357 munged_rv1 = 0LL;
358 }
2d21ac55 359
316670eb 360 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
2d21ac55
A
361 }
362}
2d21ac55 363
0a7de745
A
364#define SYSTRACE_SHIFT 16
365#define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
366#define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
367#define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
368#define SYSTRACE_RETURN(id) (id)
2d21ac55
A
369
370#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
371#error 1 << SYSTRACE_SHIFT must exceed number of system calls
372#endif
373
2d21ac55
A
374static dtrace_provider_id_t systrace_id;
375
fe8ab488
A
376/*
377 * APPLE NOTE: Avoid name clash with Darwin automagic conf symbol.
378 * See balanced undef below.
379 */
380#define systrace_init _systrace_init
2d21ac55 381
b0d623f7 382static void
f427ee49 383systrace_init(const struct sysent *actual, systrace_sysent_t **interposed)
b0d623f7 384{
b0d623f7 385 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
0a7de745 386 * from bsd/sys/sysent.h */
39037602 387 unsigned int i;
b0d623f7
A
388
389 if (ssysent == NULL) {
0a7de745 390 *interposed = ssysent = kmem_zalloc(sizeof(systrace_sysent_t) *
b0d623f7
A
391 NSYSCALL, KM_SLEEP);
392 }
393
394 for (i = 0; i < NSYSCALL; i++) {
f427ee49 395 const struct sysent *a = &actual[i];
b0d623f7
A
396 systrace_sysent_t *s = &ssysent[i];
397
0a7de745 398 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) {
b0d623f7 399 continue;
0a7de745 400 }
b0d623f7 401
0a7de745 402 if (a->sy_callc == dtrace_systrace_syscall) {
b0d623f7 403 continue;
0a7de745 404 }
b0d623f7 405
b0d623f7
A
406 s->stsy_underlying = a->sy_callc;
407 s->stsy_return_type = a->sy_return_type;
2d21ac55 408 }
316670eb 409 lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr);
2d21ac55
A
410}
411
b0d623f7 412
2d21ac55
A
413/*ARGSUSED*/
414static void
415systrace_provide(void *arg, const dtrace_probedesc_t *desc)
416{
b0d623f7 417#pragma unused(arg) /* __APPLE__ */
39037602 418 unsigned int i;
2d21ac55 419
0a7de745 420 if (desc != NULL) {
2d21ac55 421 return;
0a7de745 422 }
2d21ac55
A
423
424 systrace_init(sysent, &systrace_sysent);
2d21ac55
A
425
426 for (i = 0; i < NSYSCALL; i++) {
0a7de745 427 if (systrace_sysent[i].stsy_underlying == NULL) {
2d21ac55 428 continue;
0a7de745 429 }
2d21ac55
A
430
431 if (dtrace_probe_lookup(systrace_id, NULL,
0a7de745 432 syscallnames[i], "entry") != 0) {
2d21ac55 433 continue;
0a7de745 434 }
2d21ac55
A
435
436 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
437 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
438 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
439 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
440 "return", SYSTRACE_ARTIFICIAL_FRAMES,
441 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
442
443 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
444 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
2d21ac55
A
445 }
446}
2d21ac55 447#undef systrace_init
2d21ac55
A
448
449/*ARGSUSED*/
450static void
451systrace_destroy(void *arg, dtrace_id_t id, void *parg)
452{
b0d623f7
A
453#pragma unused(arg,id) /* __APPLE__ */
454
2d21ac55
A
455 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
456
b0d623f7 457#pragma unused(sysnum) /* __APPLE__ */
2d21ac55
A
458 /*
459 * There's nothing to do here but assert that we have actually been
460 * disabled.
461 */
462 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
463 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
2d21ac55
A
464 } else {
465 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
2d21ac55
A
466 }
467}
468
469/*ARGSUSED*/
6d2010ae 470static int
2d21ac55
A
471systrace_enable(void *arg, dtrace_id_t id, void *parg)
472{
b0d623f7 473#pragma unused(arg) /* __APPLE__ */
0a7de745 474
2d21ac55
A
475 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
476 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
477 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
478
479 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
480 systrace_sysent[sysnum].stsy_entry = id;
2d21ac55
A
481 } else {
482 systrace_sysent[sysnum].stsy_return = id;
2d21ac55
A
483 }
484
485 if (enabled) {
486 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
0a7de745 487 return 0;
2d21ac55 488 }
316670eb
A
489
490 lck_mtx_lock(&dtrace_systrace_lock);
491 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
f427ee49 492 vm_offset_t dss = ptrauth_nop_cast(vm_offset_t, &dtrace_systrace_syscall);
316670eb
A
493 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
494 }
495 lck_mtx_unlock(&dtrace_systrace_lock);
0a7de745 496 return 0;
2d21ac55
A
497}
498
499/*ARGSUSED*/
500static void
501systrace_disable(void *arg, dtrace_id_t id, void *parg)
502{
b0d623f7 503#pragma unused(arg,id) /* __APPLE__ */
0a7de745 504
2d21ac55
A
505 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
506 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
507 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
508
509 if (disable) {
316670eb 510 lck_mtx_lock(&dtrace_systrace_lock);
0a7de745 511 if (sysent[sysnum].sy_callc == dtrace_systrace_syscall) {
316670eb 512 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying));
0a7de745 513 }
316670eb 514 lck_mtx_unlock(&dtrace_systrace_lock);
2d21ac55
A
515 }
516
517 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
518 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
2d21ac55
A
519 } else {
520 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
2d21ac55
A
521 }
522}
523
524static dtrace_pattr_t systrace_attr = {
0a7de745
A
525 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
526 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
527 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
528 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
529 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
2d21ac55
A
530};
531
532static dtrace_pops_t systrace_pops = {
0a7de745
A
533 .dtps_provide = systrace_provide,
534 .dtps_provide_module = NULL,
535 .dtps_enable = systrace_enable,
536 .dtps_disable = systrace_disable,
537 .dtps_suspend = NULL,
538 .dtps_resume = NULL,
539 .dtps_getargdesc = systrace_getargdesc,
540 .dtps_getargval = systrace_getargval,
541 .dtps_usermode = NULL,
542 .dtps_destroy = systrace_destroy
2d21ac55
A
543};
544
545static int
d9a64523 546systrace_attach(dev_info_t *devi)
2d21ac55 547{
d9a64523 548 systrace_probe = (void*)&dtrace_probe;
b0d623f7
A
549 membar_enter();
550
551 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
552 DDI_PSEUDO, 0) == DDI_FAILURE ||
553 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
554 &systrace_pops, NULL, &systrace_id) != 0) {
555 systrace_probe = systrace_stub;
556 ddi_remove_minor_node(devi, NULL);
0a7de745 557 return DDI_FAILURE;
b0d623f7 558 }
2d21ac55 559
0a7de745 560 return DDI_SUCCESS;
2d21ac55
A
561}
562
fe8ab488
A
563
564/*
565 * APPLE NOTE: systrace_detach not implemented
566 */
2d21ac55
A
567#if !defined(__APPLE__)
568static int
569systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
570{
571 switch (cmd) {
572 case DDI_DETACH:
573 break;
574 case DDI_SUSPEND:
0a7de745 575 return DDI_SUCCESS;
2d21ac55 576 default:
0a7de745 577 return DDI_FAILURE;
2d21ac55
A
578 }
579
0a7de745
A
580 if (dtrace_unregister(systrace_id) != 0) {
581 return DDI_FAILURE;
582 }
2d21ac55
A
583
584 ddi_remove_minor_node(devi, NULL);
585 systrace_probe = systrace_stub;
0a7de745 586 return DDI_SUCCESS;
2d21ac55 587}
fe8ab488 588#endif /* __APPLE__ */
2d21ac55 589
2d21ac55 590
2d21ac55
A
591typedef kern_return_t (*mach_call_t)(void *);
592
fe8ab488
A
593/* APPLE NOTE: From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
594typedef void mach_munge_t(void *);
2d21ac55
A
595
596typedef struct {
0a7de745
A
597 int mach_trap_arg_count;
598 kern_return_t (*mach_trap_function)(void *);
fe8ab488 599#if defined(__arm64__) || defined(__x86_64__)
0a7de745 600 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
2d21ac55 601#endif
0a7de745
A
602 int mach_trap_u32_words;
603#if MACH_ASSERT
604 const char* mach_trap_name;
6d2010ae 605#endif /* MACH_ASSERT */
2d21ac55
A
606} mach_trap_t;
607
39236c6e 608extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */
2d21ac55
A
609extern int mach_trap_count;
610
b0d623f7 611extern const char *mach_syscall_name_table[];
2d21ac55
A
612
613/* XXX From osfmk/i386/bsd_i386.c */
614struct mach_call_args {
0a7de745
A
615 syscall_arg_t arg1;
616 syscall_arg_t arg2;
617 syscall_arg_t arg3;
618 syscall_arg_t arg4;
619 syscall_arg_t arg5;
620 syscall_arg_t arg6;
621 syscall_arg_t arg7;
622 syscall_arg_t arg8;
623 syscall_arg_t arg9;
2d21ac55
A
624};
625
626#undef NSYSCALL
627#define NSYSCALL mach_trap_count
628
629#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
630#error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
631#endif
632
316670eb 633typedef struct machtrace_sysent {
0a7de745
A
634 dtrace_id_t stsy_entry;
635 dtrace_id_t stsy_return;
636 kern_return_t (*stsy_underlying)(void *);
637 int32_t stsy_return_type;
316670eb 638} machtrace_sysent_t;
2d21ac55
A
639
640static machtrace_sysent_t *machtrace_sysent = NULL;
641
642void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
643 uint64_t, uint64_t, uint64_t);
644
0a7de745 645static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int);
316670eb 646
2d21ac55
A
647static dtrace_provider_id_t machtrace_id;
648
649static kern_return_t
650dtrace_machtrace_syscall(struct mach_call_args *args)
651{
0a7de745 652 int code; /* The mach call number */
2d21ac55
A
653
654 machtrace_sysent_t *sy;
655 dtrace_id_t id;
656 kern_return_t rval;
657#if 0 /* XXX */
658 proc_t *p;
659#endif
660 syscall_arg_t *ip = (syscall_arg_t *)args;
661 mach_call_t mach_call;
662
39236c6e 663#if defined (__x86_64__)
2d21ac55 664 {
6d2010ae 665 pal_register_cache_state(current_thread(), VALID);
2d21ac55
A
666 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
667
668 if (is_saved_state64(tagged_regs)) {
cf7d32b8 669 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
2d21ac55
A
670 } else {
671 code = -saved_state32(tagged_regs)->eax;
672 }
673 }
5ba3f43e
A
674#elif defined(__arm__)
675 {
676 /* r12 has the machcall number, but it is -ve */
677 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
678 code = (int)arm_regs->r[12];
679 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
680 code = -code;
681 }
682#elif defined(__arm64__)
683 {
684 /* From arm/thread_status.h:get_saved_state_svc_number */
685 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
686 if (is_saved_state32(arm_regs)) {
687 code = (int)saved_state32(arm_regs)->r[12];
688 } else {
689 code = (int)saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
690 }
691
0a7de745 692 /* From bsd/arm64.c:mach_syscall */
5ba3f43e 693 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
0a7de745 694 code = -code;
5ba3f43e 695 }
2d21ac55
A
696#else
697#error Unknown Architecture
698#endif
699
700 sy = &machtrace_sysent[code];
701
316670eb 702 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
0a7de745 703 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
316670eb 704
0a7de745 705 if (uthread) {
316670eb 706 uthread->t_dtrace_syscall_args = (void *)ip;
0a7de745
A
707 }
708
709 (*machtrace_probe)(id, *ip, *(ip + 1), *(ip + 2), *(ip + 3), *(ip + 4));
710
711 if (uthread) {
712 uthread->t_dtrace_syscall_args = (void *)0;
713 }
316670eb 714 }
2d21ac55
A
715
716#if 0 /* XXX */
717 /*
fe8ab488 718 * APPLE NOTE: Not implemented.
2d21ac55
A
719 * We want to explicitly allow DTrace consumers to stop a process
720 * before it actually executes the meat of the syscall.
721 */
722 p = ttoproc(curthread);
723 mutex_enter(&p->p_lock);
724 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
725 curthread->t_dtrace_stop = 0;
726 stop(PR_REQUESTED, 0);
727 }
728 mutex_exit(&p->p_lock);
729#endif
730
731 mach_call = (mach_call_t)(*sy->stsy_underlying);
732 rval = mach_call(args);
733
0a7de745 734 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
2d21ac55 735 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
0a7de745 736 }
2d21ac55 737
0a7de745 738 return rval;
2d21ac55
A
739}
740
741static void
39236c6e 742machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed)
2d21ac55
A
743{
744 machtrace_sysent_t *msysent = *interposed;
745 int i;
746
747 if (msysent == NULL) {
0a7de745
A
748 *interposed = msysent = kmem_zalloc(sizeof(machtrace_sysent_t) *
749 NSYSCALL, KM_SLEEP);
2d21ac55
A
750 }
751
752 for (i = 0; i < NSYSCALL; i++) {
39236c6e 753 const mach_trap_t *a = &actual[i];
2d21ac55
A
754 machtrace_sysent_t *s = &msysent[i];
755
0a7de745 756 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) {
2d21ac55 757 continue;
0a7de745 758 }
2d21ac55 759
0a7de745 760 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall)) {
2d21ac55 761 continue;
0a7de745 762 }
2d21ac55 763
316670eb 764 s->stsy_underlying = a->mach_trap_function;
2d21ac55
A
765 }
766}
767
768/*ARGSUSED*/
769static void
770machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
771{
b0d623f7 772#pragma unused(arg) /* __APPLE__ */
0a7de745 773
2d21ac55
A
774 int i;
775
0a7de745 776 if (desc != NULL) {
2d21ac55 777 return;
0a7de745 778 }
2d21ac55
A
779
780 machtrace_init(mach_trap_table, &machtrace_sysent);
781
782 for (i = 0; i < NSYSCALL; i++) {
0a7de745 783 if (machtrace_sysent[i].stsy_underlying == NULL) {
2d21ac55 784 continue;
0a7de745 785 }
2d21ac55
A
786
787 if (dtrace_probe_lookup(machtrace_id, NULL,
0a7de745 788 mach_syscall_name_table[i], "entry") != 0) {
2d21ac55 789 continue;
0a7de745 790 }
2d21ac55 791
b0d623f7 792 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
0a7de745
A
793 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
794 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
b0d623f7 795 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
0a7de745
A
796 "return", MACHTRACE_ARTIFICIAL_FRAMES,
797 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
2d21ac55
A
798
799 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
800 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
801 }
802}
803
804/*ARGSUSED*/
805static void
806machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
807{
b0d623f7 808#pragma unused(arg,id) /* __APPLE__ */
2d21ac55 809 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
0a7de745 810
b0d623f7 811#pragma unused(sysnum) /* __APPLE__ */
2d21ac55
A
812
813 /*
814 * There's nothing to do here but assert that we have actually been
815 * disabled.
816 */
817 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
818 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
819 } else {
820 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
821 }
822}
823
824/*ARGSUSED*/
6d2010ae 825static int
2d21ac55
A
826machtrace_enable(void *arg, dtrace_id_t id, void *parg)
827{
b0d623f7 828#pragma unused(arg) /* __APPLE__ */
0a7de745 829
2d21ac55
A
830 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
831 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
0a7de745 832 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
2d21ac55
A
833
834 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
835 machtrace_sysent[sysnum].stsy_entry = id;
836 } else {
837 machtrace_sysent[sysnum].stsy_return = id;
838 }
839
840 if (enabled) {
0a7de745
A
841 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall);
842 return 0;
2d21ac55
A
843 }
844
316670eb
A
845 lck_mtx_lock(&dtrace_systrace_lock);
846
847 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
f427ee49 848 vm_offset_t dss = ptrauth_nop_cast(vm_offset_t, &dtrace_machtrace_syscall);
316670eb
A
849 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
850 }
851
852 lck_mtx_unlock(&dtrace_systrace_lock);
853
0a7de745 854 return 0;
2d21ac55
A
855}
856
857/*ARGSUSED*/
858static void
859machtrace_disable(void *arg, dtrace_id_t id, void *parg)
860{
b0d623f7 861#pragma unused(arg,id) /* __APPLE__ */
0a7de745 862
2d21ac55
A
863 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
864 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
0a7de745 865 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
2d21ac55
A
866
867 if (disable) {
316670eb
A
868 lck_mtx_lock(&dtrace_systrace_lock);
869
870 if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
871 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
872 }
873 lck_mtx_unlock(&dtrace_systrace_lock);
2d21ac55
A
874 }
875
876 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
877 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
878 } else {
879 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
880 }
881}
882
883static dtrace_pattr_t machtrace_attr = {
0a7de745
A
884 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
885 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
886 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
887 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
888 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
2d21ac55
A
889};
890
891static dtrace_pops_t machtrace_pops = {
0a7de745
A
892 .dtps_provide = machtrace_provide,
893 .dtps_provide_module = NULL,
894 .dtps_enable = machtrace_enable,
895 .dtps_disable = machtrace_disable,
896 .dtps_suspend = NULL,
897 .dtps_resume = NULL,
898 .dtps_getargdesc = NULL,
899 .dtps_getargval = machtrace_getarg,
900 .dtps_usermode = NULL,
901 .dtps_destroy = machtrace_destroy
2d21ac55
A
902};
903
904static int
d9a64523 905machtrace_attach(dev_info_t *devi)
2d21ac55 906{
b0d623f7
A
907 machtrace_probe = dtrace_probe;
908 membar_enter();
0a7de745 909
b0d623f7 910 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
0a7de745
A
911 DDI_PSEUDO, 0) == DDI_FAILURE ||
912 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
913 &machtrace_pops, NULL, &machtrace_id) != 0) {
914 machtrace_probe = (void*)&systrace_stub;
2d21ac55 915 ddi_remove_minor_node(devi, NULL);
0a7de745 916 return DDI_FAILURE;
2d21ac55
A
917 }
918
0a7de745 919 return DDI_SUCCESS;
2d21ac55
A
920}
921
922d_open_t _systrace_open;
923
0a7de745
A
924int
925_systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
2d21ac55
A
926{
927#pragma unused(dev,flags,devtype,p)
928 return 0;
929}
930
931#define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
932
2d21ac55
A
933static struct cdevsw systrace_cdevsw =
934{
f427ee49
A
935 .d_open = _systrace_open,
936 .d_close = eno_opcl,
937 .d_read = eno_rdwrt,
938 .d_write = eno_rdwrt,
939 .d_ioctl = eno_ioctl,
940 .d_stop = (stop_fcn_t *)nulldev,
941 .d_reset = (reset_fcn_t *)nulldev,
942 .d_select = eno_select,
943 .d_mmap = eno_mmap,
944 .d_strategy = eno_strat,
945 .d_reserved_1 = eno_getc,
946 .d_reserved_2 = eno_putc,
2d21ac55
A
947};
948
2d21ac55
A
949void systrace_init( void );
950
0a7de745
A
951void
952systrace_init( void )
2d21ac55 953{
d9a64523
A
954 if (dtrace_sdt_probes_restricted()) {
955 return;
956 }
2d21ac55 957
d9a64523 958 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
2d21ac55 959
d9a64523
A
960 if (majdevno < 0) {
961 printf("systrace_init: failed to allocate a major number!\n");
962 return;
963 }
2d21ac55 964
d9a64523
A
965 systrace_attach((dev_info_t*)(uintptr_t)majdevno);
966 machtrace_attach((dev_info_t*)(uintptr_t)majdevno);
2d21ac55
A
967}
968#undef SYSTRACE_MAJOR
316670eb
A
969
970static uint64_t
5ba3f43e 971systrace_getargval(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
316670eb
A
972{
973#pragma unused(arg,id,parg,aframes) /* __APPLE__ */
974 uint64_t val = 0;
5ba3f43e 975 uint64_t *uargs = NULL;
316670eb 976
0a7de745 977 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
316670eb 978
0a7de745 979 if (uthread) {
5ba3f43e 980 uargs = uthread->t_dtrace_syscall_args;
0a7de745
A
981 }
982 if (!uargs) {
983 return 0;
984 }
985 if (argno < 0 || argno >= SYSTRACE_NARGS) {
986 return 0;
987 }
316670eb
A
988
989 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5ba3f43e 990 val = uargs[argno];
316670eb 991 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
0a7de745 992 return val;
316670eb
A
993}
994
5ba3f43e
A
995static void
996systrace_getargdesc(void *arg, dtrace_id_t id, void *parg,
0a7de745 997 dtrace_argdesc_t *desc)
5ba3f43e
A
998{
999#pragma unused(arg, id)
1000 int sysnum = SYSTRACE_SYSNUM(parg);
1001 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1002 uint64_t *uargs = NULL;
1003
1004 if (!uthread) {
1005 desc->dtargd_ndx = DTRACE_ARGNONE;
1006 return;
1007 }
1008
1009 uargs = uthread->t_dtrace_syscall_args;
1010
1011 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
1012 systrace_entry_setargdesc(sysnum, desc->dtargd_ndx,
0a7de745
A
1013 desc->dtargd_native, sizeof(desc->dtargd_native));
1014 } else {
5ba3f43e 1015 systrace_return_setargdesc(sysnum, desc->dtargd_ndx,
0a7de745 1016 desc->dtargd_native, sizeof(desc->dtargd_native));
5ba3f43e
A
1017 }
1018
0a7de745 1019 if (desc->dtargd_native[0] == '\0') {
5ba3f43e 1020 desc->dtargd_ndx = DTRACE_ARGNONE;
0a7de745 1021 }
5ba3f43e 1022}
316670eb
A
1023
1024static uint64_t
1025machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1026{
1027#pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1028 uint64_t val = 0;
1029 syscall_arg_t *stack = (syscall_arg_t *)NULL;
1030
1031 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
0a7de745
A
1032
1033 if (uthread) {
316670eb 1034 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
0a7de745
A
1035 }
1036
1037 if (!stack) {
1038 return 0;
1039 }
316670eb
A
1040
1041 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1042 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
0a7de745 1043 val = (uint64_t)*(stack + argno);
316670eb 1044 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
0a7de745 1045 return val;
316670eb 1046}