]> git.saurik.com Git - apple/xnu.git/blame - bsd/dev/dtrace/systrace.c
xnu-4570.71.2.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
CommitLineData
2d21ac55
A
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
6d2010ae 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
2d21ac55
A
23 * Use is subject to license terms.
24 */
25
b0d623f7 26/* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */
2d21ac55 27
2d21ac55
A
28#ifdef KERNEL
29#ifndef _KERNEL
30#define _KERNEL /* Solaris vs. Darwin */
31#endif
32#endif
33
2d21ac55
A
34#include <kern/thread.h>
35#include <mach/thread_status.h>
39236c6e 36
2d21ac55 37/* XXX All of these should really be derived from syscall_sw.h */
39236c6e 38#if defined (__x86_64__)
2d21ac55
A
39#define SYSCALL_CLASS_SHIFT 24
40#define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
41#define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
42#define I386_SYSCALL_NUMBER_MASK (0xFFFF)
2d21ac55
A
43#endif
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/proc.h>
48#include <sys/errno.h>
49#include <sys/ioctl.h>
50#include <sys/conf.h>
51#include <sys/fcntl.h>
39037602 52#include <sys/syscall.h>
2d21ac55
A
53#include <miscfs/devfs/devfs.h>
54
55#include <sys/dtrace.h>
56#include <sys/dtrace_impl.h>
5ba3f43e 57#include <sys/systrace_args.h>
2d21ac55
A
58#include "systrace.h"
59#include <sys/stat.h>
60#include <sys/systm.h>
61#include <sys/conf.h>
62#include <sys/user.h>
63
6d2010ae
A
64#include <machine/pal_routines.h>
65
39236c6e 66#if defined (__x86_64__)
2d21ac55
A
67#define SYSTRACE_ARTIFICIAL_FRAMES 2
68#define MACHTRACE_ARTIFICIAL_FRAMES 3
5ba3f43e
A
69#elif defined(__arm__) || defined(__arm64__)
70#define SYSTRACE_ARTIFICIAL_FRAMES 2
71#define MACHTRACE_ARTIFICIAL_FRAMES 3
2d21ac55
A
72#else
73#error Unknown Architecture
74#endif
75
5ba3f43e
A
76#define SYSTRACE_NARGS (int)(sizeof(((uthread_t)NULL)->uu_arg) / sizeof(((uthread_t)NULL)->uu_arg[0]))
77
2d21ac55
A
78#include <sys/sysent.h>
79#define sy_callc sy_call /* Map Solaris slot name to Darwin's */
80#define NSYSCALL nsysent /* and is less than 500 or so */
81
82extern const char *syscallnames[];
83
84#include <sys/dtrace_glue.h>
85#define casptr dtrace_casptr
86#define membar_enter dtrace_membar_producer
87
88#define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
89#define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
90
316670eb
A
91extern lck_attr_t* dtrace_lck_attr;
92extern lck_grp_t* dtrace_lck_grp;
93static lck_mtx_t dtrace_systrace_lock; /* probe state lock */
94
2d21ac55 95systrace_sysent_t *systrace_sysent = NULL;
316670eb
A
96void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
97
5ba3f43e
A
98static uint64_t systrace_getargval(void *, dtrace_id_t, void *, int, int);
99static void systrace_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
2d21ac55
A
100
101void
102systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
316670eb 103 uint64_t arg2, uint64_t arg3, uint64_t arg4)
2d21ac55 104{
316670eb 105#pragma unused(id,arg0,arg1,arg2,arg3,arg4)
2d21ac55
A
106}
107
108int32_t
109dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
110{
39236c6e 111 unsigned short code; /* The system call number */
2d21ac55
A
112
113 systrace_sysent_t *sy;
114 dtrace_id_t id;
115 int32_t rval;
2d21ac55 116 syscall_arg_t *ip = (syscall_arg_t *)uap;
5ba3f43e 117 uint64_t uargs[SYSTRACE_NARGS] = {0};
2d21ac55 118
39236c6e 119#if defined (__x86_64__)
2d21ac55 120 {
6d2010ae 121 pal_register_cache_state(current_thread(), VALID);
2d21ac55
A
122 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
123
124 if (is_saved_state64(tagged_regs)) {
125 x86_saved_state64_t *regs = saved_state64(tagged_regs);
126 code = regs->rax & SYSCALL_NUMBER_MASK;
127 /*
128 * Check for indirect system call... system call number
129 * passed as 'arg0'
130 */
131 if (code == 0) {
132 code = regs->rdi;
133 }
134 } else {
135 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
b0d623f7
A
136
137 if (code == 0) {
138 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int));
139 code = fuword(params);
140 }
2d21ac55
A
141 }
142 }
5ba3f43e
A
143#elif defined(__arm__)
144 {
145 /*
146 * On arm, syscall numbers depend on a flavor (indirect or not)
147 * and can be in either r0 or r12 (always u32)
148 */
149
150 /* See bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
151 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
152
153 /* Check for indirect system call */
154 if (arm_regs->r[12] != 0)
155 code = arm_regs->r[12];
156 else
157 code = arm_regs->r[0];
158 }
159#elif defined(__arm64__)
160 {
161 /*
162 * On arm64, syscall numbers depend on a flavor (indirect or not)
163 * ... and for u32 can be in either r0 or r12
164 * ... and for u64 can be in either x0 or x16
165 */
166
167 /* see bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
168 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
169
170 if (is_saved_state32(arm_regs)) {
171 /* Check for indirect system call */
172 if (saved_state32(arm_regs)->r[12] != 0) {
173 code = saved_state32(arm_regs)->r[12];
174 }
175 else {
176 code = saved_state32(arm_regs)->r[0];
177 }
178 } else {
179 /* Check for indirect system call */
180 if (saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM] != 0 ) {
181 code = saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
182 }
183 else {
184 code = saved_state64(arm_regs)->x[0];
185 }
186 }
187 }
2d21ac55
A
188#else
189#error Unknown Architecture
190#endif
191
192 // Bounds "check" the value of code a la unix_syscall
39037602 193 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
2d21ac55 194
5ba3f43e
A
195 systrace_args(code, ip, uargs);
196
c910b4d9 197 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
316670eb
A
198 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
199 if (uthread)
5ba3f43e 200 uthread->t_dtrace_syscall_args = uargs;
316670eb 201
5ba3f43e
A
202 static_assert(SYSTRACE_NARGS >= 5, "not enough system call arguments");
203 (*systrace_probe)(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]);
316670eb
A
204
205 if (uthread)
5ba3f43e 206 uthread->t_dtrace_syscall_args = NULL;
c910b4d9 207 }
2d21ac55 208
5ba3f43e
A
209
210
2d21ac55
A
211#if 0 /* XXX */
212 /*
fe8ab488 213 * APPLE NOTE: Not implemented.
2d21ac55
A
214 * We want to explicitly allow DTrace consumers to stop a process
215 * before it actually executes the meat of the syscall.
216 */
217 p = ttoproc(curthread);
218 mutex_enter(&p->p_lock);
219 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
220 curthread->t_dtrace_stop = 0;
221 stop(PR_REQUESTED, 0);
222 }
223 mutex_exit(&p->p_lock);
224#endif
225
226 rval = (*sy->stsy_underlying)(pp, uap, rv);
227
228 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
b0d623f7 229 uint64_t munged_rv0, munged_rv1;
2d21ac55
A
230 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
231
232 if (uthread)
233 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
234
235 /*
236 * "Decode" rv for use in the call to dtrace_probe()
237 */
238 if (rval == ERESTART) {
b0d623f7
A
239 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
240 munged_rv1 = -1LL;
2d21ac55
A
241 } else if (rval != EJUSTRETURN) {
242 if (rval) {
b0d623f7
A
243 munged_rv0 = -1LL; /* Mimic what libc will do. */
244 munged_rv1 = -1LL;
2d21ac55
A
245 } else {
246 switch (sy->stsy_return_type) {
247 case _SYSCALL_RET_INT_T:
b0d623f7
A
248 munged_rv0 = rv[0];
249 munged_rv1 = rv[1];
2d21ac55
A
250 break;
251 case _SYSCALL_RET_UINT_T:
b0d623f7
A
252 munged_rv0 = ((u_int)rv[0]);
253 munged_rv1 = ((u_int)rv[1]);
2d21ac55
A
254 break;
255 case _SYSCALL_RET_OFF_T:
d1ecb069 256 case _SYSCALL_RET_UINT64_T:
b0d623f7
A
257 munged_rv0 = *(u_int64_t *)rv;
258 munged_rv1 = 0LL;
2d21ac55
A
259 break;
260 case _SYSCALL_RET_ADDR_T:
261 case _SYSCALL_RET_SIZE_T:
262 case _SYSCALL_RET_SSIZE_T:
b0d623f7
A
263 munged_rv0 = *(user_addr_t *)rv;
264 munged_rv1 = 0LL;
2d21ac55
A
265 break;
266 case _SYSCALL_RET_NONE:
b0d623f7
A
267 munged_rv0 = 0LL;
268 munged_rv1 = 0LL;
2d21ac55
A
269 break;
270 default:
b0d623f7
A
271 munged_rv0 = 0LL;
272 munged_rv1 = 0LL;
2d21ac55
A
273 break;
274 }
275 }
b0d623f7
A
276 } else {
277 munged_rv0 = 0LL;
278 munged_rv1 = 0LL;
279 }
2d21ac55 280
b0d623f7
A
281 /*
282 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
283 *
284 * "This is a bit of an historical artifact. At first, the syscall provider just
285 * had its return value in arg0, and the fbt and pid providers had their return
286 * values in arg1 (so that we could use arg0 for the offset of the return site).
287 *
288 * We inevitably started writing scripts where we wanted to see the return
289 * values from probes in all three providers, and we made this script easier
290 * to write by replicating the syscall return values in arg1 to match fbt and
291 * pid. We debated briefly about removing the return value from arg0, but
292 * decided that it would be less confusing to have the same data in two places
293 * than to have some non-helpful, non-intuitive value in arg0.
294 *
295 * This change was made 4/23/2003 according to the DTrace project's putback log."
296 */
316670eb 297 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
2d21ac55
A
298 }
299
300 return (rval);
301}
302
303void
304dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
305{
306 systrace_sysent_t *sy;
307 dtrace_id_t id;
308
309 // Bounds "check" the value of code a la unix_syscall_return
39037602 310 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
2d21ac55
A
311
312 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
b0d623f7 313 uint64_t munged_rv0, munged_rv1;
2d21ac55
A
314 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
315
316 if (uthread)
317 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
318
319 /*
320 * "Decode" rv for use in the call to dtrace_probe()
321 */
322 if (rval == ERESTART) {
b0d623f7
A
323 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
324 munged_rv1 = -1LL;
2d21ac55
A
325 } else if (rval != EJUSTRETURN) {
326 if (rval) {
b0d623f7
A
327 munged_rv0 = -1LL; /* Mimic what libc will do. */
328 munged_rv1 = -1LL;
2d21ac55
A
329 } else {
330 switch (sy->stsy_return_type) {
331 case _SYSCALL_RET_INT_T:
b0d623f7
A
332 munged_rv0 = rv[0];
333 munged_rv1 = rv[1];
2d21ac55
A
334 break;
335 case _SYSCALL_RET_UINT_T:
b0d623f7
A
336 munged_rv0 = ((u_int)rv[0]);
337 munged_rv1 = ((u_int)rv[1]);
2d21ac55
A
338 break;
339 case _SYSCALL_RET_OFF_T:
d1ecb069 340 case _SYSCALL_RET_UINT64_T:
b0d623f7
A
341 munged_rv0 = *(u_int64_t *)rv;
342 munged_rv1 = 0LL;
2d21ac55
A
343 break;
344 case _SYSCALL_RET_ADDR_T:
345 case _SYSCALL_RET_SIZE_T:
346 case _SYSCALL_RET_SSIZE_T:
b0d623f7
A
347 munged_rv0 = *(user_addr_t *)rv;
348 munged_rv1 = 0LL;
2d21ac55
A
349 break;
350 case _SYSCALL_RET_NONE:
b0d623f7
A
351 munged_rv0 = 0LL;
352 munged_rv1 = 0LL;
2d21ac55
A
353 break;
354 default:
b0d623f7
A
355 munged_rv0 = 0LL;
356 munged_rv1 = 0LL;
2d21ac55
A
357 break;
358 }
359 }
b0d623f7
A
360 } else {
361 munged_rv0 = 0LL;
362 munged_rv1 = 0LL;
363 }
2d21ac55 364
316670eb 365 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
2d21ac55
A
366 }
367}
2d21ac55
A
368
369#define SYSTRACE_SHIFT 16
370#define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
371#define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
372#define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
373#define SYSTRACE_RETURN(id) (id)
374
375#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
376#error 1 << SYSTRACE_SHIFT must exceed number of system calls
377#endif
378
379static dev_info_t *systrace_devi;
380static dtrace_provider_id_t systrace_id;
381
fe8ab488
A
382/*
383 * APPLE NOTE: Avoid name clash with Darwin automagic conf symbol.
384 * See balanced undef below.
385 */
386#define systrace_init _systrace_init
2d21ac55 387
b0d623f7
A
388static void
389systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
390{
391
392 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
393 from bsd/sys/sysent.h */
39037602 394 unsigned int i;
b0d623f7
A
395
396 if (ssysent == NULL) {
397 *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) *
398 NSYSCALL, KM_SLEEP);
399 }
400
401 for (i = 0; i < NSYSCALL; i++) {
402 struct sysent *a = &actual[i];
403 systrace_sysent_t *s = &ssysent[i];
404
405 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
406 continue;
407
408 if (a->sy_callc == dtrace_systrace_syscall)
409 continue;
410
b0d623f7
A
411 s->stsy_underlying = a->sy_callc;
412 s->stsy_return_type = a->sy_return_type;
2d21ac55 413 }
316670eb 414 lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr);
2d21ac55
A
415}
416
b0d623f7 417
2d21ac55
A
418/*ARGSUSED*/
419static void
420systrace_provide(void *arg, const dtrace_probedesc_t *desc)
421{
b0d623f7 422#pragma unused(arg) /* __APPLE__ */
39037602 423 unsigned int i;
2d21ac55
A
424
425 if (desc != NULL)
426 return;
427
428 systrace_init(sysent, &systrace_sysent);
2d21ac55
A
429
430 for (i = 0; i < NSYSCALL; i++) {
431 if (systrace_sysent[i].stsy_underlying == NULL)
432 continue;
433
434 if (dtrace_probe_lookup(systrace_id, NULL,
435 syscallnames[i], "entry") != 0)
436 continue;
437
438 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
439 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
440 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
441 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
442 "return", SYSTRACE_ARTIFICIAL_FRAMES,
443 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
444
445 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
446 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
2d21ac55
A
447 }
448}
2d21ac55 449#undef systrace_init
2d21ac55
A
450
451/*ARGSUSED*/
452static void
453systrace_destroy(void *arg, dtrace_id_t id, void *parg)
454{
b0d623f7
A
455#pragma unused(arg,id) /* __APPLE__ */
456
2d21ac55
A
457 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
458
b0d623f7 459#pragma unused(sysnum) /* __APPLE__ */
2d21ac55
A
460 /*
461 * There's nothing to do here but assert that we have actually been
462 * disabled.
463 */
464 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
465 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
2d21ac55
A
466 } else {
467 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
2d21ac55
A
468 }
469}
470
471/*ARGSUSED*/
6d2010ae 472static int
2d21ac55
A
473systrace_enable(void *arg, dtrace_id_t id, void *parg)
474{
b0d623f7
A
475#pragma unused(arg) /* __APPLE__ */
476
2d21ac55
A
477 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
478 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
479 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
480
481 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
482 systrace_sysent[sysnum].stsy_entry = id;
2d21ac55
A
483 } else {
484 systrace_sysent[sysnum].stsy_return = id;
2d21ac55
A
485 }
486
487 if (enabled) {
488 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
6d2010ae 489 return(0);
2d21ac55 490 }
316670eb
A
491
492 lck_mtx_lock(&dtrace_systrace_lock);
493 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
494 vm_offset_t dss = (vm_offset_t)&dtrace_systrace_syscall;
495 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
496 }
497 lck_mtx_unlock(&dtrace_systrace_lock);
6d2010ae 498 return (0);
2d21ac55
A
499}
500
501/*ARGSUSED*/
502static void
503systrace_disable(void *arg, dtrace_id_t id, void *parg)
504{
b0d623f7
A
505#pragma unused(arg,id) /* __APPLE__ */
506
2d21ac55
A
507 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
508 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
509 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
510
511 if (disable) {
316670eb
A
512 lck_mtx_lock(&dtrace_systrace_lock);
513 if (sysent[sysnum].sy_callc == dtrace_systrace_syscall)
514 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying));
515 lck_mtx_unlock(&dtrace_systrace_lock);
2d21ac55 516
2d21ac55
A
517 }
518
519 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
520 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
2d21ac55
A
521 } else {
522 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
2d21ac55
A
523 }
524}
525
526static dtrace_pattr_t systrace_attr = {
527{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
528{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
529{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
530{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
531{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
532};
533
534static dtrace_pops_t systrace_pops = {
535 systrace_provide,
536 NULL,
537 systrace_enable,
538 systrace_disable,
539 NULL,
540 NULL,
5ba3f43e
A
541 systrace_getargdesc,
542 systrace_getargval,
2d21ac55
A
543 NULL,
544 systrace_destroy
545};
546
547static int
548systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
549{
550 switch (cmd) {
551 case DDI_ATTACH:
552 break;
553 case DDI_RESUME:
554 return (DDI_SUCCESS);
555 default:
556 return (DDI_FAILURE);
557 }
558
b0d623f7
A
559 systrace_probe = (void(*))&dtrace_probe;
560 membar_enter();
561
562 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
563 DDI_PSEUDO, 0) == DDI_FAILURE ||
564 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
565 &systrace_pops, NULL, &systrace_id) != 0) {
566 systrace_probe = systrace_stub;
567 ddi_remove_minor_node(devi, NULL);
568 return (DDI_FAILURE);
569 }
2d21ac55
A
570
571 ddi_report_dev(devi);
572 systrace_devi = devi;
573
574 return (DDI_SUCCESS);
575}
576
fe8ab488
A
577
578/*
579 * APPLE NOTE: systrace_detach not implemented
580 */
2d21ac55
A
581#if !defined(__APPLE__)
582static int
583systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
584{
585 switch (cmd) {
586 case DDI_DETACH:
587 break;
588 case DDI_SUSPEND:
589 return (DDI_SUCCESS);
590 default:
591 return (DDI_FAILURE);
592 }
593
594 if (dtrace_unregister(systrace_id) != 0)
595 return (DDI_FAILURE);
596
597 ddi_remove_minor_node(devi, NULL);
598 systrace_probe = systrace_stub;
599 return (DDI_SUCCESS);
600}
fe8ab488 601#endif /* __APPLE__ */
2d21ac55 602
2d21ac55 603
2d21ac55
A
604typedef kern_return_t (*mach_call_t)(void *);
605
fe8ab488
A
606/* APPLE NOTE: From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
607typedef void mach_munge_t(void *);
2d21ac55
A
608
609typedef struct {
316670eb
A
610 int mach_trap_arg_count;
611 kern_return_t (*mach_trap_function)(void *);
fe8ab488 612#if defined(__arm64__) || defined(__x86_64__)
316670eb 613 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
2d21ac55 614#endif
39236c6e 615 int mach_trap_u32_words;
316670eb
A
616#if MACH_ASSERT
617 const char* mach_trap_name;
6d2010ae 618#endif /* MACH_ASSERT */
2d21ac55
A
619} mach_trap_t;
620
39236c6e 621extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */
2d21ac55
A
622extern int mach_trap_count;
623
b0d623f7 624extern const char *mach_syscall_name_table[];
2d21ac55
A
625
626/* XXX From osfmk/i386/bsd_i386.c */
627struct mach_call_args {
628 syscall_arg_t arg1;
629 syscall_arg_t arg2;
630 syscall_arg_t arg3;
631 syscall_arg_t arg4;
632 syscall_arg_t arg5;
633 syscall_arg_t arg6;
634 syscall_arg_t arg7;
635 syscall_arg_t arg8;
636 syscall_arg_t arg9;
637};
638
639#undef NSYSCALL
640#define NSYSCALL mach_trap_count
641
642#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
643#error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
644#endif
645
316670eb
A
646typedef struct machtrace_sysent {
647 dtrace_id_t stsy_entry;
648 dtrace_id_t stsy_return;
649 kern_return_t (*stsy_underlying)(void *);
650 int32_t stsy_return_type;
651} machtrace_sysent_t;
2d21ac55
A
652
653static machtrace_sysent_t *machtrace_sysent = NULL;
654
655void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
656 uint64_t, uint64_t, uint64_t);
657
316670eb
A
658static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int);
659
2d21ac55
A
660static dev_info_t *machtrace_devi;
661static dtrace_provider_id_t machtrace_id;
662
663static kern_return_t
664dtrace_machtrace_syscall(struct mach_call_args *args)
665{
39236c6e 666 int code; /* The mach call number */
2d21ac55
A
667
668 machtrace_sysent_t *sy;
669 dtrace_id_t id;
670 kern_return_t rval;
671#if 0 /* XXX */
672 proc_t *p;
673#endif
674 syscall_arg_t *ip = (syscall_arg_t *)args;
675 mach_call_t mach_call;
676
39236c6e 677#if defined (__x86_64__)
2d21ac55 678 {
6d2010ae 679 pal_register_cache_state(current_thread(), VALID);
2d21ac55
A
680 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
681
682 if (is_saved_state64(tagged_regs)) {
cf7d32b8 683 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
2d21ac55
A
684 } else {
685 code = -saved_state32(tagged_regs)->eax;
686 }
687 }
5ba3f43e
A
688#elif defined(__arm__)
689 {
690 /* r12 has the machcall number, but it is -ve */
691 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
692 code = (int)arm_regs->r[12];
693 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
694 code = -code;
695 }
696#elif defined(__arm64__)
697 {
698 /* From arm/thread_status.h:get_saved_state_svc_number */
699 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
700 if (is_saved_state32(arm_regs)) {
701 code = (int)saved_state32(arm_regs)->r[12];
702 } else {
703 code = (int)saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
704 }
705
706 /* From bsd/arm64.c:mach_syscall */
707 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
708 code = -code;
709 }
2d21ac55
A
710#else
711#error Unknown Architecture
712#endif
713
714 sy = &machtrace_sysent[code];
715
316670eb
A
716 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
717 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
718
719 if (uthread)
720 uthread->t_dtrace_syscall_args = (void *)ip;
721
2d21ac55 722 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
316670eb
A
723
724 if (uthread)
725 uthread->t_dtrace_syscall_args = (void *)0;
726 }
2d21ac55
A
727
728#if 0 /* XXX */
729 /*
fe8ab488 730 * APPLE NOTE: Not implemented.
2d21ac55
A
731 * We want to explicitly allow DTrace consumers to stop a process
732 * before it actually executes the meat of the syscall.
733 */
734 p = ttoproc(curthread);
735 mutex_enter(&p->p_lock);
736 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
737 curthread->t_dtrace_stop = 0;
738 stop(PR_REQUESTED, 0);
739 }
740 mutex_exit(&p->p_lock);
741#endif
742
743 mach_call = (mach_call_t)(*sy->stsy_underlying);
744 rval = mach_call(args);
745
746 if ((id = sy->stsy_return) != DTRACE_IDNONE)
747 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
748
749 return (rval);
750}
751
752static void
39236c6e 753machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed)
2d21ac55
A
754{
755 machtrace_sysent_t *msysent = *interposed;
756 int i;
757
758 if (msysent == NULL) {
759 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) *
760 NSYSCALL, KM_SLEEP);
761 }
762
763 for (i = 0; i < NSYSCALL; i++) {
39236c6e 764 const mach_trap_t *a = &actual[i];
2d21ac55
A
765 machtrace_sysent_t *s = &msysent[i];
766
767 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
768 continue;
769
316670eb 770 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall))
2d21ac55
A
771 continue;
772
316670eb 773 s->stsy_underlying = a->mach_trap_function;
2d21ac55
A
774 }
775}
776
777/*ARGSUSED*/
778static void
779machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
780{
b0d623f7
A
781#pragma unused(arg) /* __APPLE__ */
782
2d21ac55
A
783 int i;
784
785 if (desc != NULL)
786 return;
787
788 machtrace_init(mach_trap_table, &machtrace_sysent);
789
790 for (i = 0; i < NSYSCALL; i++) {
791
792 if (machtrace_sysent[i].stsy_underlying == NULL)
793 continue;
794
795 if (dtrace_probe_lookup(machtrace_id, NULL,
b0d623f7 796 mach_syscall_name_table[i], "entry") != 0)
2d21ac55
A
797 continue;
798
b0d623f7 799 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
2d21ac55
A
800 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
801 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
b0d623f7 802 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
2d21ac55
A
803 "return", MACHTRACE_ARTIFICIAL_FRAMES,
804 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
805
806 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
807 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
808 }
809}
810
811/*ARGSUSED*/
812static void
813machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
814{
b0d623f7 815#pragma unused(arg,id) /* __APPLE__ */
2d21ac55 816 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
b0d623f7
A
817
818#pragma unused(sysnum) /* __APPLE__ */
2d21ac55
A
819
820 /*
821 * There's nothing to do here but assert that we have actually been
822 * disabled.
823 */
824 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
825 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
826 } else {
827 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
828 }
829}
830
831/*ARGSUSED*/
6d2010ae 832static int
2d21ac55
A
833machtrace_enable(void *arg, dtrace_id_t id, void *parg)
834{
b0d623f7
A
835#pragma unused(arg) /* __APPLE__ */
836
2d21ac55
A
837 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
838 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
839 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
840
841 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
842 machtrace_sysent[sysnum].stsy_entry = id;
843 } else {
844 machtrace_sysent[sysnum].stsy_return = id;
845 }
846
847 if (enabled) {
316670eb 848 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall);
6d2010ae 849 return(0);
2d21ac55
A
850 }
851
316670eb
A
852 lck_mtx_lock(&dtrace_systrace_lock);
853
854 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
855 vm_offset_t dss = (vm_offset_t)&dtrace_machtrace_syscall;
856 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
857 }
858
859 lck_mtx_unlock(&dtrace_systrace_lock);
860
6d2010ae 861 return(0);
2d21ac55
A
862}
863
864/*ARGSUSED*/
865static void
866machtrace_disable(void *arg, dtrace_id_t id, void *parg)
867{
b0d623f7
A
868#pragma unused(arg,id) /* __APPLE__ */
869
2d21ac55
A
870 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
871 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
872 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
873
874 if (disable) {
2d21ac55 875
316670eb
A
876 lck_mtx_lock(&dtrace_systrace_lock);
877
878 if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
879 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
880 }
881 lck_mtx_unlock(&dtrace_systrace_lock);
2d21ac55
A
882 }
883
884 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
885 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
886 } else {
887 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
888 }
889}
890
891static dtrace_pattr_t machtrace_attr = {
892{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
893{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
894{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
895{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
896{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
897};
898
899static dtrace_pops_t machtrace_pops = {
900 machtrace_provide,
901 NULL,
902 machtrace_enable,
903 machtrace_disable,
904 NULL,
905 NULL,
906 NULL,
316670eb 907 machtrace_getarg,
2d21ac55
A
908 NULL,
909 machtrace_destroy
910};
911
912static int
913machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
914{
915 switch (cmd) {
916 case DDI_ATTACH:
917 break;
918 case DDI_RESUME:
919 return (DDI_SUCCESS);
920 default:
921 return (DDI_FAILURE);
922 }
923
b0d623f7
A
924 machtrace_probe = dtrace_probe;
925 membar_enter();
926
927 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
928 DDI_PSEUDO, 0) == DDI_FAILURE ||
929 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
930 &machtrace_pops, NULL, &machtrace_id) != 0) {
931 machtrace_probe = (void (*))&systrace_stub;
2d21ac55
A
932 ddi_remove_minor_node(devi, NULL);
933 return (DDI_FAILURE);
934 }
935
936 ddi_report_dev(devi);
937 machtrace_devi = devi;
938
939 return (DDI_SUCCESS);
940}
941
942d_open_t _systrace_open;
943
944int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
945{
946#pragma unused(dev,flags,devtype,p)
947 return 0;
948}
949
950#define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
951
952/*
953 * A struct describing which functions will get invoked for certain
954 * actions.
955 */
956static struct cdevsw systrace_cdevsw =
957{
958 _systrace_open, /* open */
959 eno_opcl, /* close */
960 eno_rdwrt, /* read */
961 eno_rdwrt, /* write */
962 eno_ioctl, /* ioctl */
963 (stop_fcn_t *)nulldev, /* stop */
964 (reset_fcn_t *)nulldev, /* reset */
965 NULL, /* tty's */
966 eno_select, /* select */
967 eno_mmap, /* mmap */
968 eno_strat, /* strategy */
969 eno_getc, /* getc */
970 eno_putc, /* putc */
971 0 /* type */
972};
973
974static int gSysTraceInited = 0;
975
976void systrace_init( void );
977
978void systrace_init( void )
979{
980 if (0 == gSysTraceInited) {
39037602
A
981 if (dtrace_sdt_probes_restricted()) {
982 return;
983 }
984
2d21ac55
A
985 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
986
987 if (majdevno < 0) {
988 printf("systrace_init: failed to allocate a major number!\n");
989 gSysTraceInited = 0;
990 return;
991 }
992
b0d623f7
A
993 systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
994 machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
2d21ac55
A
995
996 gSysTraceInited = 1;
997 } else
998 panic("systrace_init: called twice!\n");
999}
1000#undef SYSTRACE_MAJOR
316670eb
A
1001
1002static uint64_t
5ba3f43e 1003systrace_getargval(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
316670eb
A
1004{
1005#pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1006 uint64_t val = 0;
5ba3f43e 1007 uint64_t *uargs = NULL;
316670eb
A
1008
1009 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1010
1011 if (uthread)
5ba3f43e
A
1012 uargs = uthread->t_dtrace_syscall_args;
1013 if (!uargs)
1014 return(0);
1015 if (argno < 0 || argno > SYSTRACE_NARGS)
316670eb
A
1016 return(0);
1017
1018 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
5ba3f43e 1019 val = uargs[argno];
316670eb
A
1020 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1021 return (val);
1022}
1023
5ba3f43e
A
1024static void
1025systrace_getargdesc(void *arg, dtrace_id_t id, void *parg,
1026 dtrace_argdesc_t *desc)
1027{
1028#pragma unused(arg, id)
1029 int sysnum = SYSTRACE_SYSNUM(parg);
1030 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1031 uint64_t *uargs = NULL;
1032
1033 if (!uthread) {
1034 desc->dtargd_ndx = DTRACE_ARGNONE;
1035 return;
1036 }
1037
1038 uargs = uthread->t_dtrace_syscall_args;
1039
1040 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
1041 systrace_entry_setargdesc(sysnum, desc->dtargd_ndx,
1042 desc->dtargd_native, sizeof(desc->dtargd_native));
1043 }
1044 else {
1045 systrace_return_setargdesc(sysnum, desc->dtargd_ndx,
1046 desc->dtargd_native, sizeof(desc->dtargd_native));
1047 }
1048
1049 if (desc->dtargd_native[0] == '\0')
1050 desc->dtargd_ndx = DTRACE_ARGNONE;
1051}
316670eb
A
1052
1053static uint64_t
1054machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1055{
1056#pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1057 uint64_t val = 0;
1058 syscall_arg_t *stack = (syscall_arg_t *)NULL;
1059
1060 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1061
1062 if (uthread)
1063 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
1064
1065 if (!stack)
1066 return(0);
1067
1068 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1069 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1070 val = (uint64_t)*(stack+argno);
1071 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1072 return (val);
1073}
1074