]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/dtrace/systrace.c
00ee62d29bfe25acf41bf52d6279b13db7bc55d3
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */
27
28 #ifdef KERNEL
29 #ifndef _KERNEL
30 #define _KERNEL /* Solaris vs. Darwin */
31 #endif
32 #endif
33
34 #include <kern/thread.h>
35 #include <mach/thread_status.h>
36
37 /* XXX All of these should really be derived from syscall_sw.h */
38 #if defined (__x86_64__)
39 #define SYSCALL_CLASS_SHIFT 24
40 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
41 #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
42 #define I386_SYSCALL_NUMBER_MASK (0xFFFF)
43 #endif
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/errno.h>
49 #include <sys/ioctl.h>
50 #include <sys/conf.h>
51 #include <sys/fcntl.h>
52 #include <sys/syscall.h>
53 #include <miscfs/devfs/devfs.h>
54
55 #include <sys/dtrace.h>
56 #include <sys/dtrace_impl.h>
57 #include "systrace.h"
58 #include <sys/stat.h>
59 #include <sys/systm.h>
60 #include <sys/conf.h>
61 #include <sys/user.h>
62
63 #include <machine/pal_routines.h>
64
65 #if defined (__x86_64__)
66 #define SYSTRACE_ARTIFICIAL_FRAMES 2
67 #define MACHTRACE_ARTIFICIAL_FRAMES 3
68 #else
69 #error Unknown Architecture
70 #endif
71
72 #include <sys/sysent.h>
73 #define sy_callc sy_call /* Map Solaris slot name to Darwin's */
74 #define NSYSCALL nsysent /* and is less than 500 or so */
75
76 extern const char *syscallnames[];
77
78 #include <sys/dtrace_glue.h>
79 #define casptr dtrace_casptr
80 #define membar_enter dtrace_membar_producer
81
82 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
83 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
84
85 extern lck_attr_t* dtrace_lck_attr;
86 extern lck_grp_t* dtrace_lck_grp;
87 static lck_mtx_t dtrace_systrace_lock; /* probe state lock */
88
89 systrace_sysent_t *systrace_sysent = NULL;
90 void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
91
92 static uint64_t systrace_getarg(void *, dtrace_id_t, void *, int, int);
93
94 void
95 systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
96 uint64_t arg2, uint64_t arg3, uint64_t arg4)
97 {
98 #pragma unused(id,arg0,arg1,arg2,arg3,arg4)
99 }
100
101 int32_t
102 dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
103 {
104 unsigned short code; /* The system call number */
105
106 systrace_sysent_t *sy;
107 dtrace_id_t id;
108 int32_t rval;
109 #if 0 /* XXX */
110 proc_t *p;
111 #endif
112 syscall_arg_t *ip = (syscall_arg_t *)uap;
113
114 #if defined (__x86_64__)
115 {
116 pal_register_cache_state(current_thread(), VALID);
117 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
118
119 if (is_saved_state64(tagged_regs)) {
120 x86_saved_state64_t *regs = saved_state64(tagged_regs);
121 code = regs->rax & SYSCALL_NUMBER_MASK;
122 /*
123 * Check for indirect system call... system call number
124 * passed as 'arg0'
125 */
126 if (code == 0) {
127 code = regs->rdi;
128 }
129 } else {
130 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
131
132 if (code == 0) {
133 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int));
134 code = fuword(params);
135 }
136 }
137 }
138 #else
139 #error Unknown Architecture
140 #endif
141
142 // Bounds "check" the value of code a la unix_syscall
143 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
144
145 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
146 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
147 if (uthread)
148 uthread->t_dtrace_syscall_args = (void *)ip;
149
150 if (ip)
151 (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
152 else
153 (*systrace_probe)(id, 0, 0, 0, 0, 0);
154
155 if (uthread)
156 uthread->t_dtrace_syscall_args = (void *)0;
157 }
158
159 #if 0 /* XXX */
160 /*
161 * APPLE NOTE: Not implemented.
162 * We want to explicitly allow DTrace consumers to stop a process
163 * before it actually executes the meat of the syscall.
164 */
165 p = ttoproc(curthread);
166 mutex_enter(&p->p_lock);
167 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
168 curthread->t_dtrace_stop = 0;
169 stop(PR_REQUESTED, 0);
170 }
171 mutex_exit(&p->p_lock);
172 #endif
173
174 rval = (*sy->stsy_underlying)(pp, uap, rv);
175
176 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
177 uint64_t munged_rv0, munged_rv1;
178 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
179
180 if (uthread)
181 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
182
183 /*
184 * "Decode" rv for use in the call to dtrace_probe()
185 */
186 if (rval == ERESTART) {
187 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
188 munged_rv1 = -1LL;
189 } else if (rval != EJUSTRETURN) {
190 if (rval) {
191 munged_rv0 = -1LL; /* Mimic what libc will do. */
192 munged_rv1 = -1LL;
193 } else {
194 switch (sy->stsy_return_type) {
195 case _SYSCALL_RET_INT_T:
196 munged_rv0 = rv[0];
197 munged_rv1 = rv[1];
198 break;
199 case _SYSCALL_RET_UINT_T:
200 munged_rv0 = ((u_int)rv[0]);
201 munged_rv1 = ((u_int)rv[1]);
202 break;
203 case _SYSCALL_RET_OFF_T:
204 case _SYSCALL_RET_UINT64_T:
205 munged_rv0 = *(u_int64_t *)rv;
206 munged_rv1 = 0LL;
207 break;
208 case _SYSCALL_RET_ADDR_T:
209 case _SYSCALL_RET_SIZE_T:
210 case _SYSCALL_RET_SSIZE_T:
211 munged_rv0 = *(user_addr_t *)rv;
212 munged_rv1 = 0LL;
213 break;
214 case _SYSCALL_RET_NONE:
215 munged_rv0 = 0LL;
216 munged_rv1 = 0LL;
217 break;
218 default:
219 munged_rv0 = 0LL;
220 munged_rv1 = 0LL;
221 break;
222 }
223 }
224 } else {
225 munged_rv0 = 0LL;
226 munged_rv1 = 0LL;
227 }
228
229 /*
230 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
231 *
232 * "This is a bit of an historical artifact. At first, the syscall provider just
233 * had its return value in arg0, and the fbt and pid providers had their return
234 * values in arg1 (so that we could use arg0 for the offset of the return site).
235 *
236 * We inevitably started writing scripts where we wanted to see the return
237 * values from probes in all three providers, and we made this script easier
238 * to write by replicating the syscall return values in arg1 to match fbt and
239 * pid. We debated briefly about removing the return value from arg0, but
240 * decided that it would be less confusing to have the same data in two places
241 * than to have some non-helpful, non-intuitive value in arg0.
242 *
243 * This change was made 4/23/2003 according to the DTrace project's putback log."
244 */
245 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
246 }
247
248 return (rval);
249 }
250
251 void
252 dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
253 {
254 systrace_sysent_t *sy;
255 dtrace_id_t id;
256
257 // Bounds "check" the value of code a la unix_syscall_return
258 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
259
260 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
261 uint64_t munged_rv0, munged_rv1;
262 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
263
264 if (uthread)
265 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
266
267 /*
268 * "Decode" rv for use in the call to dtrace_probe()
269 */
270 if (rval == ERESTART) {
271 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
272 munged_rv1 = -1LL;
273 } else if (rval != EJUSTRETURN) {
274 if (rval) {
275 munged_rv0 = -1LL; /* Mimic what libc will do. */
276 munged_rv1 = -1LL;
277 } else {
278 switch (sy->stsy_return_type) {
279 case _SYSCALL_RET_INT_T:
280 munged_rv0 = rv[0];
281 munged_rv1 = rv[1];
282 break;
283 case _SYSCALL_RET_UINT_T:
284 munged_rv0 = ((u_int)rv[0]);
285 munged_rv1 = ((u_int)rv[1]);
286 break;
287 case _SYSCALL_RET_OFF_T:
288 case _SYSCALL_RET_UINT64_T:
289 munged_rv0 = *(u_int64_t *)rv;
290 munged_rv1 = 0LL;
291 break;
292 case _SYSCALL_RET_ADDR_T:
293 case _SYSCALL_RET_SIZE_T:
294 case _SYSCALL_RET_SSIZE_T:
295 munged_rv0 = *(user_addr_t *)rv;
296 munged_rv1 = 0LL;
297 break;
298 case _SYSCALL_RET_NONE:
299 munged_rv0 = 0LL;
300 munged_rv1 = 0LL;
301 break;
302 default:
303 munged_rv0 = 0LL;
304 munged_rv1 = 0LL;
305 break;
306 }
307 }
308 } else {
309 munged_rv0 = 0LL;
310 munged_rv1 = 0LL;
311 }
312
313 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
314 }
315 }
316
317 #define SYSTRACE_SHIFT 16
318 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
319 #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
320 #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
321 #define SYSTRACE_RETURN(id) (id)
322
323 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
324 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
325 #endif
326
327 static dev_info_t *systrace_devi;
328 static dtrace_provider_id_t systrace_id;
329
330 /*
331 * APPLE NOTE: Avoid name clash with Darwin automagic conf symbol.
332 * See balanced undef below.
333 */
334 #define systrace_init _systrace_init
335
336 static void
337 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
338 {
339
340 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
341 from bsd/sys/sysent.h */
342 unsigned int i;
343
344 if (ssysent == NULL) {
345 *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) *
346 NSYSCALL, KM_SLEEP);
347 }
348
349 for (i = 0; i < NSYSCALL; i++) {
350 struct sysent *a = &actual[i];
351 systrace_sysent_t *s = &ssysent[i];
352
353 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
354 continue;
355
356 if (a->sy_callc == dtrace_systrace_syscall)
357 continue;
358
359 #ifdef _SYSCALL32_IMPL
360 if (a->sy_callc == dtrace_systrace_syscall32)
361 continue;
362 #endif
363
364 s->stsy_underlying = a->sy_callc;
365 s->stsy_return_type = a->sy_return_type;
366 }
367 lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr);
368 }
369
370
371 /*ARGSUSED*/
372 static void
373 systrace_provide(void *arg, const dtrace_probedesc_t *desc)
374 {
375 #pragma unused(arg) /* __APPLE__ */
376 unsigned int i;
377
378 if (desc != NULL)
379 return;
380
381 systrace_init(sysent, &systrace_sysent);
382 #ifdef _SYSCALL32_IMPL
383 systrace_init(sysent32, &systrace_sysent32);
384 #endif
385
386 for (i = 0; i < NSYSCALL; i++) {
387 if (systrace_sysent[i].stsy_underlying == NULL)
388 continue;
389
390 if (dtrace_probe_lookup(systrace_id, NULL,
391 syscallnames[i], "entry") != 0)
392 continue;
393
394 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
395 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
396 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
397 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
398 "return", SYSTRACE_ARTIFICIAL_FRAMES,
399 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
400
401 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
402 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
403 #ifdef _SYSCALL32_IMPL
404 systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
405 systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
406 #endif
407 }
408 }
409 #undef systrace_init
410
411 /*ARGSUSED*/
412 static void
413 systrace_destroy(void *arg, dtrace_id_t id, void *parg)
414 {
415 #pragma unused(arg,id) /* __APPLE__ */
416
417 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
418
419 #pragma unused(sysnum) /* __APPLE__ */
420 /*
421 * There's nothing to do here but assert that we have actually been
422 * disabled.
423 */
424 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
425 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
426 #ifdef _SYSCALL32_IMPL
427 ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
428 #endif
429 } else {
430 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
431 #ifdef _SYSCALL32_IMPL
432 ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
433 #endif
434 }
435 }
436
437 /*ARGSUSED*/
438 static int
439 systrace_enable(void *arg, dtrace_id_t id, void *parg)
440 {
441 #pragma unused(arg) /* __APPLE__ */
442
443 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
444 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
445 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
446
447 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
448 systrace_sysent[sysnum].stsy_entry = id;
449 #ifdef _SYSCALL32_IMPL
450 systrace_sysent32[sysnum].stsy_entry = id;
451 #endif
452 } else {
453 systrace_sysent[sysnum].stsy_return = id;
454 #ifdef _SYSCALL32_IMPL
455 systrace_sysent32[sysnum].stsy_return = id;
456 #endif
457 }
458
459 if (enabled) {
460 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
461 return(0);
462 }
463 #ifdef _SYSCALL32_IMPL
464 (void) casptr(&sysent32[sysnum].sy_callc,
465 (void *)systrace_sysent32[sysnum].stsy_underlying,
466 (void *)dtrace_systrace_syscall32);
467 #endif
468
469 lck_mtx_lock(&dtrace_systrace_lock);
470 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
471 vm_offset_t dss = (vm_offset_t)&dtrace_systrace_syscall;
472 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
473 }
474 lck_mtx_unlock(&dtrace_systrace_lock);
475 return (0);
476 }
477
478 /*ARGSUSED*/
479 static void
480 systrace_disable(void *arg, dtrace_id_t id, void *parg)
481 {
482 #pragma unused(arg,id) /* __APPLE__ */
483
484 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
485 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
486 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
487
488 if (disable) {
489 lck_mtx_lock(&dtrace_systrace_lock);
490 if (sysent[sysnum].sy_callc == dtrace_systrace_syscall)
491 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying));
492 lck_mtx_unlock(&dtrace_systrace_lock);
493
494 #ifdef _SYSCALL32_IMPL
495 (void) casptr(&sysent32[sysnum].sy_callc,
496 (void *)dtrace_systrace_syscall32,
497 (void *)systrace_sysent32[sysnum].stsy_underlying);
498 #endif
499 }
500
501 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
502 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
503 #ifdef _SYSCALL32_IMPL
504 systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
505 #endif
506 } else {
507 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
508 #ifdef _SYSCALL32_IMPL
509 systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
510 #endif
511 }
512 }
513
514 static dtrace_pattr_t systrace_attr = {
515 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
516 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
517 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
518 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
519 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
520 };
521
522 static dtrace_pops_t systrace_pops = {
523 systrace_provide,
524 NULL,
525 systrace_enable,
526 systrace_disable,
527 NULL,
528 NULL,
529 NULL,
530 systrace_getarg,
531 NULL,
532 systrace_destroy
533 };
534
535 static int
536 systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
537 {
538 switch (cmd) {
539 case DDI_ATTACH:
540 break;
541 case DDI_RESUME:
542 return (DDI_SUCCESS);
543 default:
544 return (DDI_FAILURE);
545 }
546
547 systrace_probe = (void(*))&dtrace_probe;
548 membar_enter();
549
550 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
551 DDI_PSEUDO, 0) == DDI_FAILURE ||
552 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
553 &systrace_pops, NULL, &systrace_id) != 0) {
554 systrace_probe = systrace_stub;
555 ddi_remove_minor_node(devi, NULL);
556 return (DDI_FAILURE);
557 }
558
559 ddi_report_dev(devi);
560 systrace_devi = devi;
561
562 return (DDI_SUCCESS);
563 }
564
565
566 /*
567 * APPLE NOTE: systrace_detach not implemented
568 */
569 #if !defined(__APPLE__)
570 static int
571 systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
572 {
573 switch (cmd) {
574 case DDI_DETACH:
575 break;
576 case DDI_SUSPEND:
577 return (DDI_SUCCESS);
578 default:
579 return (DDI_FAILURE);
580 }
581
582 if (dtrace_unregister(systrace_id) != 0)
583 return (DDI_FAILURE);
584
585 ddi_remove_minor_node(devi, NULL);
586 systrace_probe = systrace_stub;
587 return (DDI_SUCCESS);
588 }
589 #endif /* __APPLE__ */
590
591
592 typedef kern_return_t (*mach_call_t)(void *);
593
594 /* APPLE NOTE: From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
595 typedef void mach_munge_t(void *);
596
597 typedef struct {
598 int mach_trap_arg_count;
599 kern_return_t (*mach_trap_function)(void *);
600 #if defined(__arm64__) || defined(__x86_64__)
601 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
602 #endif
603 int mach_trap_u32_words;
604 #if MACH_ASSERT
605 const char* mach_trap_name;
606 #endif /* MACH_ASSERT */
607 } mach_trap_t;
608
609 extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */
610 extern int mach_trap_count;
611
612 extern const char *mach_syscall_name_table[];
613
614 /* XXX From osfmk/i386/bsd_i386.c */
615 struct mach_call_args {
616 syscall_arg_t arg1;
617 syscall_arg_t arg2;
618 syscall_arg_t arg3;
619 syscall_arg_t arg4;
620 syscall_arg_t arg5;
621 syscall_arg_t arg6;
622 syscall_arg_t arg7;
623 syscall_arg_t arg8;
624 syscall_arg_t arg9;
625 };
626
627 #undef NSYSCALL
628 #define NSYSCALL mach_trap_count
629
630 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
631 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
632 #endif
633
634 typedef struct machtrace_sysent {
635 dtrace_id_t stsy_entry;
636 dtrace_id_t stsy_return;
637 kern_return_t (*stsy_underlying)(void *);
638 int32_t stsy_return_type;
639 } machtrace_sysent_t;
640
641 static machtrace_sysent_t *machtrace_sysent = NULL;
642
643 void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
644 uint64_t, uint64_t, uint64_t);
645
646 static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int);
647
648 static dev_info_t *machtrace_devi;
649 static dtrace_provider_id_t machtrace_id;
650
651 static kern_return_t
652 dtrace_machtrace_syscall(struct mach_call_args *args)
653 {
654 int code; /* The mach call number */
655
656 machtrace_sysent_t *sy;
657 dtrace_id_t id;
658 kern_return_t rval;
659 #if 0 /* XXX */
660 proc_t *p;
661 #endif
662 syscall_arg_t *ip = (syscall_arg_t *)args;
663 mach_call_t mach_call;
664
665 #if defined (__x86_64__)
666 {
667 pal_register_cache_state(current_thread(), VALID);
668 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
669
670 if (is_saved_state64(tagged_regs)) {
671 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
672 } else {
673 code = -saved_state32(tagged_regs)->eax;
674 }
675 }
676 #else
677 #error Unknown Architecture
678 #endif
679
680 sy = &machtrace_sysent[code];
681
682 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
683 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
684
685 if (uthread)
686 uthread->t_dtrace_syscall_args = (void *)ip;
687
688 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
689
690 if (uthread)
691 uthread->t_dtrace_syscall_args = (void *)0;
692 }
693
694 #if 0 /* XXX */
695 /*
696 * APPLE NOTE: Not implemented.
697 * We want to explicitly allow DTrace consumers to stop a process
698 * before it actually executes the meat of the syscall.
699 */
700 p = ttoproc(curthread);
701 mutex_enter(&p->p_lock);
702 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
703 curthread->t_dtrace_stop = 0;
704 stop(PR_REQUESTED, 0);
705 }
706 mutex_exit(&p->p_lock);
707 #endif
708
709 mach_call = (mach_call_t)(*sy->stsy_underlying);
710 rval = mach_call(args);
711
712 if ((id = sy->stsy_return) != DTRACE_IDNONE)
713 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
714
715 return (rval);
716 }
717
718 static void
719 machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed)
720 {
721 machtrace_sysent_t *msysent = *interposed;
722 int i;
723
724 if (msysent == NULL) {
725 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) *
726 NSYSCALL, KM_SLEEP);
727 }
728
729 for (i = 0; i < NSYSCALL; i++) {
730 const mach_trap_t *a = &actual[i];
731 machtrace_sysent_t *s = &msysent[i];
732
733 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
734 continue;
735
736 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall))
737 continue;
738
739 s->stsy_underlying = a->mach_trap_function;
740 }
741 }
742
743 /*ARGSUSED*/
744 static void
745 machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
746 {
747 #pragma unused(arg) /* __APPLE__ */
748
749 int i;
750
751 if (desc != NULL)
752 return;
753
754 machtrace_init(mach_trap_table, &machtrace_sysent);
755
756 for (i = 0; i < NSYSCALL; i++) {
757
758 if (machtrace_sysent[i].stsy_underlying == NULL)
759 continue;
760
761 if (dtrace_probe_lookup(machtrace_id, NULL,
762 mach_syscall_name_table[i], "entry") != 0)
763 continue;
764
765 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
766 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
767 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
768 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
769 "return", MACHTRACE_ARTIFICIAL_FRAMES,
770 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
771
772 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
773 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
774 }
775 }
776
777 /*ARGSUSED*/
778 static void
779 machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
780 {
781 #pragma unused(arg,id) /* __APPLE__ */
782 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
783
784 #pragma unused(sysnum) /* __APPLE__ */
785
786 /*
787 * There's nothing to do here but assert that we have actually been
788 * disabled.
789 */
790 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
791 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
792 } else {
793 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
794 }
795 }
796
797 /*ARGSUSED*/
798 static int
799 machtrace_enable(void *arg, dtrace_id_t id, void *parg)
800 {
801 #pragma unused(arg) /* __APPLE__ */
802
803 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
804 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
805 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
806
807 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
808 machtrace_sysent[sysnum].stsy_entry = id;
809 } else {
810 machtrace_sysent[sysnum].stsy_return = id;
811 }
812
813 if (enabled) {
814 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall);
815 return(0);
816 }
817
818 lck_mtx_lock(&dtrace_systrace_lock);
819
820 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
821 vm_offset_t dss = (vm_offset_t)&dtrace_machtrace_syscall;
822 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
823 }
824
825 lck_mtx_unlock(&dtrace_systrace_lock);
826
827 return(0);
828 }
829
830 /*ARGSUSED*/
831 static void
832 machtrace_disable(void *arg, dtrace_id_t id, void *parg)
833 {
834 #pragma unused(arg,id) /* __APPLE__ */
835
836 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
837 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
838 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
839
840 if (disable) {
841
842 lck_mtx_lock(&dtrace_systrace_lock);
843
844 if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
845 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
846 }
847 lck_mtx_unlock(&dtrace_systrace_lock);
848 }
849
850 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
851 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
852 } else {
853 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
854 }
855 }
856
857 static dtrace_pattr_t machtrace_attr = {
858 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
859 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
860 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
861 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
862 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
863 };
864
865 static dtrace_pops_t machtrace_pops = {
866 machtrace_provide,
867 NULL,
868 machtrace_enable,
869 machtrace_disable,
870 NULL,
871 NULL,
872 NULL,
873 machtrace_getarg,
874 NULL,
875 machtrace_destroy
876 };
877
878 static int
879 machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
880 {
881 switch (cmd) {
882 case DDI_ATTACH:
883 break;
884 case DDI_RESUME:
885 return (DDI_SUCCESS);
886 default:
887 return (DDI_FAILURE);
888 }
889
890 machtrace_probe = dtrace_probe;
891 membar_enter();
892
893 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
894 DDI_PSEUDO, 0) == DDI_FAILURE ||
895 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
896 &machtrace_pops, NULL, &machtrace_id) != 0) {
897 machtrace_probe = (void (*))&systrace_stub;
898 ddi_remove_minor_node(devi, NULL);
899 return (DDI_FAILURE);
900 }
901
902 ddi_report_dev(devi);
903 machtrace_devi = devi;
904
905 return (DDI_SUCCESS);
906 }
907
908 d_open_t _systrace_open;
909
910 int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
911 {
912 #pragma unused(dev,flags,devtype,p)
913 return 0;
914 }
915
916 #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
917
918 /*
919 * A struct describing which functions will get invoked for certain
920 * actions.
921 */
922 static struct cdevsw systrace_cdevsw =
923 {
924 _systrace_open, /* open */
925 eno_opcl, /* close */
926 eno_rdwrt, /* read */
927 eno_rdwrt, /* write */
928 eno_ioctl, /* ioctl */
929 (stop_fcn_t *)nulldev, /* stop */
930 (reset_fcn_t *)nulldev, /* reset */
931 NULL, /* tty's */
932 eno_select, /* select */
933 eno_mmap, /* mmap */
934 eno_strat, /* strategy */
935 eno_getc, /* getc */
936 eno_putc, /* putc */
937 0 /* type */
938 };
939
940 static int gSysTraceInited = 0;
941
942 void systrace_init( void );
943
944 void systrace_init( void )
945 {
946 if (0 == gSysTraceInited) {
947 if (dtrace_sdt_probes_restricted()) {
948 return;
949 }
950
951 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
952
953 if (majdevno < 0) {
954 printf("systrace_init: failed to allocate a major number!\n");
955 gSysTraceInited = 0;
956 return;
957 }
958
959 systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
960 machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
961
962 gSysTraceInited = 1;
963 } else
964 panic("systrace_init: called twice!\n");
965 }
966 #undef SYSTRACE_MAJOR
967
968 static uint64_t
969 systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
970 {
971 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
972 uint64_t val = 0;
973 syscall_arg_t *stack = (syscall_arg_t *)NULL;
974
975 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
976
977 if (uthread)
978 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
979
980 if (!stack)
981 return(0);
982
983 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
984 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
985 val = (uint64_t)*(stack+argno);
986 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
987 return (val);
988 }
989
990
991 static uint64_t
992 machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
993 {
994 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
995 uint64_t val = 0;
996 syscall_arg_t *stack = (syscall_arg_t *)NULL;
997
998 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
999
1000 if (uthread)
1001 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
1002
1003 if (!stack)
1004 return(0);
1005
1006 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1007 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1008 val = (uint64_t)*(stack+argno);
1009 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1010 return (val);
1011 }
1012