]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/dtrace/systrace.c
xnu-1456.1.26.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */
27
28 #if !defined(__APPLE__)
29 #include <sys/dtrace.h>
30 #include <sys/systrace.h>
31 #include <sys/stat.h>
32 #include <sys/systm.h>
33 #include <sys/conf.h>
34 #include <sys/ddi.h>
35 #include <sys/sunddi.h>
36 #include <sys/atomic.h>
37 #define SYSTRACE_ARTIFICIAL_FRAMES 1
38 #else
39
40 #ifdef KERNEL
41 #ifndef _KERNEL
42 #define _KERNEL /* Solaris vs. Darwin */
43 #endif
44 #endif
45
46 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
47 #include <kern/thread.h>
48 #include <mach/thread_status.h>
49 /* XXX All of these should really be derived from syscall_sw.h */
50 #if defined(__i386__) || defined (__x86_64__)
51 #define SYSCALL_CLASS_SHIFT 24
52 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
53 #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
54 #define I386_SYSCALL_NUMBER_MASK (0xFFFF)
55
56 typedef x86_saved_state_t savearea_t;
57 #endif
58
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/proc.h>
62 #include <sys/errno.h>
63 #include <sys/ioctl.h>
64 #include <sys/conf.h>
65 #include <sys/fcntl.h>
66 #include <miscfs/devfs/devfs.h>
67
68 #include <sys/dtrace.h>
69 #include <sys/dtrace_impl.h>
70 #include "systrace.h"
71 #include <sys/stat.h>
72 #include <sys/systm.h>
73 #include <sys/conf.h>
74 #include <sys/user.h>
75
76 #if defined (__ppc__) || defined (__ppc64__)
77 #define SYSTRACE_ARTIFICIAL_FRAMES 3
78 #define MACHTRACE_ARTIFICIAL_FRAMES 4
79 #elif defined(__i386__) || defined (__x86_64__)
80 #define SYSTRACE_ARTIFICIAL_FRAMES 2
81 #define MACHTRACE_ARTIFICIAL_FRAMES 3
82 #else
83 #error Unknown Architecture
84 #endif
85
86 #include <sys/sysent.h>
87 #define sy_callc sy_call /* Map Solaris slot name to Darwin's */
88 #define NSYSCALL nsysent /* and is less than 500 or so */
89
90 extern const char *syscallnames[];
91
92 #include <sys/dtrace_glue.h>
93 #define casptr dtrace_casptr
94 #define membar_enter dtrace_membar_producer
95
96 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
97 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
98
99 systrace_sysent_t *systrace_sysent = NULL;
100 void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t,
101 uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
102
103 void
104 systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
105 uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7)
106 {
107 #pragma unused(id,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7)
108 }
109
110
111 int32_t
112 dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
113 {
114 boolean_t flavor;
115 unsigned short code;
116
117 systrace_sysent_t *sy;
118 dtrace_id_t id;
119 int32_t rval;
120 #if 0 /* XXX */
121 proc_t *p;
122 #endif
123 syscall_arg_t *ip = (syscall_arg_t *)uap;
124
125 #if defined (__ppc__) || defined (__ppc64__)
126 {
127 savearea_t *regs = (savearea_t *)find_user_regs(current_thread());
128
129 flavor = (((unsigned int)regs->save_r0) == 0)? 1: 0;
130
131 if (flavor)
132 code = regs->save_r3;
133 else
134 code = regs->save_r0;
135
136 /*
137 * FIXME: unix_syscall screens for "unsafe calls" and instead calls nosys(), *not* sysent[code] !
138 */
139 }
140 #elif defined(__i386__) || defined (__x86_64__)
141 #pragma unused(flavor)
142 {
143 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
144
145 if (is_saved_state64(tagged_regs)) {
146 x86_saved_state64_t *regs = saved_state64(tagged_regs);
147 code = regs->rax & SYSCALL_NUMBER_MASK;
148 /*
149 * Check for indirect system call... system call number
150 * passed as 'arg0'
151 */
152 if (code == 0) {
153 code = regs->rdi;
154 }
155 } else {
156 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
157
158 if (code == 0) {
159 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int));
160 code = fuword(params);
161 }
162 }
163 }
164 #else
165 #error Unknown Architecture
166 #endif
167
168 // Bounds "check" the value of code a la unix_syscall
169 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
170
171 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
172 if (ip)
173 (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4), *(ip+5), *(ip+6), *(ip+7));
174 else
175 (*systrace_probe)(id, 0, 0, 0, 0, 0, 0, 0, 0);
176 }
177
178 #if 0 /* XXX */
179 /*
180 * We want to explicitly allow DTrace consumers to stop a process
181 * before it actually executes the meat of the syscall.
182 */
183 p = ttoproc(curthread);
184 mutex_enter(&p->p_lock);
185 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
186 curthread->t_dtrace_stop = 0;
187 stop(PR_REQUESTED, 0);
188 }
189 mutex_exit(&p->p_lock);
190 #endif
191
192 rval = (*sy->stsy_underlying)(pp, uap, rv);
193
194 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
195 uint64_t munged_rv0, munged_rv1;
196 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
197
198 if (uthread)
199 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
200
201 /*
202 * "Decode" rv for use in the call to dtrace_probe()
203 */
204 if (rval == ERESTART) {
205 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
206 munged_rv1 = -1LL;
207 } else if (rval != EJUSTRETURN) {
208 if (rval) {
209 munged_rv0 = -1LL; /* Mimic what libc will do. */
210 munged_rv1 = -1LL;
211 } else {
212 switch (sy->stsy_return_type) {
213 case _SYSCALL_RET_INT_T:
214 munged_rv0 = rv[0];
215 munged_rv1 = rv[1];
216 break;
217 case _SYSCALL_RET_UINT_T:
218 munged_rv0 = ((u_int)rv[0]);
219 munged_rv1 = ((u_int)rv[1]);
220 break;
221 case _SYSCALL_RET_OFF_T:
222 munged_rv0 = *(u_int64_t *)rv;
223 munged_rv1 = 0LL;
224 break;
225 case _SYSCALL_RET_ADDR_T:
226 case _SYSCALL_RET_SIZE_T:
227 case _SYSCALL_RET_SSIZE_T:
228 munged_rv0 = *(user_addr_t *)rv;
229 munged_rv1 = 0LL;
230 break;
231 case _SYSCALL_RET_NONE:
232 munged_rv0 = 0LL;
233 munged_rv1 = 0LL;
234 break;
235 default:
236 munged_rv0 = 0LL;
237 munged_rv1 = 0LL;
238 break;
239 }
240 }
241 } else {
242 munged_rv0 = 0LL;
243 munged_rv1 = 0LL;
244 }
245
246 /*
247 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
248 *
249 * "This is a bit of an historical artifact. At first, the syscall provider just
250 * had its return value in arg0, and the fbt and pid providers had their return
251 * values in arg1 (so that we could use arg0 for the offset of the return site).
252 *
253 * We inevitably started writing scripts where we wanted to see the return
254 * values from probes in all three providers, and we made this script easier
255 * to write by replicating the syscall return values in arg1 to match fbt and
256 * pid. We debated briefly about removing the return value from arg0, but
257 * decided that it would be less confusing to have the same data in two places
258 * than to have some non-helpful, non-intuitive value in arg0.
259 *
260 * This change was made 4/23/2003 according to the DTrace project's putback log."
261 */
262 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0, 0, 0, 0);
263 }
264
265 return (rval);
266 }
267
268 void
269 dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
270 {
271 systrace_sysent_t *sy;
272 dtrace_id_t id;
273
274 // Bounds "check" the value of code a la unix_syscall_return
275 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
276
277 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
278 uint64_t munged_rv0, munged_rv1;
279 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
280
281 if (uthread)
282 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
283
284 /*
285 * "Decode" rv for use in the call to dtrace_probe()
286 */
287 if (rval == ERESTART) {
288 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
289 munged_rv1 = -1LL;
290 } else if (rval != EJUSTRETURN) {
291 if (rval) {
292 munged_rv0 = -1LL; /* Mimic what libc will do. */
293 munged_rv1 = -1LL;
294 } else {
295 switch (sy->stsy_return_type) {
296 case _SYSCALL_RET_INT_T:
297 munged_rv0 = rv[0];
298 munged_rv1 = rv[1];
299 break;
300 case _SYSCALL_RET_UINT_T:
301 munged_rv0 = ((u_int)rv[0]);
302 munged_rv1 = ((u_int)rv[1]);
303 break;
304 case _SYSCALL_RET_OFF_T:
305 munged_rv0 = *(u_int64_t *)rv;
306 munged_rv1 = 0LL;
307 break;
308 case _SYSCALL_RET_ADDR_T:
309 case _SYSCALL_RET_SIZE_T:
310 case _SYSCALL_RET_SSIZE_T:
311 munged_rv0 = *(user_addr_t *)rv;
312 munged_rv1 = 0LL;
313 break;
314 case _SYSCALL_RET_NONE:
315 munged_rv0 = 0LL;
316 munged_rv1 = 0LL;
317 break;
318 default:
319 munged_rv0 = 0LL;
320 munged_rv1 = 0LL;
321 break;
322 }
323 }
324 } else {
325 munged_rv0 = 0LL;
326 munged_rv1 = 0LL;
327 }
328
329 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0, 0, 0, 0);
330 }
331 }
332 #endif /* __APPLE__ */
333
334 #define SYSTRACE_SHIFT 16
335 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
336 #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
337 #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
338 #define SYSTRACE_RETURN(id) (id)
339
340 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
341 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
342 #endif
343
344 static dev_info_t *systrace_devi;
345 static dtrace_provider_id_t systrace_id;
346
347 #if !defined (__APPLE__)
348 static void
349 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
350 {
351 systrace_sysent_t *sysent = *interposed;
352 int i;
353
354 if (sysent == NULL) {
355 *interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) *
356 NSYSCALL, KM_SLEEP);
357 }
358
359 for (i = 0; i < NSYSCALL; i++) {
360 struct sysent *a = &actual[i];
361 systrace_sysent_t *s = &sysent[i];
362
363 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
364 continue;
365
366 if (a->sy_callc == dtrace_systrace_syscall)
367 continue;
368
369 #ifdef _SYSCALL32_IMPL
370 if (a->sy_callc == dtrace_systrace_syscall32)
371 continue;
372 #endif
373
374 s->stsy_underlying = a->sy_callc;
375 }
376 }
377 #else
378 #define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */
379 static void
380 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
381 {
382
383 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
384 from bsd/sys/sysent.h */
385 int i;
386
387 if (ssysent == NULL) {
388 *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) *
389 NSYSCALL, KM_SLEEP);
390 }
391
392 for (i = 0; i < NSYSCALL; i++) {
393 struct sysent *a = &actual[i];
394 systrace_sysent_t *s = &ssysent[i];
395
396 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
397 continue;
398
399 if (a->sy_callc == dtrace_systrace_syscall)
400 continue;
401
402 #ifdef _SYSCALL32_IMPL
403 if (a->sy_callc == dtrace_systrace_syscall32)
404 continue;
405 #endif
406
407 s->stsy_underlying = a->sy_callc;
408 s->stsy_return_type = a->sy_return_type;
409 }
410 }
411
412 #endif /* __APPLE__ */
413
414 /*ARGSUSED*/
415 static void
416 systrace_provide(void *arg, const dtrace_probedesc_t *desc)
417 {
418 #pragma unused(arg) /* __APPLE__ */
419 int i;
420
421 if (desc != NULL)
422 return;
423
424 systrace_init(sysent, &systrace_sysent);
425 #ifdef _SYSCALL32_IMPL
426 systrace_init(sysent32, &systrace_sysent32);
427 #endif
428
429 for (i = 0; i < NSYSCALL; i++) {
430 if (systrace_sysent[i].stsy_underlying == NULL)
431 continue;
432
433 if (dtrace_probe_lookup(systrace_id, NULL,
434 syscallnames[i], "entry") != 0)
435 continue;
436
437 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
438 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
439 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
440 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
441 "return", SYSTRACE_ARTIFICIAL_FRAMES,
442 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
443
444 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
445 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
446 #ifdef _SYSCALL32_IMPL
447 systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
448 systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
449 #endif
450 }
451 }
452 #if defined(__APPLE__)
453 #undef systrace_init
454 #endif
455
456 /*ARGSUSED*/
457 static void
458 systrace_destroy(void *arg, dtrace_id_t id, void *parg)
459 {
460 #pragma unused(arg,id) /* __APPLE__ */
461
462 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
463
464 #pragma unused(sysnum) /* __APPLE__ */
465 /*
466 * There's nothing to do here but assert that we have actually been
467 * disabled.
468 */
469 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
470 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
471 #ifdef _SYSCALL32_IMPL
472 ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
473 #endif
474 } else {
475 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
476 #ifdef _SYSCALL32_IMPL
477 ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
478 #endif
479 }
480 }
481
482 /*ARGSUSED*/
483 static void
484 systrace_enable(void *arg, dtrace_id_t id, void *parg)
485 {
486 #pragma unused(arg) /* __APPLE__ */
487
488 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
489 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
490 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
491
492 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
493 systrace_sysent[sysnum].stsy_entry = id;
494 #ifdef _SYSCALL32_IMPL
495 systrace_sysent32[sysnum].stsy_entry = id;
496 #endif
497 } else {
498 systrace_sysent[sysnum].stsy_return = id;
499 #ifdef _SYSCALL32_IMPL
500 systrace_sysent32[sysnum].stsy_return = id;
501 #endif
502 }
503
504 if (enabled) {
505 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
506 return;
507 }
508
509 (void) casptr(&sysent[sysnum].sy_callc,
510 (void *)systrace_sysent[sysnum].stsy_underlying,
511 (void *)dtrace_systrace_syscall);
512 #ifdef _SYSCALL32_IMPL
513 (void) casptr(&sysent32[sysnum].sy_callc,
514 (void *)systrace_sysent32[sysnum].stsy_underlying,
515 (void *)dtrace_systrace_syscall32);
516 #endif
517 }
518
519 /*ARGSUSED*/
520 static void
521 systrace_disable(void *arg, dtrace_id_t id, void *parg)
522 {
523 #pragma unused(arg,id) /* __APPLE__ */
524
525 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
526 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
527 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
528
529 if (disable) {
530 (void) casptr(&sysent[sysnum].sy_callc,
531 (void *)dtrace_systrace_syscall,
532 (void *)systrace_sysent[sysnum].stsy_underlying);
533
534 #ifdef _SYSCALL32_IMPL
535 (void) casptr(&sysent32[sysnum].sy_callc,
536 (void *)dtrace_systrace_syscall32,
537 (void *)systrace_sysent32[sysnum].stsy_underlying);
538 #endif
539 }
540
541 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
542 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
543 #ifdef _SYSCALL32_IMPL
544 systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
545 #endif
546 } else {
547 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
548 #ifdef _SYSCALL32_IMPL
549 systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
550 #endif
551 }
552 }
553
554 static dtrace_pattr_t systrace_attr = {
555 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
556 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
557 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
558 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
559 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
560 };
561
562 static dtrace_pops_t systrace_pops = {
563 systrace_provide,
564 NULL,
565 systrace_enable,
566 systrace_disable,
567 NULL,
568 NULL,
569 NULL,
570 NULL,
571 NULL,
572 systrace_destroy
573 };
574
575 static int
576 systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
577 {
578 switch (cmd) {
579 case DDI_ATTACH:
580 break;
581 case DDI_RESUME:
582 return (DDI_SUCCESS);
583 default:
584 return (DDI_FAILURE);
585 }
586
587 #if !defined(__APPLE__)
588 systrace_probe = (void (*)())dtrace_probe;
589 membar_enter();
590
591 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
592 DDI_PSEUDO, NULL) == DDI_FAILURE ||
593 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
594 &systrace_pops, NULL, &systrace_id) != 0) {
595 systrace_probe = systrace_stub;
596 ddi_remove_minor_node(devi, NULL);
597 return (DDI_FAILURE);
598 }
599 #else
600 systrace_probe = (void(*))&dtrace_probe;
601 membar_enter();
602
603 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
604 DDI_PSEUDO, 0) == DDI_FAILURE ||
605 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
606 &systrace_pops, NULL, &systrace_id) != 0) {
607 systrace_probe = systrace_stub;
608 ddi_remove_minor_node(devi, NULL);
609 return (DDI_FAILURE);
610 }
611 #endif /* __APPLE__ */
612
613 ddi_report_dev(devi);
614 systrace_devi = devi;
615
616 return (DDI_SUCCESS);
617 }
618
619 #if !defined(__APPLE__)
620 static int
621 systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
622 {
623 switch (cmd) {
624 case DDI_DETACH:
625 break;
626 case DDI_SUSPEND:
627 return (DDI_SUCCESS);
628 default:
629 return (DDI_FAILURE);
630 }
631
632 if (dtrace_unregister(systrace_id) != 0)
633 return (DDI_FAILURE);
634
635 ddi_remove_minor_node(devi, NULL);
636 systrace_probe = systrace_stub;
637 return (DDI_SUCCESS);
638 }
639
640 /*ARGSUSED*/
641 static int
642 systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
643 {
644 int error;
645
646 switch (infocmd) {
647 case DDI_INFO_DEVT2DEVINFO:
648 *result = (void *)systrace_devi;
649 error = DDI_SUCCESS;
650 break;
651 case DDI_INFO_DEVT2INSTANCE:
652 *result = (void *)0;
653 error = DDI_SUCCESS;
654 break;
655 default:
656 error = DDI_FAILURE;
657 }
658 return (error);
659 }
660
661 /*ARGSUSED*/
662 static int
663 systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
664 {
665 return (0);
666 }
667
668 static struct cb_ops systrace_cb_ops = {
669 systrace_open, /* open */
670 nodev, /* close */
671 nulldev, /* strategy */
672 nulldev, /* print */
673 nodev, /* dump */
674 nodev, /* read */
675 nodev, /* write */
676 nodev, /* ioctl */
677 nodev, /* devmap */
678 nodev, /* mmap */
679 nodev, /* segmap */
680 nochpoll, /* poll */
681 ddi_prop_op, /* cb_prop_op */
682 0, /* streamtab */
683 D_NEW | D_MP /* Driver compatibility flag */
684 };
685
686 static struct dev_ops systrace_ops = {
687 DEVO_REV, /* devo_rev, */
688 0, /* refcnt */
689 systrace_info, /* get_dev_info */
690 nulldev, /* identify */
691 nulldev, /* probe */
692 systrace_attach, /* attach */
693 systrace_detach, /* detach */
694 nodev, /* reset */
695 &systrace_cb_ops, /* driver operations */
696 NULL, /* bus operations */
697 nodev /* dev power */
698 };
699
700 /*
701 * Module linkage information for the kernel.
702 */
703 static struct modldrv modldrv = {
704 &mod_driverops, /* module type (this is a pseudo driver) */
705 "System Call Tracing", /* name of module */
706 &systrace_ops, /* driver ops */
707 };
708
709 static struct modlinkage modlinkage = {
710 MODREV_1,
711 (void *)&modldrv,
712 NULL
713 };
714
715 int
716 _init(void)
717 {
718 return (mod_install(&modlinkage));
719 }
720
721 int
722 _info(struct modinfo *modinfop)
723 {
724 return (mod_info(&modlinkage, modinfop));
725 }
726
727 int
728 _fini(void)
729 {
730 return (mod_remove(&modlinkage));
731 }
732 #else
733 typedef kern_return_t (*mach_call_t)(void *);
734
735 /* XXX From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
736 typedef void mach_munge_t(const void *, void *);
737
738 typedef struct {
739 int mach_trap_arg_count;
740 int (*mach_trap_function)(void);
741 #if defined(__i386__)
742 boolean_t mach_trap_stack;
743 #else
744 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
745 mach_munge_t *mach_trap_arg_munge64; /* system call arguments for 64-bit */
746 #endif
747 #if !MACH_ASSERT
748 int mach_trap_unused;
749 #else
750 const char* mach_trap_name;
751 #endif /* !MACH_ASSERT */
752 } mach_trap_t;
753
754 extern mach_trap_t mach_trap_table[];
755 extern int mach_trap_count;
756
757 extern const char *mach_syscall_name_table[];
758
759 /* XXX From osfmk/i386/bsd_i386.c */
760 struct mach_call_args {
761 syscall_arg_t arg1;
762 syscall_arg_t arg2;
763 syscall_arg_t arg3;
764 syscall_arg_t arg4;
765 syscall_arg_t arg5;
766 syscall_arg_t arg6;
767 syscall_arg_t arg7;
768 syscall_arg_t arg8;
769 syscall_arg_t arg9;
770 };
771
772 #undef NSYSCALL
773 #define NSYSCALL mach_trap_count
774
775 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
776 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
777 #endif
778
779 typedef systrace_sysent_t machtrace_sysent_t;
780
781 static machtrace_sysent_t *machtrace_sysent = NULL;
782
783 void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
784 uint64_t, uint64_t, uint64_t);
785
786 static dev_info_t *machtrace_devi;
787 static dtrace_provider_id_t machtrace_id;
788
789 static kern_return_t
790 dtrace_machtrace_syscall(struct mach_call_args *args)
791 {
792 boolean_t flavor;
793 unsigned short code;
794
795 machtrace_sysent_t *sy;
796 dtrace_id_t id;
797 kern_return_t rval;
798 #if 0 /* XXX */
799 proc_t *p;
800 #endif
801 syscall_arg_t *ip = (syscall_arg_t *)args;
802 mach_call_t mach_call;
803
804 #if defined (__ppc__) || defined (__ppc64__)
805 {
806 savearea_t *regs = (savearea_t *)find_user_regs(current_thread());
807
808 flavor = (((unsigned int)regs->save_r0) == 0)? 1: 0;
809
810 if (flavor)
811 code = -regs->save_r3;
812 else
813 code = -regs->save_r0;
814 }
815 #elif defined(__i386__) || defined (__x86_64__)
816 #pragma unused(flavor)
817 {
818 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
819
820 if (is_saved_state64(tagged_regs)) {
821 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
822 } else {
823 code = -saved_state32(tagged_regs)->eax;
824 }
825 }
826 #else
827 #error Unknown Architecture
828 #endif
829
830 sy = &machtrace_sysent[code];
831
832 if ((id = sy->stsy_entry) != DTRACE_IDNONE)
833 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
834
835 #if 0 /* XXX */
836 /*
837 * We want to explicitly allow DTrace consumers to stop a process
838 * before it actually executes the meat of the syscall.
839 */
840 p = ttoproc(curthread);
841 mutex_enter(&p->p_lock);
842 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
843 curthread->t_dtrace_stop = 0;
844 stop(PR_REQUESTED, 0);
845 }
846 mutex_exit(&p->p_lock);
847 #endif
848
849 mach_call = (mach_call_t)(*sy->stsy_underlying);
850 rval = mach_call(args);
851
852 if ((id = sy->stsy_return) != DTRACE_IDNONE)
853 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
854
855 return (rval);
856 }
857
858 static void
859 machtrace_init(mach_trap_t *actual, machtrace_sysent_t **interposed)
860 {
861 machtrace_sysent_t *msysent = *interposed;
862 int i;
863
864 if (msysent == NULL) {
865 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) *
866 NSYSCALL, KM_SLEEP);
867 }
868
869 for (i = 0; i < NSYSCALL; i++) {
870 mach_trap_t *a = &actual[i];
871 machtrace_sysent_t *s = &msysent[i];
872
873 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
874 continue;
875
876 if ((mach_call_t)(a->mach_trap_function) == (mach_call_t)(dtrace_machtrace_syscall))
877 continue;
878
879 s->stsy_underlying = (sy_call_t *)a->mach_trap_function;
880 }
881 }
882
883 /*ARGSUSED*/
884 static void
885 machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
886 {
887 #pragma unused(arg) /* __APPLE__ */
888
889 int i;
890
891 if (desc != NULL)
892 return;
893
894 machtrace_init(mach_trap_table, &machtrace_sysent);
895
896 for (i = 0; i < NSYSCALL; i++) {
897
898 if (machtrace_sysent[i].stsy_underlying == NULL)
899 continue;
900
901 if (dtrace_probe_lookup(machtrace_id, NULL,
902 mach_syscall_name_table[i], "entry") != 0)
903 continue;
904
905 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
906 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
907 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
908 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
909 "return", MACHTRACE_ARTIFICIAL_FRAMES,
910 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
911
912 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
913 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
914 }
915 }
916
917 /*ARGSUSED*/
918 static void
919 machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
920 {
921 #pragma unused(arg,id) /* __APPLE__ */
922 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
923
924 #pragma unused(sysnum) /* __APPLE__ */
925
926 /*
927 * There's nothing to do here but assert that we have actually been
928 * disabled.
929 */
930 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
931 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
932 } else {
933 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
934 }
935 }
936
937 /*ARGSUSED*/
938 static void
939 machtrace_enable(void *arg, dtrace_id_t id, void *parg)
940 {
941 #pragma unused(arg) /* __APPLE__ */
942
943 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
944 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
945 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
946
947 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
948 machtrace_sysent[sysnum].stsy_entry = id;
949 } else {
950 machtrace_sysent[sysnum].stsy_return = id;
951 }
952
953 if (enabled) {
954 ASSERT(sysent[sysnum].sy_callc == (void *)dtrace_machtrace_syscall);
955 return;
956 }
957
958 (void) casptr(&mach_trap_table[sysnum].mach_trap_function,
959 (void *)machtrace_sysent[sysnum].stsy_underlying,
960 (void *)dtrace_machtrace_syscall);
961 }
962
963 /*ARGSUSED*/
964 static void
965 machtrace_disable(void *arg, dtrace_id_t id, void *parg)
966 {
967 #pragma unused(arg,id) /* __APPLE__ */
968
969 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
970 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
971 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
972
973 if (disable) {
974 (void) casptr(&mach_trap_table[sysnum].mach_trap_function,
975 (void *)dtrace_machtrace_syscall,
976 (void *)machtrace_sysent[sysnum].stsy_underlying);
977
978 }
979
980 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
981 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
982 } else {
983 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
984 }
985 }
986
987 static dtrace_pattr_t machtrace_attr = {
988 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
989 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
990 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
991 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
992 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
993 };
994
995 static dtrace_pops_t machtrace_pops = {
996 machtrace_provide,
997 NULL,
998 machtrace_enable,
999 machtrace_disable,
1000 NULL,
1001 NULL,
1002 NULL,
1003 NULL,
1004 NULL,
1005 machtrace_destroy
1006 };
1007
1008 static int
1009 machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1010 {
1011 switch (cmd) {
1012 case DDI_ATTACH:
1013 break;
1014 case DDI_RESUME:
1015 return (DDI_SUCCESS);
1016 default:
1017 return (DDI_FAILURE);
1018 }
1019
1020 #if !defined(__APPLE__)
1021 machtrace_probe = (void (*)())dtrace_probe;
1022 membar_enter();
1023
1024 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
1025 DDI_PSEUDO, NULL) == DDI_FAILURE ||
1026 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1027 &machtrace_pops, NULL, &machtrace_id) != 0) {
1028 machtrace_probe = systrace_stub;
1029 #else
1030 machtrace_probe = dtrace_probe;
1031 membar_enter();
1032
1033 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
1034 DDI_PSEUDO, 0) == DDI_FAILURE ||
1035 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1036 &machtrace_pops, NULL, &machtrace_id) != 0) {
1037 machtrace_probe = (void (*))&systrace_stub;
1038 #endif /* __APPLE__ */
1039 ddi_remove_minor_node(devi, NULL);
1040 return (DDI_FAILURE);
1041 }
1042
1043 ddi_report_dev(devi);
1044 machtrace_devi = devi;
1045
1046 return (DDI_SUCCESS);
1047 }
1048
1049 d_open_t _systrace_open;
1050
1051 int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
1052 {
1053 #pragma unused(dev,flags,devtype,p)
1054 return 0;
1055 }
1056
1057 #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
1058
1059 /*
1060 * A struct describing which functions will get invoked for certain
1061 * actions.
1062 */
1063 static struct cdevsw systrace_cdevsw =
1064 {
1065 _systrace_open, /* open */
1066 eno_opcl, /* close */
1067 eno_rdwrt, /* read */
1068 eno_rdwrt, /* write */
1069 eno_ioctl, /* ioctl */
1070 (stop_fcn_t *)nulldev, /* stop */
1071 (reset_fcn_t *)nulldev, /* reset */
1072 NULL, /* tty's */
1073 eno_select, /* select */
1074 eno_mmap, /* mmap */
1075 eno_strat, /* strategy */
1076 eno_getc, /* getc */
1077 eno_putc, /* putc */
1078 0 /* type */
1079 };
1080
1081 static int gSysTraceInited = 0;
1082
1083 void systrace_init( void );
1084
1085 void systrace_init( void )
1086 {
1087 if (0 == gSysTraceInited) {
1088 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
1089
1090 if (majdevno < 0) {
1091 printf("systrace_init: failed to allocate a major number!\n");
1092 gSysTraceInited = 0;
1093 return;
1094 }
1095
1096 systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
1097 machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
1098
1099 gSysTraceInited = 1;
1100 } else
1101 panic("systrace_init: called twice!\n");
1102 }
1103 #undef SYSTRACE_MAJOR
1104 #endif /* __APPLE__ */