]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright 2009 Sun Microsystems, Inc. All rights reserved. | |
23 | * Use is subject to license terms. | |
24 | */ | |
25 | ||
26 | /* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */ | |
27 | ||
28 | #if !defined(__APPLE__) | |
29 | #include <sys/dtrace.h> | |
30 | #include <sys/systrace.h> | |
31 | #include <sys/stat.h> | |
32 | #include <sys/systm.h> | |
33 | #include <sys/conf.h> | |
34 | #include <sys/ddi.h> | |
35 | #include <sys/sunddi.h> | |
36 | #include <sys/atomic.h> | |
37 | #define SYSTRACE_ARTIFICIAL_FRAMES 1 | |
38 | #else | |
39 | ||
40 | #ifdef KERNEL | |
41 | #ifndef _KERNEL | |
42 | #define _KERNEL /* Solaris vs. Darwin */ | |
43 | #endif | |
44 | #endif | |
45 | ||
46 | #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */ | |
47 | #include <kern/thread.h> | |
48 | #include <mach/thread_status.h> | |
49 | /* XXX All of these should really be derived from syscall_sw.h */ | |
50 | #if defined(__i386__) || defined (__x86_64__) | |
51 | #define SYSCALL_CLASS_SHIFT 24 | |
52 | #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT) | |
53 | #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK) | |
54 | #define I386_SYSCALL_NUMBER_MASK (0xFFFF) | |
55 | ||
56 | typedef x86_saved_state_t savearea_t; | |
57 | #endif | |
58 | ||
59 | #include <sys/param.h> | |
60 | #include <sys/systm.h> | |
61 | #include <sys/proc.h> | |
62 | #include <sys/errno.h> | |
63 | #include <sys/ioctl.h> | |
64 | #include <sys/conf.h> | |
65 | #include <sys/fcntl.h> | |
66 | #include <miscfs/devfs/devfs.h> | |
67 | ||
68 | #include <sys/dtrace.h> | |
69 | #include <sys/dtrace_impl.h> | |
70 | #include "systrace.h" | |
71 | #include <sys/stat.h> | |
72 | #include <sys/systm.h> | |
73 | #include <sys/conf.h> | |
74 | #include <sys/user.h> | |
75 | ||
76 | #include <machine/pal_routines.h> | |
77 | ||
78 | #if defined(__i386__) || defined (__x86_64__) | |
79 | #define SYSTRACE_ARTIFICIAL_FRAMES 2 | |
80 | #define MACHTRACE_ARTIFICIAL_FRAMES 3 | |
81 | #else | |
82 | #error Unknown Architecture | |
83 | #endif | |
84 | ||
85 | #include <sys/sysent.h> | |
86 | #define sy_callc sy_call /* Map Solaris slot name to Darwin's */ | |
87 | #define NSYSCALL nsysent /* and is less than 500 or so */ | |
88 | ||
89 | extern const char *syscallnames[]; | |
90 | ||
91 | #include <sys/dtrace_glue.h> | |
92 | #define casptr dtrace_casptr | |
93 | #define membar_enter dtrace_membar_producer | |
94 | ||
95 | #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */ | |
96 | #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */ | |
97 | ||
98 | systrace_sysent_t *systrace_sysent = NULL; | |
99 | void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, | |
100 | uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t); | |
101 | ||
102 | void | |
103 | systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1, | |
104 | uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7) | |
105 | { | |
106 | #pragma unused(id,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7) | |
107 | } | |
108 | ||
109 | int32_t | |
110 | dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) | |
111 | { | |
112 | boolean_t flavor; | |
113 | unsigned short code; | |
114 | ||
115 | systrace_sysent_t *sy; | |
116 | dtrace_id_t id; | |
117 | int32_t rval; | |
118 | #if 0 /* XXX */ | |
119 | proc_t *p; | |
120 | #endif | |
121 | syscall_arg_t *ip = (syscall_arg_t *)uap; | |
122 | ||
123 | #if defined(__i386__) || defined (__x86_64__) | |
124 | #pragma unused(flavor) | |
125 | { | |
126 | pal_register_cache_state(current_thread(), VALID); | |
127 | x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); | |
128 | ||
129 | if (is_saved_state64(tagged_regs)) { | |
130 | x86_saved_state64_t *regs = saved_state64(tagged_regs); | |
131 | code = regs->rax & SYSCALL_NUMBER_MASK; | |
132 | /* | |
133 | * Check for indirect system call... system call number | |
134 | * passed as 'arg0' | |
135 | */ | |
136 | if (code == 0) { | |
137 | code = regs->rdi; | |
138 | } | |
139 | } else { | |
140 | code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK; | |
141 | ||
142 | if (code == 0) { | |
143 | vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int)); | |
144 | code = fuword(params); | |
145 | } | |
146 | } | |
147 | } | |
148 | #else | |
149 | #error Unknown Architecture | |
150 | #endif | |
151 | ||
152 | // Bounds "check" the value of code a la unix_syscall | |
153 | sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code]; | |
154 | ||
155 | if ((id = sy->stsy_entry) != DTRACE_IDNONE) { | |
156 | if (ip) | |
157 | (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4), *(ip+5), *(ip+6), *(ip+7)); | |
158 | else | |
159 | (*systrace_probe)(id, 0, 0, 0, 0, 0, 0, 0, 0); | |
160 | } | |
161 | ||
162 | #if 0 /* XXX */ | |
163 | /* | |
164 | * We want to explicitly allow DTrace consumers to stop a process | |
165 | * before it actually executes the meat of the syscall. | |
166 | */ | |
167 | p = ttoproc(curthread); | |
168 | mutex_enter(&p->p_lock); | |
169 | if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) { | |
170 | curthread->t_dtrace_stop = 0; | |
171 | stop(PR_REQUESTED, 0); | |
172 | } | |
173 | mutex_exit(&p->p_lock); | |
174 | #endif | |
175 | ||
176 | rval = (*sy->stsy_underlying)(pp, uap, rv); | |
177 | ||
178 | if ((id = sy->stsy_return) != DTRACE_IDNONE) { | |
179 | uint64_t munged_rv0, munged_rv1; | |
180 | uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); | |
181 | ||
182 | if (uthread) | |
183 | uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */ | |
184 | ||
185 | /* | |
186 | * "Decode" rv for use in the call to dtrace_probe() | |
187 | */ | |
188 | if (rval == ERESTART) { | |
189 | munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ | |
190 | munged_rv1 = -1LL; | |
191 | } else if (rval != EJUSTRETURN) { | |
192 | if (rval) { | |
193 | munged_rv0 = -1LL; /* Mimic what libc will do. */ | |
194 | munged_rv1 = -1LL; | |
195 | } else { | |
196 | switch (sy->stsy_return_type) { | |
197 | case _SYSCALL_RET_INT_T: | |
198 | munged_rv0 = rv[0]; | |
199 | munged_rv1 = rv[1]; | |
200 | break; | |
201 | case _SYSCALL_RET_UINT_T: | |
202 | munged_rv0 = ((u_int)rv[0]); | |
203 | munged_rv1 = ((u_int)rv[1]); | |
204 | break; | |
205 | case _SYSCALL_RET_OFF_T: | |
206 | case _SYSCALL_RET_UINT64_T: | |
207 | munged_rv0 = *(u_int64_t *)rv; | |
208 | munged_rv1 = 0LL; | |
209 | break; | |
210 | case _SYSCALL_RET_ADDR_T: | |
211 | case _SYSCALL_RET_SIZE_T: | |
212 | case _SYSCALL_RET_SSIZE_T: | |
213 | munged_rv0 = *(user_addr_t *)rv; | |
214 | munged_rv1 = 0LL; | |
215 | break; | |
216 | case _SYSCALL_RET_NONE: | |
217 | munged_rv0 = 0LL; | |
218 | munged_rv1 = 0LL; | |
219 | break; | |
220 | default: | |
221 | munged_rv0 = 0LL; | |
222 | munged_rv1 = 0LL; | |
223 | break; | |
224 | } | |
225 | } | |
226 | } else { | |
227 | munged_rv0 = 0LL; | |
228 | munged_rv1 = 0LL; | |
229 | } | |
230 | ||
231 | /* | |
232 | * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says: | |
233 | * | |
234 | * "This is a bit of an historical artifact. At first, the syscall provider just | |
235 | * had its return value in arg0, and the fbt and pid providers had their return | |
236 | * values in arg1 (so that we could use arg0 for the offset of the return site). | |
237 | * | |
238 | * We inevitably started writing scripts where we wanted to see the return | |
239 | * values from probes in all three providers, and we made this script easier | |
240 | * to write by replicating the syscall return values in arg1 to match fbt and | |
241 | * pid. We debated briefly about removing the return value from arg0, but | |
242 | * decided that it would be less confusing to have the same data in two places | |
243 | * than to have some non-helpful, non-intuitive value in arg0. | |
244 | * | |
245 | * This change was made 4/23/2003 according to the DTrace project's putback log." | |
246 | */ | |
247 | (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0, 0, 0, 0); | |
248 | } | |
249 | ||
250 | return (rval); | |
251 | } | |
252 | ||
253 | void | |
254 | dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv) | |
255 | { | |
256 | systrace_sysent_t *sy; | |
257 | dtrace_id_t id; | |
258 | ||
259 | // Bounds "check" the value of code a la unix_syscall_return | |
260 | sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code]; | |
261 | ||
262 | if ((id = sy->stsy_return) != DTRACE_IDNONE) { | |
263 | uint64_t munged_rv0, munged_rv1; | |
264 | uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); | |
265 | ||
266 | if (uthread) | |
267 | uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */ | |
268 | ||
269 | /* | |
270 | * "Decode" rv for use in the call to dtrace_probe() | |
271 | */ | |
272 | if (rval == ERESTART) { | |
273 | munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ | |
274 | munged_rv1 = -1LL; | |
275 | } else if (rval != EJUSTRETURN) { | |
276 | if (rval) { | |
277 | munged_rv0 = -1LL; /* Mimic what libc will do. */ | |
278 | munged_rv1 = -1LL; | |
279 | } else { | |
280 | switch (sy->stsy_return_type) { | |
281 | case _SYSCALL_RET_INT_T: | |
282 | munged_rv0 = rv[0]; | |
283 | munged_rv1 = rv[1]; | |
284 | break; | |
285 | case _SYSCALL_RET_UINT_T: | |
286 | munged_rv0 = ((u_int)rv[0]); | |
287 | munged_rv1 = ((u_int)rv[1]); | |
288 | break; | |
289 | case _SYSCALL_RET_OFF_T: | |
290 | case _SYSCALL_RET_UINT64_T: | |
291 | munged_rv0 = *(u_int64_t *)rv; | |
292 | munged_rv1 = 0LL; | |
293 | break; | |
294 | case _SYSCALL_RET_ADDR_T: | |
295 | case _SYSCALL_RET_SIZE_T: | |
296 | case _SYSCALL_RET_SSIZE_T: | |
297 | munged_rv0 = *(user_addr_t *)rv; | |
298 | munged_rv1 = 0LL; | |
299 | break; | |
300 | case _SYSCALL_RET_NONE: | |
301 | munged_rv0 = 0LL; | |
302 | munged_rv1 = 0LL; | |
303 | break; | |
304 | default: | |
305 | munged_rv0 = 0LL; | |
306 | munged_rv1 = 0LL; | |
307 | break; | |
308 | } | |
309 | } | |
310 | } else { | |
311 | munged_rv0 = 0LL; | |
312 | munged_rv1 = 0LL; | |
313 | } | |
314 | ||
315 | (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0, 0, 0, 0); | |
316 | } | |
317 | } | |
318 | #endif /* __APPLE__ */ | |
319 | ||
320 | #define SYSTRACE_SHIFT 16 | |
321 | #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT) | |
322 | #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1)) | |
323 | #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id)) | |
324 | #define SYSTRACE_RETURN(id) (id) | |
325 | ||
326 | #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL) | |
327 | #error 1 << SYSTRACE_SHIFT must exceed number of system calls | |
328 | #endif | |
329 | ||
330 | static dev_info_t *systrace_devi; | |
331 | static dtrace_provider_id_t systrace_id; | |
332 | ||
333 | #if !defined (__APPLE__) | |
334 | static void | |
335 | systrace_init(struct sysent *actual, systrace_sysent_t **interposed) | |
336 | { | |
337 | systrace_sysent_t *sysent = *interposed; | |
338 | int i; | |
339 | ||
340 | if (sysent == NULL) { | |
341 | *interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) * | |
342 | NSYSCALL, KM_SLEEP); | |
343 | } | |
344 | ||
345 | for (i = 0; i < NSYSCALL; i++) { | |
346 | struct sysent *a = &actual[i]; | |
347 | systrace_sysent_t *s = &sysent[i]; | |
348 | ||
349 | if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) | |
350 | continue; | |
351 | ||
352 | if (a->sy_callc == dtrace_systrace_syscall) | |
353 | continue; | |
354 | ||
355 | #ifdef _SYSCALL32_IMPL | |
356 | if (a->sy_callc == dtrace_systrace_syscall32) | |
357 | continue; | |
358 | #endif | |
359 | ||
360 | s->stsy_underlying = a->sy_callc; | |
361 | } | |
362 | } | |
363 | #else | |
364 | #define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */ | |
365 | static void | |
366 | systrace_init(struct sysent *actual, systrace_sysent_t **interposed) | |
367 | { | |
368 | ||
369 | systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning | |
370 | from bsd/sys/sysent.h */ | |
371 | int i; | |
372 | ||
373 | if (ssysent == NULL) { | |
374 | *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) * | |
375 | NSYSCALL, KM_SLEEP); | |
376 | } | |
377 | ||
378 | for (i = 0; i < NSYSCALL; i++) { | |
379 | struct sysent *a = &actual[i]; | |
380 | systrace_sysent_t *s = &ssysent[i]; | |
381 | ||
382 | if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) | |
383 | continue; | |
384 | ||
385 | if (a->sy_callc == dtrace_systrace_syscall) | |
386 | continue; | |
387 | ||
388 | #ifdef _SYSCALL32_IMPL | |
389 | if (a->sy_callc == dtrace_systrace_syscall32) | |
390 | continue; | |
391 | #endif | |
392 | ||
393 | s->stsy_underlying = a->sy_callc; | |
394 | s->stsy_return_type = a->sy_return_type; | |
395 | } | |
396 | } | |
397 | ||
398 | #endif /* __APPLE__ */ | |
399 | ||
400 | /*ARGSUSED*/ | |
401 | static void | |
402 | systrace_provide(void *arg, const dtrace_probedesc_t *desc) | |
403 | { | |
404 | #pragma unused(arg) /* __APPLE__ */ | |
405 | int i; | |
406 | ||
407 | if (desc != NULL) | |
408 | return; | |
409 | ||
410 | systrace_init(sysent, &systrace_sysent); | |
411 | #ifdef _SYSCALL32_IMPL | |
412 | systrace_init(sysent32, &systrace_sysent32); | |
413 | #endif | |
414 | ||
415 | for (i = 0; i < NSYSCALL; i++) { | |
416 | if (systrace_sysent[i].stsy_underlying == NULL) | |
417 | continue; | |
418 | ||
419 | if (dtrace_probe_lookup(systrace_id, NULL, | |
420 | syscallnames[i], "entry") != 0) | |
421 | continue; | |
422 | ||
423 | (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i], | |
424 | "entry", SYSTRACE_ARTIFICIAL_FRAMES, | |
425 | (void *)((uintptr_t)SYSTRACE_ENTRY(i))); | |
426 | (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i], | |
427 | "return", SYSTRACE_ARTIFICIAL_FRAMES, | |
428 | (void *)((uintptr_t)SYSTRACE_RETURN(i))); | |
429 | ||
430 | systrace_sysent[i].stsy_entry = DTRACE_IDNONE; | |
431 | systrace_sysent[i].stsy_return = DTRACE_IDNONE; | |
432 | #ifdef _SYSCALL32_IMPL | |
433 | systrace_sysent32[i].stsy_entry = DTRACE_IDNONE; | |
434 | systrace_sysent32[i].stsy_return = DTRACE_IDNONE; | |
435 | #endif | |
436 | } | |
437 | } | |
438 | #if defined(__APPLE__) | |
439 | #undef systrace_init | |
440 | #endif | |
441 | ||
442 | /*ARGSUSED*/ | |
443 | static void | |
444 | systrace_destroy(void *arg, dtrace_id_t id, void *parg) | |
445 | { | |
446 | #pragma unused(arg,id) /* __APPLE__ */ | |
447 | ||
448 | int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); | |
449 | ||
450 | #pragma unused(sysnum) /* __APPLE__ */ | |
451 | /* | |
452 | * There's nothing to do here but assert that we have actually been | |
453 | * disabled. | |
454 | */ | |
455 | if (SYSTRACE_ISENTRY((uintptr_t)parg)) { | |
456 | ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE); | |
457 | #ifdef _SYSCALL32_IMPL | |
458 | ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE); | |
459 | #endif | |
460 | } else { | |
461 | ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); | |
462 | #ifdef _SYSCALL32_IMPL | |
463 | ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE); | |
464 | #endif | |
465 | } | |
466 | } | |
467 | ||
468 | /*ARGSUSED*/ | |
469 | static int | |
470 | systrace_enable(void *arg, dtrace_id_t id, void *parg) | |
471 | { | |
472 | #pragma unused(arg) /* __APPLE__ */ | |
473 | ||
474 | int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); | |
475 | int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE || | |
476 | systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE); | |
477 | ||
478 | if (SYSTRACE_ISENTRY((uintptr_t)parg)) { | |
479 | systrace_sysent[sysnum].stsy_entry = id; | |
480 | #ifdef _SYSCALL32_IMPL | |
481 | systrace_sysent32[sysnum].stsy_entry = id; | |
482 | #endif | |
483 | } else { | |
484 | systrace_sysent[sysnum].stsy_return = id; | |
485 | #ifdef _SYSCALL32_IMPL | |
486 | systrace_sysent32[sysnum].stsy_return = id; | |
487 | #endif | |
488 | } | |
489 | ||
490 | if (enabled) { | |
491 | ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall); | |
492 | return(0); | |
493 | } | |
494 | ||
495 | (void) casptr(&sysent[sysnum].sy_callc, | |
496 | (void *)systrace_sysent[sysnum].stsy_underlying, | |
497 | (void *)dtrace_systrace_syscall); | |
498 | #ifdef _SYSCALL32_IMPL | |
499 | (void) casptr(&sysent32[sysnum].sy_callc, | |
500 | (void *)systrace_sysent32[sysnum].stsy_underlying, | |
501 | (void *)dtrace_systrace_syscall32); | |
502 | #endif | |
503 | return (0); | |
504 | } | |
505 | ||
506 | /*ARGSUSED*/ | |
507 | static void | |
508 | systrace_disable(void *arg, dtrace_id_t id, void *parg) | |
509 | { | |
510 | #pragma unused(arg,id) /* __APPLE__ */ | |
511 | ||
512 | int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); | |
513 | int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE || | |
514 | systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); | |
515 | ||
516 | if (disable) { | |
517 | (void) casptr(&sysent[sysnum].sy_callc, | |
518 | (void *)dtrace_systrace_syscall, | |
519 | (void *)systrace_sysent[sysnum].stsy_underlying); | |
520 | ||
521 | #ifdef _SYSCALL32_IMPL | |
522 | (void) casptr(&sysent32[sysnum].sy_callc, | |
523 | (void *)dtrace_systrace_syscall32, | |
524 | (void *)systrace_sysent32[sysnum].stsy_underlying); | |
525 | #endif | |
526 | } | |
527 | ||
528 | if (SYSTRACE_ISENTRY((uintptr_t)parg)) { | |
529 | systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE; | |
530 | #ifdef _SYSCALL32_IMPL | |
531 | systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE; | |
532 | #endif | |
533 | } else { | |
534 | systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE; | |
535 | #ifdef _SYSCALL32_IMPL | |
536 | systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE; | |
537 | #endif | |
538 | } | |
539 | } | |
540 | ||
541 | static dtrace_pattr_t systrace_attr = { | |
542 | { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, | |
543 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, | |
544 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, | |
545 | { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, | |
546 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, | |
547 | }; | |
548 | ||
549 | static dtrace_pops_t systrace_pops = { | |
550 | systrace_provide, | |
551 | NULL, | |
552 | systrace_enable, | |
553 | systrace_disable, | |
554 | NULL, | |
555 | NULL, | |
556 | NULL, | |
557 | NULL, | |
558 | NULL, | |
559 | systrace_destroy | |
560 | }; | |
561 | ||
562 | static int | |
563 | systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) | |
564 | { | |
565 | switch (cmd) { | |
566 | case DDI_ATTACH: | |
567 | break; | |
568 | case DDI_RESUME: | |
569 | return (DDI_SUCCESS); | |
570 | default: | |
571 | return (DDI_FAILURE); | |
572 | } | |
573 | ||
574 | #if !defined(__APPLE__) | |
575 | systrace_probe = (void (*)())dtrace_probe; | |
576 | membar_enter(); | |
577 | ||
578 | if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0, | |
579 | DDI_PSEUDO, NULL) == DDI_FAILURE || | |
580 | dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL, | |
581 | &systrace_pops, NULL, &systrace_id) != 0) { | |
582 | systrace_probe = systrace_stub; | |
583 | ddi_remove_minor_node(devi, NULL); | |
584 | return (DDI_FAILURE); | |
585 | } | |
586 | #else | |
587 | systrace_probe = (void(*))&dtrace_probe; | |
588 | membar_enter(); | |
589 | ||
590 | if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0, | |
591 | DDI_PSEUDO, 0) == DDI_FAILURE || | |
592 | dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL, | |
593 | &systrace_pops, NULL, &systrace_id) != 0) { | |
594 | systrace_probe = systrace_stub; | |
595 | ddi_remove_minor_node(devi, NULL); | |
596 | return (DDI_FAILURE); | |
597 | } | |
598 | #endif /* __APPLE__ */ | |
599 | ||
600 | ddi_report_dev(devi); | |
601 | systrace_devi = devi; | |
602 | ||
603 | return (DDI_SUCCESS); | |
604 | } | |
605 | ||
606 | #if !defined(__APPLE__) | |
607 | static int | |
608 | systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) | |
609 | { | |
610 | switch (cmd) { | |
611 | case DDI_DETACH: | |
612 | break; | |
613 | case DDI_SUSPEND: | |
614 | return (DDI_SUCCESS); | |
615 | default: | |
616 | return (DDI_FAILURE); | |
617 | } | |
618 | ||
619 | if (dtrace_unregister(systrace_id) != 0) | |
620 | return (DDI_FAILURE); | |
621 | ||
622 | ddi_remove_minor_node(devi, NULL); | |
623 | systrace_probe = systrace_stub; | |
624 | return (DDI_SUCCESS); | |
625 | } | |
626 | ||
627 | /*ARGSUSED*/ | |
628 | static int | |
629 | systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) | |
630 | { | |
631 | int error; | |
632 | ||
633 | switch (infocmd) { | |
634 | case DDI_INFO_DEVT2DEVINFO: | |
635 | *result = (void *)systrace_devi; | |
636 | error = DDI_SUCCESS; | |
637 | break; | |
638 | case DDI_INFO_DEVT2INSTANCE: | |
639 | *result = (void *)0; | |
640 | error = DDI_SUCCESS; | |
641 | break; | |
642 | default: | |
643 | error = DDI_FAILURE; | |
644 | } | |
645 | return (error); | |
646 | } | |
647 | ||
648 | /*ARGSUSED*/ | |
649 | static int | |
650 | systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) | |
651 | { | |
652 | return (0); | |
653 | } | |
654 | ||
655 | static struct cb_ops systrace_cb_ops = { | |
656 | systrace_open, /* open */ | |
657 | nodev, /* close */ | |
658 | nulldev, /* strategy */ | |
659 | nulldev, /* print */ | |
660 | nodev, /* dump */ | |
661 | nodev, /* read */ | |
662 | nodev, /* write */ | |
663 | nodev, /* ioctl */ | |
664 | nodev, /* devmap */ | |
665 | nodev, /* mmap */ | |
666 | nodev, /* segmap */ | |
667 | nochpoll, /* poll */ | |
668 | ddi_prop_op, /* cb_prop_op */ | |
669 | 0, /* streamtab */ | |
670 | D_NEW | D_MP /* Driver compatibility flag */ | |
671 | }; | |
672 | ||
673 | static struct dev_ops systrace_ops = { | |
674 | DEVO_REV, /* devo_rev, */ | |
675 | 0, /* refcnt */ | |
676 | systrace_info, /* get_dev_info */ | |
677 | nulldev, /* identify */ | |
678 | nulldev, /* probe */ | |
679 | systrace_attach, /* attach */ | |
680 | systrace_detach, /* detach */ | |
681 | nodev, /* reset */ | |
682 | &systrace_cb_ops, /* driver operations */ | |
683 | NULL, /* bus operations */ | |
684 | nodev /* dev power */ | |
685 | }; | |
686 | ||
687 | /* | |
688 | * Module linkage information for the kernel. | |
689 | */ | |
690 | static struct modldrv modldrv = { | |
691 | &mod_driverops, /* module type (this is a pseudo driver) */ | |
692 | "System Call Tracing", /* name of module */ | |
693 | &systrace_ops, /* driver ops */ | |
694 | }; | |
695 | ||
696 | static struct modlinkage modlinkage = { | |
697 | MODREV_1, | |
698 | (void *)&modldrv, | |
699 | NULL | |
700 | }; | |
701 | ||
702 | int | |
703 | _init(void) | |
704 | { | |
705 | return (mod_install(&modlinkage)); | |
706 | } | |
707 | ||
708 | int | |
709 | _info(struct modinfo *modinfop) | |
710 | { | |
711 | return (mod_info(&modlinkage, modinfop)); | |
712 | } | |
713 | ||
714 | int | |
715 | _fini(void) | |
716 | { | |
717 | return (mod_remove(&modlinkage)); | |
718 | } | |
719 | #else | |
720 | typedef kern_return_t (*mach_call_t)(void *); | |
721 | ||
722 | /* XXX From #include <kern/syscall_sw.h> which may be changed for 64 bit! */ | |
723 | typedef void mach_munge_t(const void *, void *); | |
724 | ||
725 | typedef struct { | |
726 | int mach_trap_arg_count; | |
727 | int (*mach_trap_function)(void); | |
728 | #if 0 /* no active architectures use mungers for mach traps */ | |
729 | mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */ | |
730 | mach_munge_t *mach_trap_arg_munge64; /* system call arguments for 64-bit */ | |
731 | #endif | |
732 | #if MACH_ASSERT | |
733 | const char* mach_trap_name; | |
734 | #endif /* MACH_ASSERT */ | |
735 | } mach_trap_t; | |
736 | ||
737 | extern mach_trap_t mach_trap_table[]; | |
738 | extern int mach_trap_count; | |
739 | ||
740 | extern const char *mach_syscall_name_table[]; | |
741 | ||
742 | /* XXX From osfmk/i386/bsd_i386.c */ | |
743 | struct mach_call_args { | |
744 | syscall_arg_t arg1; | |
745 | syscall_arg_t arg2; | |
746 | syscall_arg_t arg3; | |
747 | syscall_arg_t arg4; | |
748 | syscall_arg_t arg5; | |
749 | syscall_arg_t arg6; | |
750 | syscall_arg_t arg7; | |
751 | syscall_arg_t arg8; | |
752 | syscall_arg_t arg9; | |
753 | }; | |
754 | ||
755 | #undef NSYSCALL | |
756 | #define NSYSCALL mach_trap_count | |
757 | ||
758 | #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL) | |
759 | #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps | |
760 | #endif | |
761 | ||
762 | typedef systrace_sysent_t machtrace_sysent_t; | |
763 | ||
764 | static machtrace_sysent_t *machtrace_sysent = NULL; | |
765 | ||
766 | void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t, | |
767 | uint64_t, uint64_t, uint64_t); | |
768 | ||
769 | static dev_info_t *machtrace_devi; | |
770 | static dtrace_provider_id_t machtrace_id; | |
771 | ||
772 | static kern_return_t | |
773 | dtrace_machtrace_syscall(struct mach_call_args *args) | |
774 | { | |
775 | boolean_t flavor; | |
776 | unsigned short code; | |
777 | ||
778 | machtrace_sysent_t *sy; | |
779 | dtrace_id_t id; | |
780 | kern_return_t rval; | |
781 | #if 0 /* XXX */ | |
782 | proc_t *p; | |
783 | #endif | |
784 | syscall_arg_t *ip = (syscall_arg_t *)args; | |
785 | mach_call_t mach_call; | |
786 | ||
787 | #if defined(__i386__) || defined (__x86_64__) | |
788 | #pragma unused(flavor) | |
789 | { | |
790 | pal_register_cache_state(current_thread(), VALID); | |
791 | x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); | |
792 | ||
793 | if (is_saved_state64(tagged_regs)) { | |
794 | code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK; | |
795 | } else { | |
796 | code = -saved_state32(tagged_regs)->eax; | |
797 | } | |
798 | } | |
799 | #else | |
800 | #error Unknown Architecture | |
801 | #endif | |
802 | ||
803 | sy = &machtrace_sysent[code]; | |
804 | ||
805 | if ((id = sy->stsy_entry) != DTRACE_IDNONE) | |
806 | (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4)); | |
807 | ||
808 | #if 0 /* XXX */ | |
809 | /* | |
810 | * We want to explicitly allow DTrace consumers to stop a process | |
811 | * before it actually executes the meat of the syscall. | |
812 | */ | |
813 | p = ttoproc(curthread); | |
814 | mutex_enter(&p->p_lock); | |
815 | if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) { | |
816 | curthread->t_dtrace_stop = 0; | |
817 | stop(PR_REQUESTED, 0); | |
818 | } | |
819 | mutex_exit(&p->p_lock); | |
820 | #endif | |
821 | ||
822 | mach_call = (mach_call_t)(*sy->stsy_underlying); | |
823 | rval = mach_call(args); | |
824 | ||
825 | if ((id = sy->stsy_return) != DTRACE_IDNONE) | |
826 | (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0); | |
827 | ||
828 | return (rval); | |
829 | } | |
830 | ||
831 | static void | |
832 | machtrace_init(mach_trap_t *actual, machtrace_sysent_t **interposed) | |
833 | { | |
834 | machtrace_sysent_t *msysent = *interposed; | |
835 | int i; | |
836 | ||
837 | if (msysent == NULL) { | |
838 | *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) * | |
839 | NSYSCALL, KM_SLEEP); | |
840 | } | |
841 | ||
842 | for (i = 0; i < NSYSCALL; i++) { | |
843 | mach_trap_t *a = &actual[i]; | |
844 | machtrace_sysent_t *s = &msysent[i]; | |
845 | ||
846 | if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) | |
847 | continue; | |
848 | ||
849 | if ((mach_call_t)(a->mach_trap_function) == (mach_call_t)(dtrace_machtrace_syscall)) | |
850 | continue; | |
851 | ||
852 | s->stsy_underlying = (sy_call_t *)a->mach_trap_function; | |
853 | } | |
854 | } | |
855 | ||
856 | /*ARGSUSED*/ | |
857 | static void | |
858 | machtrace_provide(void *arg, const dtrace_probedesc_t *desc) | |
859 | { | |
860 | #pragma unused(arg) /* __APPLE__ */ | |
861 | ||
862 | int i; | |
863 | ||
864 | if (desc != NULL) | |
865 | return; | |
866 | ||
867 | machtrace_init(mach_trap_table, &machtrace_sysent); | |
868 | ||
869 | for (i = 0; i < NSYSCALL; i++) { | |
870 | ||
871 | if (machtrace_sysent[i].stsy_underlying == NULL) | |
872 | continue; | |
873 | ||
874 | if (dtrace_probe_lookup(machtrace_id, NULL, | |
875 | mach_syscall_name_table[i], "entry") != 0) | |
876 | continue; | |
877 | ||
878 | (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i], | |
879 | "entry", MACHTRACE_ARTIFICIAL_FRAMES, | |
880 | (void *)((uintptr_t)SYSTRACE_ENTRY(i))); | |
881 | (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i], | |
882 | "return", MACHTRACE_ARTIFICIAL_FRAMES, | |
883 | (void *)((uintptr_t)SYSTRACE_RETURN(i))); | |
884 | ||
885 | machtrace_sysent[i].stsy_entry = DTRACE_IDNONE; | |
886 | machtrace_sysent[i].stsy_return = DTRACE_IDNONE; | |
887 | } | |
888 | } | |
889 | ||
890 | /*ARGSUSED*/ | |
891 | static void | |
892 | machtrace_destroy(void *arg, dtrace_id_t id, void *parg) | |
893 | { | |
894 | #pragma unused(arg,id) /* __APPLE__ */ | |
895 | int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); | |
896 | ||
897 | #pragma unused(sysnum) /* __APPLE__ */ | |
898 | ||
899 | /* | |
900 | * There's nothing to do here but assert that we have actually been | |
901 | * disabled. | |
902 | */ | |
903 | if (SYSTRACE_ISENTRY((uintptr_t)parg)) { | |
904 | ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE); | |
905 | } else { | |
906 | ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); | |
907 | } | |
908 | } | |
909 | ||
910 | /*ARGSUSED*/ | |
911 | static int | |
912 | machtrace_enable(void *arg, dtrace_id_t id, void *parg) | |
913 | { | |
914 | #pragma unused(arg) /* __APPLE__ */ | |
915 | ||
916 | int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); | |
917 | int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE || | |
918 | machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE); | |
919 | ||
920 | if (SYSTRACE_ISENTRY((uintptr_t)parg)) { | |
921 | machtrace_sysent[sysnum].stsy_entry = id; | |
922 | } else { | |
923 | machtrace_sysent[sysnum].stsy_return = id; | |
924 | } | |
925 | ||
926 | if (enabled) { | |
927 | ASSERT(sysent[sysnum].sy_callc == (void *)dtrace_machtrace_syscall); | |
928 | return(0); | |
929 | } | |
930 | ||
931 | (void) casptr(&mach_trap_table[sysnum].mach_trap_function, | |
932 | (void *)machtrace_sysent[sysnum].stsy_underlying, | |
933 | (void *)dtrace_machtrace_syscall); | |
934 | return(0); | |
935 | } | |
936 | ||
937 | /*ARGSUSED*/ | |
938 | static void | |
939 | machtrace_disable(void *arg, dtrace_id_t id, void *parg) | |
940 | { | |
941 | #pragma unused(arg,id) /* __APPLE__ */ | |
942 | ||
943 | int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); | |
944 | int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE || | |
945 | machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); | |
946 | ||
947 | if (disable) { | |
948 | (void) casptr(&mach_trap_table[sysnum].mach_trap_function, | |
949 | (void *)dtrace_machtrace_syscall, | |
950 | (void *)machtrace_sysent[sysnum].stsy_underlying); | |
951 | ||
952 | } | |
953 | ||
954 | if (SYSTRACE_ISENTRY((uintptr_t)parg)) { | |
955 | machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE; | |
956 | } else { | |
957 | machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE; | |
958 | } | |
959 | } | |
960 | ||
961 | static dtrace_pattr_t machtrace_attr = { | |
962 | { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, | |
963 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, | |
964 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, | |
965 | { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, | |
966 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, | |
967 | }; | |
968 | ||
969 | static dtrace_pops_t machtrace_pops = { | |
970 | machtrace_provide, | |
971 | NULL, | |
972 | machtrace_enable, | |
973 | machtrace_disable, | |
974 | NULL, | |
975 | NULL, | |
976 | NULL, | |
977 | NULL, | |
978 | NULL, | |
979 | machtrace_destroy | |
980 | }; | |
981 | ||
982 | static int | |
983 | machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) | |
984 | { | |
985 | switch (cmd) { | |
986 | case DDI_ATTACH: | |
987 | break; | |
988 | case DDI_RESUME: | |
989 | return (DDI_SUCCESS); | |
990 | default: | |
991 | return (DDI_FAILURE); | |
992 | } | |
993 | ||
994 | #if !defined(__APPLE__) | |
995 | machtrace_probe = (void (*)())dtrace_probe; | |
996 | membar_enter(); | |
997 | ||
998 | if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0, | |
999 | DDI_PSEUDO, NULL) == DDI_FAILURE || | |
1000 | dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL, | |
1001 | &machtrace_pops, NULL, &machtrace_id) != 0) { | |
1002 | machtrace_probe = systrace_stub; | |
1003 | #else | |
1004 | machtrace_probe = dtrace_probe; | |
1005 | membar_enter(); | |
1006 | ||
1007 | if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0, | |
1008 | DDI_PSEUDO, 0) == DDI_FAILURE || | |
1009 | dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL, | |
1010 | &machtrace_pops, NULL, &machtrace_id) != 0) { | |
1011 | machtrace_probe = (void (*))&systrace_stub; | |
1012 | #endif /* __APPLE__ */ | |
1013 | ddi_remove_minor_node(devi, NULL); | |
1014 | return (DDI_FAILURE); | |
1015 | } | |
1016 | ||
1017 | ddi_report_dev(devi); | |
1018 | machtrace_devi = devi; | |
1019 | ||
1020 | return (DDI_SUCCESS); | |
1021 | } | |
1022 | ||
1023 | d_open_t _systrace_open; | |
1024 | ||
1025 | int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p) | |
1026 | { | |
1027 | #pragma unused(dev,flags,devtype,p) | |
1028 | return 0; | |
1029 | } | |
1030 | ||
1031 | #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */ | |
1032 | ||
1033 | /* | |
1034 | * A struct describing which functions will get invoked for certain | |
1035 | * actions. | |
1036 | */ | |
1037 | static struct cdevsw systrace_cdevsw = | |
1038 | { | |
1039 | _systrace_open, /* open */ | |
1040 | eno_opcl, /* close */ | |
1041 | eno_rdwrt, /* read */ | |
1042 | eno_rdwrt, /* write */ | |
1043 | eno_ioctl, /* ioctl */ | |
1044 | (stop_fcn_t *)nulldev, /* stop */ | |
1045 | (reset_fcn_t *)nulldev, /* reset */ | |
1046 | NULL, /* tty's */ | |
1047 | eno_select, /* select */ | |
1048 | eno_mmap, /* mmap */ | |
1049 | eno_strat, /* strategy */ | |
1050 | eno_getc, /* getc */ | |
1051 | eno_putc, /* putc */ | |
1052 | 0 /* type */ | |
1053 | }; | |
1054 | ||
1055 | static int gSysTraceInited = 0; | |
1056 | ||
1057 | void systrace_init( void ); | |
1058 | ||
1059 | void systrace_init( void ) | |
1060 | { | |
1061 | if (0 == gSysTraceInited) { | |
1062 | int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw); | |
1063 | ||
1064 | if (majdevno < 0) { | |
1065 | printf("systrace_init: failed to allocate a major number!\n"); | |
1066 | gSysTraceInited = 0; | |
1067 | return; | |
1068 | } | |
1069 | ||
1070 | systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); | |
1071 | machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); | |
1072 | ||
1073 | gSysTraceInited = 1; | |
1074 | } else | |
1075 | panic("systrace_init: called twice!\n"); | |
1076 | } | |
1077 | #undef SYSTRACE_MAJOR | |
1078 | #endif /* __APPLE__ */ |