2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <mach/machine/thread_status.h>
35 #include <mach/thread_act.h>
37 #include <sys/kernel.h>
39 #include <sys/proc_internal.h>
40 #include <sys/syscall.h>
41 #include <sys/systm.h>
43 #include <sys/errno.h>
44 #include <sys/kdebug.h>
45 #include <sys/sysent.h>
46 #include <sys/sysproto.h>
47 #include <sys/kauth.h>
48 #include <sys/systm.h>
50 #include <bsm/audit_kernel.h>
53 #include <i386/machine_routines.h>
54 #include <mach/i386/syscall_sw.h>
57 extern int32_t dtrace_systrace_syscall(struct proc
*, void *, int *);
58 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
61 extern void unix_syscall(x86_saved_state_t
*);
62 extern void unix_syscall64(x86_saved_state_t
*);
63 extern void *find_user_regs(thread_t
);
64 extern void throttle_lowpri_io(int *lowpri_window
, mount_t v_mount
);
66 extern void x86_toggle_sysenter_arg_store(thread_t thread
, boolean_t valid
);
67 extern boolean_t
x86_sysenter_arg_store_isvalid(thread_t thread
);
69 * Function: unix_syscall
71 * Inputs: regs - pointer to i386 save area
76 unix_syscall(x86_saved_state_t
*state
)
86 struct uthread
*uthread
;
87 x86_saved_state32_t
*regs
;
88 boolean_t args_in_uthread
;
90 assert(is_saved_state32(state
));
91 regs
= saved_state32(state
);
93 if (regs
->eax
== 0x800)
94 thread_exception_return();
96 thread
= current_thread();
97 uthread
= get_bsdthread_info(thread
);
99 /* Get the approriate proc; may be different from task's for vfork() */
100 if (!(uthread
->uu_flag
& UT_VFORK
))
101 p
= (struct proc
*)get_bsdtask_info(current_task());
105 /* Verify that we are not being called from a task without a proc */
109 task_terminate_internal(current_task());
110 thread_exception_return();
114 code
= regs
->eax
& I386_SYSCALL_NUMBER_MASK
;
115 args_in_uthread
= ((regs
->eax
& I386_SYSCALL_ARG_BYTES_MASK
) != 0) && x86_sysenter_arg_store_isvalid(thread
);
116 params
= (vm_offset_t
) ((caddr_t
)regs
->uesp
+ sizeof (int));
118 regs
->efl
&= ~(EFL_CF
);
120 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
122 if (callp
== sysent
) {
123 code
= fuword(params
);
124 params
+= sizeof(int);
125 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
128 vt
= (void *)uthread
->uu_arg
;
130 if (callp
->sy_arg_bytes
!= 0) {
133 assert((unsigned) callp
->sy_arg_bytes
<= sizeof (uthread
->uu_arg
));
134 if (!args_in_uthread
)
137 nargs
= callp
->sy_arg_bytes
;
138 error
= copyin((user_addr_t
) params
, (char *) vt
, nargs
);
142 thread_exception_return();
150 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
151 *ip
, *(ip
+1), *(ip
+2), *(ip
+3), 0);
153 mungerp
= callp
->sy_arg_munge32
;
156 * If non-NULL, then call the syscall argument munger to
157 * copy in arguments (see xnu/bsd/dev/i386/munge.s); the
158 * first argument is NULL because we are munging in place
159 * after a copyin because the ABI currently doesn't use
160 * registers to pass system call arguments.
163 (*mungerp
)(NULL
, vt
);
165 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
169 * Delayed binding of thread credential to process credential, if we
170 * are not running with an explicitly set thread credential.
172 kauth_cred_uthread_update(uthread
, p
);
174 uthread
->uu_rval
[0] = 0;
175 uthread
->uu_rval
[1] = regs
->edx
;
176 uthread
->uu_flag
|= UT_NOTCANCELPT
;
180 uthread
->uu_iocount
= 0;
181 uthread
->uu_vpindex
= 0;
184 AUDIT_SYSCALL_ENTER(code
, p
, uthread
);
185 error
= (*(callp
->sy_call
))((void *) p
, (void *) vt
, &(uthread
->uu_rval
[0]));
186 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
189 if (uthread
->uu_iocount
)
190 joe_debug("system call returned with uu_iocount != 0");
193 uthread
->t_dtrace_errno
= error
;
194 #endif /* CONFIG_DTRACE */
196 if (error
== ERESTART
) {
198 * Move the user's pc back to repeat the syscall:
199 * 5 bytes for a sysenter, or 2 for an int 8x.
200 * The SYSENTER_TF_CS covers single-stepping over a sysenter
201 * - see debug trap handler in idt.s/idt64.s
203 if (regs
->cs
== SYSENTER_CS
|| regs
->cs
== SYSENTER_TF_CS
) {
209 else if (error
!= EJUSTRETURN
) {
212 regs
->efl
|= EFL_CF
; /* carry bit */
213 } else { /* (not error) */
214 regs
->eax
= uthread
->uu_rval
[0];
215 regs
->edx
= uthread
->uu_rval
[1];
219 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
222 * if we're holding the funnel panic
224 syscall_exit_funnelcheck();
226 if (uthread
->uu_lowpri_window
&& uthread
->v_mount
) {
228 * task is marked as a low priority I/O type
229 * and the I/O we issued while in this system call
230 * collided with normal I/O operations... we'll
231 * delay in order to mitigate the impact of this
232 * task on the normal operation of the system
234 throttle_lowpri_io(&uthread
->uu_lowpri_window
,uthread
->v_mount
);
237 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
238 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], 0, 0);
240 thread_exception_return();
246 unix_syscall64(x86_saved_state_t
*state
)
250 struct sysent
*callp
;
255 struct uthread
*uthread
;
256 x86_saved_state64_t
*regs
;
258 assert(is_saved_state64(state
));
259 regs
= saved_state64(state
);
261 if (regs
->rax
== 0x2000800)
262 thread_exception_return();
264 thread
= current_thread();
265 uthread
= get_bsdthread_info(thread
);
267 /* Get the approriate proc; may be different from task's for vfork() */
268 if (!(uthread
->uu_flag
& UT_VFORK
))
269 p
= (struct proc
*)get_bsdtask_info(current_task());
273 /* Verify that we are not being called from a task without a proc */
276 regs
->isf
.rflags
|= EFL_CF
;
277 task_terminate_internal(current_task());
278 thread_exception_return();
283 code
= regs
->rax
& SYSCALL_NUMBER_MASK
;
284 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
285 uargp
= (void *)(®s
->rdi
);
287 if (callp
== sysent
) {
289 * indirect system call... system call number
293 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
294 uargp
= (void *)(®s
->rsi
);
298 if (callp
->sy_narg
!= 0) {
300 uint64_t *ip
= (uint64_t *)uargp
;
302 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
303 (int)(*ip
), (int)(*(ip
+1)), (int)(*(ip
+2)), (int)(*(ip
+3)), 0);
305 assert(callp
->sy_narg
<= 8);
307 if (callp
->sy_narg
> args_in_regs
) {
310 copyin_count
= (callp
->sy_narg
- args_in_regs
) * sizeof(uint64_t);
312 error
= copyin((user_addr_t
)(regs
->isf
.rsp
+ sizeof(user_addr_t
)), (char *)®s
->v_arg6
, copyin_count
);
315 regs
->isf
.rflags
|= EFL_CF
;
316 thread_exception_return();
321 * XXX Turn 64 bit unsafe calls into nosys()
323 if (callp
->sy_flags
& UNSAFE_64BIT
) {
328 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
333 * Delayed binding of thread credential to process credential, if we
334 * are not running with an explicitly set thread credential.
336 kauth_cred_uthread_update(uthread
, p
);
338 uthread
->uu_rval
[0] = 0;
339 uthread
->uu_rval
[1] = 0;
342 uthread
->uu_flag
|= UT_NOTCANCELPT
;
345 AUDIT_SYSCALL_ENTER(code
, p
, uthread
);
346 error
= (*(callp
->sy_call
))((void *) p
, uargp
, &(uthread
->uu_rval
[0]));
347 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
350 uthread
->t_dtrace_errno
= error
;
351 #endif /* CONFIG_DTRACE */
353 if (error
== ERESTART
) {
355 * all system calls come through via the syscall instruction
356 * in 64 bit mode... its 2 bytes in length
357 * move the user's pc back to repeat the syscall:
361 else if (error
!= EJUSTRETURN
) {
364 regs
->isf
.rflags
|= EFL_CF
; /* carry bit */
365 } else { /* (not error) */
367 switch (callp
->sy_return_type
) {
368 case _SYSCALL_RET_INT_T
:
369 regs
->rax
= uthread
->uu_rval
[0];
370 regs
->rdx
= uthread
->uu_rval
[1];
372 case _SYSCALL_RET_UINT_T
:
373 regs
->rax
= ((u_int
)uthread
->uu_rval
[0]);
374 regs
->rdx
= ((u_int
)uthread
->uu_rval
[1]);
376 case _SYSCALL_RET_OFF_T
:
377 case _SYSCALL_RET_ADDR_T
:
378 case _SYSCALL_RET_SIZE_T
:
379 case _SYSCALL_RET_SSIZE_T
:
380 regs
->rax
= *((uint64_t *)(&uthread
->uu_rval
[0]));
383 case _SYSCALL_RET_NONE
:
386 panic("unix_syscall: unknown return type");
389 regs
->isf
.rflags
&= ~EFL_CF
;
394 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
397 * if we're holding the funnel panic
399 syscall_exit_funnelcheck();
401 if (uthread
->uu_lowpri_window
&& uthread
->v_mount
) {
403 * task is marked as a low priority I/O type
404 * and the I/O we issued while in this system call
405 * collided with normal I/O operations... we'll
406 * delay in order to mitigate the impact of this
407 * task on the normal operation of the system
409 throttle_lowpri_io(&uthread
->uu_lowpri_window
,uthread
->v_mount
);
412 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
413 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], 0, 0);
415 thread_exception_return();
421 unix_syscall_return(int error
)
424 struct uthread
*uthread
;
428 struct sysent
*callp
;
430 thread
= current_thread();
431 uthread
= get_bsdthread_info(thread
);
435 if (proc_is64bit(p
)) {
436 x86_saved_state64_t
*regs
;
438 regs
= saved_state64(find_user_regs(thread
));
440 /* reconstruct code for tracing before blasting rax */
441 code
= regs
->rax
& SYSCALL_NUMBER_MASK
;
442 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
446 * indirect system call... system call number
452 if (callp
->sy_call
== dtrace_systrace_syscall
)
453 dtrace_systrace_syscall_return( code
, error
, uthread
->uu_rval
);
454 #endif /* CONFIG_DTRACE */
456 if (error
== ERESTART
) {
458 * all system calls come through via the syscall instruction
459 * in 64 bit mode... its 2 bytes in length
460 * move the user's pc back to repeat the syscall:
464 else if (error
!= EJUSTRETURN
) {
467 regs
->isf
.rflags
|= EFL_CF
; /* carry bit */
468 } else { /* (not error) */
470 switch (callp
->sy_return_type
) {
471 case _SYSCALL_RET_INT_T
:
472 regs
->rax
= uthread
->uu_rval
[0];
473 regs
->rdx
= uthread
->uu_rval
[1];
475 case _SYSCALL_RET_UINT_T
:
476 regs
->rax
= ((u_int
)uthread
->uu_rval
[0]);
477 regs
->rdx
= ((u_int
)uthread
->uu_rval
[1]);
479 case _SYSCALL_RET_OFF_T
:
480 case _SYSCALL_RET_ADDR_T
:
481 case _SYSCALL_RET_SIZE_T
:
482 case _SYSCALL_RET_SSIZE_T
:
483 regs
->rax
= *((uint64_t *)(&uthread
->uu_rval
[0]));
486 case _SYSCALL_RET_NONE
:
489 panic("unix_syscall: unknown return type");
492 regs
->isf
.rflags
&= ~EFL_CF
;
496 x86_saved_state32_t
*regs
;
498 regs
= saved_state32(find_user_regs(thread
));
500 regs
->efl
&= ~(EFL_CF
);
501 /* reconstruct code for tracing before blasting eax */
502 code
= regs
->eax
& I386_SYSCALL_NUMBER_MASK
;
503 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
506 if (callp
->sy_call
== dtrace_systrace_syscall
)
507 dtrace_systrace_syscall_return( code
, error
, uthread
->uu_rval
);
508 #endif /* CONFIG_DTRACE */
510 if (callp
== sysent
) {
511 params
= (vm_offset_t
) ((caddr_t
)regs
->uesp
+ sizeof (int));
512 code
= fuword(params
);
514 if (error
== ERESTART
) {
515 regs
->eip
-= ((regs
->cs
& 0xffff) == SYSENTER_CS
) ? 5 : 2;
517 else if (error
!= EJUSTRETURN
) {
520 regs
->efl
|= EFL_CF
; /* carry bit */
521 } else { /* (not error) */
522 regs
->eax
= uthread
->uu_rval
[0];
523 regs
->edx
= uthread
->uu_rval
[1];
529 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
532 * if we're holding the funnel panic
534 syscall_exit_funnelcheck();
536 if (uthread
->uu_lowpri_window
&& uthread
->v_mount
) {
538 * task is marked as a low priority I/O type
539 * and the I/O we issued while in this system call
540 * collided with normal I/O operations... we'll
541 * delay in order to mitigate the impact of this
542 * task on the normal operation of the system
544 throttle_lowpri_io(&uthread
->uu_lowpri_window
,uthread
->v_mount
);
547 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
548 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], 0, 0);
550 thread_exception_return();
556 __unused
const void *in32
,
562 /* we convert in place in out64 */
563 arg32
= (uint32_t *) out64
;
564 arg64
= (uint64_t *) out64
;
566 arg64
[5] = arg32
[6]; /* wwwlwW */
567 arg64
[4] = arg32
[5]; /* wwwlWw */
568 arg32
[7] = arg32
[4]; /* wwwLww (hi) */
569 arg32
[6] = arg32
[3]; /* wwwLww (lo) */
570 arg64
[2] = arg32
[2]; /* wwWlww */
571 arg64
[1] = arg32
[1]; /* wWwlww */
572 arg64
[0] = arg32
[0]; /* Wwwlww */
578 __unused
const void *in32
,
584 /* we convert in place in out64 */
585 arg32
= (uint32_t *) out64
;
586 arg64
= (uint64_t *) out64
;
588 arg64
[5] = arg32
[6]; /* wwlwwW */
589 arg64
[4] = arg32
[5]; /* wwlwWw */
590 arg64
[3] = arg32
[4]; /* wwlWww */
591 arg32
[5] = arg32
[3]; /* wwLwww (hi) */
592 arg32
[4] = arg32
[2]; /* wwLwww (lo) */
593 arg64
[1] = arg32
[1]; /* wWlwww */
594 arg64
[0] = arg32
[0]; /* Wwlwww */