2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/debug.h>
35 #include <mach/machine/thread_status.h>
36 #include <mach/thread_act.h>
38 #include <sys/kernel.h>
40 #include <sys/proc_internal.h>
41 #include <sys/syscall.h>
42 #include <sys/systm.h>
44 #include <sys/errno.h>
45 #include <sys/kdebug.h>
46 #include <sys/sysent.h>
47 #include <sys/sysproto.h>
48 #include <sys/kauth.h>
49 #include <sys/systm.h>
51 #include <security/audit/audit.h>
54 #include <i386/machine_routines.h>
55 #include <mach/i386/syscall_sw.h>
58 extern int32_t dtrace_systrace_syscall(struct proc
*, void *, int *);
59 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
62 extern void unix_syscall(x86_saved_state_t
*);
63 extern void unix_syscall64(x86_saved_state_t
*);
64 extern void *find_user_regs(thread_t
);
66 extern void x86_toggle_sysenter_arg_store(thread_t thread
, boolean_t valid
);
67 extern boolean_t
x86_sysenter_arg_store_isvalid(thread_t thread
);
69 /* dynamically generated at build time based on syscalls.master */
70 extern const char *syscallnames
[];
73 * Function: unix_syscall
75 * Inputs: regs - pointer to i386 save area
80 unix_syscall(x86_saved_state_t
*state
)
90 struct uthread
*uthread
;
91 x86_saved_state32_t
*regs
;
92 boolean_t args_in_uthread
;
94 assert(is_saved_state32(state
));
95 regs
= saved_state32(state
);
97 if (regs
->eax
== 0x800)
98 thread_exception_return();
100 thread
= current_thread();
101 uthread
= get_bsdthread_info(thread
);
104 /* Get the approriate proc; may be different from task's for vfork() */
105 if (!(uthread
->uu_flag
& UT_VFORK
))
106 p
= (struct proc
*)get_bsdtask_info(current_task());
110 /* Verify that we are not being called from a task without a proc */
114 task_terminate_internal(current_task());
115 thread_exception_return();
119 code
= regs
->eax
& I386_SYSCALL_NUMBER_MASK
;
120 DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
121 code
, syscallnames
[code
>= NUM_SYSENT
? 63 : code
], (uint32_t)regs
->eip
);
122 args_in_uthread
= ((regs
->eax
& I386_SYSCALL_ARG_BYTES_MASK
) != 0) && x86_sysenter_arg_store_isvalid(thread
);
123 params
= (vm_offset_t
) (regs
->uesp
+ sizeof (int));
125 regs
->efl
&= ~(EFL_CF
);
127 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
129 if (callp
== sysent
) {
130 code
= fuword(params
);
131 params
+= sizeof(int);
132 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
135 vt
= (void *)uthread
->uu_arg
;
137 if (callp
->sy_arg_bytes
!= 0) {
140 assert((unsigned) callp
->sy_arg_bytes
<= sizeof (uthread
->uu_arg
));
141 if (!args_in_uthread
)
144 nargs
= callp
->sy_arg_bytes
;
145 error
= copyin((user_addr_t
) params
, (char *) vt
, nargs
);
149 thread_exception_return();
157 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
158 *ip
, *(ip
+1), *(ip
+2), *(ip
+3), 0);
160 mungerp
= callp
->sy_arg_munge32
;
163 * If non-NULL, then call the syscall argument munger to
164 * copy in arguments (see xnu/bsd/dev/{i386|x86_64}/munge.s); the
165 * first argument is NULL because we are munging in place
166 * after a copyin because the ABI currently doesn't use
167 * registers to pass system call arguments.
170 (*mungerp
)(NULL
, vt
);
172 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
176 * Delayed binding of thread credential to process credential, if we
177 * are not running with an explicitly set thread credential.
179 kauth_cred_uthread_update(uthread
, p
);
181 uthread
->uu_rval
[0] = 0;
182 uthread
->uu_rval
[1] = regs
->edx
;
183 uthread
->uu_flag
|= UT_NOTCANCELPT
;
187 uthread
->uu_iocount
= 0;
188 uthread
->uu_vpindex
= 0;
191 AUDIT_SYSCALL_ENTER(code
, p
, uthread
);
192 error
= (*(callp
->sy_call
))((void *) p
, (void *) vt
, &(uthread
->uu_rval
[0]));
193 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
195 mac_thread_userret(code
, error
, thread
);
199 if (uthread
->uu_iocount
)
200 printf("system call returned with uu_iocount != 0\n");
203 uthread
->t_dtrace_errno
= error
;
204 #endif /* CONFIG_DTRACE */
206 if (error
== ERESTART
) {
208 * Move the user's pc back to repeat the syscall:
209 * 5 bytes for a sysenter, or 2 for an int 8x.
210 * The SYSENTER_TF_CS covers single-stepping over a sysenter
211 * - see debug trap handler in idt.s/idt64.s
214 if (regs
->cs
== SYSENTER_CS
|| regs
->cs
== SYSENTER_TF_CS
) {
220 else if (error
!= EJUSTRETURN
) {
223 regs
->efl
|= EFL_CF
; /* carry bit */
224 } else { /* (not error) */
225 regs
->eax
= uthread
->uu_rval
[0];
226 regs
->edx
= uthread
->uu_rval
[1];
230 DEBUG_KPRINT_SYSCALL_UNIX(
231 "unix_syscall: error=%d retval=(%u,%u)\n",
232 error
, regs
->eax
, regs
->edx
);
234 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
237 * if we're holding the funnel panic
239 syscall_exit_funnelcheck();
241 if (uthread
->uu_lowpri_window
) {
243 * task is marked as a low priority I/O type
244 * and the I/O we issued while in this system call
245 * collided with normal I/O operations... we'll
246 * delay in order to mitigate the impact of this
247 * task on the normal operation of the system
249 throttle_lowpri_io(TRUE
);
252 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
253 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], p
->p_pid
, 0);
256 thread_exception_return();
262 unix_syscall64(x86_saved_state_t
*state
)
266 struct sysent
*callp
;
271 struct uthread
*uthread
;
272 x86_saved_state64_t
*regs
;
274 assert(is_saved_state64(state
));
275 regs
= saved_state64(state
);
277 if (regs
->rax
== 0x2000800)
278 thread_exception_return();
280 thread
= current_thread();
281 uthread
= get_bsdthread_info(thread
);
283 /* Get the approriate proc; may be different from task's for vfork() */
284 if (!(uthread
->uu_flag
& UT_VFORK
))
285 p
= (struct proc
*)get_bsdtask_info(current_task());
289 /* Verify that we are not being called from a task without a proc */
292 regs
->isf
.rflags
|= EFL_CF
;
293 task_terminate_internal(current_task());
294 thread_exception_return();
299 code
= regs
->rax
& SYSCALL_NUMBER_MASK
;
300 DEBUG_KPRINT_SYSCALL_UNIX(
301 "unix_syscall64: code=%d(%s) rip=%llx\n",
302 code
, syscallnames
[code
>= NUM_SYSENT
? 63 : code
], regs
->isf
.rip
);
303 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
304 uargp
= (void *)(®s
->rdi
);
306 if (callp
== sysent
) {
308 * indirect system call... system call number
312 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
313 uargp
= (void *)(®s
->rsi
);
317 if (callp
->sy_narg
!= 0) {
319 uint64_t *ip
= (uint64_t *)uargp
;
321 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
322 (int)(*ip
), (int)(*(ip
+1)), (int)(*(ip
+2)), (int)(*(ip
+3)), 0);
324 assert(callp
->sy_narg
<= 8);
326 if (callp
->sy_narg
> args_in_regs
) {
329 copyin_count
= (callp
->sy_narg
- args_in_regs
) * sizeof(uint64_t);
331 error
= copyin((user_addr_t
)(regs
->isf
.rsp
+ sizeof(user_addr_t
)), (char *)®s
->v_arg6
, copyin_count
);
334 regs
->isf
.rflags
|= EFL_CF
;
335 thread_exception_return();
340 * XXX Turn 64 bit unsafe calls into nosys()
342 if (callp
->sy_flags
& UNSAFE_64BIT
) {
347 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
352 * Delayed binding of thread credential to process credential, if we
353 * are not running with an explicitly set thread credential.
355 kauth_cred_uthread_update(uthread
, p
);
357 uthread
->uu_rval
[0] = 0;
358 uthread
->uu_rval
[1] = 0;
361 uthread
->uu_flag
|= UT_NOTCANCELPT
;
364 AUDIT_SYSCALL_ENTER(code
, p
, uthread
);
365 error
= (*(callp
->sy_call
))((void *) p
, uargp
, &(uthread
->uu_rval
[0]));
366 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
369 uthread
->t_dtrace_errno
= error
;
370 #endif /* CONFIG_DTRACE */
372 if (error
== ERESTART
) {
374 * all system calls come through via the syscall instruction
375 * in 64 bit mode... its 2 bytes in length
376 * move the user's pc back to repeat the syscall:
380 else if (error
!= EJUSTRETURN
) {
383 regs
->isf
.rflags
|= EFL_CF
; /* carry bit */
384 } else { /* (not error) */
386 switch (callp
->sy_return_type
) {
387 case _SYSCALL_RET_INT_T
:
388 regs
->rax
= uthread
->uu_rval
[0];
389 regs
->rdx
= uthread
->uu_rval
[1];
391 case _SYSCALL_RET_UINT_T
:
392 regs
->rax
= ((u_int
)uthread
->uu_rval
[0]);
393 regs
->rdx
= ((u_int
)uthread
->uu_rval
[1]);
395 case _SYSCALL_RET_OFF_T
:
396 case _SYSCALL_RET_ADDR_T
:
397 case _SYSCALL_RET_SIZE_T
:
398 case _SYSCALL_RET_SSIZE_T
:
399 regs
->rax
= *((uint64_t *)(&uthread
->uu_rval
[0]));
402 case _SYSCALL_RET_NONE
:
405 panic("unix_syscall: unknown return type");
408 regs
->isf
.rflags
&= ~EFL_CF
;
412 DEBUG_KPRINT_SYSCALL_UNIX(
413 "unix_syscall64: error=%d retval=(%llu,%llu)\n",
414 error
, regs
->rax
, regs
->rdx
);
416 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
419 * if we're holding the funnel panic
421 syscall_exit_funnelcheck();
423 if (uthread
->uu_lowpri_window
) {
425 * task is marked as a low priority I/O type
426 * and the I/O we issued while in this system call
427 * collided with normal I/O operations... we'll
428 * delay in order to mitigate the impact of this
429 * task on the normal operation of the system
431 throttle_lowpri_io(TRUE
);
434 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
435 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], p
->p_pid
, 0);
437 thread_exception_return();
443 unix_syscall_return(int error
)
446 struct uthread
*uthread
;
450 struct sysent
*callp
;
452 thread
= current_thread();
453 uthread
= get_bsdthread_info(thread
);
458 if (proc_is64bit(p
)) {
459 x86_saved_state64_t
*regs
;
461 regs
= saved_state64(find_user_regs(thread
));
463 /* reconstruct code for tracing before blasting rax */
464 code
= regs
->rax
& SYSCALL_NUMBER_MASK
;
465 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
469 * indirect system call... system call number
475 if (callp
->sy_call
== dtrace_systrace_syscall
)
476 dtrace_systrace_syscall_return( code
, error
, uthread
->uu_rval
);
477 #endif /* CONFIG_DTRACE */
478 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
480 if (error
== ERESTART
) {
482 * all system calls come through via the syscall instruction
483 * in 64 bit mode... its 2 bytes in length
484 * move the user's pc back to repeat the syscall:
488 else if (error
!= EJUSTRETURN
) {
491 regs
->isf
.rflags
|= EFL_CF
; /* carry bit */
492 } else { /* (not error) */
494 switch (callp
->sy_return_type
) {
495 case _SYSCALL_RET_INT_T
:
496 regs
->rax
= uthread
->uu_rval
[0];
497 regs
->rdx
= uthread
->uu_rval
[1];
499 case _SYSCALL_RET_UINT_T
:
500 regs
->rax
= ((u_int
)uthread
->uu_rval
[0]);
501 regs
->rdx
= ((u_int
)uthread
->uu_rval
[1]);
503 case _SYSCALL_RET_OFF_T
:
504 case _SYSCALL_RET_ADDR_T
:
505 case _SYSCALL_RET_SIZE_T
:
506 case _SYSCALL_RET_SSIZE_T
:
507 regs
->rax
= *((uint64_t *)(&uthread
->uu_rval
[0]));
510 case _SYSCALL_RET_NONE
:
513 panic("unix_syscall: unknown return type");
516 regs
->isf
.rflags
&= ~EFL_CF
;
519 DEBUG_KPRINT_SYSCALL_UNIX(
520 "unix_syscall_return: error=%d retval=(%llu,%llu)\n",
521 error
, regs
->rax
, regs
->rdx
);
523 x86_saved_state32_t
*regs
;
525 regs
= saved_state32(find_user_regs(thread
));
527 regs
->efl
&= ~(EFL_CF
);
528 /* reconstruct code for tracing before blasting eax */
529 code
= regs
->eax
& I386_SYSCALL_NUMBER_MASK
;
530 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
533 if (callp
->sy_call
== dtrace_systrace_syscall
)
534 dtrace_systrace_syscall_return( code
, error
, uthread
->uu_rval
);
535 #endif /* CONFIG_DTRACE */
536 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
538 if (callp
== sysent
) {
539 params
= (vm_offset_t
) (regs
->uesp
+ sizeof (int));
540 code
= fuword(params
);
542 if (error
== ERESTART
) {
543 regs
->eip
-= ((regs
->cs
& 0xffff) == SYSENTER_CS
) ? 5 : 2;
545 else if (error
!= EJUSTRETURN
) {
548 regs
->efl
|= EFL_CF
; /* carry bit */
549 } else { /* (not error) */
550 regs
->eax
= uthread
->uu_rval
[0];
551 regs
->edx
= uthread
->uu_rval
[1];
554 DEBUG_KPRINT_SYSCALL_UNIX(
555 "unix_syscall_return: error=%d retval=(%u,%u)\n",
556 error
, regs
->eax
, regs
->edx
);
560 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
563 * if we're holding the funnel panic
565 syscall_exit_funnelcheck();
567 if (uthread
->uu_lowpri_window
) {
569 * task is marked as a low priority I/O type
570 * and the I/O we issued while in this system call
571 * collided with normal I/O operations... we'll
572 * delay in order to mitigate the impact of this
573 * task on the normal operation of the system
575 throttle_lowpri_io(TRUE
);
578 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
579 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], p
->p_pid
, 0);
581 thread_exception_return();
587 __unused
const void *in32
,
593 /* we convert in place in out64 */
594 arg32
= (uint32_t *) out64
;
595 arg64
= (uint64_t *) out64
;
597 arg64
[5] = arg32
[6]; /* wwwlwW */
598 arg64
[4] = arg32
[5]; /* wwwlWw */
599 arg32
[7] = arg32
[4]; /* wwwLww (hi) */
600 arg32
[6] = arg32
[3]; /* wwwLww (lo) */
601 arg64
[2] = arg32
[2]; /* wwWlww */
602 arg64
[1] = arg32
[1]; /* wWwlww */
603 arg64
[0] = arg32
[0]; /* Wwwlww */
609 __unused
const void *in32
,
615 /* we convert in place in out64 */
616 arg32
= (uint32_t *) out64
;
617 arg64
= (uint64_t *) out64
;
619 arg64
[5] = arg32
[6]; /* wwlwwW */
620 arg64
[4] = arg32
[5]; /* wwlwWw */
621 arg64
[3] = arg32
[4]; /* wwlWww */
622 arg32
[5] = arg32
[3]; /* wwLwww (hi) */
623 arg32
[4] = arg32
[2]; /* wwLwww (lo) */
624 arg64
[1] = arg32
[1]; /* wWlwww */
625 arg64
[0] = arg32
[0]; /* Wwlwww */