2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/debug.h>
35 #include <mach/machine/thread_status.h>
36 #include <mach/thread_act.h>
37 #include <mach/branch_predicates.h>
39 #include <sys/kernel.h>
41 #include <sys/proc_internal.h>
42 #include <sys/syscall.h>
43 #include <sys/systm.h>
45 #include <sys/errno.h>
46 #include <sys/kdebug.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/kauth.h>
50 #include <sys/systm.h>
52 #include <security/audit/audit.h>
55 #include <i386/machine_routines.h>
56 #include <mach/i386/syscall_sw.h>
58 #include <machine/pal_routines.h>
61 extern int32_t dtrace_systrace_syscall(struct proc
*, void *, int *);
62 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
65 extern void unix_syscall(x86_saved_state_t
*);
66 extern void unix_syscall64(x86_saved_state_t
*);
67 extern void *find_user_regs(thread_t
);
69 /* dynamically generated at build time based on syscalls.master */
70 extern const char *syscallnames
[];
72 #define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || \
73 ((code) == SYS_kdebug_trace64) || \
74 ((code) == SYS_kdebug_trace_string))
77 * Function: unix_syscall
79 * Inputs: regs - pointer to i386 save area
83 __attribute__((noreturn
))
85 unix_syscall(x86_saved_state_t
*state
)
95 struct uthread
*uthread
;
96 x86_saved_state32_t
*regs
;
100 assert(is_saved_state32(state
));
101 regs
= saved_state32(state
);
103 if (regs
->eax
== 0x800)
104 thread_exception_return();
106 thread
= current_thread();
107 uthread
= get_bsdthread_info(thread
);
109 uthread_reset_proc_refcount(uthread
);
111 /* Get the approriate proc; may be different from task's for vfork() */
112 is_vfork
= uthread
->uu_flag
& UT_VFORK
;
113 if (__improbable(is_vfork
!= 0))
116 p
= (struct proc
*)get_bsdtask_info(current_task());
118 code
= regs
->eax
& I386_SYSCALL_NUMBER_MASK
;
119 DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
120 code
, syscallnames
[code
>= nsysent
? SYS_invalid
: code
], (uint32_t)regs
->eip
);
121 params
= (vm_offset_t
) (regs
->uesp
+ sizeof (int));
123 regs
->efl
&= ~(EFL_CF
);
125 callp
= (code
>= nsysent
) ? &sysent
[SYS_invalid
] : &sysent
[code
];
127 if (__improbable(callp
== sysent
)) {
128 code
= fuword(params
);
129 params
+= sizeof(int);
130 callp
= (code
>= nsysent
) ? &sysent
[SYS_invalid
] : &sysent
[code
];
133 vt
= (void *)uthread
->uu_arg
;
135 if (callp
->sy_arg_bytes
!= 0) {
136 #if CONFIG_REQUIRES_U32_MUNGING
139 #error U32 syscalls on x86_64 kernel requires munging
143 assert((unsigned) callp
->sy_arg_bytes
<= sizeof (uthread
->uu_arg
));
144 nargs
= callp
->sy_arg_bytes
;
145 error
= copyin((user_addr_t
) params
, (char *) vt
, nargs
);
149 thread_exception_return();
153 if (__probable(!code_is_kdebug_trace(code
))) {
156 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
157 BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
158 *ip
, *(ip
+1), *(ip
+2), *(ip
+3), 0);
161 #if CONFIG_REQUIRES_U32_MUNGING
162 mungerp
= callp
->sy_arg_munge32
;
168 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
169 BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
173 * Delayed binding of thread credential to process credential, if we
174 * are not running with an explicitly set thread credential.
176 kauth_cred_uthread_update(uthread
, p
);
178 uthread
->uu_rval
[0] = 0;
179 uthread
->uu_rval
[1] = 0;
180 uthread
->uu_flag
|= UT_NOTCANCELPT
;
181 uthread
->syscall_code
= code
;
185 uthread
->uu_iocount
= 0;
186 uthread
->uu_vpindex
= 0;
189 AUDIT_SYSCALL_ENTER(code
, p
, uthread
);
190 error
= (*(callp
->sy_call
))((void *) p
, (void *) vt
, &(uthread
->uu_rval
[0]));
191 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
194 if (uthread
->uu_iocount
)
195 printf("system call returned with uu_iocount != 0\n");
198 uthread
->t_dtrace_errno
= error
;
199 #endif /* CONFIG_DTRACE */
201 if (__improbable(error
== ERESTART
)) {
203 * Move the user's pc back to repeat the syscall:
204 * 5 bytes for a sysenter, or 2 for an int 8x.
205 * The SYSENTER_TF_CS covers single-stepping over a sysenter
206 * - see debug trap handler in idt.s/idt64.s
209 pal_syscall_restart(thread
, state
);
211 else if (error
!= EJUSTRETURN
) {
212 if (__improbable(error
)) {
214 regs
->efl
|= EFL_CF
; /* carry bit */
215 } else { /* (not error) */
217 * We split retval across two registers, in case the
218 * syscall had a 64-bit return value, in which case
219 * eax/edx matches the function call ABI.
221 regs
->eax
= uthread
->uu_rval
[0];
222 regs
->edx
= uthread
->uu_rval
[1];
226 DEBUG_KPRINT_SYSCALL_UNIX(
227 "unix_syscall: error=%d retval=(%u,%u)\n",
228 error
, regs
->eax
, regs
->edx
);
230 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
232 #if DEBUG || DEVELOPMENT
233 kern_allocation_name_t
234 prior __assert_only
= thread_set_allocation_name(NULL
);
235 assertf(prior
== NULL
, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior
));
236 #endif /* DEBUG || DEVELOPMENT */
238 if (__improbable(uthread
->uu_lowpri_window
)) {
240 * task is marked as a low priority I/O type
241 * and the I/O we issued while in this system call
242 * collided with normal I/O operations... we'll
243 * delay in order to mitigate the impact of this
244 * task on the normal operation of the system
246 throttle_lowpri_io(1);
248 if (__probable(!code_is_kdebug_trace(code
)))
249 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
250 BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
251 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], pid
, 0);
253 if (__improbable(!is_vfork
&& callp
->sy_call
== (sy_call_t
*)execve
&& !error
)) {
254 pal_execve_return(thread
);
258 if (__improbable(uthread_get_proc_refcount(uthread
) != 0)) {
259 panic("system call returned with uu_proc_refcount != 0");
263 thread_exception_return();
267 __attribute__((noreturn
))
269 unix_syscall64(x86_saved_state_t
*state
)
274 struct sysent
*callp
;
276 boolean_t args_start_at_rdi
;
279 struct uthread
*uthread
;
280 x86_saved_state64_t
*regs
;
283 assert(is_saved_state64(state
));
284 regs
= saved_state64(state
);
286 if (regs
->rax
== 0x2000800)
287 thread_exception_return();
289 thread
= current_thread();
290 uthread
= get_bsdthread_info(thread
);
292 uthread_reset_proc_refcount(uthread
);
294 /* Get the approriate proc; may be different from task's for vfork() */
295 if (__probable(!(uthread
->uu_flag
& UT_VFORK
)))
296 p
= (struct proc
*)get_bsdtask_info(current_task());
300 /* Verify that we are not being called from a task without a proc */
301 if (__improbable(p
== NULL
)) {
303 regs
->isf
.rflags
|= EFL_CF
;
304 task_terminate_internal(current_task());
305 thread_exception_return();
309 code
= regs
->rax
& SYSCALL_NUMBER_MASK
;
310 DEBUG_KPRINT_SYSCALL_UNIX(
311 "unix_syscall64: code=%d(%s) rip=%llx\n",
312 code
, syscallnames
[code
>= nsysent
? SYS_invalid
: code
], regs
->isf
.rip
);
313 callp
= (code
>= nsysent
) ? &sysent
[SYS_invalid
] : &sysent
[code
];
315 vt
= (void *)uthread
->uu_arg
;
317 if (__improbable(callp
== sysent
)) {
319 * indirect system call... system call number
323 callp
= (code
>= nsysent
) ? &sysent
[SYS_invalid
] : &sysent
[code
];
324 args_start_at_rdi
= FALSE
;
327 args_start_at_rdi
= TRUE
;
331 if (callp
->sy_narg
!= 0) {
332 assert(callp
->sy_narg
<= 8); /* size of uu_arg */
334 args_in_regs
= MIN(args_in_regs
, callp
->sy_narg
);
335 memcpy(vt
, args_start_at_rdi
? ®s
->rdi
: ®s
->rsi
, args_in_regs
* sizeof(syscall_arg_t
));
338 if (!code_is_kdebug_trace(code
)) {
339 uint64_t *ip
= (uint64_t *)vt
;
341 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
342 BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
343 (int)(*ip
), (int)(*(ip
+1)), (int)(*(ip
+2)), (int)(*(ip
+3)), 0);
346 if (__improbable(callp
->sy_narg
> args_in_regs
)) {
349 copyin_count
= (callp
->sy_narg
- args_in_regs
) * sizeof(syscall_arg_t
);
351 error
= copyin((user_addr_t
)(regs
->isf
.rsp
+ sizeof(user_addr_t
)), (char *)&uthread
->uu_arg
[args_in_regs
], copyin_count
);
354 regs
->isf
.rflags
|= EFL_CF
;
355 thread_exception_return();
360 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
361 BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
365 * Delayed binding of thread credential to process credential, if we
366 * are not running with an explicitly set thread credential.
368 kauth_cred_uthread_update(uthread
, p
);
370 uthread
->uu_rval
[0] = 0;
371 uthread
->uu_rval
[1] = 0;
372 uthread
->uu_flag
|= UT_NOTCANCELPT
;
373 uthread
->syscall_code
= code
;
377 uthread
->uu_iocount
= 0;
378 uthread
->uu_vpindex
= 0;
381 AUDIT_SYSCALL_ENTER(code
, p
, uthread
);
382 error
= (*(callp
->sy_call
))((void *) p
, vt
, &(uthread
->uu_rval
[0]));
383 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
386 if (uthread
->uu_iocount
)
387 printf("system call returned with uu_iocount != 0\n");
391 uthread
->t_dtrace_errno
= error
;
392 #endif /* CONFIG_DTRACE */
394 if (__improbable(error
== ERESTART
)) {
396 * all system calls come through via the syscall instruction
397 * in 64 bit mode... its 2 bytes in length
398 * move the user's pc back to repeat the syscall:
400 pal_syscall_restart( thread
, state
);
402 else if (error
!= EJUSTRETURN
) {
403 if (__improbable(error
)) {
405 regs
->isf
.rflags
|= EFL_CF
; /* carry bit */
406 } else { /* (not error) */
408 switch (callp
->sy_return_type
) {
409 case _SYSCALL_RET_INT_T
:
410 regs
->rax
= uthread
->uu_rval
[0];
411 regs
->rdx
= uthread
->uu_rval
[1];
413 case _SYSCALL_RET_UINT_T
:
414 regs
->rax
= ((u_int
)uthread
->uu_rval
[0]);
415 regs
->rdx
= ((u_int
)uthread
->uu_rval
[1]);
417 case _SYSCALL_RET_OFF_T
:
418 case _SYSCALL_RET_ADDR_T
:
419 case _SYSCALL_RET_SIZE_T
:
420 case _SYSCALL_RET_SSIZE_T
:
421 case _SYSCALL_RET_UINT64_T
:
422 regs
->rax
= *((uint64_t *)(&uthread
->uu_rval
[0]));
425 case _SYSCALL_RET_NONE
:
428 panic("unix_syscall: unknown return type");
431 regs
->isf
.rflags
&= ~EFL_CF
;
435 DEBUG_KPRINT_SYSCALL_UNIX(
436 "unix_syscall64: error=%d retval=(%llu,%llu)\n",
437 error
, regs
->rax
, regs
->rdx
);
439 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
441 #if DEBUG || DEVELOPMENT
442 kern_allocation_name_t
443 prior __assert_only
= thread_set_allocation_name(NULL
);
444 assertf(prior
== NULL
, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior
));
445 #endif /* DEBUG || DEVELOPMENT */
447 if (__improbable(uthread
->uu_lowpri_window
)) {
449 * task is marked as a low priority I/O type
450 * and the I/O we issued while in this system call
451 * collided with normal I/O operations... we'll
452 * delay in order to mitigate the impact of this
453 * task on the normal operation of the system
455 throttle_lowpri_io(1);
457 if (__probable(!code_is_kdebug_trace(code
)))
458 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
459 BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
460 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], pid
, 0);
463 if (__improbable(uthread_get_proc_refcount(uthread
))) {
464 panic("system call returned with uu_proc_refcount != 0");
468 thread_exception_return();
474 unix_syscall_return(int error
)
477 struct uthread
*uthread
;
480 struct sysent
*callp
;
482 thread
= current_thread();
483 uthread
= get_bsdthread_info(thread
);
485 pal_register_cache_state(thread
, DIRTY
);
489 if (proc_is64bit(p
)) {
490 x86_saved_state64_t
*regs
;
492 regs
= saved_state64(find_user_regs(thread
));
494 code
= uthread
->syscall_code
;
495 callp
= (code
>= nsysent
) ? &sysent
[SYS_invalid
] : &sysent
[code
];
498 if (callp
->sy_call
== dtrace_systrace_syscall
)
499 dtrace_systrace_syscall_return( code
, error
, uthread
->uu_rval
);
500 #endif /* CONFIG_DTRACE */
501 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
503 if (error
== ERESTART
) {
507 pal_syscall_restart( thread
, find_user_regs(thread
) );
509 else if (error
!= EJUSTRETURN
) {
512 regs
->isf
.rflags
|= EFL_CF
; /* carry bit */
513 } else { /* (not error) */
515 switch (callp
->sy_return_type
) {
516 case _SYSCALL_RET_INT_T
:
517 regs
->rax
= uthread
->uu_rval
[0];
518 regs
->rdx
= uthread
->uu_rval
[1];
520 case _SYSCALL_RET_UINT_T
:
521 regs
->rax
= ((u_int
)uthread
->uu_rval
[0]);
522 regs
->rdx
= ((u_int
)uthread
->uu_rval
[1]);
524 case _SYSCALL_RET_OFF_T
:
525 case _SYSCALL_RET_ADDR_T
:
526 case _SYSCALL_RET_SIZE_T
:
527 case _SYSCALL_RET_SSIZE_T
:
528 case _SYSCALL_RET_UINT64_T
:
529 regs
->rax
= *((uint64_t *)(&uthread
->uu_rval
[0]));
532 case _SYSCALL_RET_NONE
:
535 panic("unix_syscall: unknown return type");
538 regs
->isf
.rflags
&= ~EFL_CF
;
541 DEBUG_KPRINT_SYSCALL_UNIX(
542 "unix_syscall_return: error=%d retval=(%llu,%llu)\n",
543 error
, regs
->rax
, regs
->rdx
);
545 x86_saved_state32_t
*regs
;
547 regs
= saved_state32(find_user_regs(thread
));
549 regs
->efl
&= ~(EFL_CF
);
551 code
= uthread
->syscall_code
;
552 callp
= (code
>= nsysent
) ? &sysent
[SYS_invalid
] : &sysent
[code
];
555 if (callp
->sy_call
== dtrace_systrace_syscall
)
556 dtrace_systrace_syscall_return( code
, error
, uthread
->uu_rval
);
557 #endif /* CONFIG_DTRACE */
558 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
560 if (error
== ERESTART
) {
561 pal_syscall_restart( thread
, find_user_regs(thread
) );
563 else if (error
!= EJUSTRETURN
) {
566 regs
->efl
|= EFL_CF
; /* carry bit */
567 } else { /* (not error) */
568 regs
->eax
= uthread
->uu_rval
[0];
569 regs
->edx
= uthread
->uu_rval
[1];
572 DEBUG_KPRINT_SYSCALL_UNIX(
573 "unix_syscall_return: error=%d retval=(%u,%u)\n",
574 error
, regs
->eax
, regs
->edx
);
578 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
580 #if DEBUG || DEVELOPMENT
581 kern_allocation_name_t
582 prior __assert_only
= thread_set_allocation_name(NULL
);
583 assertf(prior
== NULL
, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior
));
584 #endif /* DEBUG || DEVELOPMENT */
586 if (uthread
->uu_lowpri_window
) {
588 * task is marked as a low priority I/O type
589 * and the I/O we issued while in this system call
590 * collided with normal I/O operations... we'll
591 * delay in order to mitigate the impact of this
592 * task on the normal operation of the system
594 throttle_lowpri_io(1);
596 if (!code_is_kdebug_trace(code
))
597 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
598 BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
599 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], p
->p_pid
, 0);
601 thread_exception_return();