2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/debug.h>
35 #include <mach/machine/thread_status.h>
36 #include <mach/thread_act.h>
38 #include <sys/kernel.h>
40 #include <sys/proc_internal.h>
41 #include <sys/syscall.h>
42 #include <sys/systm.h>
44 #include <sys/errno.h>
45 #include <sys/kdebug.h>
46 #include <sys/sysent.h>
47 #include <sys/sysproto.h>
48 #include <sys/kauth.h>
49 #include <sys/systm.h>
50 #include <sys/bitstring.h>
52 #include <security/audit/audit.h>
55 #include <i386/machine_routines.h>
56 #include <mach/i386/syscall_sw.h>
58 #include <machine/pal_routines.h>
61 #include <security/mac_framework.h>
65 extern int32_t dtrace_systrace_syscall(struct proc
*, void *, int *);
66 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
69 extern void unix_syscall(x86_saved_state_t
*);
70 extern void unix_syscall64(x86_saved_state_t
*);
71 extern void *find_user_regs(thread_t
);
73 /* dynamically generated at build time based on syscalls.master */
74 extern const char *syscallnames
[];
76 #define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || \
77 ((code) == SYS_kdebug_trace64) || \
78 ((code) == SYS_kdebug_trace_string))
81 * Function: unix_syscall
83 * Inputs: regs - pointer to i386 save area
87 __attribute__((noreturn
))
89 unix_syscall(x86_saved_state_t
*state
)
93 unsigned int code
, syscode
;
94 const struct sysent
*callp
;
99 struct uthread
*uthread
;
100 x86_saved_state32_t
*regs
;
104 assert(is_saved_state32(state
));
105 regs
= saved_state32(state
);
107 if (regs
->eax
== 0x800) {
108 thread_exception_return();
111 thread
= current_thread();
112 uthread
= get_bsdthread_info(thread
);
114 uthread_reset_proc_refcount(uthread
);
116 /* Get the approriate proc; may be different from task's for vfork() */
117 is_vfork
= uthread
->uu_flag
& UT_VFORK
;
118 if (__improbable(is_vfork
!= 0)) {
121 p
= (struct proc
*)get_bsdtask_info(current_task());
124 code
= regs
->eax
& I386_SYSCALL_NUMBER_MASK
;
125 syscode
= (code
< nsysent
) ? code
: SYS_invalid
;
126 DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
127 code
, syscallnames
[syscode
], (uint32_t)regs
->eip
);
128 params
= (vm_offset_t
) (regs
->uesp
+ sizeof(int));
130 regs
->efl
&= ~(EFL_CF
);
132 callp
= &sysent
[syscode
];
134 if (__improbable(callp
== sysent
)) {
135 code
= fuword(params
);
136 params
+= sizeof(int);
137 syscode
= (code
< nsysent
) ? code
: SYS_invalid
;
138 callp
= &sysent
[syscode
];
141 vt
= (void *)uthread
->uu_arg
;
143 if (callp
->sy_arg_bytes
!= 0) {
144 #if CONFIG_REQUIRES_U32_MUNGING
147 #error U32 syscalls on x86_64 kernel requires munging
151 assert((unsigned) callp
->sy_arg_bytes
<= sizeof(uthread
->uu_arg
));
152 nargs
= callp
->sy_arg_bytes
;
153 error
= copyin((user_addr_t
) params
, (char *) vt
, nargs
);
157 thread_exception_return();
161 if (__probable(!code_is_kdebug_trace(code
))) {
163 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
164 uip
[0], uip
[1], uip
[2], uip
[3]);
167 #if CONFIG_REQUIRES_U32_MUNGING
168 mungerp
= callp
->sy_arg_munge32
;
170 if (mungerp
!= NULL
) {
175 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
);
179 * Delayed binding of thread credential to process credential, if we
180 * are not running with an explicitly set thread credential.
182 kauth_cred_uthread_update(uthread
, p
);
184 uthread
->uu_rval
[0] = 0;
185 uthread
->uu_rval
[1] = 0;
186 uthread
->uu_flag
|= UT_NOTCANCELPT
;
187 uthread
->syscall_code
= code
;
191 uthread
->uu_iocount
= 0;
192 uthread
->uu_vpindex
= 0;
196 if (__improbable(p
->syscall_filter_mask
!= NULL
&& !bitstr_test(p
->syscall_filter_mask
, syscode
))) {
197 error
= mac_proc_check_syscall_unix(p
, syscode
);
202 #endif /* CONFIG_MACF */
204 AUDIT_SYSCALL_ENTER(code
, p
, uthread
);
205 error
= (*(callp
->sy_call
))((void *) p
, (void *) vt
, &(uthread
->uu_rval
[0]));
206 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
210 #endif /* CONFIG_MACF */
213 if (uthread
->uu_iocount
) {
214 printf("system call returned with uu_iocount != 0\n");
218 uthread
->t_dtrace_errno
= error
;
219 #endif /* CONFIG_DTRACE */
221 if (__improbable(error
== ERESTART
)) {
223 * Move the user's pc back to repeat the syscall:
224 * 5 bytes for a sysenter, or 2 for an int 8x.
225 * The SYSENTER_TF_CS covers single-stepping over a sysenter
226 * - see debug trap handler in idt.s/idt64.s
229 pal_syscall_restart(thread
, state
);
230 } else if (error
!= EJUSTRETURN
) {
231 if (__improbable(error
)) {
233 regs
->efl
|= EFL_CF
; /* carry bit */
234 } else { /* (not error) */
236 * We split retval across two registers, in case the
237 * syscall had a 64-bit return value, in which case
238 * eax/edx matches the function call ABI.
240 regs
->eax
= uthread
->uu_rval
[0];
241 regs
->edx
= uthread
->uu_rval
[1];
245 DEBUG_KPRINT_SYSCALL_UNIX(
246 "unix_syscall: error=%d retval=(%u,%u)\n",
247 error
, regs
->eax
, regs
->edx
);
249 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
250 uthread
->syscall_code
= 0;
252 #if DEBUG || DEVELOPMENT
253 kern_allocation_name_t
254 prior __assert_only
= thread_set_allocation_name(NULL
);
255 assertf(prior
== NULL
, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior
));
256 #endif /* DEBUG || DEVELOPMENT */
258 if (__improbable(uthread
->uu_lowpri_window
)) {
260 * task is marked as a low priority I/O type
261 * and the I/O we issued while in this system call
262 * collided with normal I/O operations... we'll
263 * delay in order to mitigate the impact of this
264 * task on the normal operation of the system
266 throttle_lowpri_io(1);
268 if (__probable(!code_is_kdebug_trace(code
))) {
269 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
270 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], pid
);
273 if (__improbable(!is_vfork
&& callp
->sy_call
== (sy_call_t
*)execve
&& !error
)) {
274 pal_execve_return(thread
);
278 if (__improbable(uthread_get_proc_refcount(uthread
) != 0)) {
279 panic("system call returned with uu_proc_refcount != 0");
283 thread_exception_return();
287 __attribute__((noreturn
))
289 unix_syscall64(x86_saved_state_t
*state
)
293 unsigned int code
, syscode
;
294 const struct sysent
*callp
;
296 boolean_t args_start_at_rdi
;
299 struct uthread
*uthread
;
300 x86_saved_state64_t
*regs
;
303 assert(is_saved_state64(state
));
304 regs
= saved_state64(state
);
306 if (regs
->rax
== 0x2000800) {
307 thread_exception_return();
310 thread
= current_thread();
311 uthread
= get_bsdthread_info(thread
);
313 uthread_reset_proc_refcount(uthread
);
315 /* Get the approriate proc; may be different from task's for vfork() */
316 if (__probable(!(uthread
->uu_flag
& UT_VFORK
))) {
317 p
= (struct proc
*)get_bsdtask_info(current_task());
322 /* Verify that we are not being called from a task without a proc */
323 if (__improbable(p
== NULL
)) {
325 regs
->isf
.rflags
|= EFL_CF
;
326 task_terminate_internal(current_task());
327 thread_exception_return();
331 code
= regs
->rax
& SYSCALL_NUMBER_MASK
;
332 syscode
= (code
< nsysent
) ? code
: SYS_invalid
;
333 DEBUG_KPRINT_SYSCALL_UNIX(
334 "unix_syscall64: code=%d(%s) rip=%llx\n",
335 code
, syscallnames
[syscode
], regs
->isf
.rip
);
336 callp
= &sysent
[syscode
];
338 vt
= (void *)uthread
->uu_arg
;
340 if (__improbable(callp
== sysent
)) {
342 * indirect system call... system call number
346 syscode
= (code
< nsysent
) ? code
: SYS_invalid
;
347 callp
= &sysent
[syscode
];
348 args_start_at_rdi
= FALSE
;
351 args_start_at_rdi
= TRUE
;
355 if (callp
->sy_narg
!= 0) {
356 assert(callp
->sy_narg
<= 8); /* size of uu_arg */
358 args_in_regs
= MIN(args_in_regs
, callp
->sy_narg
);
359 memcpy(vt
, args_start_at_rdi
? ®s
->rdi
: ®s
->rsi
, args_in_regs
* sizeof(syscall_arg_t
));
361 if (!code_is_kdebug_trace(code
)) {
364 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
365 uip
[0], uip
[1], uip
[2], uip
[3]);
368 if (__improbable(callp
->sy_narg
> args_in_regs
)) {
371 copyin_count
= (callp
->sy_narg
- args_in_regs
) * sizeof(syscall_arg_t
);
373 error
= copyin((user_addr_t
)(regs
->isf
.rsp
+ sizeof(user_addr_t
)), (char *)&uthread
->uu_arg
[args_in_regs
], copyin_count
);
376 regs
->isf
.rflags
|= EFL_CF
;
377 thread_exception_return();
382 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
);
386 * Delayed binding of thread credential to process credential, if we
387 * are not running with an explicitly set thread credential.
389 kauth_cred_uthread_update(uthread
, p
);
391 uthread
->uu_rval
[0] = 0;
392 uthread
->uu_rval
[1] = 0;
393 uthread
->uu_flag
|= UT_NOTCANCELPT
;
394 uthread
->syscall_code
= code
;
398 uthread
->uu_iocount
= 0;
399 uthread
->uu_vpindex
= 0;
403 if (__improbable(p
->syscall_filter_mask
!= NULL
&& !bitstr_test(p
->syscall_filter_mask
, syscode
))) {
404 error
= mac_proc_check_syscall_unix(p
, syscode
);
409 #endif /* CONFIG_MACF */
411 AUDIT_SYSCALL_ENTER(code
, p
, uthread
);
412 error
= (*(callp
->sy_call
))((void *) p
, vt
, &(uthread
->uu_rval
[0]));
413 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
417 #endif /* CONFIG_MACF */
420 if (uthread
->uu_iocount
) {
421 printf("system call returned with uu_iocount != 0\n");
426 uthread
->t_dtrace_errno
= error
;
427 #endif /* CONFIG_DTRACE */
429 if (__improbable(error
== ERESTART
)) {
431 * all system calls come through via the syscall instruction
432 * in 64 bit mode... its 2 bytes in length
433 * move the user's pc back to repeat the syscall:
435 pal_syscall_restart( thread
, state
);
436 } else if (error
!= EJUSTRETURN
) {
437 if (__improbable(error
)) {
439 regs
->isf
.rflags
|= EFL_CF
; /* carry bit */
440 } else { /* (not error) */
441 switch (callp
->sy_return_type
) {
442 case _SYSCALL_RET_INT_T
:
443 regs
->rax
= uthread
->uu_rval
[0];
444 regs
->rdx
= uthread
->uu_rval
[1];
446 case _SYSCALL_RET_UINT_T
:
447 regs
->rax
= ((u_int
)uthread
->uu_rval
[0]);
448 regs
->rdx
= ((u_int
)uthread
->uu_rval
[1]);
450 case _SYSCALL_RET_OFF_T
:
451 case _SYSCALL_RET_ADDR_T
:
452 case _SYSCALL_RET_SIZE_T
:
453 case _SYSCALL_RET_SSIZE_T
:
454 case _SYSCALL_RET_UINT64_T
:
455 regs
->rax
= *((uint64_t *)(&uthread
->uu_rval
[0]));
458 case _SYSCALL_RET_NONE
:
461 panic("unix_syscall: unknown return type");
464 regs
->isf
.rflags
&= ~EFL_CF
;
468 DEBUG_KPRINT_SYSCALL_UNIX(
469 "unix_syscall64: error=%d retval=(%llu,%llu)\n",
470 error
, regs
->rax
, regs
->rdx
);
472 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
473 uthread
->syscall_code
= 0;
475 #if DEBUG || DEVELOPMENT
476 kern_allocation_name_t
477 prior __assert_only
= thread_set_allocation_name(NULL
);
478 assertf(prior
== NULL
, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior
));
479 #endif /* DEBUG || DEVELOPMENT */
481 if (__improbable(uthread
->uu_lowpri_window
)) {
483 * task is marked as a low priority I/O type
484 * and the I/O we issued while in this system call
485 * collided with normal I/O operations... we'll
486 * delay in order to mitigate the impact of this
487 * task on the normal operation of the system
489 throttle_lowpri_io(1);
491 if (__probable(!code_is_kdebug_trace(code
))) {
492 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
493 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], pid
);
497 if (__improbable(uthread_get_proc_refcount(uthread
))) {
498 panic("system call returned with uu_proc_refcount != 0");
502 thread_exception_return();
508 unix_syscall_return(int error
)
511 struct uthread
*uthread
;
514 const struct sysent
*callp
;
516 thread
= current_thread();
517 uthread
= get_bsdthread_info(thread
);
519 pal_register_cache_state(thread
, DIRTY
);
523 if (proc_is64bit(p
)) {
524 x86_saved_state64_t
*regs
;
526 regs
= saved_state64(find_user_regs(thread
));
528 code
= uthread
->syscall_code
;
529 callp
= (code
>= nsysent
) ? &sysent
[SYS_invalid
] : &sysent
[code
];
532 if (callp
->sy_call
== dtrace_systrace_syscall
) {
533 dtrace_systrace_syscall_return( code
, error
, uthread
->uu_rval
);
535 #endif /* CONFIG_DTRACE */
536 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
538 if (error
== ERESTART
) {
542 pal_syscall_restart( thread
, find_user_regs(thread
));
543 } else if (error
!= EJUSTRETURN
) {
546 regs
->isf
.rflags
|= EFL_CF
; /* carry bit */
547 } else { /* (not error) */
548 switch (callp
->sy_return_type
) {
549 case _SYSCALL_RET_INT_T
:
550 regs
->rax
= uthread
->uu_rval
[0];
551 regs
->rdx
= uthread
->uu_rval
[1];
553 case _SYSCALL_RET_UINT_T
:
554 regs
->rax
= ((u_int
)uthread
->uu_rval
[0]);
555 regs
->rdx
= ((u_int
)uthread
->uu_rval
[1]);
557 case _SYSCALL_RET_OFF_T
:
558 case _SYSCALL_RET_ADDR_T
:
559 case _SYSCALL_RET_SIZE_T
:
560 case _SYSCALL_RET_SSIZE_T
:
561 case _SYSCALL_RET_UINT64_T
:
562 regs
->rax
= *((uint64_t *)(&uthread
->uu_rval
[0]));
565 case _SYSCALL_RET_NONE
:
568 panic("unix_syscall: unknown return type");
571 regs
->isf
.rflags
&= ~EFL_CF
;
574 DEBUG_KPRINT_SYSCALL_UNIX(
575 "unix_syscall_return: error=%d retval=(%llu,%llu)\n",
576 error
, regs
->rax
, regs
->rdx
);
578 x86_saved_state32_t
*regs
;
580 regs
= saved_state32(find_user_regs(thread
));
582 regs
->efl
&= ~(EFL_CF
);
584 code
= uthread
->syscall_code
;
585 callp
= (code
>= nsysent
) ? &sysent
[SYS_invalid
] : &sysent
[code
];
588 if (callp
->sy_call
== dtrace_systrace_syscall
) {
589 dtrace_systrace_syscall_return( code
, error
, uthread
->uu_rval
);
591 #endif /* CONFIG_DTRACE */
592 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
594 if (error
== ERESTART
) {
595 pal_syscall_restart( thread
, find_user_regs(thread
));
596 } else if (error
!= EJUSTRETURN
) {
599 regs
->efl
|= EFL_CF
; /* carry bit */
600 } else { /* (not error) */
601 regs
->eax
= uthread
->uu_rval
[0];
602 regs
->edx
= uthread
->uu_rval
[1];
605 DEBUG_KPRINT_SYSCALL_UNIX(
606 "unix_syscall_return: error=%d retval=(%u,%u)\n",
607 error
, regs
->eax
, regs
->edx
);
611 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
613 #if DEBUG || DEVELOPMENT
614 kern_allocation_name_t
615 prior __assert_only
= thread_set_allocation_name(NULL
);
616 assertf(prior
== NULL
, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior
));
617 #endif /* DEBUG || DEVELOPMENT */
619 if (uthread
->uu_lowpri_window
) {
621 * task is marked as a low priority I/O type
622 * and the I/O we issued while in this system call
623 * collided with normal I/O operations... we'll
624 * delay in order to mitigate the impact of this
625 * task on the normal operation of the system
627 throttle_lowpri_io(1);
629 if (!code_is_kdebug_trace(code
)) {
630 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
631 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], p
->p_pid
);
634 thread_exception_return();