2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
6 #include <kern/thread.h>
7 #include <kern/assert.h>
8 #include <kern/clock.h>
9 #include <kern/locks.h>
10 #include <kern/sched_prim.h>
11 #include <mach/machine/thread_status.h>
12 #include <mach/thread_act.h>
13 #include <machine/machine_routines.h>
14 #include <arm/thread.h>
15 #include <arm/proc_reg.h>
16 #include <pexpert/pexpert.h>
18 #include <sys/kernel.h>
20 #include <sys/proc_internal.h>
21 #include <sys/syscall.h>
22 #include <sys/systm.h>
24 #include <sys/errno.h>
25 #include <sys/kdebug.h>
26 #include <sys/sysent.h>
27 #include <sys/sysproto.h>
28 #include <sys/kauth.h>
29 #include <sys/bitstring.h>
31 #include <security/audit/audit.h>
34 #include <security/mac_framework.h>
38 extern int32_t dtrace_systrace_syscall(struct proc
*, void *, int *);
39 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
40 #endif /* CONFIG_DTRACE */
43 unix_syscall(struct arm_saved_state
* regs
, thread_t thread_act
,
44 struct uthread
* uthread
, struct proc
* proc
);
46 static int arm_get_syscall_args(uthread_t
, struct arm_saved_state
*, const struct sysent
*);
47 static int arm_get_u32_syscall_args(uthread_t
, arm_saved_state32_t
*, const struct sysent
*);
48 static void arm_prepare_u32_syscall_return(const struct sysent
*, arm_saved_state_t
*, uthread_t
, int);
49 static void arm_prepare_syscall_return(const struct sysent
*, struct arm_saved_state
*, uthread_t
, int);
50 static unsigned short arm_get_syscall_number(struct arm_saved_state
*);
51 static void arm_trace_unix_syscall(int, struct arm_saved_state
*);
52 static void arm_clear_syscall_error(struct arm_saved_state
*);
63 #define save_r10 r[10]
64 #define save_r11 r[11]
65 #define save_r12 r[12]
66 #define save_r13 r[13]
69 __XNU_PRIVATE_EXTERN
int do_count_syscalls
= 1;
70 __XNU_PRIVATE_EXTERN
int syscalls_log
[SYS_MAXSYSCALL
];
73 #define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || \
74 ((code) == SYS_kdebug_trace64) || \
75 ((code) == SYS_kdebug_trace_string))
78 * Function: unix_syscall
80 * Inputs: regs - pointer to Process Control Block
85 __attribute__((noreturn
))
89 struct arm_saved_state
* state
,
90 __unused thread_t thread_act
,
91 struct uthread
* uthread
,
94 const struct sysent
*callp
;
96 unsigned short code
, syscode
;
100 assert(is_saved_state32(state
));
103 uthread_reset_proc_refcount(uthread
);
105 code
= arm_get_syscall_number(state
);
107 #define unix_syscall_kprintf(x...) /* kprintf("unix_syscall: " x) */
109 if (kdebug_enable
&& !code_is_kdebug_trace(code
)) {
110 arm_trace_unix_syscall(code
, state
);
113 if ((uthread
->uu_flag
& UT_VFORK
)) {
114 proc
= current_proc();
117 syscode
= (code
< nsysent
) ? code
: SYS_invalid
;
118 callp
= &sysent
[syscode
];
121 * sy_narg is inaccurate on ARM if a 64 bit parameter is specified. Since user_addr_t
122 * is currently a 32 bit type, this is really a long word count. See rdar://problem/6104668.
124 if (callp
->sy_narg
!= 0) {
125 if (arm_get_syscall_args(uthread
, state
, callp
) != 0) {
126 /* Too many arguments, or something failed */
127 unix_syscall_kprintf("arm_get_syscall_args failed.\n");
128 callp
= &sysent
[SYS_invalid
];
132 uthread
->uu_flag
|= UT_NOTCANCELPT
;
133 uthread
->syscall_code
= code
;
135 uthread
->uu_rval
[0] = 0;
138 * r4 is volatile, if we set it to regs->save_r4 here the child
139 * will have parents r4 after execve
141 uthread
->uu_rval
[1] = 0;
146 * ARM runtime will call cerror if the carry bit is set after a
147 * system call, so clear it here for the common case of success.
149 arm_clear_syscall_error(state
);
152 if (do_count_syscalls
> 0) {
153 syscalls_log
[code
]++;
156 pid
= proc_pid(proc
);
159 uthread
->uu_iocount
= 0;
160 uthread
->uu_vpindex
= 0;
162 unix_syscall_kprintf("code %d (pid %d - %s, tid %lld)\n", code
,
163 pid
, proc
->p_comm
, thread_tid(current_thread()));
166 if (__improbable(proc
->syscall_filter_mask
!= NULL
&& !bitstr_test(proc
->syscall_filter_mask
, syscode
))) {
167 error
= mac_proc_check_syscall_unix(proc
, syscode
);
172 #endif /* CONFIG_MACF */
174 AUDIT_SYSCALL_ENTER(code
, proc
, uthread
);
175 error
= (*(callp
->sy_call
))(proc
, &uthread
->uu_arg
[0], &(uthread
->uu_rval
[0]));
176 AUDIT_SYSCALL_EXIT(code
, proc
, uthread
, error
);
180 #endif /* CONFIG_MACF */
182 unix_syscall_kprintf("code %d, error %d, results %x, %x (pid %d - %s, tid %lld)\n", code
, error
,
183 uthread
->uu_rval
[0], uthread
->uu_rval
[1],
184 pid
, get_bsdtask_info(current_task()) ? proc
->p_comm
: "unknown", thread_tid(current_thread()));
187 if (uthread
->uu_iocount
) {
188 printf("system call returned with uu_iocount != 0");
192 uthread
->t_dtrace_errno
= error
;
193 #endif /* CONFIG_DTRACE */
194 #if DEBUG || DEVELOPMENT
195 kern_allocation_name_t
196 prior __assert_only
= thread_set_allocation_name(NULL
);
197 assertf(prior
== NULL
, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior
));
198 #endif /* DEBUG || DEVELOPMENT */
200 arm_prepare_syscall_return(callp
, state
, uthread
, error
);
202 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
203 uthread
->syscall_code
= 0;
205 if (uthread
->uu_lowpri_window
) {
207 * task is marked as a low priority I/O type
208 * and the I/O we issued while in this system call
209 * collided with normal I/O operations... we'll
210 * delay in order to mitigate the impact of this
211 * task on the normal operation of the system
213 throttle_lowpri_io(1);
215 if (kdebug_enable
&& !code_is_kdebug_trace(code
)) {
216 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
217 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], pid
);
221 if (__improbable(uthread_get_proc_refcount(uthread
) != 0)) {
222 panic("system call returned with uu_proc_refcount != 0");
227 thread_exception_return();
232 unix_syscall_return(int error
)
235 struct uthread
*uthread
;
237 struct arm_saved_state
*regs
;
239 const struct sysent
*callp
;
241 #define unix_syscall_return_kprintf(x...) /* kprintf("unix_syscall_retur
244 thread_act
= current_thread();
245 proc
= current_proc();
246 uthread
= get_bsdthread_info(thread_act
);
248 regs
= find_user_regs(thread_act
);
249 code
= uthread
->syscall_code
;
250 callp
= (code
>= nsysent
) ? &sysent
[SYS_invalid
] : &sysent
[code
];
253 if (callp
->sy_call
== dtrace_systrace_syscall
) {
254 dtrace_systrace_syscall_return( code
, error
, uthread
->uu_rval
);
256 #endif /* CONFIG_DTRACE */
257 #if DEBUG || DEVELOPMENT
258 kern_allocation_name_t
259 prior __assert_only
= thread_set_allocation_name(NULL
);
260 assertf(prior
== NULL
, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior
));
261 #endif /* DEBUG || DEVELOPMENT */
263 AUDIT_SYSCALL_EXIT(code
, proc
, uthread
, error
);
266 * Get index into sysent table
268 arm_prepare_syscall_return(callp
, regs
, uthread
, error
);
270 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
271 uthread
->syscall_code
= 0;
273 if (uthread
->uu_lowpri_window
) {
275 * task is marked as a low priority I/O type
276 * and the I/O we issued while in this system call
277 * collided with normal I/O operations... we'll
278 * delay in order to mitigate the impact of this
279 * task on the normal operation of the system
281 throttle_lowpri_io(1);
283 if (kdebug_enable
&& !code_is_kdebug_trace(code
)) {
284 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
285 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], proc
->p_pid
);
288 thread_exception_return();
293 arm_prepare_u32_syscall_return(const struct sysent
*callp
, arm_saved_state_t
*regs
, uthread_t uthread
, int error
)
295 assert(is_saved_state32(regs
));
297 arm_saved_state32_t
*ss32
= saved_state32(regs
);
299 if (error
== ERESTART
) {
301 } else if (error
!= EJUSTRETURN
) {
303 ss32
->save_r0
= error
;
305 /* set the carry bit to execute cerror routine */
306 ss32
->cpsr
|= PSR_CF
;
307 unix_syscall_return_kprintf("error: setting carry to trigger cerror call\n");
308 } else { /* (not error) */
309 switch (callp
->sy_return_type
) {
310 case _SYSCALL_RET_INT_T
:
311 case _SYSCALL_RET_UINT_T
:
312 case _SYSCALL_RET_OFF_T
:
313 case _SYSCALL_RET_ADDR_T
:
314 case _SYSCALL_RET_SIZE_T
:
315 case _SYSCALL_RET_SSIZE_T
:
316 case _SYSCALL_RET_UINT64_T
:
317 ss32
->save_r0
= uthread
->uu_rval
[0];
318 ss32
->save_r1
= uthread
->uu_rval
[1];
320 case _SYSCALL_RET_NONE
:
325 panic("unix_syscall: unknown return type");
330 /* else (error == EJUSTRETURN) { nothing } */
334 arm_trace_u32_unix_syscall(int code
, arm_saved_state32_t
*regs
)
336 bool indirect
= (regs
->save_r12
== 0);
338 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
339 regs
->save_r1
, regs
->save_r2
, regs
->save_r3
, regs
->save_r4
);
341 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
342 regs
->save_r0
, regs
->save_r1
, regs
->save_r2
, regs
->save_r3
);
347 arm_clear_u32_syscall_error(arm_saved_state32_t
*regs
)
349 regs
->cpsr
&= ~PSR_CF
;
355 arm_get_syscall_args(uthread_t uthread
, struct arm_saved_state
*state
, const struct sysent
*callp
)
357 assert(is_saved_state32(state
));
358 return arm_get_u32_syscall_args(uthread
, saved_state32(state
), callp
);
361 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
363 * For armv7k, the alignment constraints of the ABI mean we don't know how the userspace
364 * arguments are arranged without knowing the the prototype of the syscall. So we use mungers
365 * to marshal the userspace data into the uu_arg. This also means we need the same convention
366 * as mach syscalls. That means we use r8 to pass arguments in the BSD case as well.
369 arm_get_u32_syscall_args(uthread_t uthread
, arm_saved_state32_t
*regs
, const struct sysent
*callp
)
373 /* This check is probably not very useful since these both come from build-time */
374 if (callp
->sy_arg_bytes
> sizeof(uthread
->uu_arg
)) {
378 /* get the munger and use it to marshal in the data from userspace */
379 munger
= callp
->sy_arg_munge32
;
380 if (munger
== NULL
|| (callp
->sy_arg_bytes
== 0)) {
384 return munger(regs
, uthread
->uu_arg
);
388 * For an AArch32 kernel, where we know that we have only AArch32 userland,
389 * we do not do any munging (which is a little confusing, as it is a contrast
390 * to the i386 kernel, where, like the x86_64 kernel, we always munge
391 * arguments from a 32-bit userland out to 64-bit.
394 arm_get_u32_syscall_args(uthread_t uthread
, arm_saved_state32_t
*regs
, const struct sysent
*callp
)
397 int flavor
= (regs
->save_r12
== 0 ? 1 : 0);
399 regparams
= (7 - flavor
); /* Indirect value consumes a register */
401 assert((unsigned) callp
->sy_arg_bytes
<= sizeof(uthread
->uu_arg
));
403 if (callp
->sy_arg_bytes
<= (sizeof(uint32_t) * regparams
)) {
405 * Seven arguments or less are passed in registers.
407 memcpy(&uthread
->uu_arg
[0], ®s
->r
[flavor
], callp
->sy_arg_bytes
);
408 } else if (callp
->sy_arg_bytes
<= sizeof(uthread
->uu_arg
)) {
410 * In this case, we composite - take the first args from registers,
411 * the remainder from the stack (offset by the 7 regs therein).
413 unix_syscall_kprintf("%s: spillover...\n", __FUNCTION__
);
414 memcpy(&uthread
->uu_arg
[0], ®s
->r
[flavor
], regparams
* sizeof(int));
415 if (copyin((user_addr_t
)regs
->sp
+ 7 * sizeof(int), (int *)&uthread
->uu_arg
[0] + regparams
,
416 (callp
->sy_arg_bytes
- (sizeof(uint32_t) * regparams
))) != 0) {
427 static unsigned short
428 arm_get_syscall_number(struct arm_saved_state
*regs
)
430 if (regs
->save_r12
!= 0) {
431 return (unsigned short)regs
->save_r12
;
433 return (unsigned short)regs
->save_r0
;
438 arm_prepare_syscall_return(const struct sysent
*callp
, struct arm_saved_state
*state
, uthread_t uthread
, int error
)
440 assert(is_saved_state32(state
));
441 arm_prepare_u32_syscall_return(callp
, state
, uthread
, error
);
445 arm_trace_unix_syscall(int code
, struct arm_saved_state
*state
)
447 assert(is_saved_state32(state
));
448 arm_trace_u32_unix_syscall(code
, saved_state32(state
));
452 arm_clear_syscall_error(struct arm_saved_state
* state
)
454 assert(is_saved_state32(state
));
455 arm_clear_u32_syscall_error(saved_state32(state
));
458 #elif defined(__arm64__)
459 static void arm_prepare_u64_syscall_return(const struct sysent
*, arm_saved_state_t
*, uthread_t
, int);
460 static int arm_get_u64_syscall_args(uthread_t
, arm_saved_state64_t
*, const struct sysent
*);
463 arm_get_syscall_args(uthread_t uthread
, struct arm_saved_state
*state
, const struct sysent
*callp
)
465 if (is_saved_state32(state
)) {
466 return arm_get_u32_syscall_args(uthread
, saved_state32(state
), callp
);
468 return arm_get_u64_syscall_args(uthread
, saved_state64(state
), callp
);
473 * 64-bit: all arguments in registers. We're willing to use x9, a temporary
474 * register per the ABI, to pass an argument to the kernel for one case,
475 * an indirect syscall with 8 arguments. No munging required, as all arguments
476 * are in 64-bit wide registers already.
479 arm_get_u64_syscall_args(uthread_t uthread
, arm_saved_state64_t
*regs
, const struct sysent
*callp
)
483 #if CONFIG_REQUIRES_U32_MUNGING
487 indirect_offset
= (regs
->x
[ARM64_SYSCALL_CODE_REG_NUM
] == 0) ? 1 : 0;
490 * Everything should fit in registers for now.
492 if (callp
->sy_narg
> (int)(sizeof(uthread
->uu_arg
) / sizeof(uthread
->uu_arg
[0]))) {
496 memcpy(&uthread
->uu_arg
[0], ®s
->x
[indirect_offset
], callp
->sy_narg
* sizeof(uint64_t));
498 #if CONFIG_REQUIRES_U32_MUNGING
500 * The indirect system call interface is vararg based. For armv7k, arm64_32,
501 * and arm64, this means we simply lay the values down on the stack, padded to
502 * a width multiple (4 bytes for armv7k and arm64_32, 8 bytes for arm64).
503 * The arm64(_32) stub for syscall will load this data into the registers and
504 * then trap. This gives us register state that corresponds to what we would
505 * expect from a armv7 task, so in this particular case we need to munge the
508 * TODO: Is there a cleaner way to do this check? What we're actually
509 * interested in is whether the task is arm64_32. We don't appear to guarantee
510 * that uu_proc is populated here, which is why this currently uses the
513 mungerp
= callp
->sy_arg_munge32
;
514 assert(uthread
->uu_thread
);
516 if (indirect_offset
&& !ml_thread_is64bit(uthread
->uu_thread
)) {
517 (*mungerp
)(&uthread
->uu_arg
[0]);
524 * When the kernel is running AArch64, munge arguments from 32-bit
525 * userland out to 64-bit.
527 * flavor == 1 indicates an indirect syscall.
530 arm_get_u32_syscall_args(uthread_t uthread
, arm_saved_state32_t
*regs
, const struct sysent
*callp
)
533 #if CONFIG_REQUIRES_U32_MUNGING
536 #error U32 syscalls on ARM64 kernel requires munging
538 int flavor
= (regs
->save_r12
== 0 ? 1 : 0);
540 regparams
= (7 - flavor
); /* Indirect value consumes a register */
542 assert((unsigned) callp
->sy_arg_bytes
<= sizeof(uthread
->uu_arg
));
544 if (callp
->sy_arg_bytes
<= (sizeof(uint32_t) * regparams
)) {
546 * Seven arguments or less are passed in registers.
548 memcpy(&uthread
->uu_arg
[0], ®s
->r
[flavor
], callp
->sy_arg_bytes
);
549 } else if (callp
->sy_arg_bytes
<= sizeof(uthread
->uu_arg
)) {
551 * In this case, we composite - take the first args from registers,
552 * the remainder from the stack (offset by the 7 regs therein).
554 unix_syscall_kprintf("%s: spillover...\n", __FUNCTION__
);
555 memcpy(&uthread
->uu_arg
[0], ®s
->r
[flavor
], regparams
* sizeof(int));
556 if (copyin((user_addr_t
)regs
->sp
+ 7 * sizeof(int), (int *)&uthread
->uu_arg
[0] + regparams
,
557 (callp
->sy_arg_bytes
- (sizeof(uint32_t) * regparams
))) != 0) {
564 #if CONFIG_REQUIRES_U32_MUNGING
566 mungerp
= callp
->sy_arg_munge32
;
567 if (mungerp
!= NULL
) {
568 (*mungerp
)(&uthread
->uu_arg
[0]);
575 static unsigned short
576 arm_get_syscall_number(struct arm_saved_state
*state
)
578 if (is_saved_state32(state
)) {
579 if (saved_state32(state
)->save_r12
!= 0) {
580 return (unsigned short)saved_state32(state
)->save_r12
;
582 return (unsigned short)saved_state32(state
)->save_r0
;
585 if (saved_state64(state
)->x
[ARM64_SYSCALL_CODE_REG_NUM
] != 0) {
586 return (unsigned short)saved_state64(state
)->x
[ARM64_SYSCALL_CODE_REG_NUM
];
588 return (unsigned short)saved_state64(state
)->x
[0];
594 arm_prepare_syscall_return(const struct sysent
*callp
, struct arm_saved_state
*state
, uthread_t uthread
, int error
)
596 if (is_saved_state32(state
)) {
597 arm_prepare_u32_syscall_return(callp
, state
, uthread
, error
);
599 arm_prepare_u64_syscall_return(callp
, state
, uthread
, error
);
604 arm_prepare_u64_syscall_return(const struct sysent
*callp
, arm_saved_state_t
*regs
, uthread_t uthread
, int error
)
606 assert(is_saved_state64(regs
));
608 arm_saved_state64_t
*ss64
= saved_state64(regs
);
610 if (error
== ERESTART
) {
611 add_saved_state_pc(regs
, -4);
612 } else if (error
!= EJUSTRETURN
) {
617 * Set the carry bit to execute cerror routine.
618 * ARM64_TODO: should we have a separate definition?
619 * The bits are the same.
621 ss64
->cpsr
|= PSR_CF
;
622 unix_syscall_return_kprintf("error: setting carry to trigger cerror call\n");
623 } else { /* (not error) */
624 switch (callp
->sy_return_type
) {
625 case _SYSCALL_RET_INT_T
:
626 ss64
->x
[0] = uthread
->uu_rval
[0];
627 ss64
->x
[1] = uthread
->uu_rval
[1];
629 case _SYSCALL_RET_UINT_T
:
630 ss64
->x
[0] = (u_int
)uthread
->uu_rval
[0];
631 ss64
->x
[1] = (u_int
)uthread
->uu_rval
[1];
633 case _SYSCALL_RET_OFF_T
:
634 case _SYSCALL_RET_ADDR_T
:
635 case _SYSCALL_RET_SIZE_T
:
636 case _SYSCALL_RET_SSIZE_T
:
637 case _SYSCALL_RET_UINT64_T
:
638 ss64
->x
[0] = *((uint64_t *)(&uthread
->uu_rval
[0]));
641 case _SYSCALL_RET_NONE
:
644 panic("unix_syscall: unknown return type");
649 /* else (error == EJUSTRETURN) { nothing } */
652 arm_trace_u64_unix_syscall(int code
, arm_saved_state64_t
*regs
)
654 bool indirect
= (regs
->x
[ARM64_SYSCALL_CODE_REG_NUM
] == 0);
656 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
657 regs
->x
[1], regs
->x
[2], regs
->x
[3], regs
->x
[4]);
659 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
660 regs
->x
[0], regs
->x
[1], regs
->x
[2], regs
->x
[3]);
665 arm_trace_unix_syscall(int code
, struct arm_saved_state
*state
)
667 if (is_saved_state32(state
)) {
668 arm_trace_u32_unix_syscall(code
, saved_state32(state
));
670 arm_trace_u64_unix_syscall(code
, saved_state64(state
));
675 arm_clear_u64_syscall_error(arm_saved_state64_t
*regs
)
678 * ARM64_TODO: should we have a separate definition?
679 * The bits are the same.
681 regs
->cpsr
&= ~PSR_CF
;
685 arm_clear_syscall_error(struct arm_saved_state
* state
)
687 if (is_saved_state32(state
)) {
688 arm_clear_u32_syscall_error(saved_state32(state
));
690 arm_clear_u64_syscall_error(saved_state64(state
));
695 #error Unknown architecture.