2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
6 #include <kern/thread.h>
7 #include <kern/assert.h>
8 #include <kern/clock.h>
9 #include <kern/locks.h>
10 #include <kern/sched_prim.h>
11 #include <mach/machine/thread_status.h>
12 #include <mach/thread_act.h>
13 #include <arm/thread.h>
14 #include <arm/proc_reg.h>
15 #include <pexpert/pexpert.h>
17 #include <sys/kernel.h>
19 #include <sys/proc_internal.h>
20 #include <sys/syscall.h>
21 #include <sys/systm.h>
23 #include <sys/errno.h>
24 #include <sys/kdebug.h>
25 #include <sys/sysent.h>
26 #include <sys/sysproto.h>
27 #include <sys/kauth.h>
29 #include <security/audit/audit.h>
32 extern int32_t dtrace_systrace_syscall(struct proc
*, void *, int *);
33 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
34 #endif /* CONFIG_DTRACE */
37 unix_syscall(struct arm_saved_state
* regs
, thread_t thread_act
,
38 struct uthread
* uthread
, struct proc
* proc
);
40 static int arm_get_syscall_args(uthread_t
, struct arm_saved_state
*, struct sysent
*);
41 static int arm_get_u32_syscall_args(uthread_t
, arm_saved_state32_t
*, struct sysent
*);
42 static void arm_prepare_u32_syscall_return(struct sysent
*, arm_saved_state32_t
*, uthread_t
, int);
43 static void arm_prepare_syscall_return(struct sysent
*, struct arm_saved_state
*, uthread_t
, int);
44 static int arm_get_syscall_number(struct arm_saved_state
*);
45 static void arm_trace_unix_syscall(int, struct arm_saved_state
*);
46 static void arm_clear_syscall_error(struct arm_saved_state
*);
57 #define save_r10 r[10]
58 #define save_r11 r[11]
59 #define save_r12 r[12]
60 #define save_r13 r[13]
63 __XNU_PRIVATE_EXTERN
int do_count_syscalls
= 1;
64 __XNU_PRIVATE_EXTERN
int syscalls_log
[SYS_MAXSYSCALL
];
67 #define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || \
68 ((code) == SYS_kdebug_trace64) || \
69 ((code) == SYS_kdebug_trace_string))
72 * Function: unix_syscall
74 * Inputs: regs - pointer to Process Control Block
79 __attribute__((noreturn
))
83 struct arm_saved_state
* state
,
84 __unused thread_t thread_act
,
85 struct uthread
* uthread
,
94 assert(is_saved_state32(state
));
97 uthread_reset_proc_refcount(uthread
);
99 code
= arm_get_syscall_number(state
);
101 #define unix_syscall_kprintf(x...) /* kprintf("unix_syscall: " x) */
103 #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST)
104 if (kdebug_enable
&& !code_is_kdebug_trace(code
)) {
105 arm_trace_unix_syscall(code
, state
);
109 if ((uthread
->uu_flag
& UT_VFORK
))
110 proc
= current_proc();
112 callp
= (code
>= nsysent
) ? &sysent
[SYS_invalid
] : &sysent
[code
];
115 * sy_narg is inaccurate on ARM if a 64 bit parameter is specified. Since user_addr_t
116 * is currently a 32 bit type, this is really a long word count. See rdar://problem/6104668.
118 if (callp
->sy_narg
!= 0) {
119 if (arm_get_syscall_args(uthread
, state
, callp
) != 0) {
120 /* Too many arguments, or something failed */
121 unix_syscall_kprintf("arm_get_syscall_args failed.\n");
122 callp
= &sysent
[SYS_invalid
];
126 uthread
->uu_flag
|= UT_NOTCANCELPT
;
127 uthread
->syscall_code
= code
;
129 uthread
->uu_rval
[0] = 0;
132 * r4 is volatile, if we set it to regs->save_r4 here the child
133 * will have parents r4 after execve
135 uthread
->uu_rval
[1] = 0;
140 * ARM runtime will call cerror if the carry bit is set after a
141 * system call, so clear it here for the common case of success.
143 arm_clear_syscall_error(state
);
146 if (do_count_syscalls
> 0) {
147 syscalls_log
[code
]++;
150 pid
= proc_pid(proc
);
153 uthread
->uu_iocount
= 0;
154 uthread
->uu_vpindex
= 0;
156 unix_syscall_kprintf("code %d (pid %d - %s, tid %lld)\n", code
,
157 pid
, proc
->p_comm
, thread_tid(current_thread()));
159 AUDIT_SYSCALL_ENTER(code
, proc
, uthread
);
160 error
= (*(callp
->sy_call
)) (proc
, &uthread
->uu_arg
[0], &(uthread
->uu_rval
[0]));
161 AUDIT_SYSCALL_EXIT(code
, proc
, uthread
, error
);
163 unix_syscall_kprintf("code %d, error %d, results %x, %x (pid %d - %s, tid %lld)\n", code
, error
,
164 uthread
->uu_rval
[0], uthread
->uu_rval
[1],
165 pid
, get_bsdtask_info(current_task()) ? proc
->p_comm
: "unknown" , thread_tid(current_thread()));
168 if (uthread
->uu_iocount
) {
169 printf("system call returned with uu_iocount != 0");
173 uthread
->t_dtrace_errno
= error
;
174 #endif /* CONFIG_DTRACE */
175 #if DEBUG || DEVELOPMENT
176 kern_allocation_name_t
177 prior __assert_only
= thread_set_allocation_name(NULL
);
178 assertf(prior
== NULL
, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior
));
179 #endif /* DEBUG || DEVELOPMENT */
181 arm_prepare_syscall_return(callp
, state
, uthread
, error
);
183 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
185 if (uthread
->uu_lowpri_window
) {
187 * task is marked as a low priority I/O type
188 * and the I/O we issued while in this system call
189 * collided with normal I/O operations... we'll
190 * delay in order to mitigate the impact of this
191 * task on the normal operation of the system
193 throttle_lowpri_io(1);
195 #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST)
196 if (kdebug_enable
&& !code_is_kdebug_trace(code
)) {
197 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
198 BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
199 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], pid
, 0);
204 if (__improbable(uthread_get_proc_refcount(uthread
) != 0)) {
205 panic("system call returned with uu_proc_refcount != 0");
210 thread_exception_return();
215 unix_syscall_return(int error
)
218 struct uthread
*uthread
;
220 struct arm_saved_state
*regs
;
222 struct sysent
*callp
;
224 #define unix_syscall_return_kprintf(x...) /* kprintf("unix_syscall_retur
227 thread_act
= current_thread();
228 proc
= current_proc();
229 uthread
= get_bsdthread_info(thread_act
);
231 regs
= find_user_regs(thread_act
);
232 code
= uthread
->syscall_code
;
233 callp
= (code
>= nsysent
) ? &sysent
[SYS_invalid
] : &sysent
[code
];
236 if (callp
->sy_call
== dtrace_systrace_syscall
)
237 dtrace_systrace_syscall_return( code
, error
, uthread
->uu_rval
);
238 #endif /* CONFIG_DTRACE */
239 #if DEBUG || DEVELOPMENT
240 kern_allocation_name_t
241 prior __assert_only
= thread_set_allocation_name(NULL
);
242 assertf(prior
== NULL
, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior
));
243 #endif /* DEBUG || DEVELOPMENT */
245 AUDIT_SYSCALL_EXIT(code
, proc
, uthread
, error
);
248 * Get index into sysent table
250 arm_prepare_syscall_return(callp
, regs
, uthread
, error
);
252 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
254 if (uthread
->uu_lowpri_window
) {
256 * task is marked as a low priority I/O type
257 * and the I/O we issued while in this system call
258 * collided with normal I/O operations... we'll
259 * delay in order to mitigate the impact of this
260 * task on the normal operation of the system
262 throttle_lowpri_io(1);
264 #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST)
265 if (kdebug_enable
&& !code_is_kdebug_trace(code
)) {
266 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
267 BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
268 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], proc
->p_pid
, 0);
272 thread_exception_return();
277 arm_prepare_u32_syscall_return(struct sysent
*callp
, arm_saved_state32_t
*regs
, uthread_t uthread
, int error
)
279 if (error
== ERESTART
) {
281 } else if (error
!= EJUSTRETURN
) {
283 regs
->save_r0
= error
;
285 /* set the carry bit to execute cerror routine */
286 regs
->cpsr
|= PSR_CF
;
287 unix_syscall_return_kprintf("error: setting carry to trigger cerror call\n");
288 } else { /* (not error) */
289 switch (callp
->sy_return_type
) {
290 case _SYSCALL_RET_INT_T
:
291 case _SYSCALL_RET_UINT_T
:
292 case _SYSCALL_RET_OFF_T
:
293 case _SYSCALL_RET_ADDR_T
:
294 case _SYSCALL_RET_SIZE_T
:
295 case _SYSCALL_RET_SSIZE_T
:
296 case _SYSCALL_RET_UINT64_T
:
297 regs
->save_r0
= uthread
->uu_rval
[0];
298 regs
->save_r1
= uthread
->uu_rval
[1];
300 case _SYSCALL_RET_NONE
:
305 panic("unix_syscall: unknown return type");
310 /* else (error == EJUSTRETURN) { nothing } */
315 arm_trace_u32_unix_syscall(int code
, arm_saved_state32_t
*regs
)
317 boolean_t indirect
= (regs
->save_r12
== 0);
319 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
320 BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
321 regs
->save_r1
, regs
->save_r2
, regs
->save_r3
, regs
->save_r4
, 0);
323 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
324 BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
325 regs
->save_r0
, regs
->save_r1
, regs
->save_r2
, regs
->save_r3
, 0);
329 arm_clear_u32_syscall_error(arm_saved_state32_t
*regs
)
331 regs
->cpsr
&= ~PSR_CF
;
337 arm_get_syscall_args(uthread_t uthread
, struct arm_saved_state
*state
, struct sysent
*callp
)
339 assert(is_saved_state32(state
));
340 return arm_get_u32_syscall_args(uthread
, saved_state32(state
), callp
);
343 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
345 * For armv7k, the alignment constraints of the ABI mean we don't know how the userspace
346 * arguments are arranged without knowing the the prototype of the syscall. So we use mungers
347 * to marshal the userspace data into the uu_arg. This also means we need the same convention
348 * as mach syscalls. That means we use r8 to pass arguments in the BSD case as well.
351 arm_get_u32_syscall_args(uthread_t uthread
, arm_saved_state32_t
*regs
, struct sysent
*callp
)
355 /* This check is probably not very useful since these both come from build-time */
356 if (callp
->sy_arg_bytes
> sizeof(uthread
->uu_arg
))
359 /* get the munger and use it to marshal in the data from userspace */
360 munger
= callp
->sy_arg_munge32
;
361 if (munger
== NULL
|| (callp
->sy_arg_bytes
== 0))
364 return munger(regs
, uthread
->uu_arg
);
368 * For an AArch32 kernel, where we know that we have only AArch32 userland,
369 * we do not do any munging (which is a little confusing, as it is a contrast
370 * to the i386 kernel, where, like the x86_64 kernel, we always munge
371 * arguments from a 32-bit userland out to 64-bit.
374 arm_get_u32_syscall_args(uthread_t uthread
, arm_saved_state32_t
*regs
, struct sysent
*callp
)
377 int flavor
= (regs
->save_r12
== 0 ? 1 : 0);
379 regparams
= (7 - flavor
); /* Indirect value consumes a register */
381 assert((unsigned) callp
->sy_arg_bytes
<= sizeof (uthread
->uu_arg
));
383 if (callp
->sy_arg_bytes
<= (sizeof(uint32_t) * regparams
)) {
385 * Seven arguments or less are passed in registers.
387 memcpy(&uthread
->uu_arg
[0], ®s
->r
[flavor
], callp
->sy_arg_bytes
);
388 } else if (callp
->sy_arg_bytes
<= sizeof(uthread
->uu_arg
)) {
390 * In this case, we composite - take the first args from registers,
391 * the remainder from the stack (offset by the 7 regs therein).
393 unix_syscall_kprintf("%s: spillover...\n", __FUNCTION__
);
394 memcpy(&uthread
->uu_arg
[0] , ®s
->r
[flavor
], regparams
* sizeof(int));
395 if (copyin((user_addr_t
)regs
->sp
+ 7 * sizeof(int), (int *)&uthread
->uu_arg
[0] + regparams
,
396 (callp
->sy_arg_bytes
- (sizeof(uint32_t) * regparams
))) != 0) {
408 arm_get_syscall_number(struct arm_saved_state
*regs
)
410 if (regs
->save_r12
!= 0) {
411 return regs
->save_r12
;
413 return regs
->save_r0
;
418 arm_prepare_syscall_return(struct sysent
*callp
, struct arm_saved_state
*state
, uthread_t uthread
, int error
)
420 assert(is_saved_state32(state
));
421 arm_prepare_u32_syscall_return(callp
, state
, uthread
, error
);
425 arm_trace_unix_syscall(int code
, struct arm_saved_state
*state
)
427 assert(is_saved_state32(state
));
428 arm_trace_u32_unix_syscall(code
, saved_state32(state
));
432 arm_clear_syscall_error(struct arm_saved_state
* state
)
434 assert(is_saved_state32(state
));
435 arm_clear_u32_syscall_error(saved_state32(state
));
438 #elif defined(__arm64__)
439 static void arm_prepare_u64_syscall_return(struct sysent
*, arm_saved_state64_t
*, uthread_t
, int);
440 static int arm_get_u64_syscall_args(uthread_t
, arm_saved_state64_t
*, struct sysent
*);
443 arm_get_syscall_args(uthread_t uthread
, struct arm_saved_state
*state
, struct sysent
*callp
)
445 if (is_saved_state32(state
)) {
446 return arm_get_u32_syscall_args(uthread
, saved_state32(state
), callp
);
448 return arm_get_u64_syscall_args(uthread
, saved_state64(state
), callp
);
453 * 64-bit: all arguments in registers. We're willing to use x9, a temporary
454 * register per the ABI, to pass an argument to the kernel for one case,
455 * an indirect syscall with 8 arguments. No munging required, as all arguments
456 * are in 64-bit wide registers already.
459 arm_get_u64_syscall_args(uthread_t uthread
, arm_saved_state64_t
*regs
, struct sysent
*callp
)
461 int indirect_offset
, regparams
;
463 indirect_offset
= (regs
->x
[ARM64_SYSCALL_CODE_REG_NUM
] == 0) ? 1 : 0;
464 regparams
= 9 - indirect_offset
;
467 * Everything should fit in registers for now.
469 assert(callp
->sy_narg
<= 8);
470 if (callp
->sy_narg
> regparams
) {
474 memcpy(&uthread
->uu_arg
[0], ®s
->x
[indirect_offset
], callp
->sy_narg
* sizeof(uint64_t));
478 * When the kernel is running AArch64, munge arguments from 32-bit
479 * userland out to 64-bit.
481 * flavor == 1 indicates an indirect syscall.
484 arm_get_u32_syscall_args(uthread_t uthread
, arm_saved_state32_t
*regs
, struct sysent
*callp
)
487 #if CONFIG_REQUIRES_U32_MUNGING
490 #error U32 syscalls on ARM64 kernel requires munging
492 int flavor
= (regs
->save_r12
== 0 ? 1 : 0);
494 regparams
= (7 - flavor
); /* Indirect value consumes a register */
496 assert((unsigned) callp
->sy_arg_bytes
<= sizeof (uthread
->uu_arg
));
498 if (callp
->sy_arg_bytes
<= (sizeof(uint32_t) * regparams
)) {
500 * Seven arguments or less are passed in registers.
502 memcpy(&uthread
->uu_arg
[0], ®s
->r
[flavor
], callp
->sy_arg_bytes
);
503 } else if (callp
->sy_arg_bytes
<= sizeof(uthread
->uu_arg
)) {
505 * In this case, we composite - take the first args from registers,
506 * the remainder from the stack (offset by the 7 regs therein).
508 unix_syscall_kprintf("%s: spillover...\n", __FUNCTION__
);
509 memcpy(&uthread
->uu_arg
[0] , ®s
->r
[flavor
], regparams
* sizeof(int));
510 if (copyin((user_addr_t
)regs
->sp
+ 7 * sizeof(int), (int *)&uthread
->uu_arg
[0] + regparams
,
511 (callp
->sy_arg_bytes
- (sizeof(uint32_t) * regparams
))) != 0) {
518 #if CONFIG_REQUIRES_U32_MUNGING
520 mungerp
= callp
->sy_arg_munge32
;
521 if (mungerp
!= NULL
) {
522 (*mungerp
)(&uthread
->uu_arg
[0]);
531 arm_get_syscall_number(struct arm_saved_state
*state
)
533 if (is_saved_state32(state
)) {
534 if (saved_state32(state
)->save_r12
!= 0) {
535 return saved_state32(state
)->save_r12
;
537 return saved_state32(state
)->save_r0
;
540 if (saved_state64(state
)->x
[ARM64_SYSCALL_CODE_REG_NUM
] != 0) {
541 return saved_state64(state
)->x
[ARM64_SYSCALL_CODE_REG_NUM
];
543 return saved_state64(state
)->x
[0];
550 arm_prepare_syscall_return(struct sysent
*callp
, struct arm_saved_state
*state
, uthread_t uthread
, int error
)
552 if (is_saved_state32(state
)) {
553 arm_prepare_u32_syscall_return(callp
, saved_state32(state
), uthread
, error
);
555 arm_prepare_u64_syscall_return(callp
, saved_state64(state
), uthread
, error
);
560 arm_prepare_u64_syscall_return(struct sysent
*callp
, arm_saved_state64_t
*regs
, uthread_t uthread
, int error
)
562 if (error
== ERESTART
) {
564 } else if (error
!= EJUSTRETURN
) {
569 * Set the carry bit to execute cerror routine.
570 * ARM64_TODO: should we have a separate definition?
571 * The bits are the same.
573 regs
->cpsr
|= PSR_CF
;
574 unix_syscall_return_kprintf("error: setting carry to trigger cerror call\n");
575 } else { /* (not error) */
576 switch (callp
->sy_return_type
) {
577 case _SYSCALL_RET_INT_T
:
578 regs
->x
[0] = uthread
->uu_rval
[0];
579 regs
->x
[1] = uthread
->uu_rval
[1];
581 case _SYSCALL_RET_UINT_T
:
582 regs
->x
[0] = (u_int
)uthread
->uu_rval
[0];
583 regs
->x
[1] = (u_int
)uthread
->uu_rval
[1];
585 case _SYSCALL_RET_OFF_T
:
586 case _SYSCALL_RET_ADDR_T
:
587 case _SYSCALL_RET_SIZE_T
:
588 case _SYSCALL_RET_SSIZE_T
:
589 case _SYSCALL_RET_UINT64_T
:
590 regs
->x
[0] = *((uint64_t *)(&uthread
->uu_rval
[0]));
593 case _SYSCALL_RET_NONE
:
596 panic("unix_syscall: unknown return type");
601 /* else (error == EJUSTRETURN) { nothing } */
606 arm_trace_u64_unix_syscall(int code
, arm_saved_state64_t
*regs
)
608 boolean_t indirect
= (regs
->x
[ARM64_SYSCALL_CODE_REG_NUM
] == 0);
610 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
611 BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
612 regs
->x
[1], regs
->x
[2], regs
->x
[3], regs
->x
[4], 0);
614 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
615 BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
616 regs
->x
[0], regs
->x
[1], regs
->x
[2], regs
->x
[3], 0);
620 arm_trace_unix_syscall(int code
, struct arm_saved_state
*state
)
622 if (is_saved_state32(state
)) {
623 arm_trace_u32_unix_syscall(code
, saved_state32(state
));
625 arm_trace_u64_unix_syscall(code
, saved_state64(state
));
630 arm_clear_u64_syscall_error(arm_saved_state64_t
*regs
)
633 * ARM64_TODO: should we have a separate definition?
634 * The bits are the same.
636 regs
->cpsr
&= ~PSR_CF
;
640 arm_clear_syscall_error(struct arm_saved_state
* state
)
642 if (is_saved_state32(state
)) {
643 arm_clear_u32_syscall_error(saved_state32(state
));
645 arm_clear_u64_syscall_error(saved_state64(state
));
650 #error Unknown architecture.