]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/i386/systemcalls.c
f2398a723f13afc261108d65bf93c86246db58ca
[apple/xnu.git] / bsd / dev / i386 / systemcalls.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/debug.h>
35 #include <mach/machine/thread_status.h>
36 #include <mach/thread_act.h>
37 #include <mach/branch_predicates.h>
38
39 #include <sys/kernel.h>
40 #include <sys/vm.h>
41 #include <sys/proc_internal.h>
42 #include <sys/syscall.h>
43 #include <sys/systm.h>
44 #include <sys/user.h>
45 #include <sys/errno.h>
46 #include <sys/kdebug.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/kauth.h>
50 #include <sys/systm.h>
51
52 #include <security/audit/audit.h>
53
54 #include <i386/seg.h>
55 #include <i386/machine_routines.h>
56 #include <mach/i386/syscall_sw.h>
57
58 #include <machine/pal_routines.h>
59
60 #if CONFIG_DTRACE
61 extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
62 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
63 #endif
64
65 extern void unix_syscall(x86_saved_state_t *);
66 extern void unix_syscall64(x86_saved_state_t *);
67 extern void *find_user_regs(thread_t);
68
69 /* dynamically generated at build time based on syscalls.master */
70 extern const char *syscallnames[];
71
72 #define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || \
73 ((code) == SYS_kdebug_trace64) || \
74 ((code) == SYS_kdebug_trace_string))
75
76 /*
77 * Function: unix_syscall
78 *
79 * Inputs: regs - pointer to i386 save area
80 *
81 * Outputs: none
82 */
83 __attribute__((noreturn))
84 void
85 unix_syscall(x86_saved_state_t *state)
86 {
87 thread_t thread;
88 void *vt;
89 unsigned int code;
90 struct sysent *callp;
91
92 int error;
93 vm_offset_t params;
94 struct proc *p;
95 struct uthread *uthread;
96 x86_saved_state32_t *regs;
97 boolean_t is_vfork;
98 pid_t pid;
99
100 assert(is_saved_state32(state));
101 regs = saved_state32(state);
102 #if DEBUG
103 if (regs->eax == 0x800)
104 thread_exception_return();
105 #endif
106 thread = current_thread();
107 uthread = get_bsdthread_info(thread);
108
109 #if PROC_REF_DEBUG
110 uthread_reset_proc_refcount(uthread);
111 #endif
112
113 /* Get the approriate proc; may be different from task's for vfork() */
114 is_vfork = uthread->uu_flag & UT_VFORK;
115 if (__improbable(is_vfork != 0))
116 p = current_proc();
117 else
118 p = (struct proc *)get_bsdtask_info(current_task());
119
120 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
121 DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
122 code, syscallnames[code >= nsysent ? SYS_invalid : code], (uint32_t)regs->eip);
123 params = (vm_offset_t) (regs->uesp + sizeof (int));
124
125 regs->efl &= ~(EFL_CF);
126
127 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
128
129 if (__improbable(callp == sysent)) {
130 code = fuword(params);
131 params += sizeof(int);
132 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
133 }
134
135 vt = (void *)uthread->uu_arg;
136
137 if (callp->sy_arg_bytes != 0) {
138 #if CONFIG_REQUIRES_U32_MUNGING
139 sy_munge_t *mungerp;
140 #else
141 #error U32 syscalls on x86_64 kernel requires munging
142 #endif
143 uint32_t nargs;
144
145 assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg));
146 nargs = callp->sy_arg_bytes;
147 error = copyin((user_addr_t) params, (char *) vt, nargs);
148 if (error) {
149 regs->eax = error;
150 regs->efl |= EFL_CF;
151 thread_exception_return();
152 /* NOTREACHED */
153 }
154
155 if (__probable(!code_is_kdebug_trace(code))) {
156 int *ip = (int *)vt;
157
158 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
159 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
160 *ip, *(ip+1), *(ip+2), *(ip+3), 0);
161 }
162
163 #if CONFIG_REQUIRES_U32_MUNGING
164 mungerp = callp->sy_arg_munge32;
165
166 if (mungerp != NULL)
167 (*mungerp)(vt);
168 #endif
169 } else
170 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
171 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
172 0, 0, 0, 0, 0);
173
174 /*
175 * Delayed binding of thread credential to process credential, if we
176 * are not running with an explicitly set thread credential.
177 */
178 kauth_cred_uthread_update(uthread, p);
179
180 uthread->uu_rval[0] = 0;
181 uthread->uu_rval[1] = 0;
182 uthread->uu_flag |= UT_NOTCANCELPT;
183 uthread->syscall_code = code;
184 pid = proc_pid(p);
185
186 #ifdef JOE_DEBUG
187 uthread->uu_iocount = 0;
188 uthread->uu_vpindex = 0;
189 #endif
190
191 AUDIT_SYSCALL_ENTER(code, p, uthread);
192 error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
193 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
194
195 #ifdef JOE_DEBUG
196 if (uthread->uu_iocount)
197 printf("system call returned with uu_iocount != 0\n");
198 #endif
199 #if CONFIG_DTRACE
200 uthread->t_dtrace_errno = error;
201 #endif /* CONFIG_DTRACE */
202
203 if (__improbable(error == ERESTART)) {
204 /*
205 * Move the user's pc back to repeat the syscall:
206 * 5 bytes for a sysenter, or 2 for an int 8x.
207 * The SYSENTER_TF_CS covers single-stepping over a sysenter
208 * - see debug trap handler in idt.s/idt64.s
209 */
210
211 pal_syscall_restart(thread, state);
212 }
213 else if (error != EJUSTRETURN) {
214 if (__improbable(error)) {
215 regs->eax = error;
216 regs->efl |= EFL_CF; /* carry bit */
217 } else { /* (not error) */
218 /*
219 * We split retval across two registers, in case the
220 * syscall had a 64-bit return value, in which case
221 * eax/edx matches the function call ABI.
222 */
223 regs->eax = uthread->uu_rval[0];
224 regs->edx = uthread->uu_rval[1];
225 }
226 }
227
228 DEBUG_KPRINT_SYSCALL_UNIX(
229 "unix_syscall: error=%d retval=(%u,%u)\n",
230 error, regs->eax, regs->edx);
231
232 uthread->uu_flag &= ~UT_NOTCANCELPT;
233
234 if (__improbable(uthread->uu_lowpri_window)) {
235 /*
236 * task is marked as a low priority I/O type
237 * and the I/O we issued while in this system call
238 * collided with normal I/O operations... we'll
239 * delay in order to mitigate the impact of this
240 * task on the normal operation of the system
241 */
242 throttle_lowpri_io(1);
243 }
244 if (__probable(!code_is_kdebug_trace(code)))
245 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
246 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
247 error, uthread->uu_rval[0], uthread->uu_rval[1], pid, 0);
248
249 if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) {
250 pal_execve_return(thread);
251 }
252
253 #if PROC_REF_DEBUG
254 if (__improbable(uthread_get_proc_refcount(uthread) != 0)) {
255 panic("system call returned with uu_proc_refcount != 0");
256 }
257 #endif
258
259 thread_exception_return();
260 /* NOTREACHED */
261 }
262
263 __attribute__((noreturn))
264 void
265 unix_syscall64(x86_saved_state_t *state)
266 {
267 thread_t thread;
268 void *vt;
269 unsigned int code;
270 struct sysent *callp;
271 int args_in_regs;
272 boolean_t args_start_at_rdi;
273 int error;
274 struct proc *p;
275 struct uthread *uthread;
276 x86_saved_state64_t *regs;
277 pid_t pid;
278
279 assert(is_saved_state64(state));
280 regs = saved_state64(state);
281 #if DEBUG
282 if (regs->rax == 0x2000800)
283 thread_exception_return();
284 #endif
285 thread = current_thread();
286 uthread = get_bsdthread_info(thread);
287
288 #if PROC_REF_DEBUG
289 uthread_reset_proc_refcount(uthread);
290 #endif
291
292 /* Get the approriate proc; may be different from task's for vfork() */
293 if (__probable(!(uthread->uu_flag & UT_VFORK)))
294 p = (struct proc *)get_bsdtask_info(current_task());
295 else
296 p = current_proc();
297
298 /* Verify that we are not being called from a task without a proc */
299 if (__improbable(p == NULL)) {
300 regs->rax = EPERM;
301 regs->isf.rflags |= EFL_CF;
302 task_terminate_internal(current_task());
303 thread_exception_return();
304 /* NOTREACHED */
305 }
306
307 code = regs->rax & SYSCALL_NUMBER_MASK;
308 DEBUG_KPRINT_SYSCALL_UNIX(
309 "unix_syscall64: code=%d(%s) rip=%llx\n",
310 code, syscallnames[code >= nsysent ? SYS_invalid : code], regs->isf.rip);
311 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
312
313 vt = (void *)uthread->uu_arg;
314
315 if (__improbable(callp == sysent)) {
316 /*
317 * indirect system call... system call number
318 * passed as 'arg0'
319 */
320 code = regs->rdi;
321 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
322 args_start_at_rdi = FALSE;
323 args_in_regs = 5;
324 } else {
325 args_start_at_rdi = TRUE;
326 args_in_regs = 6;
327 }
328
329 if (callp->sy_narg != 0) {
330 assert(callp->sy_narg <= 8); /* size of uu_arg */
331
332 args_in_regs = MIN(args_in_regs, callp->sy_narg);
333 memcpy(vt, args_start_at_rdi ? &regs->rdi : &regs->rsi, args_in_regs * sizeof(syscall_arg_t));
334
335
336 if (!code_is_kdebug_trace(code)) {
337 uint64_t *ip = (uint64_t *)vt;
338
339 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
340 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
341 (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
342 }
343
344 if (__improbable(callp->sy_narg > args_in_regs)) {
345 int copyin_count;
346
347 copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t);
348
349 error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count);
350 if (error) {
351 regs->rax = error;
352 regs->isf.rflags |= EFL_CF;
353 thread_exception_return();
354 /* NOTREACHED */
355 }
356 }
357 } else
358 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
359 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
360 0, 0, 0, 0, 0);
361
362 /*
363 * Delayed binding of thread credential to process credential, if we
364 * are not running with an explicitly set thread credential.
365 */
366 kauth_cred_uthread_update(uthread, p);
367
368 uthread->uu_rval[0] = 0;
369 uthread->uu_rval[1] = 0;
370 uthread->uu_flag |= UT_NOTCANCELPT;
371 uthread->syscall_code = code;
372 pid = proc_pid(p);
373
374 #ifdef JOE_DEBUG
375 uthread->uu_iocount = 0;
376 uthread->uu_vpindex = 0;
377 #endif
378
379 AUDIT_SYSCALL_ENTER(code, p, uthread);
380 error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0]));
381 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
382
383 #ifdef JOE_DEBUG
384 if (uthread->uu_iocount)
385 printf("system call returned with uu_iocount != 0\n");
386 #endif
387
388 #if CONFIG_DTRACE
389 uthread->t_dtrace_errno = error;
390 #endif /* CONFIG_DTRACE */
391
392 if (__improbable(error == ERESTART)) {
393 /*
394 * all system calls come through via the syscall instruction
395 * in 64 bit mode... its 2 bytes in length
396 * move the user's pc back to repeat the syscall:
397 */
398 pal_syscall_restart( thread, state );
399 }
400 else if (error != EJUSTRETURN) {
401 if (__improbable(error)) {
402 regs->rax = error;
403 regs->isf.rflags |= EFL_CF; /* carry bit */
404 } else { /* (not error) */
405
406 switch (callp->sy_return_type) {
407 case _SYSCALL_RET_INT_T:
408 regs->rax = uthread->uu_rval[0];
409 regs->rdx = uthread->uu_rval[1];
410 break;
411 case _SYSCALL_RET_UINT_T:
412 regs->rax = ((u_int)uthread->uu_rval[0]);
413 regs->rdx = ((u_int)uthread->uu_rval[1]);
414 break;
415 case _SYSCALL_RET_OFF_T:
416 case _SYSCALL_RET_ADDR_T:
417 case _SYSCALL_RET_SIZE_T:
418 case _SYSCALL_RET_SSIZE_T:
419 case _SYSCALL_RET_UINT64_T:
420 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
421 regs->rdx = 0;
422 break;
423 case _SYSCALL_RET_NONE:
424 break;
425 default:
426 panic("unix_syscall: unknown return type");
427 break;
428 }
429 regs->isf.rflags &= ~EFL_CF;
430 }
431 }
432
433 DEBUG_KPRINT_SYSCALL_UNIX(
434 "unix_syscall64: error=%d retval=(%llu,%llu)\n",
435 error, regs->rax, regs->rdx);
436
437 uthread->uu_flag &= ~UT_NOTCANCELPT;
438
439 if (__improbable(uthread->uu_lowpri_window)) {
440 /*
441 * task is marked as a low priority I/O type
442 * and the I/O we issued while in this system call
443 * collided with normal I/O operations... we'll
444 * delay in order to mitigate the impact of this
445 * task on the normal operation of the system
446 */
447 throttle_lowpri_io(1);
448 }
449 if (__probable(!code_is_kdebug_trace(code)))
450 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
451 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
452 error, uthread->uu_rval[0], uthread->uu_rval[1], pid, 0);
453
454 #if PROC_REF_DEBUG
455 if (__improbable(uthread_get_proc_refcount(uthread))) {
456 panic("system call returned with uu_proc_refcount != 0");
457 }
458 #endif
459
460 thread_exception_return();
461 /* NOTREACHED */
462 }
463
464
465 void
466 unix_syscall_return(int error)
467 {
468 thread_t thread;
469 struct uthread *uthread;
470 struct proc *p;
471 unsigned int code;
472 struct sysent *callp;
473
474 thread = current_thread();
475 uthread = get_bsdthread_info(thread);
476
477 pal_register_cache_state(thread, DIRTY);
478
479 p = current_proc();
480
481 if (proc_is64bit(p)) {
482 x86_saved_state64_t *regs;
483
484 regs = saved_state64(find_user_regs(thread));
485
486 code = uthread->syscall_code;
487 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
488
489 #if CONFIG_DTRACE
490 if (callp->sy_call == dtrace_systrace_syscall)
491 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
492 #endif /* CONFIG_DTRACE */
493 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
494
495 if (error == ERESTART) {
496 /*
497 * repeat the syscall
498 */
499 pal_syscall_restart( thread, find_user_regs(thread) );
500 }
501 else if (error != EJUSTRETURN) {
502 if (error) {
503 regs->rax = error;
504 regs->isf.rflags |= EFL_CF; /* carry bit */
505 } else { /* (not error) */
506
507 switch (callp->sy_return_type) {
508 case _SYSCALL_RET_INT_T:
509 regs->rax = uthread->uu_rval[0];
510 regs->rdx = uthread->uu_rval[1];
511 break;
512 case _SYSCALL_RET_UINT_T:
513 regs->rax = ((u_int)uthread->uu_rval[0]);
514 regs->rdx = ((u_int)uthread->uu_rval[1]);
515 break;
516 case _SYSCALL_RET_OFF_T:
517 case _SYSCALL_RET_ADDR_T:
518 case _SYSCALL_RET_SIZE_T:
519 case _SYSCALL_RET_SSIZE_T:
520 case _SYSCALL_RET_UINT64_T:
521 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
522 regs->rdx = 0;
523 break;
524 case _SYSCALL_RET_NONE:
525 break;
526 default:
527 panic("unix_syscall: unknown return type");
528 break;
529 }
530 regs->isf.rflags &= ~EFL_CF;
531 }
532 }
533 DEBUG_KPRINT_SYSCALL_UNIX(
534 "unix_syscall_return: error=%d retval=(%llu,%llu)\n",
535 error, regs->rax, regs->rdx);
536 } else {
537 x86_saved_state32_t *regs;
538
539 regs = saved_state32(find_user_regs(thread));
540
541 regs->efl &= ~(EFL_CF);
542
543 code = uthread->syscall_code;
544 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
545
546 #if CONFIG_DTRACE
547 if (callp->sy_call == dtrace_systrace_syscall)
548 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
549 #endif /* CONFIG_DTRACE */
550 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
551
552 if (error == ERESTART) {
553 pal_syscall_restart( thread, find_user_regs(thread) );
554 }
555 else if (error != EJUSTRETURN) {
556 if (error) {
557 regs->eax = error;
558 regs->efl |= EFL_CF; /* carry bit */
559 } else { /* (not error) */
560 regs->eax = uthread->uu_rval[0];
561 regs->edx = uthread->uu_rval[1];
562 }
563 }
564 DEBUG_KPRINT_SYSCALL_UNIX(
565 "unix_syscall_return: error=%d retval=(%u,%u)\n",
566 error, regs->eax, regs->edx);
567 }
568
569
570 uthread->uu_flag &= ~UT_NOTCANCELPT;
571
572 if (uthread->uu_lowpri_window) {
573 /*
574 * task is marked as a low priority I/O type
575 * and the I/O we issued while in this system call
576 * collided with normal I/O operations... we'll
577 * delay in order to mitigate the impact of this
578 * task on the normal operation of the system
579 */
580 throttle_lowpri_io(1);
581 }
582 if (!code_is_kdebug_trace(code))
583 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
584 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
585 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
586
587 thread_exception_return();
588 /* NOTREACHED */
589 }