]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/i386/systemcalls.c
xnu-4570.20.62.tar.gz
[apple/xnu.git] / bsd / dev / i386 / systemcalls.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/debug.h>
35 #include <mach/machine/thread_status.h>
36 #include <mach/thread_act.h>
37 #include <mach/branch_predicates.h>
38
39 #include <sys/kernel.h>
40 #include <sys/vm.h>
41 #include <sys/proc_internal.h>
42 #include <sys/syscall.h>
43 #include <sys/systm.h>
44 #include <sys/user.h>
45 #include <sys/errno.h>
46 #include <sys/kdebug.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/kauth.h>
50 #include <sys/systm.h>
51
52 #include <security/audit/audit.h>
53
54 #include <i386/seg.h>
55 #include <i386/machine_routines.h>
56 #include <mach/i386/syscall_sw.h>
57
58 #include <machine/pal_routines.h>
59
60 #if CONFIG_DTRACE
61 extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
62 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
63 #endif
64
65 extern void unix_syscall(x86_saved_state_t *);
66 extern void unix_syscall64(x86_saved_state_t *);
67 extern void *find_user_regs(thread_t);
68
69 /* dynamically generated at build time based on syscalls.master */
70 extern const char *syscallnames[];
71
72 #define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || \
73 ((code) == SYS_kdebug_trace64) || \
74 ((code) == SYS_kdebug_trace_string))
75
76 /*
77 * Function: unix_syscall
78 *
79 * Inputs: regs - pointer to i386 save area
80 *
81 * Outputs: none
82 */
83 __attribute__((noreturn))
84 void
85 unix_syscall(x86_saved_state_t *state)
86 {
87 thread_t thread;
88 void *vt;
89 unsigned int code;
90 struct sysent *callp;
91
92 int error;
93 vm_offset_t params;
94 struct proc *p;
95 struct uthread *uthread;
96 x86_saved_state32_t *regs;
97 boolean_t is_vfork;
98 pid_t pid;
99
100 assert(is_saved_state32(state));
101 regs = saved_state32(state);
102 #if DEBUG
103 if (regs->eax == 0x800)
104 thread_exception_return();
105 #endif
106 thread = current_thread();
107 uthread = get_bsdthread_info(thread);
108
109 uthread_reset_proc_refcount(uthread);
110
111 /* Get the approriate proc; may be different from task's for vfork() */
112 is_vfork = uthread->uu_flag & UT_VFORK;
113 if (__improbable(is_vfork != 0))
114 p = current_proc();
115 else
116 p = (struct proc *)get_bsdtask_info(current_task());
117
118 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
119 DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
120 code, syscallnames[code >= nsysent ? SYS_invalid : code], (uint32_t)regs->eip);
121 params = (vm_offset_t) (regs->uesp + sizeof (int));
122
123 regs->efl &= ~(EFL_CF);
124
125 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
126
127 if (__improbable(callp == sysent)) {
128 code = fuword(params);
129 params += sizeof(int);
130 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
131 }
132
133 vt = (void *)uthread->uu_arg;
134
135 if (callp->sy_arg_bytes != 0) {
136 #if CONFIG_REQUIRES_U32_MUNGING
137 sy_munge_t *mungerp;
138 #else
139 #error U32 syscalls on x86_64 kernel requires munging
140 #endif
141 uint32_t nargs;
142
143 assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg));
144 nargs = callp->sy_arg_bytes;
145 error = copyin((user_addr_t) params, (char *) vt, nargs);
146 if (error) {
147 regs->eax = error;
148 regs->efl |= EFL_CF;
149 thread_exception_return();
150 /* NOTREACHED */
151 }
152
153 if (__probable(!code_is_kdebug_trace(code))) {
154 int *ip = (int *)vt;
155
156 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
157 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
158 *ip, *(ip+1), *(ip+2), *(ip+3), 0);
159 }
160
161 #if CONFIG_REQUIRES_U32_MUNGING
162 mungerp = callp->sy_arg_munge32;
163
164 if (mungerp != NULL)
165 (*mungerp)(vt);
166 #endif
167 } else
168 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
169 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
170 0, 0, 0, 0, 0);
171
172 /*
173 * Delayed binding of thread credential to process credential, if we
174 * are not running with an explicitly set thread credential.
175 */
176 kauth_cred_uthread_update(uthread, p);
177
178 uthread->uu_rval[0] = 0;
179 uthread->uu_rval[1] = 0;
180 uthread->uu_flag |= UT_NOTCANCELPT;
181 uthread->syscall_code = code;
182 pid = proc_pid(p);
183
184 #ifdef JOE_DEBUG
185 uthread->uu_iocount = 0;
186 uthread->uu_vpindex = 0;
187 #endif
188
189 AUDIT_SYSCALL_ENTER(code, p, uthread);
190 error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
191 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
192
193 #ifdef JOE_DEBUG
194 if (uthread->uu_iocount)
195 printf("system call returned with uu_iocount != 0\n");
196 #endif
197 #if CONFIG_DTRACE
198 uthread->t_dtrace_errno = error;
199 #endif /* CONFIG_DTRACE */
200
201 if (__improbable(error == ERESTART)) {
202 /*
203 * Move the user's pc back to repeat the syscall:
204 * 5 bytes for a sysenter, or 2 for an int 8x.
205 * The SYSENTER_TF_CS covers single-stepping over a sysenter
206 * - see debug trap handler in idt.s/idt64.s
207 */
208
209 pal_syscall_restart(thread, state);
210 }
211 else if (error != EJUSTRETURN) {
212 if (__improbable(error)) {
213 regs->eax = error;
214 regs->efl |= EFL_CF; /* carry bit */
215 } else { /* (not error) */
216 /*
217 * We split retval across two registers, in case the
218 * syscall had a 64-bit return value, in which case
219 * eax/edx matches the function call ABI.
220 */
221 regs->eax = uthread->uu_rval[0];
222 regs->edx = uthread->uu_rval[1];
223 }
224 }
225
226 DEBUG_KPRINT_SYSCALL_UNIX(
227 "unix_syscall: error=%d retval=(%u,%u)\n",
228 error, regs->eax, regs->edx);
229
230 uthread->uu_flag &= ~UT_NOTCANCELPT;
231
232 #if DEBUG || DEVELOPMENT
233 kern_allocation_name_t
234 prior __assert_only = thread_set_allocation_name(NULL);
235 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
236 #endif /* DEBUG || DEVELOPMENT */
237
238 if (__improbable(uthread->uu_lowpri_window)) {
239 /*
240 * task is marked as a low priority I/O type
241 * and the I/O we issued while in this system call
242 * collided with normal I/O operations... we'll
243 * delay in order to mitigate the impact of this
244 * task on the normal operation of the system
245 */
246 throttle_lowpri_io(1);
247 }
248 if (__probable(!code_is_kdebug_trace(code)))
249 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
250 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
251 error, uthread->uu_rval[0], uthread->uu_rval[1], pid, 0);
252
253 if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) {
254 pal_execve_return(thread);
255 }
256
257 #if PROC_REF_DEBUG
258 if (__improbable(uthread_get_proc_refcount(uthread) != 0)) {
259 panic("system call returned with uu_proc_refcount != 0");
260 }
261 #endif
262
263 thread_exception_return();
264 /* NOTREACHED */
265 }
266
267 __attribute__((noreturn))
268 void
269 unix_syscall64(x86_saved_state_t *state)
270 {
271 thread_t thread;
272 void *vt;
273 unsigned int code;
274 struct sysent *callp;
275 int args_in_regs;
276 boolean_t args_start_at_rdi;
277 int error;
278 struct proc *p;
279 struct uthread *uthread;
280 x86_saved_state64_t *regs;
281 pid_t pid;
282
283 assert(is_saved_state64(state));
284 regs = saved_state64(state);
285 #if DEBUG
286 if (regs->rax == 0x2000800)
287 thread_exception_return();
288 #endif
289 thread = current_thread();
290 uthread = get_bsdthread_info(thread);
291
292 uthread_reset_proc_refcount(uthread);
293
294 /* Get the approriate proc; may be different from task's for vfork() */
295 if (__probable(!(uthread->uu_flag & UT_VFORK)))
296 p = (struct proc *)get_bsdtask_info(current_task());
297 else
298 p = current_proc();
299
300 /* Verify that we are not being called from a task without a proc */
301 if (__improbable(p == NULL)) {
302 regs->rax = EPERM;
303 regs->isf.rflags |= EFL_CF;
304 task_terminate_internal(current_task());
305 thread_exception_return();
306 /* NOTREACHED */
307 }
308
309 code = regs->rax & SYSCALL_NUMBER_MASK;
310 DEBUG_KPRINT_SYSCALL_UNIX(
311 "unix_syscall64: code=%d(%s) rip=%llx\n",
312 code, syscallnames[code >= nsysent ? SYS_invalid : code], regs->isf.rip);
313 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
314
315 vt = (void *)uthread->uu_arg;
316
317 if (__improbable(callp == sysent)) {
318 /*
319 * indirect system call... system call number
320 * passed as 'arg0'
321 */
322 code = regs->rdi;
323 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
324 args_start_at_rdi = FALSE;
325 args_in_regs = 5;
326 } else {
327 args_start_at_rdi = TRUE;
328 args_in_regs = 6;
329 }
330
331 if (callp->sy_narg != 0) {
332 assert(callp->sy_narg <= 8); /* size of uu_arg */
333
334 args_in_regs = MIN(args_in_regs, callp->sy_narg);
335 memcpy(vt, args_start_at_rdi ? &regs->rdi : &regs->rsi, args_in_regs * sizeof(syscall_arg_t));
336
337
338 if (!code_is_kdebug_trace(code)) {
339 uint64_t *ip = (uint64_t *)vt;
340
341 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
342 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
343 (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
344 }
345
346 if (__improbable(callp->sy_narg > args_in_regs)) {
347 int copyin_count;
348
349 copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t);
350
351 error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count);
352 if (error) {
353 regs->rax = error;
354 regs->isf.rflags |= EFL_CF;
355 thread_exception_return();
356 /* NOTREACHED */
357 }
358 }
359 } else
360 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
361 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
362 0, 0, 0, 0, 0);
363
364 /*
365 * Delayed binding of thread credential to process credential, if we
366 * are not running with an explicitly set thread credential.
367 */
368 kauth_cred_uthread_update(uthread, p);
369
370 uthread->uu_rval[0] = 0;
371 uthread->uu_rval[1] = 0;
372 uthread->uu_flag |= UT_NOTCANCELPT;
373 uthread->syscall_code = code;
374 pid = proc_pid(p);
375
376 #ifdef JOE_DEBUG
377 uthread->uu_iocount = 0;
378 uthread->uu_vpindex = 0;
379 #endif
380
381 AUDIT_SYSCALL_ENTER(code, p, uthread);
382 error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0]));
383 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
384
385 #ifdef JOE_DEBUG
386 if (uthread->uu_iocount)
387 printf("system call returned with uu_iocount != 0\n");
388 #endif
389
390 #if CONFIG_DTRACE
391 uthread->t_dtrace_errno = error;
392 #endif /* CONFIG_DTRACE */
393
394 if (__improbable(error == ERESTART)) {
395 /*
396 * all system calls come through via the syscall instruction
397 * in 64 bit mode... its 2 bytes in length
398 * move the user's pc back to repeat the syscall:
399 */
400 pal_syscall_restart( thread, state );
401 }
402 else if (error != EJUSTRETURN) {
403 if (__improbable(error)) {
404 regs->rax = error;
405 regs->isf.rflags |= EFL_CF; /* carry bit */
406 } else { /* (not error) */
407
408 switch (callp->sy_return_type) {
409 case _SYSCALL_RET_INT_T:
410 regs->rax = uthread->uu_rval[0];
411 regs->rdx = uthread->uu_rval[1];
412 break;
413 case _SYSCALL_RET_UINT_T:
414 regs->rax = ((u_int)uthread->uu_rval[0]);
415 regs->rdx = ((u_int)uthread->uu_rval[1]);
416 break;
417 case _SYSCALL_RET_OFF_T:
418 case _SYSCALL_RET_ADDR_T:
419 case _SYSCALL_RET_SIZE_T:
420 case _SYSCALL_RET_SSIZE_T:
421 case _SYSCALL_RET_UINT64_T:
422 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
423 regs->rdx = 0;
424 break;
425 case _SYSCALL_RET_NONE:
426 break;
427 default:
428 panic("unix_syscall: unknown return type");
429 break;
430 }
431 regs->isf.rflags &= ~EFL_CF;
432 }
433 }
434
435 DEBUG_KPRINT_SYSCALL_UNIX(
436 "unix_syscall64: error=%d retval=(%llu,%llu)\n",
437 error, regs->rax, regs->rdx);
438
439 uthread->uu_flag &= ~UT_NOTCANCELPT;
440
441 #if DEBUG || DEVELOPMENT
442 kern_allocation_name_t
443 prior __assert_only = thread_set_allocation_name(NULL);
444 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
445 #endif /* DEBUG || DEVELOPMENT */
446
447 if (__improbable(uthread->uu_lowpri_window)) {
448 /*
449 * task is marked as a low priority I/O type
450 * and the I/O we issued while in this system call
451 * collided with normal I/O operations... we'll
452 * delay in order to mitigate the impact of this
453 * task on the normal operation of the system
454 */
455 throttle_lowpri_io(1);
456 }
457 if (__probable(!code_is_kdebug_trace(code)))
458 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
459 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
460 error, uthread->uu_rval[0], uthread->uu_rval[1], pid, 0);
461
462 #if PROC_REF_DEBUG
463 if (__improbable(uthread_get_proc_refcount(uthread))) {
464 panic("system call returned with uu_proc_refcount != 0");
465 }
466 #endif
467
468 thread_exception_return();
469 /* NOTREACHED */
470 }
471
472
473 void
474 unix_syscall_return(int error)
475 {
476 thread_t thread;
477 struct uthread *uthread;
478 struct proc *p;
479 unsigned int code;
480 struct sysent *callp;
481
482 thread = current_thread();
483 uthread = get_bsdthread_info(thread);
484
485 pal_register_cache_state(thread, DIRTY);
486
487 p = current_proc();
488
489 if (proc_is64bit(p)) {
490 x86_saved_state64_t *regs;
491
492 regs = saved_state64(find_user_regs(thread));
493
494 code = uthread->syscall_code;
495 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
496
497 #if CONFIG_DTRACE
498 if (callp->sy_call == dtrace_systrace_syscall)
499 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
500 #endif /* CONFIG_DTRACE */
501 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
502
503 if (error == ERESTART) {
504 /*
505 * repeat the syscall
506 */
507 pal_syscall_restart( thread, find_user_regs(thread) );
508 }
509 else if (error != EJUSTRETURN) {
510 if (error) {
511 regs->rax = error;
512 regs->isf.rflags |= EFL_CF; /* carry bit */
513 } else { /* (not error) */
514
515 switch (callp->sy_return_type) {
516 case _SYSCALL_RET_INT_T:
517 regs->rax = uthread->uu_rval[0];
518 regs->rdx = uthread->uu_rval[1];
519 break;
520 case _SYSCALL_RET_UINT_T:
521 regs->rax = ((u_int)uthread->uu_rval[0]);
522 regs->rdx = ((u_int)uthread->uu_rval[1]);
523 break;
524 case _SYSCALL_RET_OFF_T:
525 case _SYSCALL_RET_ADDR_T:
526 case _SYSCALL_RET_SIZE_T:
527 case _SYSCALL_RET_SSIZE_T:
528 case _SYSCALL_RET_UINT64_T:
529 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
530 regs->rdx = 0;
531 break;
532 case _SYSCALL_RET_NONE:
533 break;
534 default:
535 panic("unix_syscall: unknown return type");
536 break;
537 }
538 regs->isf.rflags &= ~EFL_CF;
539 }
540 }
541 DEBUG_KPRINT_SYSCALL_UNIX(
542 "unix_syscall_return: error=%d retval=(%llu,%llu)\n",
543 error, regs->rax, regs->rdx);
544 } else {
545 x86_saved_state32_t *regs;
546
547 regs = saved_state32(find_user_regs(thread));
548
549 regs->efl &= ~(EFL_CF);
550
551 code = uthread->syscall_code;
552 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
553
554 #if CONFIG_DTRACE
555 if (callp->sy_call == dtrace_systrace_syscall)
556 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
557 #endif /* CONFIG_DTRACE */
558 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
559
560 if (error == ERESTART) {
561 pal_syscall_restart( thread, find_user_regs(thread) );
562 }
563 else if (error != EJUSTRETURN) {
564 if (error) {
565 regs->eax = error;
566 regs->efl |= EFL_CF; /* carry bit */
567 } else { /* (not error) */
568 regs->eax = uthread->uu_rval[0];
569 regs->edx = uthread->uu_rval[1];
570 }
571 }
572 DEBUG_KPRINT_SYSCALL_UNIX(
573 "unix_syscall_return: error=%d retval=(%u,%u)\n",
574 error, regs->eax, regs->edx);
575 }
576
577
578 uthread->uu_flag &= ~UT_NOTCANCELPT;
579
580 #if DEBUG || DEVELOPMENT
581 kern_allocation_name_t
582 prior __assert_only = thread_set_allocation_name(NULL);
583 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
584 #endif /* DEBUG || DEVELOPMENT */
585
586 if (uthread->uu_lowpri_window) {
587 /*
588 * task is marked as a low priority I/O type
589 * and the I/O we issued while in this system call
590 * collided with normal I/O operations... we'll
591 * delay in order to mitigate the impact of this
592 * task on the normal operation of the system
593 */
594 throttle_lowpri_io(1);
595 }
596 if (!code_is_kdebug_trace(code))
597 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
598 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
599 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
600
601 thread_exception_return();
602 /* NOTREACHED */
603 }