]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/i386/systemcalls.c
xnu-4903.221.2.tar.gz
[apple/xnu.git] / bsd / dev / i386 / systemcalls.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/debug.h>
35 #include <mach/machine/thread_status.h>
36 #include <mach/thread_act.h>
37
38 #include <sys/kernel.h>
39 #include <sys/vm.h>
40 #include <sys/proc_internal.h>
41 #include <sys/syscall.h>
42 #include <sys/systm.h>
43 #include <sys/user.h>
44 #include <sys/errno.h>
45 #include <sys/kdebug.h>
46 #include <sys/sysent.h>
47 #include <sys/sysproto.h>
48 #include <sys/kauth.h>
49 #include <sys/systm.h>
50
51 #include <security/audit/audit.h>
52
53 #include <i386/seg.h>
54 #include <i386/machine_routines.h>
55 #include <mach/i386/syscall_sw.h>
56
57 #include <machine/pal_routines.h>
58
59 #if CONFIG_DTRACE
60 extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
61 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
62 #endif
63
64 extern void unix_syscall(x86_saved_state_t *);
65 extern void unix_syscall64(x86_saved_state_t *);
66 extern void *find_user_regs(thread_t);
67
68 /* dynamically generated at build time based on syscalls.master */
69 extern const char *syscallnames[];
70
71 #define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || \
72 ((code) == SYS_kdebug_trace64) || \
73 ((code) == SYS_kdebug_trace_string))
74
75 /*
76 * Function: unix_syscall
77 *
78 * Inputs: regs - pointer to i386 save area
79 *
80 * Outputs: none
81 */
82 __attribute__((noreturn))
83 void
84 unix_syscall(x86_saved_state_t *state)
85 {
86 thread_t thread;
87 void *vt;
88 unsigned int code;
89 struct sysent *callp;
90
91 int error;
92 vm_offset_t params;
93 struct proc *p;
94 struct uthread *uthread;
95 x86_saved_state32_t *regs;
96 boolean_t is_vfork;
97 pid_t pid;
98
99 assert(is_saved_state32(state));
100 regs = saved_state32(state);
101 #if DEBUG
102 if (regs->eax == 0x800)
103 thread_exception_return();
104 #endif
105 thread = current_thread();
106 uthread = get_bsdthread_info(thread);
107
108 uthread_reset_proc_refcount(uthread);
109
110 /* Get the approriate proc; may be different from task's for vfork() */
111 is_vfork = uthread->uu_flag & UT_VFORK;
112 if (__improbable(is_vfork != 0))
113 p = current_proc();
114 else
115 p = (struct proc *)get_bsdtask_info(current_task());
116
117 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
118 DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
119 code, syscallnames[code >= nsysent ? SYS_invalid : code], (uint32_t)regs->eip);
120 params = (vm_offset_t) (regs->uesp + sizeof (int));
121
122 regs->efl &= ~(EFL_CF);
123
124 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
125
126 if (__improbable(callp == sysent)) {
127 code = fuword(params);
128 params += sizeof(int);
129 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
130 }
131
132 vt = (void *)uthread->uu_arg;
133
134 if (callp->sy_arg_bytes != 0) {
135 #if CONFIG_REQUIRES_U32_MUNGING
136 sy_munge_t *mungerp;
137 #else
138 #error U32 syscalls on x86_64 kernel requires munging
139 #endif
140 uint32_t nargs;
141
142 assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg));
143 nargs = callp->sy_arg_bytes;
144 error = copyin((user_addr_t) params, (char *) vt, nargs);
145 if (error) {
146 regs->eax = error;
147 regs->efl |= EFL_CF;
148 thread_exception_return();
149 /* NOTREACHED */
150 }
151
152 if (__probable(!code_is_kdebug_trace(code))) {
153 int *ip = (int *)vt;
154
155 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
156 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
157 *ip, *(ip+1), *(ip+2), *(ip+3), 0);
158 }
159
160 #if CONFIG_REQUIRES_U32_MUNGING
161 mungerp = callp->sy_arg_munge32;
162
163 if (mungerp != NULL)
164 (*mungerp)(vt);
165 #endif
166 } else
167 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
168 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
169 0, 0, 0, 0, 0);
170
171 /*
172 * Delayed binding of thread credential to process credential, if we
173 * are not running with an explicitly set thread credential.
174 */
175 kauth_cred_uthread_update(uthread, p);
176
177 uthread->uu_rval[0] = 0;
178 uthread->uu_rval[1] = 0;
179 uthread->uu_flag |= UT_NOTCANCELPT;
180 uthread->syscall_code = code;
181 pid = proc_pid(p);
182
183 #ifdef JOE_DEBUG
184 uthread->uu_iocount = 0;
185 uthread->uu_vpindex = 0;
186 #endif
187
188 AUDIT_SYSCALL_ENTER(code, p, uthread);
189 error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
190 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
191
192 #ifdef JOE_DEBUG
193 if (uthread->uu_iocount)
194 printf("system call returned with uu_iocount != 0\n");
195 #endif
196 #if CONFIG_DTRACE
197 uthread->t_dtrace_errno = error;
198 #endif /* CONFIG_DTRACE */
199
200 if (__improbable(error == ERESTART)) {
201 /*
202 * Move the user's pc back to repeat the syscall:
203 * 5 bytes for a sysenter, or 2 for an int 8x.
204 * The SYSENTER_TF_CS covers single-stepping over a sysenter
205 * - see debug trap handler in idt.s/idt64.s
206 */
207
208 pal_syscall_restart(thread, state);
209 }
210 else if (error != EJUSTRETURN) {
211 if (__improbable(error)) {
212 regs->eax = error;
213 regs->efl |= EFL_CF; /* carry bit */
214 } else { /* (not error) */
215 /*
216 * We split retval across two registers, in case the
217 * syscall had a 64-bit return value, in which case
218 * eax/edx matches the function call ABI.
219 */
220 regs->eax = uthread->uu_rval[0];
221 regs->edx = uthread->uu_rval[1];
222 }
223 }
224
225 DEBUG_KPRINT_SYSCALL_UNIX(
226 "unix_syscall: error=%d retval=(%u,%u)\n",
227 error, regs->eax, regs->edx);
228
229 uthread->uu_flag &= ~UT_NOTCANCELPT;
230
231 #if DEBUG || DEVELOPMENT
232 kern_allocation_name_t
233 prior __assert_only = thread_set_allocation_name(NULL);
234 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
235 #endif /* DEBUG || DEVELOPMENT */
236
237 if (__improbable(uthread->uu_lowpri_window)) {
238 /*
239 * task is marked as a low priority I/O type
240 * and the I/O we issued while in this system call
241 * collided with normal I/O operations... we'll
242 * delay in order to mitigate the impact of this
243 * task on the normal operation of the system
244 */
245 throttle_lowpri_io(1);
246 }
247 if (__probable(!code_is_kdebug_trace(code)))
248 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
249 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
250 error, uthread->uu_rval[0], uthread->uu_rval[1], pid, 0);
251
252 if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) {
253 pal_execve_return(thread);
254 }
255
256 #if PROC_REF_DEBUG
257 if (__improbable(uthread_get_proc_refcount(uthread) != 0)) {
258 panic("system call returned with uu_proc_refcount != 0");
259 }
260 #endif
261
262 thread_exception_return();
263 /* NOTREACHED */
264 }
265
266 __attribute__((noreturn))
267 void
268 unix_syscall64(x86_saved_state_t *state)
269 {
270 thread_t thread;
271 void *vt;
272 unsigned int code;
273 struct sysent *callp;
274 int args_in_regs;
275 boolean_t args_start_at_rdi;
276 int error;
277 struct proc *p;
278 struct uthread *uthread;
279 x86_saved_state64_t *regs;
280 pid_t pid;
281
282 assert(is_saved_state64(state));
283 regs = saved_state64(state);
284 #if DEBUG
285 if (regs->rax == 0x2000800)
286 thread_exception_return();
287 #endif
288 thread = current_thread();
289 uthread = get_bsdthread_info(thread);
290
291 uthread_reset_proc_refcount(uthread);
292
293 /* Get the approriate proc; may be different from task's for vfork() */
294 if (__probable(!(uthread->uu_flag & UT_VFORK)))
295 p = (struct proc *)get_bsdtask_info(current_task());
296 else
297 p = current_proc();
298
299 /* Verify that we are not being called from a task without a proc */
300 if (__improbable(p == NULL)) {
301 regs->rax = EPERM;
302 regs->isf.rflags |= EFL_CF;
303 task_terminate_internal(current_task());
304 thread_exception_return();
305 /* NOTREACHED */
306 }
307
308 code = regs->rax & SYSCALL_NUMBER_MASK;
309 DEBUG_KPRINT_SYSCALL_UNIX(
310 "unix_syscall64: code=%d(%s) rip=%llx\n",
311 code, syscallnames[code >= nsysent ? SYS_invalid : code], regs->isf.rip);
312 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
313
314 vt = (void *)uthread->uu_arg;
315
316 if (__improbable(callp == sysent)) {
317 /*
318 * indirect system call... system call number
319 * passed as 'arg0'
320 */
321 code = regs->rdi;
322 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
323 args_start_at_rdi = FALSE;
324 args_in_regs = 5;
325 } else {
326 args_start_at_rdi = TRUE;
327 args_in_regs = 6;
328 }
329
330 if (callp->sy_narg != 0) {
331 assert(callp->sy_narg <= 8); /* size of uu_arg */
332
333 args_in_regs = MIN(args_in_regs, callp->sy_narg);
334 memcpy(vt, args_start_at_rdi ? &regs->rdi : &regs->rsi, args_in_regs * sizeof(syscall_arg_t));
335
336
337 if (!code_is_kdebug_trace(code)) {
338 uint64_t *ip = (uint64_t *)vt;
339
340 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
341 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
342 (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
343 }
344
345 if (__improbable(callp->sy_narg > args_in_regs)) {
346 int copyin_count;
347
348 copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t);
349
350 error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count);
351 if (error) {
352 regs->rax = error;
353 regs->isf.rflags |= EFL_CF;
354 thread_exception_return();
355 /* NOTREACHED */
356 }
357 }
358 } else
359 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
360 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
361 0, 0, 0, 0, 0);
362
363 /*
364 * Delayed binding of thread credential to process credential, if we
365 * are not running with an explicitly set thread credential.
366 */
367 kauth_cred_uthread_update(uthread, p);
368
369 uthread->uu_rval[0] = 0;
370 uthread->uu_rval[1] = 0;
371 uthread->uu_flag |= UT_NOTCANCELPT;
372 uthread->syscall_code = code;
373 pid = proc_pid(p);
374
375 #ifdef JOE_DEBUG
376 uthread->uu_iocount = 0;
377 uthread->uu_vpindex = 0;
378 #endif
379
380 AUDIT_SYSCALL_ENTER(code, p, uthread);
381 error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0]));
382 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
383
384 #ifdef JOE_DEBUG
385 if (uthread->uu_iocount)
386 printf("system call returned with uu_iocount != 0\n");
387 #endif
388
389 #if CONFIG_DTRACE
390 uthread->t_dtrace_errno = error;
391 #endif /* CONFIG_DTRACE */
392
393 if (__improbable(error == ERESTART)) {
394 /*
395 * all system calls come through via the syscall instruction
396 * in 64 bit mode... its 2 bytes in length
397 * move the user's pc back to repeat the syscall:
398 */
399 pal_syscall_restart( thread, state );
400 }
401 else if (error != EJUSTRETURN) {
402 if (__improbable(error)) {
403 regs->rax = error;
404 regs->isf.rflags |= EFL_CF; /* carry bit */
405 } else { /* (not error) */
406
407 switch (callp->sy_return_type) {
408 case _SYSCALL_RET_INT_T:
409 regs->rax = uthread->uu_rval[0];
410 regs->rdx = uthread->uu_rval[1];
411 break;
412 case _SYSCALL_RET_UINT_T:
413 regs->rax = ((u_int)uthread->uu_rval[0]);
414 regs->rdx = ((u_int)uthread->uu_rval[1]);
415 break;
416 case _SYSCALL_RET_OFF_T:
417 case _SYSCALL_RET_ADDR_T:
418 case _SYSCALL_RET_SIZE_T:
419 case _SYSCALL_RET_SSIZE_T:
420 case _SYSCALL_RET_UINT64_T:
421 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
422 regs->rdx = 0;
423 break;
424 case _SYSCALL_RET_NONE:
425 break;
426 default:
427 panic("unix_syscall: unknown return type");
428 break;
429 }
430 regs->isf.rflags &= ~EFL_CF;
431 }
432 }
433
434 DEBUG_KPRINT_SYSCALL_UNIX(
435 "unix_syscall64: error=%d retval=(%llu,%llu)\n",
436 error, regs->rax, regs->rdx);
437
438 uthread->uu_flag &= ~UT_NOTCANCELPT;
439
440 #if DEBUG || DEVELOPMENT
441 kern_allocation_name_t
442 prior __assert_only = thread_set_allocation_name(NULL);
443 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
444 #endif /* DEBUG || DEVELOPMENT */
445
446 if (__improbable(uthread->uu_lowpri_window)) {
447 /*
448 * task is marked as a low priority I/O type
449 * and the I/O we issued while in this system call
450 * collided with normal I/O operations... we'll
451 * delay in order to mitigate the impact of this
452 * task on the normal operation of the system
453 */
454 throttle_lowpri_io(1);
455 }
456 if (__probable(!code_is_kdebug_trace(code)))
457 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
458 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
459 error, uthread->uu_rval[0], uthread->uu_rval[1], pid, 0);
460
461 #if PROC_REF_DEBUG
462 if (__improbable(uthread_get_proc_refcount(uthread))) {
463 panic("system call returned with uu_proc_refcount != 0");
464 }
465 #endif
466
467 thread_exception_return();
468 /* NOTREACHED */
469 }
470
471
472 void
473 unix_syscall_return(int error)
474 {
475 thread_t thread;
476 struct uthread *uthread;
477 struct proc *p;
478 unsigned int code;
479 struct sysent *callp;
480
481 thread = current_thread();
482 uthread = get_bsdthread_info(thread);
483
484 pal_register_cache_state(thread, DIRTY);
485
486 p = current_proc();
487
488 if (proc_is64bit(p)) {
489 x86_saved_state64_t *regs;
490
491 regs = saved_state64(find_user_regs(thread));
492
493 code = uthread->syscall_code;
494 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
495
496 #if CONFIG_DTRACE
497 if (callp->sy_call == dtrace_systrace_syscall)
498 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
499 #endif /* CONFIG_DTRACE */
500 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
501
502 if (error == ERESTART) {
503 /*
504 * repeat the syscall
505 */
506 pal_syscall_restart( thread, find_user_regs(thread) );
507 }
508 else if (error != EJUSTRETURN) {
509 if (error) {
510 regs->rax = error;
511 regs->isf.rflags |= EFL_CF; /* carry bit */
512 } else { /* (not error) */
513
514 switch (callp->sy_return_type) {
515 case _SYSCALL_RET_INT_T:
516 regs->rax = uthread->uu_rval[0];
517 regs->rdx = uthread->uu_rval[1];
518 break;
519 case _SYSCALL_RET_UINT_T:
520 regs->rax = ((u_int)uthread->uu_rval[0]);
521 regs->rdx = ((u_int)uthread->uu_rval[1]);
522 break;
523 case _SYSCALL_RET_OFF_T:
524 case _SYSCALL_RET_ADDR_T:
525 case _SYSCALL_RET_SIZE_T:
526 case _SYSCALL_RET_SSIZE_T:
527 case _SYSCALL_RET_UINT64_T:
528 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
529 regs->rdx = 0;
530 break;
531 case _SYSCALL_RET_NONE:
532 break;
533 default:
534 panic("unix_syscall: unknown return type");
535 break;
536 }
537 regs->isf.rflags &= ~EFL_CF;
538 }
539 }
540 DEBUG_KPRINT_SYSCALL_UNIX(
541 "unix_syscall_return: error=%d retval=(%llu,%llu)\n",
542 error, regs->rax, regs->rdx);
543 } else {
544 x86_saved_state32_t *regs;
545
546 regs = saved_state32(find_user_regs(thread));
547
548 regs->efl &= ~(EFL_CF);
549
550 code = uthread->syscall_code;
551 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
552
553 #if CONFIG_DTRACE
554 if (callp->sy_call == dtrace_systrace_syscall)
555 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
556 #endif /* CONFIG_DTRACE */
557 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
558
559 if (error == ERESTART) {
560 pal_syscall_restart( thread, find_user_regs(thread) );
561 }
562 else if (error != EJUSTRETURN) {
563 if (error) {
564 regs->eax = error;
565 regs->efl |= EFL_CF; /* carry bit */
566 } else { /* (not error) */
567 regs->eax = uthread->uu_rval[0];
568 regs->edx = uthread->uu_rval[1];
569 }
570 }
571 DEBUG_KPRINT_SYSCALL_UNIX(
572 "unix_syscall_return: error=%d retval=(%u,%u)\n",
573 error, regs->eax, regs->edx);
574 }
575
576
577 uthread->uu_flag &= ~UT_NOTCANCELPT;
578
579 #if DEBUG || DEVELOPMENT
580 kern_allocation_name_t
581 prior __assert_only = thread_set_allocation_name(NULL);
582 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
583 #endif /* DEBUG || DEVELOPMENT */
584
585 if (uthread->uu_lowpri_window) {
586 /*
587 * task is marked as a low priority I/O type
588 * and the I/O we issued while in this system call
589 * collided with normal I/O operations... we'll
590 * delay in order to mitigate the impact of this
591 * task on the normal operation of the system
592 */
593 throttle_lowpri_io(1);
594 }
595 if (!code_is_kdebug_trace(code))
596 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
597 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
598 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
599
600 thread_exception_return();
601 /* NOTREACHED */
602 }