]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/i386/systemcalls.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / dev / i386 / systemcalls.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/debug.h>
35 #include <mach/machine/thread_status.h>
36 #include <mach/thread_act.h>
37
38 #include <sys/kernel.h>
39 #include <sys/vm.h>
40 #include <sys/proc_internal.h>
41 #include <sys/syscall.h>
42 #include <sys/systm.h>
43 #include <sys/user.h>
44 #include <sys/errno.h>
45 #include <sys/kdebug.h>
46 #include <sys/sysent.h>
47 #include <sys/sysproto.h>
48 #include <sys/kauth.h>
49 #include <sys/systm.h>
50 #include <sys/bitstring.h>
51
52 #include <security/audit/audit.h>
53
54 #include <i386/seg.h>
55 #include <i386/machine_routines.h>
56 #include <mach/i386/syscall_sw.h>
57
58 #include <machine/pal_routines.h>
59
60 #if CONFIG_MACF
61 #include <security/mac_framework.h>
62 #endif
63
64 #if CONFIG_DTRACE
65 extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
66 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
67 #endif
68
69 extern void unix_syscall(x86_saved_state_t *);
70 extern void unix_syscall64(x86_saved_state_t *);
71 extern void *find_user_regs(thread_t);
72
73 /* dynamically generated at build time based on syscalls.master */
74 extern const char *syscallnames[];
75
76 #define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || \
77 ((code) == SYS_kdebug_trace64) || \
78 ((code) == SYS_kdebug_trace_string))
79
80 /*
81 * Function: unix_syscall
82 *
83 * Inputs: regs - pointer to i386 save area
84 *
85 * Outputs: none
86 */
87 __attribute__((noreturn))
88 void
89 unix_syscall(x86_saved_state_t *state)
90 {
91 thread_t thread;
92 void *vt;
93 unsigned int code, syscode;
94 const struct sysent *callp;
95
96 int error;
97 vm_offset_t params;
98 struct proc *p;
99 struct uthread *uthread;
100 x86_saved_state32_t *regs;
101 boolean_t is_vfork;
102 pid_t pid;
103
104 assert(is_saved_state32(state));
105 regs = saved_state32(state);
106 #if DEBUG
107 if (regs->eax == 0x800) {
108 thread_exception_return();
109 }
110 #endif
111 thread = current_thread();
112 uthread = get_bsdthread_info(thread);
113
114 uthread_reset_proc_refcount(uthread);
115
116 /* Get the approriate proc; may be different from task's for vfork() */
117 is_vfork = uthread->uu_flag & UT_VFORK;
118 if (__improbable(is_vfork != 0)) {
119 p = current_proc();
120 } else {
121 p = (struct proc *)get_bsdtask_info(current_task());
122 }
123
124 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
125 syscode = (code < nsysent) ? code : SYS_invalid;
126 DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
127 code, syscallnames[syscode], (uint32_t)regs->eip);
128 params = (vm_offset_t) (regs->uesp + sizeof(int));
129
130 regs->efl &= ~(EFL_CF);
131
132 callp = &sysent[syscode];
133
134 if (__improbable(callp == sysent)) {
135 code = fuword(params);
136 params += sizeof(int);
137 syscode = (code < nsysent) ? code : SYS_invalid;
138 callp = &sysent[syscode];
139 }
140
141 vt = (void *)uthread->uu_arg;
142
143 if (callp->sy_arg_bytes != 0) {
144 #if CONFIG_REQUIRES_U32_MUNGING
145 sy_munge_t *mungerp;
146 #else
147 #error U32 syscalls on x86_64 kernel requires munging
148 #endif
149 uint32_t nargs;
150
151 assert((unsigned) callp->sy_arg_bytes <= sizeof(uthread->uu_arg));
152 nargs = callp->sy_arg_bytes;
153 error = copyin((user_addr_t) params, (char *) vt, nargs);
154 if (error) {
155 regs->eax = error;
156 regs->efl |= EFL_CF;
157 thread_exception_return();
158 /* NOTREACHED */
159 }
160
161 if (__probable(!code_is_kdebug_trace(code))) {
162 uint32_t *uip = vt;
163 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
164 uip[0], uip[1], uip[2], uip[3]);
165 }
166
167 #if CONFIG_REQUIRES_U32_MUNGING
168 mungerp = callp->sy_arg_munge32;
169
170 if (mungerp != NULL) {
171 (*mungerp)(vt);
172 }
173 #endif
174 } else {
175 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START);
176 }
177
178 /*
179 * Delayed binding of thread credential to process credential, if we
180 * are not running with an explicitly set thread credential.
181 */
182 kauth_cred_uthread_update(uthread, p);
183
184 uthread->uu_rval[0] = 0;
185 uthread->uu_rval[1] = 0;
186 uthread->uu_flag |= UT_NOTCANCELPT;
187 uthread->syscall_code = code;
188 pid = proc_pid(p);
189
190 #ifdef JOE_DEBUG
191 uthread->uu_iocount = 0;
192 uthread->uu_vpindex = 0;
193 #endif
194
195 #if CONFIG_MACF
196 if (__improbable(p->syscall_filter_mask != NULL && !bitstr_test(p->syscall_filter_mask, syscode))) {
197 error = mac_proc_check_syscall_unix(p, syscode);
198 if (error) {
199 goto skip_syscall;
200 }
201 }
202 #endif /* CONFIG_MACF */
203
204 AUDIT_SYSCALL_ENTER(code, p, uthread);
205 error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
206 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
207
208 #if CONFIG_MACF
209 skip_syscall:
210 #endif /* CONFIG_MACF */
211
212 #ifdef JOE_DEBUG
213 if (uthread->uu_iocount) {
214 printf("system call returned with uu_iocount != 0\n");
215 }
216 #endif
217 #if CONFIG_DTRACE
218 uthread->t_dtrace_errno = error;
219 #endif /* CONFIG_DTRACE */
220
221 if (__improbable(error == ERESTART)) {
222 /*
223 * Move the user's pc back to repeat the syscall:
224 * 5 bytes for a sysenter, or 2 for an int 8x.
225 * The SYSENTER_TF_CS covers single-stepping over a sysenter
226 * - see debug trap handler in idt.s/idt64.s
227 */
228
229 pal_syscall_restart(thread, state);
230 } else if (error != EJUSTRETURN) {
231 if (__improbable(error)) {
232 regs->eax = error;
233 regs->efl |= EFL_CF; /* carry bit */
234 } else { /* (not error) */
235 /*
236 * We split retval across two registers, in case the
237 * syscall had a 64-bit return value, in which case
238 * eax/edx matches the function call ABI.
239 */
240 regs->eax = uthread->uu_rval[0];
241 regs->edx = uthread->uu_rval[1];
242 }
243 }
244
245 DEBUG_KPRINT_SYSCALL_UNIX(
246 "unix_syscall: error=%d retval=(%u,%u)\n",
247 error, regs->eax, regs->edx);
248
249 uthread->uu_flag &= ~UT_NOTCANCELPT;
250 uthread->syscall_code = 0;
251
252 #if DEBUG || DEVELOPMENT
253 kern_allocation_name_t
254 prior __assert_only = thread_set_allocation_name(NULL);
255 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
256 #endif /* DEBUG || DEVELOPMENT */
257
258 if (__improbable(uthread->uu_lowpri_window)) {
259 /*
260 * task is marked as a low priority I/O type
261 * and the I/O we issued while in this system call
262 * collided with normal I/O operations... we'll
263 * delay in order to mitigate the impact of this
264 * task on the normal operation of the system
265 */
266 throttle_lowpri_io(1);
267 }
268 if (__probable(!code_is_kdebug_trace(code))) {
269 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
270 error, uthread->uu_rval[0], uthread->uu_rval[1], pid);
271 }
272
273 if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) {
274 pal_execve_return(thread);
275 }
276
277 #if PROC_REF_DEBUG
278 if (__improbable(uthread_get_proc_refcount(uthread) != 0)) {
279 panic("system call returned with uu_proc_refcount != 0");
280 }
281 #endif
282
283 thread_exception_return();
284 /* NOTREACHED */
285 }
286
287 __attribute__((noreturn))
288 void
289 unix_syscall64(x86_saved_state_t *state)
290 {
291 thread_t thread;
292 void *vt;
293 unsigned int code, syscode;
294 const struct sysent *callp;
295 int args_in_regs;
296 boolean_t args_start_at_rdi;
297 int error;
298 struct proc *p;
299 struct uthread *uthread;
300 x86_saved_state64_t *regs;
301 pid_t pid;
302
303 assert(is_saved_state64(state));
304 regs = saved_state64(state);
305 #if DEBUG
306 if (regs->rax == 0x2000800) {
307 thread_exception_return();
308 }
309 #endif
310 thread = current_thread();
311 uthread = get_bsdthread_info(thread);
312
313 uthread_reset_proc_refcount(uthread);
314
315 /* Get the approriate proc; may be different from task's for vfork() */
316 if (__probable(!(uthread->uu_flag & UT_VFORK))) {
317 p = (struct proc *)get_bsdtask_info(current_task());
318 } else {
319 p = current_proc();
320 }
321
322 /* Verify that we are not being called from a task without a proc */
323 if (__improbable(p == NULL)) {
324 regs->rax = EPERM;
325 regs->isf.rflags |= EFL_CF;
326 task_terminate_internal(current_task());
327 thread_exception_return();
328 /* NOTREACHED */
329 }
330
331 code = regs->rax & SYSCALL_NUMBER_MASK;
332 syscode = (code < nsysent) ? code : SYS_invalid;
333 DEBUG_KPRINT_SYSCALL_UNIX(
334 "unix_syscall64: code=%d(%s) rip=%llx\n",
335 code, syscallnames[syscode], regs->isf.rip);
336 callp = &sysent[syscode];
337
338 vt = (void *)uthread->uu_arg;
339
340 if (__improbable(callp == sysent)) {
341 /*
342 * indirect system call... system call number
343 * passed as 'arg0'
344 */
345 code = regs->rdi;
346 syscode = (code < nsysent) ? code : SYS_invalid;
347 callp = &sysent[syscode];
348 args_start_at_rdi = FALSE;
349 args_in_regs = 5;
350 } else {
351 args_start_at_rdi = TRUE;
352 args_in_regs = 6;
353 }
354
355 if (callp->sy_narg != 0) {
356 assert(callp->sy_narg <= 8); /* size of uu_arg */
357
358 args_in_regs = MIN(args_in_regs, callp->sy_narg);
359 memcpy(vt, args_start_at_rdi ? &regs->rdi : &regs->rsi, args_in_regs * sizeof(syscall_arg_t));
360
361 if (!code_is_kdebug_trace(code)) {
362 uint64_t *uip = vt;
363
364 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
365 uip[0], uip[1], uip[2], uip[3]);
366 }
367
368 if (__improbable(callp->sy_narg > args_in_regs)) {
369 int copyin_count;
370
371 copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t);
372
373 error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count);
374 if (error) {
375 regs->rax = error;
376 regs->isf.rflags |= EFL_CF;
377 thread_exception_return();
378 /* NOTREACHED */
379 }
380 }
381 } else {
382 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START);
383 }
384
385 /*
386 * Delayed binding of thread credential to process credential, if we
387 * are not running with an explicitly set thread credential.
388 */
389 kauth_cred_uthread_update(uthread, p);
390
391 uthread->uu_rval[0] = 0;
392 uthread->uu_rval[1] = 0;
393 uthread->uu_flag |= UT_NOTCANCELPT;
394 uthread->syscall_code = code;
395 pid = proc_pid(p);
396
397 #ifdef JOE_DEBUG
398 uthread->uu_iocount = 0;
399 uthread->uu_vpindex = 0;
400 #endif
401
402 #if CONFIG_MACF
403 if (__improbable(p->syscall_filter_mask != NULL && !bitstr_test(p->syscall_filter_mask, syscode))) {
404 error = mac_proc_check_syscall_unix(p, syscode);
405 if (error) {
406 goto skip_syscall;
407 }
408 }
409 #endif /* CONFIG_MACF */
410
411 AUDIT_SYSCALL_ENTER(code, p, uthread);
412 error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0]));
413 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
414
415 #if CONFIG_MACF
416 skip_syscall:
417 #endif /* CONFIG_MACF */
418
419 #ifdef JOE_DEBUG
420 if (uthread->uu_iocount) {
421 printf("system call returned with uu_iocount != 0\n");
422 }
423 #endif
424
425 #if CONFIG_DTRACE
426 uthread->t_dtrace_errno = error;
427 #endif /* CONFIG_DTRACE */
428
429 if (__improbable(error == ERESTART)) {
430 /*
431 * all system calls come through via the syscall instruction
432 * in 64 bit mode... its 2 bytes in length
433 * move the user's pc back to repeat the syscall:
434 */
435 pal_syscall_restart( thread, state );
436 } else if (error != EJUSTRETURN) {
437 if (__improbable(error)) {
438 regs->rax = error;
439 regs->isf.rflags |= EFL_CF; /* carry bit */
440 } else { /* (not error) */
441 switch (callp->sy_return_type) {
442 case _SYSCALL_RET_INT_T:
443 regs->rax = uthread->uu_rval[0];
444 regs->rdx = uthread->uu_rval[1];
445 break;
446 case _SYSCALL_RET_UINT_T:
447 regs->rax = ((u_int)uthread->uu_rval[0]);
448 regs->rdx = ((u_int)uthread->uu_rval[1]);
449 break;
450 case _SYSCALL_RET_OFF_T:
451 case _SYSCALL_RET_ADDR_T:
452 case _SYSCALL_RET_SIZE_T:
453 case _SYSCALL_RET_SSIZE_T:
454 case _SYSCALL_RET_UINT64_T:
455 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
456 regs->rdx = 0;
457 break;
458 case _SYSCALL_RET_NONE:
459 break;
460 default:
461 panic("unix_syscall: unknown return type");
462 break;
463 }
464 regs->isf.rflags &= ~EFL_CF;
465 }
466 }
467
468 DEBUG_KPRINT_SYSCALL_UNIX(
469 "unix_syscall64: error=%d retval=(%llu,%llu)\n",
470 error, regs->rax, regs->rdx);
471
472 uthread->uu_flag &= ~UT_NOTCANCELPT;
473 uthread->syscall_code = 0;
474
475 #if DEBUG || DEVELOPMENT
476 kern_allocation_name_t
477 prior __assert_only = thread_set_allocation_name(NULL);
478 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
479 #endif /* DEBUG || DEVELOPMENT */
480
481 if (__improbable(uthread->uu_lowpri_window)) {
482 /*
483 * task is marked as a low priority I/O type
484 * and the I/O we issued while in this system call
485 * collided with normal I/O operations... we'll
486 * delay in order to mitigate the impact of this
487 * task on the normal operation of the system
488 */
489 throttle_lowpri_io(1);
490 }
491 if (__probable(!code_is_kdebug_trace(code))) {
492 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
493 error, uthread->uu_rval[0], uthread->uu_rval[1], pid);
494 }
495
496 #if PROC_REF_DEBUG
497 if (__improbable(uthread_get_proc_refcount(uthread))) {
498 panic("system call returned with uu_proc_refcount != 0");
499 }
500 #endif
501
502 thread_exception_return();
503 /* NOTREACHED */
504 }
505
506
507 void
508 unix_syscall_return(int error)
509 {
510 thread_t thread;
511 struct uthread *uthread;
512 struct proc *p;
513 unsigned int code;
514 const struct sysent *callp;
515
516 thread = current_thread();
517 uthread = get_bsdthread_info(thread);
518
519 pal_register_cache_state(thread, DIRTY);
520
521 p = current_proc();
522
523 if (proc_is64bit(p)) {
524 x86_saved_state64_t *regs;
525
526 regs = saved_state64(find_user_regs(thread));
527
528 code = uthread->syscall_code;
529 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
530
531 #if CONFIG_DTRACE
532 if (callp->sy_call == dtrace_systrace_syscall) {
533 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
534 }
535 #endif /* CONFIG_DTRACE */
536 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
537
538 if (error == ERESTART) {
539 /*
540 * repeat the syscall
541 */
542 pal_syscall_restart( thread, find_user_regs(thread));
543 } else if (error != EJUSTRETURN) {
544 if (error) {
545 regs->rax = error;
546 regs->isf.rflags |= EFL_CF; /* carry bit */
547 } else { /* (not error) */
548 switch (callp->sy_return_type) {
549 case _SYSCALL_RET_INT_T:
550 regs->rax = uthread->uu_rval[0];
551 regs->rdx = uthread->uu_rval[1];
552 break;
553 case _SYSCALL_RET_UINT_T:
554 regs->rax = ((u_int)uthread->uu_rval[0]);
555 regs->rdx = ((u_int)uthread->uu_rval[1]);
556 break;
557 case _SYSCALL_RET_OFF_T:
558 case _SYSCALL_RET_ADDR_T:
559 case _SYSCALL_RET_SIZE_T:
560 case _SYSCALL_RET_SSIZE_T:
561 case _SYSCALL_RET_UINT64_T:
562 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
563 regs->rdx = 0;
564 break;
565 case _SYSCALL_RET_NONE:
566 break;
567 default:
568 panic("unix_syscall: unknown return type");
569 break;
570 }
571 regs->isf.rflags &= ~EFL_CF;
572 }
573 }
574 DEBUG_KPRINT_SYSCALL_UNIX(
575 "unix_syscall_return: error=%d retval=(%llu,%llu)\n",
576 error, regs->rax, regs->rdx);
577 } else {
578 x86_saved_state32_t *regs;
579
580 regs = saved_state32(find_user_regs(thread));
581
582 regs->efl &= ~(EFL_CF);
583
584 code = uthread->syscall_code;
585 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
586
587 #if CONFIG_DTRACE
588 if (callp->sy_call == dtrace_systrace_syscall) {
589 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
590 }
591 #endif /* CONFIG_DTRACE */
592 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
593
594 if (error == ERESTART) {
595 pal_syscall_restart( thread, find_user_regs(thread));
596 } else if (error != EJUSTRETURN) {
597 if (error) {
598 regs->eax = error;
599 regs->efl |= EFL_CF; /* carry bit */
600 } else { /* (not error) */
601 regs->eax = uthread->uu_rval[0];
602 regs->edx = uthread->uu_rval[1];
603 }
604 }
605 DEBUG_KPRINT_SYSCALL_UNIX(
606 "unix_syscall_return: error=%d retval=(%u,%u)\n",
607 error, regs->eax, regs->edx);
608 }
609
610
611 uthread->uu_flag &= ~UT_NOTCANCELPT;
612
613 #if DEBUG || DEVELOPMENT
614 kern_allocation_name_t
615 prior __assert_only = thread_set_allocation_name(NULL);
616 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
617 #endif /* DEBUG || DEVELOPMENT */
618
619 if (uthread->uu_lowpri_window) {
620 /*
621 * task is marked as a low priority I/O type
622 * and the I/O we issued while in this system call
623 * collided with normal I/O operations... we'll
624 * delay in order to mitigate the impact of this
625 * task on the normal operation of the system
626 */
627 throttle_lowpri_io(1);
628 }
629 if (!code_is_kdebug_trace(code)) {
630 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
631 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid);
632 }
633
634 thread_exception_return();
635 /* NOTREACHED */
636 }