]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/i386/systemcalls.c
77ecfba3a70c91585faafe684d09ad953578ebb7
[apple/xnu.git] / bsd / dev / i386 / systemcalls.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/debug.h>
35 #include <mach/machine/thread_status.h>
36 #include <mach/thread_act.h>
37 #include <mach/branch_predicates.h>
38
39 #include <sys/kernel.h>
40 #include <sys/vm.h>
41 #include <sys/proc_internal.h>
42 #include <sys/syscall.h>
43 #include <sys/systm.h>
44 #include <sys/user.h>
45 #include <sys/errno.h>
46 #include <sys/kdebug.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/kauth.h>
50 #include <sys/systm.h>
51
52 #include <security/audit/audit.h>
53
54 #include <i386/seg.h>
55 #include <i386/machine_routines.h>
56 #include <mach/i386/syscall_sw.h>
57
58 #include <machine/pal_routines.h>
59
60 #if CONFIG_DTRACE
61 extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
62 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
63 #endif
64
65 extern void unix_syscall(x86_saved_state_t *);
66 extern void unix_syscall64(x86_saved_state_t *);
67 extern void *find_user_regs(thread_t);
68
69 /* dynamically generated at build time based on syscalls.master */
70 extern const char *syscallnames[];
71
72 /*
73 * This needs to be a single switch so that it's "all on" or "all off",
74 * rather than being turned on for some code paths and not others, as this
75 * has a tendency to introduce "blame the next guy" bugs.
76 */
77 #if DEBUG
78 #define FUNNEL_DEBUG 1 /* Check for funnel held on exit */
79 #endif
80
81 /*
82 * Function: unix_syscall
83 *
84 * Inputs: regs - pointer to i386 save area
85 *
86 * Outputs: none
87 */
88 void
89 unix_syscall(x86_saved_state_t *state)
90 {
91 thread_t thread;
92 void *vt;
93 unsigned int code;
94 struct sysent *callp;
95
96 int error;
97 vm_offset_t params;
98 struct proc *p;
99 struct uthread *uthread;
100 x86_saved_state32_t *regs;
101 boolean_t is_vfork;
102
103 assert(is_saved_state32(state));
104 regs = saved_state32(state);
105 #if DEBUG
106 if (regs->eax == 0x800)
107 thread_exception_return();
108 #endif
109 thread = current_thread();
110 uthread = get_bsdthread_info(thread);
111
112 /* Get the approriate proc; may be different from task's for vfork() */
113 is_vfork = uthread->uu_flag & UT_VFORK;
114 if (__improbable(is_vfork != 0))
115 p = current_proc();
116 else
117 p = (struct proc *)get_bsdtask_info(current_task());
118
119 /* Verify that we are not being called from a task without a proc */
120 if (__improbable(p == NULL)) {
121 regs->eax = EPERM;
122 regs->efl |= EFL_CF;
123 task_terminate_internal(current_task());
124 thread_exception_return();
125 /* NOTREACHED */
126 }
127
128 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
129 DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
130 code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip);
131 params = (vm_offset_t) (regs->uesp + sizeof (int));
132
133 regs->efl &= ~(EFL_CF);
134
135 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
136
137 if (__improbable(callp == sysent)) {
138 code = fuword(params);
139 params += sizeof(int);
140 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
141 }
142
143 vt = (void *)uthread->uu_arg;
144 uthread->uu_ap = vt;
145
146 if (callp->sy_arg_bytes != 0) {
147 sy_munge_t *mungerp;
148 uint32_t nargs;
149
150 assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg));
151 nargs = callp->sy_arg_bytes;
152 error = copyin((user_addr_t) params, (char *) vt, nargs);
153 if (error) {
154 regs->eax = error;
155 regs->efl |= EFL_CF;
156 thread_exception_return();
157 /* NOTREACHED */
158 }
159
160 if (__probable(code != 180)) {
161 int *ip = (int *)vt;
162
163 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
164 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
165 *ip, *(ip+1), *(ip+2), *(ip+3), 0);
166 }
167 mungerp = callp->sy_arg_munge32;
168
169 /*
170 * If non-NULL, then call the syscall argument munger to
171 * copy in arguments (see xnu/bsd/dev/{i386|x86_64}/munge.s); the
172 * first argument is NULL because we are munging in place
173 * after a copyin because the ABI currently doesn't use
174 * registers to pass system call arguments.
175 */
176 if (mungerp != NULL)
177 (*mungerp)(NULL, vt);
178 } else
179 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
180 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
181 0, 0, 0, 0, 0);
182
183 /*
184 * Delayed binding of thread credential to process credential, if we
185 * are not running with an explicitly set thread credential.
186 */
187 kauth_cred_uthread_update(uthread, p);
188
189 uthread->uu_rval[0] = 0;
190 uthread->uu_rval[1] = regs->edx;
191 uthread->uu_flag |= UT_NOTCANCELPT;
192
193
194 #ifdef JOE_DEBUG
195 uthread->uu_iocount = 0;
196 uthread->uu_vpindex = 0;
197 #endif
198
199 AUDIT_SYSCALL_ENTER(code, p, uthread);
200 error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
201 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
202
203 #ifdef JOE_DEBUG
204 if (uthread->uu_iocount)
205 printf("system call returned with uu_iocount != 0\n");
206 #endif
207 #if CONFIG_DTRACE
208 uthread->t_dtrace_errno = error;
209 #endif /* CONFIG_DTRACE */
210
211 if (__improbable(error == ERESTART)) {
212 /*
213 * Move the user's pc back to repeat the syscall:
214 * 5 bytes for a sysenter, or 2 for an int 8x.
215 * The SYSENTER_TF_CS covers single-stepping over a sysenter
216 * - see debug trap handler in idt.s/idt64.s
217 */
218
219 pal_syscall_restart(thread, state);
220 }
221 else if (error != EJUSTRETURN) {
222 if (__improbable(error)) {
223 regs->eax = error;
224 regs->efl |= EFL_CF; /* carry bit */
225 } else { /* (not error) */
226 regs->eax = uthread->uu_rval[0];
227 regs->edx = uthread->uu_rval[1];
228 }
229 }
230
231 DEBUG_KPRINT_SYSCALL_UNIX(
232 "unix_syscall: error=%d retval=(%u,%u)\n",
233 error, regs->eax, regs->edx);
234
235 uthread->uu_flag &= ~UT_NOTCANCELPT;
236 #if FUNNEL_DEBUG
237 /*
238 * if we're holding the funnel panic
239 */
240 syscall_exit_funnelcheck();
241 #endif /* FUNNEL_DEBUG */
242
243 if (__improbable(uthread->uu_lowpri_window)) {
244 /*
245 * task is marked as a low priority I/O type
246 * and the I/O we issued while in this system call
247 * collided with normal I/O operations... we'll
248 * delay in order to mitigate the impact of this
249 * task on the normal operation of the system
250 */
251 throttle_lowpri_io(1);
252 }
253 if (__probable(code != 180))
254 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
255 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
256 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
257
258 if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) {
259 pal_execve_return(thread);
260 }
261
262 thread_exception_return();
263 /* NOTREACHED */
264 }
265
266
267 void
268 unix_syscall64(x86_saved_state_t *state)
269 {
270 thread_t thread;
271 unsigned int code;
272 struct sysent *callp;
273 void *uargp;
274 int args_in_regs;
275 int error;
276 struct proc *p;
277 struct uthread *uthread;
278 x86_saved_state64_t *regs;
279
280 assert(is_saved_state64(state));
281 regs = saved_state64(state);
282 #if DEBUG
283 if (regs->rax == 0x2000800)
284 thread_exception_return();
285 #endif
286 thread = current_thread();
287 uthread = get_bsdthread_info(thread);
288
289 /* Get the approriate proc; may be different from task's for vfork() */
290 if (__probable(!(uthread->uu_flag & UT_VFORK)))
291 p = (struct proc *)get_bsdtask_info(current_task());
292 else
293 p = current_proc();
294
295 /* Verify that we are not being called from a task without a proc */
296 if (__improbable(p == NULL)) {
297 regs->rax = EPERM;
298 regs->isf.rflags |= EFL_CF;
299 task_terminate_internal(current_task());
300 thread_exception_return();
301 /* NOTREACHED */
302 }
303 args_in_regs = 6;
304
305 code = regs->rax & SYSCALL_NUMBER_MASK;
306 DEBUG_KPRINT_SYSCALL_UNIX(
307 "unix_syscall64: code=%d(%s) rip=%llx\n",
308 code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip);
309 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
310 uargp = (void *)(&regs->rdi);
311
312 if (__improbable(callp == sysent)) {
313 /*
314 * indirect system call... system call number
315 * passed as 'arg0'
316 */
317 code = regs->rdi;
318 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
319 uargp = (void *)(&regs->rsi);
320 args_in_regs = 5;
321 }
322 uthread->uu_ap = uargp;
323
324 if (callp->sy_narg != 0) {
325 if (code != 180) {
326 uint64_t *ip = (uint64_t *)uargp;
327
328 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
329 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
330 (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
331 }
332 assert(callp->sy_narg <= 8);
333
334 if (__improbable(callp->sy_narg > args_in_regs)) {
335 int copyin_count;
336
337 copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t);
338
339 error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&regs->v_arg6, copyin_count);
340 if (error) {
341 regs->rax = error;
342 regs->isf.rflags |= EFL_CF;
343 thread_exception_return();
344 /* NOTREACHED */
345 }
346 }
347 } else
348 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
349 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
350 0, 0, 0, 0, 0);
351
352 /*
353 * Delayed binding of thread credential to process credential, if we
354 * are not running with an explicitly set thread credential.
355 */
356 kauth_cred_uthread_update(uthread, p);
357
358 uthread->uu_rval[0] = 0;
359 uthread->uu_rval[1] = 0;
360
361
362 uthread->uu_flag |= UT_NOTCANCELPT;
363
364 #ifdef JOE_DEBUG
365 uthread->uu_iocount = 0;
366 uthread->uu_vpindex = 0;
367 #endif
368
369 AUDIT_SYSCALL_ENTER(code, p, uthread);
370 error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0]));
371 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
372
373 #ifdef JOE_DEBUG
374 if (uthread->uu_iocount)
375 printf("system call returned with uu_iocount != 0\n");
376 #endif
377
378 #if CONFIG_DTRACE
379 uthread->t_dtrace_errno = error;
380 #endif /* CONFIG_DTRACE */
381
382 if (__improbable(error == ERESTART)) {
383 /*
384 * all system calls come through via the syscall instruction
385 * in 64 bit mode... its 2 bytes in length
386 * move the user's pc back to repeat the syscall:
387 */
388 pal_syscall_restart( thread, state );
389 }
390 else if (error != EJUSTRETURN) {
391 if (__improbable(error)) {
392 regs->rax = error;
393 regs->isf.rflags |= EFL_CF; /* carry bit */
394 } else { /* (not error) */
395
396 switch (callp->sy_return_type) {
397 case _SYSCALL_RET_INT_T:
398 regs->rax = uthread->uu_rval[0];
399 regs->rdx = uthread->uu_rval[1];
400 break;
401 case _SYSCALL_RET_UINT_T:
402 regs->rax = ((u_int)uthread->uu_rval[0]);
403 regs->rdx = ((u_int)uthread->uu_rval[1]);
404 break;
405 case _SYSCALL_RET_OFF_T:
406 case _SYSCALL_RET_ADDR_T:
407 case _SYSCALL_RET_SIZE_T:
408 case _SYSCALL_RET_SSIZE_T:
409 case _SYSCALL_RET_UINT64_T:
410 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
411 regs->rdx = 0;
412 break;
413 case _SYSCALL_RET_NONE:
414 break;
415 default:
416 panic("unix_syscall: unknown return type");
417 break;
418 }
419 regs->isf.rflags &= ~EFL_CF;
420 }
421 }
422
423 DEBUG_KPRINT_SYSCALL_UNIX(
424 "unix_syscall64: error=%d retval=(%llu,%llu)\n",
425 error, regs->rax, regs->rdx);
426
427 uthread->uu_flag &= ~UT_NOTCANCELPT;
428
429 #if FUNNEL_DEBUG
430 /*
431 * if we're holding the funnel panic
432 */
433 syscall_exit_funnelcheck();
434 #endif /* FUNNEL_DEBUG */
435
436 if (__improbable(uthread->uu_lowpri_window)) {
437 /*
438 * task is marked as a low priority I/O type
439 * and the I/O we issued while in this system call
440 * collided with normal I/O operations... we'll
441 * delay in order to mitigate the impact of this
442 * task on the normal operation of the system
443 */
444 throttle_lowpri_io(1);
445 }
446 if (__probable(code != 180))
447 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
448 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
449 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
450
451 thread_exception_return();
452 /* NOTREACHED */
453 }
454
455
456 void
457 unix_syscall_return(int error)
458 {
459 thread_t thread;
460 struct uthread *uthread;
461 struct proc *p;
462 unsigned int code;
463 vm_offset_t params;
464 struct sysent *callp;
465
466 thread = current_thread();
467 uthread = get_bsdthread_info(thread);
468
469 pal_register_cache_state(thread, DIRTY);
470
471 p = current_proc();
472
473 if (proc_is64bit(p)) {
474 x86_saved_state64_t *regs;
475
476 regs = saved_state64(find_user_regs(thread));
477
478 /* reconstruct code for tracing before blasting rax */
479 code = regs->rax & SYSCALL_NUMBER_MASK;
480 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
481
482 if (callp == sysent)
483 /*
484 * indirect system call... system call number
485 * passed as 'arg0'
486 */
487 code = regs->rdi;
488
489 #if CONFIG_DTRACE
490 if (callp->sy_call == dtrace_systrace_syscall)
491 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
492 #endif /* CONFIG_DTRACE */
493 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
494
495 if (error == ERESTART) {
496 /*
497 * repeat the syscall
498 */
499 pal_syscall_restart( thread, find_user_regs(thread) );
500 }
501 else if (error != EJUSTRETURN) {
502 if (error) {
503 regs->rax = error;
504 regs->isf.rflags |= EFL_CF; /* carry bit */
505 } else { /* (not error) */
506
507 switch (callp->sy_return_type) {
508 case _SYSCALL_RET_INT_T:
509 regs->rax = uthread->uu_rval[0];
510 regs->rdx = uthread->uu_rval[1];
511 break;
512 case _SYSCALL_RET_UINT_T:
513 regs->rax = ((u_int)uthread->uu_rval[0]);
514 regs->rdx = ((u_int)uthread->uu_rval[1]);
515 break;
516 case _SYSCALL_RET_OFF_T:
517 case _SYSCALL_RET_ADDR_T:
518 case _SYSCALL_RET_SIZE_T:
519 case _SYSCALL_RET_SSIZE_T:
520 case _SYSCALL_RET_UINT64_T:
521 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
522 regs->rdx = 0;
523 break;
524 case _SYSCALL_RET_NONE:
525 break;
526 default:
527 panic("unix_syscall: unknown return type");
528 break;
529 }
530 regs->isf.rflags &= ~EFL_CF;
531 }
532 }
533 DEBUG_KPRINT_SYSCALL_UNIX(
534 "unix_syscall_return: error=%d retval=(%llu,%llu)\n",
535 error, regs->rax, regs->rdx);
536 } else {
537 x86_saved_state32_t *regs;
538
539 regs = saved_state32(find_user_regs(thread));
540
541 regs->efl &= ~(EFL_CF);
542 /* reconstruct code for tracing before blasting eax */
543 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
544 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
545
546 #if CONFIG_DTRACE
547 if (callp->sy_call == dtrace_systrace_syscall)
548 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
549 #endif /* CONFIG_DTRACE */
550 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
551
552 if (callp == sysent) {
553 params = (vm_offset_t) (regs->uesp + sizeof (int));
554 code = fuword(params);
555 }
556 if (error == ERESTART) {
557 pal_syscall_restart( thread, find_user_regs(thread) );
558 }
559 else if (error != EJUSTRETURN) {
560 if (error) {
561 regs->eax = error;
562 regs->efl |= EFL_CF; /* carry bit */
563 } else { /* (not error) */
564 regs->eax = uthread->uu_rval[0];
565 regs->edx = uthread->uu_rval[1];
566 }
567 }
568 DEBUG_KPRINT_SYSCALL_UNIX(
569 "unix_syscall_return: error=%d retval=(%u,%u)\n",
570 error, regs->eax, regs->edx);
571 }
572
573
574 uthread->uu_flag &= ~UT_NOTCANCELPT;
575
576 #if FUNNEL_DEBUG
577 /*
578 * if we're holding the funnel panic
579 */
580 syscall_exit_funnelcheck();
581 #endif /* FUNNEL_DEBUG */
582
583 if (uthread->uu_lowpri_window) {
584 /*
585 * task is marked as a low priority I/O type
586 * and the I/O we issued while in this system call
587 * collided with normal I/O operations... we'll
588 * delay in order to mitigate the impact of this
589 * task on the normal operation of the system
590 */
591 throttle_lowpri_io(1);
592 }
593 if (code != 180)
594 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
595 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
596 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
597
598 thread_exception_return();
599 /* NOTREACHED */
600 }
601