]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/i386/systemcalls.c
xnu-3248.60.10.tar.gz
[apple/xnu.git] / bsd / dev / i386 / systemcalls.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/debug.h>
35 #include <mach/machine/thread_status.h>
36 #include <mach/thread_act.h>
37 #include <mach/branch_predicates.h>
38
39 #include <sys/kernel.h>
40 #include <sys/vm.h>
41 #include <sys/proc_internal.h>
42 #include <sys/syscall.h>
43 #include <sys/systm.h>
44 #include <sys/user.h>
45 #include <sys/errno.h>
46 #include <sys/kdebug.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/kauth.h>
50 #include <sys/systm.h>
51
52 #include <security/audit/audit.h>
53
54 #include <i386/seg.h>
55 #include <i386/machine_routines.h>
56 #include <mach/i386/syscall_sw.h>
57
58 #include <machine/pal_routines.h>
59
60 #if CONFIG_DTRACE
61 extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
62 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
63 #endif
64
65 extern void unix_syscall(x86_saved_state_t *);
66 extern void unix_syscall64(x86_saved_state_t *);
67 extern void *find_user_regs(thread_t);
68
69 /* dynamically generated at build time based on syscalls.master */
70 extern const char *syscallnames[];
71
72 #define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || \
73 ((code) == SYS_kdebug_trace64) || \
74 ((code) == SYS_kdebug_trace_string))
75
76 /*
77 * Function: unix_syscall
78 *
79 * Inputs: regs - pointer to i386 save area
80 *
81 * Outputs: none
82 */
83 void
84 unix_syscall(x86_saved_state_t *state)
85 {
86 thread_t thread;
87 void *vt;
88 unsigned int code;
89 struct sysent *callp;
90
91 int error;
92 vm_offset_t params;
93 struct proc *p;
94 struct uthread *uthread;
95 x86_saved_state32_t *regs;
96 boolean_t is_vfork;
97
98 assert(is_saved_state32(state));
99 regs = saved_state32(state);
100 #if DEBUG
101 if (regs->eax == 0x800)
102 thread_exception_return();
103 #endif
104 thread = current_thread();
105 uthread = get_bsdthread_info(thread);
106
107 #if PROC_REF_DEBUG
108 uthread_reset_proc_refcount(uthread);
109 #endif
110
111 /* Get the approriate proc; may be different from task's for vfork() */
112 is_vfork = uthread->uu_flag & UT_VFORK;
113 if (__improbable(is_vfork != 0))
114 p = current_proc();
115 else
116 p = (struct proc *)get_bsdtask_info(current_task());
117
118 /* Verify that we are not being called from a task without a proc */
119 if (__improbable(p == NULL)) {
120 regs->eax = EPERM;
121 regs->efl |= EFL_CF;
122 task_terminate_internal(current_task());
123 thread_exception_return();
124 /* NOTREACHED */
125 }
126
127 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
128 DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
129 code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip);
130 params = (vm_offset_t) (regs->uesp + sizeof (int));
131
132 regs->efl &= ~(EFL_CF);
133
134 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
135
136 if (__improbable(callp == sysent)) {
137 code = fuword(params);
138 params += sizeof(int);
139 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
140 }
141
142 vt = (void *)uthread->uu_arg;
143
144 if (callp->sy_arg_bytes != 0) {
145 #if CONFIG_REQUIRES_U32_MUNGING
146 sy_munge_t *mungerp;
147 #else
148 #error U32 syscalls on x86_64 kernel requires munging
149 #endif
150 uint32_t nargs;
151
152 assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg));
153 nargs = callp->sy_arg_bytes;
154 error = copyin((user_addr_t) params, (char *) vt, nargs);
155 if (error) {
156 regs->eax = error;
157 regs->efl |= EFL_CF;
158 thread_exception_return();
159 /* NOTREACHED */
160 }
161
162 if (__probable(!code_is_kdebug_trace(code))) {
163 int *ip = (int *)vt;
164
165 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
166 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
167 *ip, *(ip+1), *(ip+2), *(ip+3), 0);
168 }
169
170 #if CONFIG_REQUIRES_U32_MUNGING
171 mungerp = callp->sy_arg_munge32;
172
173 if (mungerp != NULL)
174 (*mungerp)(vt);
175 #endif
176 } else
177 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
178 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
179 0, 0, 0, 0, 0);
180
181 /*
182 * Delayed binding of thread credential to process credential, if we
183 * are not running with an explicitly set thread credential.
184 */
185 kauth_cred_uthread_update(uthread, p);
186
187 uthread->uu_rval[0] = 0;
188 uthread->uu_rval[1] = 0;
189 uthread->uu_flag |= UT_NOTCANCELPT;
190 uthread->syscall_code = code;
191
192 #ifdef JOE_DEBUG
193 uthread->uu_iocount = 0;
194 uthread->uu_vpindex = 0;
195 #endif
196
197 AUDIT_SYSCALL_ENTER(code, p, uthread);
198 error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
199 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
200
201 #ifdef JOE_DEBUG
202 if (uthread->uu_iocount)
203 printf("system call returned with uu_iocount != 0\n");
204 #endif
205 #if CONFIG_DTRACE
206 uthread->t_dtrace_errno = error;
207 #endif /* CONFIG_DTRACE */
208
209 if (__improbable(error == ERESTART)) {
210 /*
211 * Move the user's pc back to repeat the syscall:
212 * 5 bytes for a sysenter, or 2 for an int 8x.
213 * The SYSENTER_TF_CS covers single-stepping over a sysenter
214 * - see debug trap handler in idt.s/idt64.s
215 */
216
217 pal_syscall_restart(thread, state);
218 }
219 else if (error != EJUSTRETURN) {
220 if (__improbable(error)) {
221 regs->eax = error;
222 regs->efl |= EFL_CF; /* carry bit */
223 } else { /* (not error) */
224 /*
225 * We split retval across two registers, in case the
226 * syscall had a 64-bit return value, in which case
227 * eax/edx matches the function call ABI.
228 */
229 regs->eax = uthread->uu_rval[0];
230 regs->edx = uthread->uu_rval[1];
231 }
232 }
233
234 DEBUG_KPRINT_SYSCALL_UNIX(
235 "unix_syscall: error=%d retval=(%u,%u)\n",
236 error, regs->eax, regs->edx);
237
238 uthread->uu_flag &= ~UT_NOTCANCELPT;
239
240 if (__improbable(uthread->uu_lowpri_window)) {
241 /*
242 * task is marked as a low priority I/O type
243 * and the I/O we issued while in this system call
244 * collided with normal I/O operations... we'll
245 * delay in order to mitigate the impact of this
246 * task on the normal operation of the system
247 */
248 throttle_lowpri_io(1);
249 }
250 if (__probable(!code_is_kdebug_trace(code)))
251 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
252 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
253 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
254
255 if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) {
256 pal_execve_return(thread);
257 }
258
259 #if PROC_REF_DEBUG
260 if (__improbable(uthread_get_proc_refcount(uthread) != 0)) {
261 panic("system call returned with uu_proc_refcount != 0");
262 }
263 #endif
264
265 thread_exception_return();
266 /* NOTREACHED */
267 }
268
269
270 void
271 unix_syscall64(x86_saved_state_t *state)
272 {
273 thread_t thread;
274 void *vt;
275 unsigned int code;
276 struct sysent *callp;
277 int args_in_regs;
278 boolean_t args_start_at_rdi;
279 int error;
280 struct proc *p;
281 struct uthread *uthread;
282 x86_saved_state64_t *regs;
283
284 assert(is_saved_state64(state));
285 regs = saved_state64(state);
286 #if DEBUG
287 if (regs->rax == 0x2000800)
288 thread_exception_return();
289 #endif
290 thread = current_thread();
291 uthread = get_bsdthread_info(thread);
292
293 #if PROC_REF_DEBUG
294 uthread_reset_proc_refcount(uthread);
295 #endif
296
297 /* Get the approriate proc; may be different from task's for vfork() */
298 if (__probable(!(uthread->uu_flag & UT_VFORK)))
299 p = (struct proc *)get_bsdtask_info(current_task());
300 else
301 p = current_proc();
302
303 /* Verify that we are not being called from a task without a proc */
304 if (__improbable(p == NULL)) {
305 regs->rax = EPERM;
306 regs->isf.rflags |= EFL_CF;
307 task_terminate_internal(current_task());
308 thread_exception_return();
309 /* NOTREACHED */
310 }
311
312 code = regs->rax & SYSCALL_NUMBER_MASK;
313 DEBUG_KPRINT_SYSCALL_UNIX(
314 "unix_syscall64: code=%d(%s) rip=%llx\n",
315 code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip);
316 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
317
318 vt = (void *)uthread->uu_arg;
319
320 if (__improbable(callp == sysent)) {
321 /*
322 * indirect system call... system call number
323 * passed as 'arg0'
324 */
325 code = regs->rdi;
326 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
327 args_start_at_rdi = FALSE;
328 args_in_regs = 5;
329 } else {
330 args_start_at_rdi = TRUE;
331 args_in_regs = 6;
332 }
333
334 if (callp->sy_narg != 0) {
335 assert(callp->sy_narg <= 8); /* size of uu_arg */
336
337 args_in_regs = MIN(args_in_regs, callp->sy_narg);
338 memcpy(vt, args_start_at_rdi ? &regs->rdi : &regs->rsi, args_in_regs * sizeof(syscall_arg_t));
339
340
341 if (!code_is_kdebug_trace(code)) {
342 uint64_t *ip = (uint64_t *)vt;
343
344 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
345 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
346 (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
347 }
348
349 if (__improbable(callp->sy_narg > args_in_regs)) {
350 int copyin_count;
351
352 copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t);
353
354 error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count);
355 if (error) {
356 regs->rax = error;
357 regs->isf.rflags |= EFL_CF;
358 thread_exception_return();
359 /* NOTREACHED */
360 }
361 }
362 } else
363 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
364 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
365 0, 0, 0, 0, 0);
366
367 /*
368 * Delayed binding of thread credential to process credential, if we
369 * are not running with an explicitly set thread credential.
370 */
371 kauth_cred_uthread_update(uthread, p);
372
373 uthread->uu_rval[0] = 0;
374 uthread->uu_rval[1] = 0;
375 uthread->uu_flag |= UT_NOTCANCELPT;
376 uthread->syscall_code = code;
377
378 #ifdef JOE_DEBUG
379 uthread->uu_iocount = 0;
380 uthread->uu_vpindex = 0;
381 #endif
382
383 AUDIT_SYSCALL_ENTER(code, p, uthread);
384 error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0]));
385 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
386
387 #ifdef JOE_DEBUG
388 if (uthread->uu_iocount)
389 printf("system call returned with uu_iocount != 0\n");
390 #endif
391
392 #if CONFIG_DTRACE
393 uthread->t_dtrace_errno = error;
394 #endif /* CONFIG_DTRACE */
395
396 if (__improbable(error == ERESTART)) {
397 /*
398 * all system calls come through via the syscall instruction
399 * in 64 bit mode... its 2 bytes in length
400 * move the user's pc back to repeat the syscall:
401 */
402 pal_syscall_restart( thread, state );
403 }
404 else if (error != EJUSTRETURN) {
405 if (__improbable(error)) {
406 regs->rax = error;
407 regs->isf.rflags |= EFL_CF; /* carry bit */
408 } else { /* (not error) */
409
410 switch (callp->sy_return_type) {
411 case _SYSCALL_RET_INT_T:
412 regs->rax = uthread->uu_rval[0];
413 regs->rdx = uthread->uu_rval[1];
414 break;
415 case _SYSCALL_RET_UINT_T:
416 regs->rax = ((u_int)uthread->uu_rval[0]);
417 regs->rdx = ((u_int)uthread->uu_rval[1]);
418 break;
419 case _SYSCALL_RET_OFF_T:
420 case _SYSCALL_RET_ADDR_T:
421 case _SYSCALL_RET_SIZE_T:
422 case _SYSCALL_RET_SSIZE_T:
423 case _SYSCALL_RET_UINT64_T:
424 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
425 regs->rdx = 0;
426 break;
427 case _SYSCALL_RET_NONE:
428 break;
429 default:
430 panic("unix_syscall: unknown return type");
431 break;
432 }
433 regs->isf.rflags &= ~EFL_CF;
434 }
435 }
436
437 DEBUG_KPRINT_SYSCALL_UNIX(
438 "unix_syscall64: error=%d retval=(%llu,%llu)\n",
439 error, regs->rax, regs->rdx);
440
441 uthread->uu_flag &= ~UT_NOTCANCELPT;
442
443 if (__improbable(uthread->uu_lowpri_window)) {
444 /*
445 * task is marked as a low priority I/O type
446 * and the I/O we issued while in this system call
447 * collided with normal I/O operations... we'll
448 * delay in order to mitigate the impact of this
449 * task on the normal operation of the system
450 */
451 throttle_lowpri_io(1);
452 }
453 if (__probable(!code_is_kdebug_trace(code)))
454 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
455 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
456 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
457
458 #if PROC_REF_DEBUG
459 if (__improbable(uthread_get_proc_refcount(uthread))) {
460 panic("system call returned with uu_proc_refcount != 0");
461 }
462 #endif
463
464 thread_exception_return();
465 /* NOTREACHED */
466 }
467
468
469 void
470 unix_syscall_return(int error)
471 {
472 thread_t thread;
473 struct uthread *uthread;
474 struct proc *p;
475 unsigned int code;
476 struct sysent *callp;
477
478 thread = current_thread();
479 uthread = get_bsdthread_info(thread);
480
481 pal_register_cache_state(thread, DIRTY);
482
483 p = current_proc();
484
485 if (proc_is64bit(p)) {
486 x86_saved_state64_t *regs;
487
488 regs = saved_state64(find_user_regs(thread));
489
490 code = uthread->syscall_code;
491 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
492
493 #if CONFIG_DTRACE
494 if (callp->sy_call == dtrace_systrace_syscall)
495 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
496 #endif /* CONFIG_DTRACE */
497 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
498
499 if (error == ERESTART) {
500 /*
501 * repeat the syscall
502 */
503 pal_syscall_restart( thread, find_user_regs(thread) );
504 }
505 else if (error != EJUSTRETURN) {
506 if (error) {
507 regs->rax = error;
508 regs->isf.rflags |= EFL_CF; /* carry bit */
509 } else { /* (not error) */
510
511 switch (callp->sy_return_type) {
512 case _SYSCALL_RET_INT_T:
513 regs->rax = uthread->uu_rval[0];
514 regs->rdx = uthread->uu_rval[1];
515 break;
516 case _SYSCALL_RET_UINT_T:
517 regs->rax = ((u_int)uthread->uu_rval[0]);
518 regs->rdx = ((u_int)uthread->uu_rval[1]);
519 break;
520 case _SYSCALL_RET_OFF_T:
521 case _SYSCALL_RET_ADDR_T:
522 case _SYSCALL_RET_SIZE_T:
523 case _SYSCALL_RET_SSIZE_T:
524 case _SYSCALL_RET_UINT64_T:
525 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
526 regs->rdx = 0;
527 break;
528 case _SYSCALL_RET_NONE:
529 break;
530 default:
531 panic("unix_syscall: unknown return type");
532 break;
533 }
534 regs->isf.rflags &= ~EFL_CF;
535 }
536 }
537 DEBUG_KPRINT_SYSCALL_UNIX(
538 "unix_syscall_return: error=%d retval=(%llu,%llu)\n",
539 error, regs->rax, regs->rdx);
540 } else {
541 x86_saved_state32_t *regs;
542
543 regs = saved_state32(find_user_regs(thread));
544
545 regs->efl &= ~(EFL_CF);
546
547 code = uthread->syscall_code;
548 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
549
550 #if CONFIG_DTRACE
551 if (callp->sy_call == dtrace_systrace_syscall)
552 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
553 #endif /* CONFIG_DTRACE */
554 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
555
556 if (error == ERESTART) {
557 pal_syscall_restart( thread, find_user_regs(thread) );
558 }
559 else if (error != EJUSTRETURN) {
560 if (error) {
561 regs->eax = error;
562 regs->efl |= EFL_CF; /* carry bit */
563 } else { /* (not error) */
564 regs->eax = uthread->uu_rval[0];
565 regs->edx = uthread->uu_rval[1];
566 }
567 }
568 DEBUG_KPRINT_SYSCALL_UNIX(
569 "unix_syscall_return: error=%d retval=(%u,%u)\n",
570 error, regs->eax, regs->edx);
571 }
572
573
574 uthread->uu_flag &= ~UT_NOTCANCELPT;
575
576 if (uthread->uu_lowpri_window) {
577 /*
578 * task is marked as a low priority I/O type
579 * and the I/O we issued while in this system call
580 * collided with normal I/O operations... we'll
581 * delay in order to mitigate the impact of this
582 * task on the normal operation of the system
583 */
584 throttle_lowpri_io(1);
585 }
586 if (!code_is_kdebug_trace(code))
587 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
588 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
589 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
590
591 thread_exception_return();
592 /* NOTREACHED */
593 }
594