]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/i386/systemcalls.c
xnu-2782.40.9.tar.gz
[apple/xnu.git] / bsd / dev / i386 / systemcalls.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/debug.h>
35 #include <mach/machine/thread_status.h>
36 #include <mach/thread_act.h>
37 #include <mach/branch_predicates.h>
38
39 #include <sys/kernel.h>
40 #include <sys/vm.h>
41 #include <sys/proc_internal.h>
42 #include <sys/syscall.h>
43 #include <sys/systm.h>
44 #include <sys/user.h>
45 #include <sys/errno.h>
46 #include <sys/kdebug.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/kauth.h>
50 #include <sys/systm.h>
51
52 #include <security/audit/audit.h>
53
54 #include <i386/seg.h>
55 #include <i386/machine_routines.h>
56 #include <mach/i386/syscall_sw.h>
57
58 #include <machine/pal_routines.h>
59
60 #if CONFIG_DTRACE
61 extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
62 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
63 #endif
64
65 extern void unix_syscall(x86_saved_state_t *);
66 extern void unix_syscall64(x86_saved_state_t *);
67 extern void *find_user_regs(thread_t);
68
69 /* dynamically generated at build time based on syscalls.master */
70 extern const char *syscallnames[];
71
72 #define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || ((code) == SYS_kdebug_trace64))
73
74 /*
75 * Function: unix_syscall
76 *
77 * Inputs: regs - pointer to i386 save area
78 *
79 * Outputs: none
80 */
81 void
82 unix_syscall(x86_saved_state_t *state)
83 {
84 thread_t thread;
85 void *vt;
86 unsigned int code;
87 struct sysent *callp;
88
89 int error;
90 vm_offset_t params;
91 struct proc *p;
92 struct uthread *uthread;
93 x86_saved_state32_t *regs;
94 boolean_t is_vfork;
95
96 assert(is_saved_state32(state));
97 regs = saved_state32(state);
98 #if DEBUG
99 if (regs->eax == 0x800)
100 thread_exception_return();
101 #endif
102 thread = current_thread();
103 uthread = get_bsdthread_info(thread);
104
105 /* Get the approriate proc; may be different from task's for vfork() */
106 is_vfork = uthread->uu_flag & UT_VFORK;
107 if (__improbable(is_vfork != 0))
108 p = current_proc();
109 else
110 p = (struct proc *)get_bsdtask_info(current_task());
111
112 /* Verify that we are not being called from a task without a proc */
113 if (__improbable(p == NULL)) {
114 regs->eax = EPERM;
115 regs->efl |= EFL_CF;
116 task_terminate_internal(current_task());
117 thread_exception_return();
118 /* NOTREACHED */
119 }
120
121 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
122 DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
123 code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip);
124 params = (vm_offset_t) (regs->uesp + sizeof (int));
125
126 regs->efl &= ~(EFL_CF);
127
128 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
129
130 if (__improbable(callp == sysent)) {
131 code = fuword(params);
132 params += sizeof(int);
133 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
134 }
135
136 vt = (void *)uthread->uu_arg;
137
138 if (callp->sy_arg_bytes != 0) {
139 #if CONFIG_REQUIRES_U32_MUNGING
140 sy_munge_t *mungerp;
141 #else
142 #error U32 syscalls on x86_64 kernel requires munging
143 #endif
144 uint32_t nargs;
145
146 assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg));
147 nargs = callp->sy_arg_bytes;
148 error = copyin((user_addr_t) params, (char *) vt, nargs);
149 if (error) {
150 regs->eax = error;
151 regs->efl |= EFL_CF;
152 thread_exception_return();
153 /* NOTREACHED */
154 }
155
156 if (__probable(!code_is_kdebug_trace(code))) {
157 int *ip = (int *)vt;
158
159 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
160 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
161 *ip, *(ip+1), *(ip+2), *(ip+3), 0);
162 }
163
164 #if CONFIG_REQUIRES_U32_MUNGING
165 mungerp = callp->sy_arg_munge32;
166
167 if (mungerp != NULL)
168 (*mungerp)(vt);
169 #endif
170 } else
171 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
172 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
173 0, 0, 0, 0, 0);
174
175 /*
176 * Delayed binding of thread credential to process credential, if we
177 * are not running with an explicitly set thread credential.
178 */
179 kauth_cred_uthread_update(uthread, p);
180
181 uthread->uu_rval[0] = 0;
182 uthread->uu_rval[1] = 0;
183 uthread->uu_flag |= UT_NOTCANCELPT;
184 uthread->syscall_code = code;
185
186 #ifdef JOE_DEBUG
187 uthread->uu_iocount = 0;
188 uthread->uu_vpindex = 0;
189 #endif
190
191 AUDIT_SYSCALL_ENTER(code, p, uthread);
192 error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
193 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
194
195 #ifdef JOE_DEBUG
196 if (uthread->uu_iocount)
197 printf("system call returned with uu_iocount != 0\n");
198 #endif
199 #if CONFIG_DTRACE
200 uthread->t_dtrace_errno = error;
201 #endif /* CONFIG_DTRACE */
202
203 if (__improbable(error == ERESTART)) {
204 /*
205 * Move the user's pc back to repeat the syscall:
206 * 5 bytes for a sysenter, or 2 for an int 8x.
207 * The SYSENTER_TF_CS covers single-stepping over a sysenter
208 * - see debug trap handler in idt.s/idt64.s
209 */
210
211 pal_syscall_restart(thread, state);
212 }
213 else if (error != EJUSTRETURN) {
214 if (__improbable(error)) {
215 regs->eax = error;
216 regs->efl |= EFL_CF; /* carry bit */
217 } else { /* (not error) */
218 /*
219 * We split retval across two registers, in case the
220 * syscall had a 64-bit return value, in which case
221 * eax/edx matches the function call ABI.
222 */
223 regs->eax = uthread->uu_rval[0];
224 regs->edx = uthread->uu_rval[1];
225 }
226 }
227
228 DEBUG_KPRINT_SYSCALL_UNIX(
229 "unix_syscall: error=%d retval=(%u,%u)\n",
230 error, regs->eax, regs->edx);
231
232 uthread->uu_flag &= ~UT_NOTCANCELPT;
233
234 if (__improbable(uthread->uu_lowpri_window)) {
235 /*
236 * task is marked as a low priority I/O type
237 * and the I/O we issued while in this system call
238 * collided with normal I/O operations... we'll
239 * delay in order to mitigate the impact of this
240 * task on the normal operation of the system
241 */
242 throttle_lowpri_io(1);
243 }
244 if (__probable(!code_is_kdebug_trace(code)))
245 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
246 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
247 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
248
249 if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) {
250 pal_execve_return(thread);
251 }
252
253 thread_exception_return();
254 /* NOTREACHED */
255 }
256
257
258 void
259 unix_syscall64(x86_saved_state_t *state)
260 {
261 thread_t thread;
262 void *vt;
263 unsigned int code;
264 struct sysent *callp;
265 int args_in_regs;
266 boolean_t args_start_at_rdi;
267 int error;
268 struct proc *p;
269 struct uthread *uthread;
270 x86_saved_state64_t *regs;
271
272 assert(is_saved_state64(state));
273 regs = saved_state64(state);
274 #if DEBUG
275 if (regs->rax == 0x2000800)
276 thread_exception_return();
277 #endif
278 thread = current_thread();
279 uthread = get_bsdthread_info(thread);
280
281 /* Get the approriate proc; may be different from task's for vfork() */
282 if (__probable(!(uthread->uu_flag & UT_VFORK)))
283 p = (struct proc *)get_bsdtask_info(current_task());
284 else
285 p = current_proc();
286
287 /* Verify that we are not being called from a task without a proc */
288 if (__improbable(p == NULL)) {
289 regs->rax = EPERM;
290 regs->isf.rflags |= EFL_CF;
291 task_terminate_internal(current_task());
292 thread_exception_return();
293 /* NOTREACHED */
294 }
295
296 code = regs->rax & SYSCALL_NUMBER_MASK;
297 DEBUG_KPRINT_SYSCALL_UNIX(
298 "unix_syscall64: code=%d(%s) rip=%llx\n",
299 code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip);
300 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
301
302 vt = (void *)uthread->uu_arg;
303
304 if (__improbable(callp == sysent)) {
305 /*
306 * indirect system call... system call number
307 * passed as 'arg0'
308 */
309 code = regs->rdi;
310 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
311 args_start_at_rdi = FALSE;
312 args_in_regs = 5;
313 } else {
314 args_start_at_rdi = TRUE;
315 args_in_regs = 6;
316 }
317
318 if (callp->sy_narg != 0) {
319 assert(callp->sy_narg <= 8); /* size of uu_arg */
320
321 args_in_regs = MIN(args_in_regs, callp->sy_narg);
322 memcpy(vt, args_start_at_rdi ? &regs->rdi : &regs->rsi, args_in_regs * sizeof(syscall_arg_t));
323
324
325 if (!code_is_kdebug_trace(code)) {
326 uint64_t *ip = (uint64_t *)vt;
327
328 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
329 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
330 (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
331 }
332
333 if (__improbable(callp->sy_narg > args_in_regs)) {
334 int copyin_count;
335
336 copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t);
337
338 error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count);
339 if (error) {
340 regs->rax = error;
341 regs->isf.rflags |= EFL_CF;
342 thread_exception_return();
343 /* NOTREACHED */
344 }
345 }
346 } else
347 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
348 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
349 0, 0, 0, 0, 0);
350
351 /*
352 * Delayed binding of thread credential to process credential, if we
353 * are not running with an explicitly set thread credential.
354 */
355 kauth_cred_uthread_update(uthread, p);
356
357 uthread->uu_rval[0] = 0;
358 uthread->uu_rval[1] = 0;
359 uthread->uu_flag |= UT_NOTCANCELPT;
360 uthread->syscall_code = code;
361
362 #ifdef JOE_DEBUG
363 uthread->uu_iocount = 0;
364 uthread->uu_vpindex = 0;
365 #endif
366
367 AUDIT_SYSCALL_ENTER(code, p, uthread);
368 error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0]));
369 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
370
371 #ifdef JOE_DEBUG
372 if (uthread->uu_iocount)
373 printf("system call returned with uu_iocount != 0\n");
374 #endif
375
376 #if CONFIG_DTRACE
377 uthread->t_dtrace_errno = error;
378 #endif /* CONFIG_DTRACE */
379
380 if (__improbable(error == ERESTART)) {
381 /*
382 * all system calls come through via the syscall instruction
383 * in 64 bit mode... its 2 bytes in length
384 * move the user's pc back to repeat the syscall:
385 */
386 pal_syscall_restart( thread, state );
387 }
388 else if (error != EJUSTRETURN) {
389 if (__improbable(error)) {
390 regs->rax = error;
391 regs->isf.rflags |= EFL_CF; /* carry bit */
392 } else { /* (not error) */
393
394 switch (callp->sy_return_type) {
395 case _SYSCALL_RET_INT_T:
396 regs->rax = uthread->uu_rval[0];
397 regs->rdx = uthread->uu_rval[1];
398 break;
399 case _SYSCALL_RET_UINT_T:
400 regs->rax = ((u_int)uthread->uu_rval[0]);
401 regs->rdx = ((u_int)uthread->uu_rval[1]);
402 break;
403 case _SYSCALL_RET_OFF_T:
404 case _SYSCALL_RET_ADDR_T:
405 case _SYSCALL_RET_SIZE_T:
406 case _SYSCALL_RET_SSIZE_T:
407 case _SYSCALL_RET_UINT64_T:
408 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
409 regs->rdx = 0;
410 break;
411 case _SYSCALL_RET_NONE:
412 break;
413 default:
414 panic("unix_syscall: unknown return type");
415 break;
416 }
417 regs->isf.rflags &= ~EFL_CF;
418 }
419 }
420
421 DEBUG_KPRINT_SYSCALL_UNIX(
422 "unix_syscall64: error=%d retval=(%llu,%llu)\n",
423 error, regs->rax, regs->rdx);
424
425 uthread->uu_flag &= ~UT_NOTCANCELPT;
426
427 if (__improbable(uthread->uu_lowpri_window)) {
428 /*
429 * task is marked as a low priority I/O type
430 * and the I/O we issued while in this system call
431 * collided with normal I/O operations... we'll
432 * delay in order to mitigate the impact of this
433 * task on the normal operation of the system
434 */
435 throttle_lowpri_io(1);
436 }
437 if (__probable(!code_is_kdebug_trace(code)))
438 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
439 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
440 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
441
442 thread_exception_return();
443 /* NOTREACHED */
444 }
445
446
447 void
448 unix_syscall_return(int error)
449 {
450 thread_t thread;
451 struct uthread *uthread;
452 struct proc *p;
453 unsigned int code;
454 struct sysent *callp;
455
456 thread = current_thread();
457 uthread = get_bsdthread_info(thread);
458
459 pal_register_cache_state(thread, DIRTY);
460
461 p = current_proc();
462
463 if (proc_is64bit(p)) {
464 x86_saved_state64_t *regs;
465
466 regs = saved_state64(find_user_regs(thread));
467
468 code = uthread->syscall_code;
469 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
470
471 #if CONFIG_DTRACE
472 if (callp->sy_call == dtrace_systrace_syscall)
473 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
474 #endif /* CONFIG_DTRACE */
475 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
476
477 if (error == ERESTART) {
478 /*
479 * repeat the syscall
480 */
481 pal_syscall_restart( thread, find_user_regs(thread) );
482 }
483 else if (error != EJUSTRETURN) {
484 if (error) {
485 regs->rax = error;
486 regs->isf.rflags |= EFL_CF; /* carry bit */
487 } else { /* (not error) */
488
489 switch (callp->sy_return_type) {
490 case _SYSCALL_RET_INT_T:
491 regs->rax = uthread->uu_rval[0];
492 regs->rdx = uthread->uu_rval[1];
493 break;
494 case _SYSCALL_RET_UINT_T:
495 regs->rax = ((u_int)uthread->uu_rval[0]);
496 regs->rdx = ((u_int)uthread->uu_rval[1]);
497 break;
498 case _SYSCALL_RET_OFF_T:
499 case _SYSCALL_RET_ADDR_T:
500 case _SYSCALL_RET_SIZE_T:
501 case _SYSCALL_RET_SSIZE_T:
502 case _SYSCALL_RET_UINT64_T:
503 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
504 regs->rdx = 0;
505 break;
506 case _SYSCALL_RET_NONE:
507 break;
508 default:
509 panic("unix_syscall: unknown return type");
510 break;
511 }
512 regs->isf.rflags &= ~EFL_CF;
513 }
514 }
515 DEBUG_KPRINT_SYSCALL_UNIX(
516 "unix_syscall_return: error=%d retval=(%llu,%llu)\n",
517 error, regs->rax, regs->rdx);
518 } else {
519 x86_saved_state32_t *regs;
520
521 regs = saved_state32(find_user_regs(thread));
522
523 regs->efl &= ~(EFL_CF);
524
525 code = uthread->syscall_code;
526 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
527
528 #if CONFIG_DTRACE
529 if (callp->sy_call == dtrace_systrace_syscall)
530 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
531 #endif /* CONFIG_DTRACE */
532 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
533
534 if (error == ERESTART) {
535 pal_syscall_restart( thread, find_user_regs(thread) );
536 }
537 else if (error != EJUSTRETURN) {
538 if (error) {
539 regs->eax = error;
540 regs->efl |= EFL_CF; /* carry bit */
541 } else { /* (not error) */
542 regs->eax = uthread->uu_rval[0];
543 regs->edx = uthread->uu_rval[1];
544 }
545 }
546 DEBUG_KPRINT_SYSCALL_UNIX(
547 "unix_syscall_return: error=%d retval=(%u,%u)\n",
548 error, regs->eax, regs->edx);
549 }
550
551
552 uthread->uu_flag &= ~UT_NOTCANCELPT;
553
554 if (uthread->uu_lowpri_window) {
555 /*
556 * task is marked as a low priority I/O type
557 * and the I/O we issued while in this system call
558 * collided with normal I/O operations... we'll
559 * delay in order to mitigate the impact of this
560 * task on the normal operation of the system
561 */
562 throttle_lowpri_io(1);
563 }
564 if (!code_is_kdebug_trace(code))
565 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
566 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
567 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
568
569 thread_exception_return();
570 /* NOTREACHED */
571 }
572