]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/i386/systemcalls.c
6b24ccf6cc8b7e7fa87447c11bcbe648e1d17804
[apple/xnu.git] / bsd / dev / i386 / systemcalls.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <mach/machine/thread_status.h>
35 #include <mach/thread_act.h>
36
37 #include <sys/kernel.h>
38 #include <sys/vm.h>
39 #include <sys/proc_internal.h>
40 #include <sys/syscall.h>
41 #include <sys/systm.h>
42 #include <sys/user.h>
43 #include <sys/errno.h>
44 #include <sys/kdebug.h>
45 #include <sys/sysent.h>
46 #include <sys/sysproto.h>
47 #include <sys/kauth.h>
48 #include <sys/systm.h>
49
50 #include <bsm/audit_kernel.h>
51
52 #include <i386/seg.h>
53 #include <i386/machine_routines.h>
54 #include <mach/i386/syscall_sw.h>
55
56 #if CONFIG_DTRACE
57 extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
58 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
59 #endif
60
61 extern void unix_syscall(x86_saved_state_t *);
62 extern void unix_syscall64(x86_saved_state_t *);
63 extern void *find_user_regs(thread_t);
64
65 extern void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid);
66 extern boolean_t x86_sysenter_arg_store_isvalid(thread_t thread);
67 /*
68 * Function: unix_syscall
69 *
70 * Inputs: regs - pointer to i386 save area
71 *
72 * Outputs: none
73 */
74 void
75 unix_syscall(x86_saved_state_t *state)
76 {
77 thread_t thread;
78 void *vt;
79 unsigned int code;
80 struct sysent *callp;
81
82 int error;
83 vm_offset_t params;
84 struct proc *p;
85 struct uthread *uthread;
86 x86_saved_state32_t *regs;
87 boolean_t args_in_uthread;
88
89 assert(is_saved_state32(state));
90 regs = saved_state32(state);
91 #if DEBUG
92 if (regs->eax == 0x800)
93 thread_exception_return();
94 #endif
95 thread = current_thread();
96 uthread = get_bsdthread_info(thread);
97
98 /* Get the approriate proc; may be different from task's for vfork() */
99 if (!(uthread->uu_flag & UT_VFORK))
100 p = (struct proc *)get_bsdtask_info(current_task());
101 else
102 p = current_proc();
103
104 /* Verify that we are not being called from a task without a proc */
105 if (p == NULL) {
106 regs->eax = EPERM;
107 regs->efl |= EFL_CF;
108 task_terminate_internal(current_task());
109 thread_exception_return();
110 /* NOTREACHED */
111 }
112
113 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
114 args_in_uthread = ((regs->eax & I386_SYSCALL_ARG_BYTES_MASK) != 0) && x86_sysenter_arg_store_isvalid(thread);
115 params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int));
116
117 regs->efl &= ~(EFL_CF);
118
119 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
120
121 if (callp == sysent) {
122 code = fuword(params);
123 params += sizeof(int);
124 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
125 }
126
127 vt = (void *)uthread->uu_arg;
128
129 if (callp->sy_arg_bytes != 0) {
130 sy_munge_t *mungerp;
131
132 assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg));
133 if (!args_in_uthread)
134 {
135 uint32_t nargs;
136 nargs = callp->sy_arg_bytes;
137 error = copyin((user_addr_t) params, (char *) vt, nargs);
138 if (error) {
139 regs->eax = error;
140 regs->efl |= EFL_CF;
141 thread_exception_return();
142 /* NOTREACHED */
143 }
144 }
145
146 if (code != 180) {
147 int *ip = (int *)vt;
148
149 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
150 *ip, *(ip+1), *(ip+2), *(ip+3), 0);
151 }
152 mungerp = callp->sy_arg_munge32;
153
154 /*
155 * If non-NULL, then call the syscall argument munger to
156 * copy in arguments (see xnu/bsd/dev/i386/munge.s); the
157 * first argument is NULL because we are munging in place
158 * after a copyin because the ABI currently doesn't use
159 * registers to pass system call arguments.
160 */
161 if (mungerp != NULL)
162 (*mungerp)(NULL, vt);
163 } else
164 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
165 0, 0, 0, 0, 0);
166
167 /*
168 * Delayed binding of thread credential to process credential, if we
169 * are not running with an explicitly set thread credential.
170 */
171 kauth_cred_uthread_update(uthread, p);
172
173 uthread->uu_rval[0] = 0;
174 uthread->uu_rval[1] = regs->edx;
175 uthread->uu_flag |= UT_NOTCANCELPT;
176
177
178 #ifdef JOE_DEBUG
179 uthread->uu_iocount = 0;
180 uthread->uu_vpindex = 0;
181 #endif
182
183 AUDIT_SYSCALL_ENTER(code, p, uthread);
184 error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
185 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
186
187 #ifdef JOE_DEBUG
188 if (uthread->uu_iocount)
189 joe_debug("system call returned with uu_iocount != 0");
190 #endif
191 #if CONFIG_DTRACE
192 uthread->t_dtrace_errno = error;
193 #endif /* CONFIG_DTRACE */
194
195 if (error == ERESTART) {
196 /*
197 * Move the user's pc back to repeat the syscall:
198 * 5 bytes for a sysenter, or 2 for an int 8x.
199 * The SYSENTER_TF_CS covers single-stepping over a sysenter
200 * - see debug trap handler in idt.s/idt64.s
201 */
202 if (regs->cs == SYSENTER_CS || regs->cs == SYSENTER_TF_CS) {
203 regs->eip -= 5;
204 }
205 else
206 regs->eip -= 2;
207 }
208 else if (error != EJUSTRETURN) {
209 if (error) {
210 regs->eax = error;
211 regs->efl |= EFL_CF; /* carry bit */
212 } else { /* (not error) */
213 regs->eax = uthread->uu_rval[0];
214 regs->edx = uthread->uu_rval[1];
215 }
216 }
217
218 uthread->uu_flag &= ~UT_NOTCANCELPT;
219 #if DEBUG
220 /*
221 * if we're holding the funnel panic
222 */
223 syscall_exit_funnelcheck();
224 #endif /* DEBUG */
225 if (uthread->uu_lowpri_window) {
226 /*
227 * task is marked as a low priority I/O type
228 * and the I/O we issued while in this system call
229 * collided with normal I/O operations... we'll
230 * delay in order to mitigate the impact of this
231 * task on the normal operation of the system
232 */
233 throttle_lowpri_io(TRUE);
234 }
235 if (code != 180)
236 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
237 error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
238
239 thread_exception_return();
240 /* NOTREACHED */
241 }
242
243
244 void
245 unix_syscall64(x86_saved_state_t *state)
246 {
247 thread_t thread;
248 unsigned int code;
249 struct sysent *callp;
250 void *uargp;
251 int args_in_regs;
252 int error;
253 struct proc *p;
254 struct uthread *uthread;
255 x86_saved_state64_t *regs;
256
257 assert(is_saved_state64(state));
258 regs = saved_state64(state);
259
260 if (regs->rax == 0x2000800)
261 thread_exception_return();
262
263 thread = current_thread();
264 uthread = get_bsdthread_info(thread);
265
266 /* Get the approriate proc; may be different from task's for vfork() */
267 if (!(uthread->uu_flag & UT_VFORK))
268 p = (struct proc *)get_bsdtask_info(current_task());
269 else
270 p = current_proc();
271
272 /* Verify that we are not being called from a task without a proc */
273 if (p == NULL) {
274 regs->rax = EPERM;
275 regs->isf.rflags |= EFL_CF;
276 task_terminate_internal(current_task());
277 thread_exception_return();
278 /* NOTREACHED */
279 }
280 args_in_regs = 6;
281
282 code = regs->rax & SYSCALL_NUMBER_MASK;
283 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
284 uargp = (void *)(&regs->rdi);
285
286 if (callp == sysent) {
287 /*
288 * indirect system call... system call number
289 * passed as 'arg0'
290 */
291 code = regs->rdi;
292 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
293 uargp = (void *)(&regs->rsi);
294 args_in_regs = 5;
295 }
296
297 if (callp->sy_narg != 0) {
298 if (code != 180) {
299 uint64_t *ip = (uint64_t *)uargp;
300
301 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
302 (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
303 }
304 assert(callp->sy_narg <= 8);
305
306 if (callp->sy_narg > args_in_regs) {
307 int copyin_count;
308
309 copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t);
310
311 error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&regs->v_arg6, copyin_count);
312 if (error) {
313 regs->rax = error;
314 regs->isf.rflags |= EFL_CF;
315 thread_exception_return();
316 /* NOTREACHED */
317 }
318 }
319 /*
320 * XXX Turn 64 bit unsafe calls into nosys()
321 */
322 if (callp->sy_flags & UNSAFE_64BIT) {
323 callp = &sysent[63];
324 goto unsafe;
325 }
326 } else
327 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
328 0, 0, 0, 0, 0);
329 unsafe:
330
331 /*
332 * Delayed binding of thread credential to process credential, if we
333 * are not running with an explicitly set thread credential.
334 */
335 kauth_cred_uthread_update(uthread, p);
336
337 uthread->uu_rval[0] = 0;
338 uthread->uu_rval[1] = 0;
339
340
341 uthread->uu_flag |= UT_NOTCANCELPT;
342
343
344 AUDIT_SYSCALL_ENTER(code, p, uthread);
345 error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0]));
346 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
347
348 #if CONFIG_DTRACE
349 uthread->t_dtrace_errno = error;
350 #endif /* CONFIG_DTRACE */
351
352 if (error == ERESTART) {
353 /*
354 * all system calls come through via the syscall instruction
355 * in 64 bit mode... its 2 bytes in length
356 * move the user's pc back to repeat the syscall:
357 */
358 regs->isf.rip -= 2;
359 }
360 else if (error != EJUSTRETURN) {
361 if (error) {
362 regs->rax = error;
363 regs->isf.rflags |= EFL_CF; /* carry bit */
364 } else { /* (not error) */
365
366 switch (callp->sy_return_type) {
367 case _SYSCALL_RET_INT_T:
368 regs->rax = uthread->uu_rval[0];
369 regs->rdx = uthread->uu_rval[1];
370 break;
371 case _SYSCALL_RET_UINT_T:
372 regs->rax = ((u_int)uthread->uu_rval[0]);
373 regs->rdx = ((u_int)uthread->uu_rval[1]);
374 break;
375 case _SYSCALL_RET_OFF_T:
376 case _SYSCALL_RET_ADDR_T:
377 case _SYSCALL_RET_SIZE_T:
378 case _SYSCALL_RET_SSIZE_T:
379 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
380 regs->rdx = 0;
381 break;
382 case _SYSCALL_RET_NONE:
383 break;
384 default:
385 panic("unix_syscall: unknown return type");
386 break;
387 }
388 regs->isf.rflags &= ~EFL_CF;
389 }
390 }
391
392
393 uthread->uu_flag &= ~UT_NOTCANCELPT;
394
395 /*
396 * if we're holding the funnel panic
397 */
398 syscall_exit_funnelcheck();
399
400 if (uthread->uu_lowpri_window) {
401 /*
402 * task is marked as a low priority I/O type
403 * and the I/O we issued while in this system call
404 * collided with normal I/O operations... we'll
405 * delay in order to mitigate the impact of this
406 * task on the normal operation of the system
407 */
408 throttle_lowpri_io(TRUE);
409 }
410 if (code != 180)
411 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
412 error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
413
414 thread_exception_return();
415 /* NOTREACHED */
416 }
417
418
419 void
420 unix_syscall_return(int error)
421 {
422 thread_t thread;
423 struct uthread *uthread;
424 struct proc *p;
425 unsigned int code;
426 vm_offset_t params;
427 struct sysent *callp;
428
429 thread = current_thread();
430 uthread = get_bsdthread_info(thread);
431
432 p = current_proc();
433
434 if (proc_is64bit(p)) {
435 x86_saved_state64_t *regs;
436
437 regs = saved_state64(find_user_regs(thread));
438
439 /* reconstruct code for tracing before blasting rax */
440 code = regs->rax & SYSCALL_NUMBER_MASK;
441 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
442
443 if (callp == sysent)
444 /*
445 * indirect system call... system call number
446 * passed as 'arg0'
447 */
448 code = regs->rdi;
449
450 #if CONFIG_DTRACE
451 if (callp->sy_call == dtrace_systrace_syscall)
452 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
453 #endif /* CONFIG_DTRACE */
454
455 if (error == ERESTART) {
456 /*
457 * all system calls come through via the syscall instruction
458 * in 64 bit mode... its 2 bytes in length
459 * move the user's pc back to repeat the syscall:
460 */
461 regs->isf.rip -= 2;
462 }
463 else if (error != EJUSTRETURN) {
464 if (error) {
465 regs->rax = error;
466 regs->isf.rflags |= EFL_CF; /* carry bit */
467 } else { /* (not error) */
468
469 switch (callp->sy_return_type) {
470 case _SYSCALL_RET_INT_T:
471 regs->rax = uthread->uu_rval[0];
472 regs->rdx = uthread->uu_rval[1];
473 break;
474 case _SYSCALL_RET_UINT_T:
475 regs->rax = ((u_int)uthread->uu_rval[0]);
476 regs->rdx = ((u_int)uthread->uu_rval[1]);
477 break;
478 case _SYSCALL_RET_OFF_T:
479 case _SYSCALL_RET_ADDR_T:
480 case _SYSCALL_RET_SIZE_T:
481 case _SYSCALL_RET_SSIZE_T:
482 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
483 regs->rdx = 0;
484 break;
485 case _SYSCALL_RET_NONE:
486 break;
487 default:
488 panic("unix_syscall: unknown return type");
489 break;
490 }
491 regs->isf.rflags &= ~EFL_CF;
492 }
493 }
494 } else {
495 x86_saved_state32_t *regs;
496
497 regs = saved_state32(find_user_regs(thread));
498
499 regs->efl &= ~(EFL_CF);
500 /* reconstruct code for tracing before blasting eax */
501 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
502 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
503
504 #if CONFIG_DTRACE
505 if (callp->sy_call == dtrace_systrace_syscall)
506 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
507 #endif /* CONFIG_DTRACE */
508
509 if (callp == sysent) {
510 params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int));
511 code = fuword(params);
512 }
513 if (error == ERESTART) {
514 regs->eip -= ((regs->cs & 0xffff) == SYSENTER_CS) ? 5 : 2;
515 }
516 else if (error != EJUSTRETURN) {
517 if (error) {
518 regs->eax = error;
519 regs->efl |= EFL_CF; /* carry bit */
520 } else { /* (not error) */
521 regs->eax = uthread->uu_rval[0];
522 regs->edx = uthread->uu_rval[1];
523 }
524 }
525 }
526
527
528 uthread->uu_flag &= ~UT_NOTCANCELPT;
529
530 /*
531 * if we're holding the funnel panic
532 */
533 syscall_exit_funnelcheck();
534
535 if (uthread->uu_lowpri_window) {
536 /*
537 * task is marked as a low priority I/O type
538 * and the I/O we issued while in this system call
539 * collided with normal I/O operations... we'll
540 * delay in order to mitigate the impact of this
541 * task on the normal operation of the system
542 */
543 throttle_lowpri_io(TRUE);
544 }
545 if (code != 180)
546 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
547 error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
548
549 thread_exception_return();
550 /* NOTREACHED */
551 }
552
553 void
554 munge_wwwlww(
555 __unused const void *in32,
556 void *out64)
557 {
558 uint32_t *arg32;
559 uint64_t *arg64;
560
561 /* we convert in place in out64 */
562 arg32 = (uint32_t *) out64;
563 arg64 = (uint64_t *) out64;
564
565 arg64[5] = arg32[6]; /* wwwlwW */
566 arg64[4] = arg32[5]; /* wwwlWw */
567 arg32[7] = arg32[4]; /* wwwLww (hi) */
568 arg32[6] = arg32[3]; /* wwwLww (lo) */
569 arg64[2] = arg32[2]; /* wwWlww */
570 arg64[1] = arg32[1]; /* wWwlww */
571 arg64[0] = arg32[0]; /* Wwwlww */
572 }
573
574
575 void
576 munge_wwlwww(
577 __unused const void *in32,
578 void *out64)
579 {
580 uint32_t *arg32;
581 uint64_t *arg64;
582
583 /* we convert in place in out64 */
584 arg32 = (uint32_t *) out64;
585 arg64 = (uint64_t *) out64;
586
587 arg64[5] = arg32[6]; /* wwlwwW */
588 arg64[4] = arg32[5]; /* wwlwWw */
589 arg64[3] = arg32[4]; /* wwlWww */
590 arg32[5] = arg32[3]; /* wwLwww (hi) */
591 arg32[4] = arg32[2]; /* wwLwww (lo) */
592 arg64[1] = arg32[1]; /* wWlwww */
593 arg64[0] = arg32[0]; /* Wwlwww */
594 }
595
596 #ifdef JOE_DEBUG
597 joe_debug(char *p) {
598
599 printf("%s\n", p);
600 }
601 #endif
602
603