]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/i386/systemcalls.c
xnu-1228.5.18.tar.gz
[apple/xnu.git] / bsd / dev / i386 / systemcalls.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <mach/machine/thread_status.h>
35 #include <mach/thread_act.h>
36
37 #include <sys/kernel.h>
38 #include <sys/vm.h>
39 #include <sys/proc_internal.h>
40 #include <sys/syscall.h>
41 #include <sys/systm.h>
42 #include <sys/user.h>
43 #include <sys/errno.h>
44 #include <sys/kdebug.h>
45 #include <sys/sysent.h>
46 #include <sys/sysproto.h>
47 #include <sys/kauth.h>
48 #include <sys/systm.h>
49
50 #include <bsm/audit_kernel.h>
51
52 #include <i386/seg.h>
53 #include <i386/machine_routines.h>
54 #include <mach/i386/syscall_sw.h>
55
56 #if CONFIG_DTRACE
57 extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
58 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
59 #endif
60
61 extern void unix_syscall(x86_saved_state_t *);
62 extern void unix_syscall64(x86_saved_state_t *);
63 extern void *find_user_regs(thread_t);
64 extern void throttle_lowpri_io(int *lowpri_window, mount_t v_mount);
65
66 extern void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid);
67 extern boolean_t x86_sysenter_arg_store_isvalid(thread_t thread);
68 /*
69 * Function: unix_syscall
70 *
71 * Inputs: regs - pointer to i386 save area
72 *
73 * Outputs: none
74 */
75 void
76 unix_syscall(x86_saved_state_t *state)
77 {
78 thread_t thread;
79 void *vt;
80 unsigned int code;
81 struct sysent *callp;
82
83 int error;
84 vm_offset_t params;
85 struct proc *p;
86 struct uthread *uthread;
87 x86_saved_state32_t *regs;
88 boolean_t args_in_uthread;
89
90 assert(is_saved_state32(state));
91 regs = saved_state32(state);
92 #if DEBUG
93 if (regs->eax == 0x800)
94 thread_exception_return();
95 #endif
96 thread = current_thread();
97 uthread = get_bsdthread_info(thread);
98
99 /* Get the approriate proc; may be different from task's for vfork() */
100 if (!(uthread->uu_flag & UT_VFORK))
101 p = (struct proc *)get_bsdtask_info(current_task());
102 else
103 p = current_proc();
104
105 /* Verify that we are not being called from a task without a proc */
106 if (p == NULL) {
107 regs->eax = EPERM;
108 regs->efl |= EFL_CF;
109 task_terminate_internal(current_task());
110 thread_exception_return();
111 /* NOTREACHED */
112 }
113
114 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
115 args_in_uthread = ((regs->eax & I386_SYSCALL_ARG_BYTES_MASK) != 0) && x86_sysenter_arg_store_isvalid(thread);
116 params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int));
117
118 regs->efl &= ~(EFL_CF);
119
120 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
121
122 if (callp == sysent) {
123 code = fuword(params);
124 params += sizeof(int);
125 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
126 }
127
128 vt = (void *)uthread->uu_arg;
129
130 if (callp->sy_arg_bytes != 0) {
131 sy_munge_t *mungerp;
132
133 assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg));
134 if (!args_in_uthread)
135 {
136 uint32_t nargs;
137 nargs = callp->sy_arg_bytes;
138 error = copyin((user_addr_t) params, (char *) vt, nargs);
139 if (error) {
140 regs->eax = error;
141 regs->efl |= EFL_CF;
142 thread_exception_return();
143 /* NOTREACHED */
144 }
145 }
146
147 if (code != 180) {
148 int *ip = (int *)vt;
149
150 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
151 *ip, *(ip+1), *(ip+2), *(ip+3), 0);
152 }
153 mungerp = callp->sy_arg_munge32;
154
155 /*
156 * If non-NULL, then call the syscall argument munger to
157 * copy in arguments (see xnu/bsd/dev/i386/munge.s); the
158 * first argument is NULL because we are munging in place
159 * after a copyin because the ABI currently doesn't use
160 * registers to pass system call arguments.
161 */
162 if (mungerp != NULL)
163 (*mungerp)(NULL, vt);
164 } else
165 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
166 0, 0, 0, 0, 0);
167
168 /*
169 * Delayed binding of thread credential to process credential, if we
170 * are not running with an explicitly set thread credential.
171 */
172 kauth_cred_uthread_update(uthread, p);
173
174 uthread->uu_rval[0] = 0;
175 uthread->uu_rval[1] = regs->edx;
176 uthread->uu_flag |= UT_NOTCANCELPT;
177
178
179 #ifdef JOE_DEBUG
180 uthread->uu_iocount = 0;
181 uthread->uu_vpindex = 0;
182 #endif
183
184 AUDIT_SYSCALL_ENTER(code, p, uthread);
185 error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
186 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
187
188 #ifdef JOE_DEBUG
189 if (uthread->uu_iocount)
190 joe_debug("system call returned with uu_iocount != 0");
191 #endif
192 #if CONFIG_DTRACE
193 uthread->t_dtrace_errno = error;
194 #endif /* CONFIG_DTRACE */
195
196 if (error == ERESTART) {
197 /*
198 * Move the user's pc back to repeat the syscall:
199 * 5 bytes for a sysenter, or 2 for an int 8x.
200 * The SYSENTER_TF_CS covers single-stepping over a sysenter
201 * - see debug trap handler in idt.s/idt64.s
202 */
203 if (regs->cs == SYSENTER_CS || regs->cs == SYSENTER_TF_CS) {
204 regs->eip -= 5;
205 }
206 else
207 regs->eip -= 2;
208 }
209 else if (error != EJUSTRETURN) {
210 if (error) {
211 regs->eax = error;
212 regs->efl |= EFL_CF; /* carry bit */
213 } else { /* (not error) */
214 regs->eax = uthread->uu_rval[0];
215 regs->edx = uthread->uu_rval[1];
216 }
217 }
218
219 uthread->uu_flag &= ~UT_NOTCANCELPT;
220 #if DEBUG
221 /*
222 * if we're holding the funnel panic
223 */
224 syscall_exit_funnelcheck();
225 #endif /* DEBUG */
226 if (uthread->uu_lowpri_window && uthread->v_mount) {
227 /*
228 * task is marked as a low priority I/O type
229 * and the I/O we issued while in this system call
230 * collided with normal I/O operations... we'll
231 * delay in order to mitigate the impact of this
232 * task on the normal operation of the system
233 */
234 throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
235 }
236 if (code != 180)
237 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
238 error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
239
240 thread_exception_return();
241 /* NOTREACHED */
242 }
243
244
245 void
246 unix_syscall64(x86_saved_state_t *state)
247 {
248 thread_t thread;
249 unsigned int code;
250 struct sysent *callp;
251 void *uargp;
252 int args_in_regs;
253 int error;
254 struct proc *p;
255 struct uthread *uthread;
256 x86_saved_state64_t *regs;
257
258 assert(is_saved_state64(state));
259 regs = saved_state64(state);
260
261 if (regs->rax == 0x2000800)
262 thread_exception_return();
263
264 thread = current_thread();
265 uthread = get_bsdthread_info(thread);
266
267 /* Get the approriate proc; may be different from task's for vfork() */
268 if (!(uthread->uu_flag & UT_VFORK))
269 p = (struct proc *)get_bsdtask_info(current_task());
270 else
271 p = current_proc();
272
273 /* Verify that we are not being called from a task without a proc */
274 if (p == NULL) {
275 regs->rax = EPERM;
276 regs->isf.rflags |= EFL_CF;
277 task_terminate_internal(current_task());
278 thread_exception_return();
279 /* NOTREACHED */
280 }
281 args_in_regs = 6;
282
283 code = regs->rax & SYSCALL_NUMBER_MASK;
284 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
285 uargp = (void *)(&regs->rdi);
286
287 if (callp == sysent) {
288 /*
289 * indirect system call... system call number
290 * passed as 'arg0'
291 */
292 code = regs->rdi;
293 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
294 uargp = (void *)(&regs->rsi);
295 args_in_regs = 5;
296 }
297
298 if (callp->sy_narg != 0) {
299 if (code != 180) {
300 uint64_t *ip = (uint64_t *)uargp;
301
302 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
303 (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
304 }
305 assert(callp->sy_narg <= 8);
306
307 if (callp->sy_narg > args_in_regs) {
308 int copyin_count;
309
310 copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t);
311
312 error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&regs->v_arg6, copyin_count);
313 if (error) {
314 regs->rax = error;
315 regs->isf.rflags |= EFL_CF;
316 thread_exception_return();
317 /* NOTREACHED */
318 }
319 }
320 /*
321 * XXX Turn 64 bit unsafe calls into nosys()
322 */
323 if (callp->sy_flags & UNSAFE_64BIT) {
324 callp = &sysent[63];
325 goto unsafe;
326 }
327 } else
328 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
329 0, 0, 0, 0, 0);
330 unsafe:
331
332 /*
333 * Delayed binding of thread credential to process credential, if we
334 * are not running with an explicitly set thread credential.
335 */
336 kauth_cred_uthread_update(uthread, p);
337
338 uthread->uu_rval[0] = 0;
339 uthread->uu_rval[1] = 0;
340
341
342 uthread->uu_flag |= UT_NOTCANCELPT;
343
344
345 AUDIT_SYSCALL_ENTER(code, p, uthread);
346 error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0]));
347 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
348
349 #if CONFIG_DTRACE
350 uthread->t_dtrace_errno = error;
351 #endif /* CONFIG_DTRACE */
352
353 if (error == ERESTART) {
354 /*
355 * all system calls come through via the syscall instruction
356 * in 64 bit mode... its 2 bytes in length
357 * move the user's pc back to repeat the syscall:
358 */
359 regs->isf.rip -= 2;
360 }
361 else if (error != EJUSTRETURN) {
362 if (error) {
363 regs->rax = error;
364 regs->isf.rflags |= EFL_CF; /* carry bit */
365 } else { /* (not error) */
366
367 switch (callp->sy_return_type) {
368 case _SYSCALL_RET_INT_T:
369 regs->rax = uthread->uu_rval[0];
370 regs->rdx = uthread->uu_rval[1];
371 break;
372 case _SYSCALL_RET_UINT_T:
373 regs->rax = ((u_int)uthread->uu_rval[0]);
374 regs->rdx = ((u_int)uthread->uu_rval[1]);
375 break;
376 case _SYSCALL_RET_OFF_T:
377 case _SYSCALL_RET_ADDR_T:
378 case _SYSCALL_RET_SIZE_T:
379 case _SYSCALL_RET_SSIZE_T:
380 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
381 regs->rdx = 0;
382 break;
383 case _SYSCALL_RET_NONE:
384 break;
385 default:
386 panic("unix_syscall: unknown return type");
387 break;
388 }
389 regs->isf.rflags &= ~EFL_CF;
390 }
391 }
392
393
394 uthread->uu_flag &= ~UT_NOTCANCELPT;
395
396 /*
397 * if we're holding the funnel panic
398 */
399 syscall_exit_funnelcheck();
400
401 if (uthread->uu_lowpri_window && uthread->v_mount) {
402 /*
403 * task is marked as a low priority I/O type
404 * and the I/O we issued while in this system call
405 * collided with normal I/O operations... we'll
406 * delay in order to mitigate the impact of this
407 * task on the normal operation of the system
408 */
409 throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
410 }
411 if (code != 180)
412 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
413 error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
414
415 thread_exception_return();
416 /* NOTREACHED */
417 }
418
419
420 void
421 unix_syscall_return(int error)
422 {
423 thread_t thread;
424 struct uthread *uthread;
425 struct proc *p;
426 unsigned int code;
427 vm_offset_t params;
428 struct sysent *callp;
429
430 thread = current_thread();
431 uthread = get_bsdthread_info(thread);
432
433 p = current_proc();
434
435 if (proc_is64bit(p)) {
436 x86_saved_state64_t *regs;
437
438 regs = saved_state64(find_user_regs(thread));
439
440 /* reconstruct code for tracing before blasting rax */
441 code = regs->rax & SYSCALL_NUMBER_MASK;
442 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
443
444 if (callp == sysent)
445 /*
446 * indirect system call... system call number
447 * passed as 'arg0'
448 */
449 code = regs->rdi;
450
451 #if CONFIG_DTRACE
452 if (callp->sy_call == dtrace_systrace_syscall)
453 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
454 #endif /* CONFIG_DTRACE */
455
456 if (error == ERESTART) {
457 /*
458 * all system calls come through via the syscall instruction
459 * in 64 bit mode... its 2 bytes in length
460 * move the user's pc back to repeat the syscall:
461 */
462 regs->isf.rip -= 2;
463 }
464 else if (error != EJUSTRETURN) {
465 if (error) {
466 regs->rax = error;
467 regs->isf.rflags |= EFL_CF; /* carry bit */
468 } else { /* (not error) */
469
470 switch (callp->sy_return_type) {
471 case _SYSCALL_RET_INT_T:
472 regs->rax = uthread->uu_rval[0];
473 regs->rdx = uthread->uu_rval[1];
474 break;
475 case _SYSCALL_RET_UINT_T:
476 regs->rax = ((u_int)uthread->uu_rval[0]);
477 regs->rdx = ((u_int)uthread->uu_rval[1]);
478 break;
479 case _SYSCALL_RET_OFF_T:
480 case _SYSCALL_RET_ADDR_T:
481 case _SYSCALL_RET_SIZE_T:
482 case _SYSCALL_RET_SSIZE_T:
483 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
484 regs->rdx = 0;
485 break;
486 case _SYSCALL_RET_NONE:
487 break;
488 default:
489 panic("unix_syscall: unknown return type");
490 break;
491 }
492 regs->isf.rflags &= ~EFL_CF;
493 }
494 }
495 } else {
496 x86_saved_state32_t *regs;
497
498 regs = saved_state32(find_user_regs(thread));
499
500 regs->efl &= ~(EFL_CF);
501 /* reconstruct code for tracing before blasting eax */
502 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
503 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
504
505 #if CONFIG_DTRACE
506 if (callp->sy_call == dtrace_systrace_syscall)
507 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
508 #endif /* CONFIG_DTRACE */
509
510 if (callp == sysent) {
511 params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int));
512 code = fuword(params);
513 }
514 if (error == ERESTART) {
515 regs->eip -= ((regs->cs & 0xffff) == SYSENTER_CS) ? 5 : 2;
516 }
517 else if (error != EJUSTRETURN) {
518 if (error) {
519 regs->eax = error;
520 regs->efl |= EFL_CF; /* carry bit */
521 } else { /* (not error) */
522 regs->eax = uthread->uu_rval[0];
523 regs->edx = uthread->uu_rval[1];
524 }
525 }
526 }
527
528
529 uthread->uu_flag &= ~UT_NOTCANCELPT;
530
531 /*
532 * if we're holding the funnel panic
533 */
534 syscall_exit_funnelcheck();
535
536 if (uthread->uu_lowpri_window && uthread->v_mount) {
537 /*
538 * task is marked as a low priority I/O type
539 * and the I/O we issued while in this system call
540 * collided with normal I/O operations... we'll
541 * delay in order to mitigate the impact of this
542 * task on the normal operation of the system
543 */
544 throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
545 }
546 if (code != 180)
547 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
548 error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
549
550 thread_exception_return();
551 /* NOTREACHED */
552 }
553
554 void
555 munge_wwwlww(
556 __unused const void *in32,
557 void *out64)
558 {
559 uint32_t *arg32;
560 uint64_t *arg64;
561
562 /* we convert in place in out64 */
563 arg32 = (uint32_t *) out64;
564 arg64 = (uint64_t *) out64;
565
566 arg64[5] = arg32[6]; /* wwwlwW */
567 arg64[4] = arg32[5]; /* wwwlWw */
568 arg32[7] = arg32[4]; /* wwwLww (hi) */
569 arg32[6] = arg32[3]; /* wwwLww (lo) */
570 arg64[2] = arg32[2]; /* wwWlww */
571 arg64[1] = arg32[1]; /* wWwlww */
572 arg64[0] = arg32[0]; /* Wwwlww */
573 }
574
575
576 void
577 munge_wwlwww(
578 __unused const void *in32,
579 void *out64)
580 {
581 uint32_t *arg32;
582 uint64_t *arg64;
583
584 /* we convert in place in out64 */
585 arg32 = (uint32_t *) out64;
586 arg64 = (uint64_t *) out64;
587
588 arg64[5] = arg32[6]; /* wwlwwW */
589 arg64[4] = arg32[5]; /* wwlwWw */
590 arg64[3] = arg32[4]; /* wwlWww */
591 arg32[5] = arg32[3]; /* wwLwww (hi) */
592 arg32[4] = arg32[2]; /* wwLwww (lo) */
593 arg64[1] = arg32[1]; /* wWlwww */
594 arg64[0] = arg32[0]; /* Wwlwww */
595 }
596
597 #ifdef JOE_DEBUG
598 joe_debug(char *p) {
599
600 printf("%s\n", p);
601 }
602 #endif
603
604