]> git.saurik.com Git - apple/xnu.git/blame - bsd/dev/i386/systemcalls.c
xnu-792.10.96.tar.gz
[apple/xnu.git] / bsd / dev / i386 / systemcalls.c
CommitLineData
c0fea474
A
1/*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22#include <kern/task.h>
23#include <kern/thread.h>
24#include <kern/assert.h>
25#include <kern/clock.h>
26#include <kern/locks.h>
27#include <kern/sched_prim.h>
28#include <mach/machine/thread_status.h>
29#include <mach/thread_act.h>
30
31#include <sys/kernel.h>
32#include <sys/vm.h>
33#include <sys/proc_internal.h>
34#include <sys/syscall.h>
35#include <sys/systm.h>
36#include <sys/user.h>
37#include <sys/errno.h>
38#include <sys/ktrace.h>
39#include <sys/kdebug.h>
40#include <sys/sysent.h>
41#include <sys/sysproto.h>
42#include <sys/kauth.h>
43#include <sys/systm.h>
44
45#include <bsm/audit_kernel.h>
46
47#include <i386/seg.h>
48#include <i386/machine_routines.h>
49#include <mach/i386/syscall_sw.h>
50
51extern void unix_syscall(x86_saved_state_t *);
52extern void unix_syscall64(x86_saved_state_t *);
53extern void unix_syscall_return(int);
54extern void *find_user_regs(thread_t);
55extern void IOSleep(int);
56extern void exit_funnel_section(void);
57
58extern void Debugger(const char * message);
59
60/*
61 * Function: unix_syscall
62 *
63 * Inputs: regs - pointer to i386 save area
64 *
65 * Outputs: none
66 */
67void
68unix_syscall(x86_saved_state_t *state)
69{
70 thread_t thread;
71 void *vt;
72 unsigned short code;
73 struct sysent *callp;
74 int nargs;
75 int error;
76 int funnel_type;
77 vm_offset_t params;
78 struct proc *p;
79 struct uthread *uthread;
80 unsigned int cancel_enable;
81 x86_saved_state32_t *regs;
82
83 assert(is_saved_state32(state));
84 regs = saved_state32(state);
85
86 if (regs->eax == 0x800)
87 thread_exception_return();
88
89 thread = current_thread();
90 uthread = get_bsdthread_info(thread);
91
92 /* Get the approriate proc; may be different from task's for vfork() */
93 if (!(uthread->uu_flag & UT_VFORK))
94 p = (struct proc *)get_bsdtask_info(current_task());
95 else
96 p = current_proc();
97
98 /* Verify that we are not being called from a task without a proc */
99 if (p == NULL) {
100 regs->eax = EPERM;
101 regs->efl |= EFL_CF;
102 task_terminate_internal(current_task());
103 thread_exception_return();
104 /* NOTREACHED */
105 }
106
107 //printf("[scall : eax %x]", regs->eax);
108 code = regs->eax;
109 params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int));
110 callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
111
112 if (callp == sysent) {
113 code = fuword(params);
114 params += sizeof (int);
115 callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
116 }
117 vt = (void *)uthread->uu_arg;
118
119 nargs = callp->sy_narg * sizeof (syscall_arg_t);
120 if (nargs != 0) {
121 sy_munge_t *mungerp;
122
123 assert(nargs <= 8);
124
125 error = copyin((user_addr_t) params, (char *) vt, nargs);
126 if (error) {
127 regs->eax = error;
128 regs->efl |= EFL_CF;
129 thread_exception_return();
130 /* NOTREACHED */
131 }
132 if (code != 180) {
133 int *ip = (int *)vt;
134
135 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
136 *ip, *(ip+1), *(ip+2), *(ip+3), 0);
137 }
138 mungerp = callp->sy_arg_munge32;
139
140 /*
141 * If non-NULL, then call the syscall argument munger to
142 * copy in arguments (see xnu/bsd/dev/i386/munge.s); the
143 * first argument is NULL because we are munging in place
144 * after a copyin because the ABI currently doesn't use
145 * registers to pass system call arguments.
146 */
147 if (mungerp != NULL)
148 (*mungerp)(NULL, vt);
149 } else
150 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
151 0, 0, 0, 0, 0);
152 /*
153 * Delayed binding of thread credential to process credential, if we
154 * are not running with an explicitly set thread credential.
155 */
156 if (uthread->uu_ucred != p->p_ucred &&
157 (uthread->uu_flag & UT_SETUID) == 0) {
158 kauth_cred_t old = uthread->uu_ucred;
159 proc_lock(p);
160 uthread->uu_ucred = p->p_ucred;
161 kauth_cred_ref(uthread->uu_ucred);
162 proc_unlock(p);
163 if (old != NOCRED)
164 kauth_cred_rele(old);
165 }
166
167 uthread->uu_rval[0] = 0;
168 uthread->uu_rval[1] = regs->edx;
169
170 cancel_enable = callp->sy_cancel;
171
172 if (cancel_enable == _SYSCALL_CANCEL_NONE) {
173 uthread->uu_flag |= UT_NOTCANCELPT;
174 } else {
175 if ((uthread->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) {
176 if (cancel_enable == _SYSCALL_CANCEL_PRE) {
177 /* system call cancelled; return to handle cancellation */
178 regs->eax = (long long)EINTR;
179 regs->efl |= EFL_CF;
180 thread_exception_return();
181 /* NOTREACHED */
182 } else {
183 thread_abort_safely(thread);
184 }
185 }
186 }
187
188 funnel_type = (callp->sy_funnel & FUNNEL_MASK);
189 if (funnel_type == KERNEL_FUNNEL)
190 thread_funnel_set(kernel_flock, TRUE);
191
192 if (KTRPOINT(p, KTR_SYSCALL))
193 ktrsyscall(p, code, callp->sy_narg, vt);
194
195 AUDIT_SYSCALL_ENTER(code, p, uthread);
196 error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
197 AUDIT_SYSCALL_EXIT(error, p, uthread);
198
199 if (error == ERESTART) {
200 /*
201 * Move the user's pc back to repeat the syscall:
202 * 5 bytes for a sysenter, or 2 for an int 8x.
203 * The SYSENTER_TF_CS covers single-stepping over a sysenter
204 * - see debug trap handler in idt.s/idt64.s
205 */
206 if (regs->cs == SYSENTER_CS || regs->cs == SYSENTER_TF_CS)
207 regs->eip -= 5;
208 else
209 regs->eip -= 2;
210 }
211 else if (error != EJUSTRETURN) {
212 if (error) {
213 regs->eax = error;
214 regs->efl |= EFL_CF; /* carry bit */
215 } else { /* (not error) */
216 regs->eax = uthread->uu_rval[0];
217 regs->edx = uthread->uu_rval[1];
218 regs->efl &= ~EFL_CF;
219 }
220 }
221
222 if (KTRPOINT(p, KTR_SYSRET))
223 ktrsysret(p, code, error, uthread->uu_rval[0]);
224
225 if (cancel_enable == _SYSCALL_CANCEL_NONE)
226 uthread->uu_flag &= ~UT_NOTCANCELPT;
227
228 /*
229 * if we're holding the funnel
230 * than drop it regardless of whether
231 * we took it on system call entry
232 */
233 exit_funnel_section();
234
235 if (uthread->uu_lowpri_delay) {
236 /*
237 * task is marked as a low priority I/O type
238 * and the I/O we issued while in this system call
239 * collided with normal I/O operations... we'll
240 * delay in order to mitigate the impact of this
241 * task on the normal operation of the system
242 */
243 IOSleep(uthread->uu_lowpri_delay);
244 uthread->uu_lowpri_delay = 0;
245 }
246 if (code != 180)
247 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
248 error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
249
250 thread_exception_return();
251 /* NOTREACHED */
252}
253
254
255void
256unix_syscall64(x86_saved_state_t *state)
257{
258 thread_t thread;
259 unsigned short code;
260 struct sysent *callp;
261 void *uargp;
262 int args_in_regs;
263 int error;
264 int funnel_type;
265 struct proc *p;
266 struct uthread *uthread;
267 unsigned int cancel_enable;
268 x86_saved_state64_t *regs;
269
270 assert(is_saved_state64(state));
271 regs = saved_state64(state);
272
273 if (regs->rax == 0x2000800)
274 thread_exception_return();
275
276 thread = current_thread();
277 uthread = get_bsdthread_info(thread);
278
279 /* Get the approriate proc; may be different from task's for vfork() */
280 if (!(uthread->uu_flag & UT_VFORK))
281 p = (struct proc *)get_bsdtask_info(current_task());
282 else
283 p = current_proc();
284
285 /* Verify that we are not being called from a task without a proc */
286 if (p == NULL) {
287 regs->rax = EPERM;
288 regs->isf.rflags |= EFL_CF;
289 task_terminate_internal(current_task());
290 thread_exception_return();
291 /* NOTREACHED */
292 }
293 args_in_regs = 6;
294
295 code = regs->rax & SYSCALL_NUMBER_MASK;
296 callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
297 uargp = (void *)(&regs->rdi);
298
299 if (callp == sysent) {
300 /*
301 * indirect system call... system call number
302 * passed as 'arg0'
303 */
304 code = regs->rdi;
305 callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
306 uargp = (void *)(&regs->rsi);
307 args_in_regs = 5;
308 }
309
310 if (callp->sy_narg != 0) {
311 if (code != 180) {
312 uint64_t *ip = (uint64_t *)uargp;
313
314 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
315 (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
316 }
317 assert(callp->sy_narg <= 8);
318
319 if (callp->sy_narg > args_in_regs) {
320 int copyin_count;
321
322 copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t);
323
324 error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&regs->v_arg6, copyin_count);
325 if (error) {
326 regs->rax = error;
327 regs->isf.rflags |= EFL_CF;
328 thread_exception_return();
329 /* NOTREACHED */
330 }
331 }
332 /*
333 * XXX Turn 64 bit unsafe calls into nosys()
334 */
335 if (callp->sy_funnel & UNSAFE_64BIT) {
336 callp = &sysent[63];
337 goto unsafe;
338 }
339
340 } else
341 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
342 0, 0, 0, 0, 0);
343unsafe:
344
345 /*
346 * Delayed binding of thread credential to process credential, if we
347 * are not running with an explicitly set thread credential.
348 */
349 if (uthread->uu_ucred != p->p_ucred &&
350 (uthread->uu_flag & UT_SETUID) == 0) {
351 kauth_cred_t old = uthread->uu_ucred;
352 proc_lock(p);
353 uthread->uu_ucred = p->p_ucred;
354 kauth_cred_ref(uthread->uu_ucred);
355 proc_unlock(p);
356 if (old != NOCRED)
357 kauth_cred_rele(old);
358 }
359
360 uthread->uu_rval[0] = 0;
361 uthread->uu_rval[1] = 0;
362
363 cancel_enable = callp->sy_cancel;
364
365 if (cancel_enable == _SYSCALL_CANCEL_NONE) {
366 uthread->uu_flag |= UT_NOTCANCELPT;
367 } else {
368 if ((uthread->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) {
369 if (cancel_enable == _SYSCALL_CANCEL_PRE) {
370 /* system call cancelled; return to handle cancellation */
371 regs->rax = EINTR;
372 regs->isf.rflags |= EFL_CF;
373 thread_exception_return();
374 /* NOTREACHED */
375 } else {
376 thread_abort_safely(thread);
377 }
378 }
379 }
380
381 funnel_type = (callp->sy_funnel & FUNNEL_MASK);
382 if (funnel_type == KERNEL_FUNNEL)
383 thread_funnel_set(kernel_flock, TRUE);
384
385 if (KTRPOINT(p, KTR_SYSCALL))
386 ktrsyscall(p, code, callp->sy_narg, uargp);
387
388 AUDIT_SYSCALL_ENTER(code, p, uthread);
389 error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0]));
390 AUDIT_SYSCALL_EXIT(error, p, uthread);
391
392 if (error == ERESTART) {
393 /*
394 * all system calls come through via the syscall instruction
395 * in 64 bit mode... its 2 bytes in length
396 * move the user's pc back to repeat the syscall:
397 */
398 regs->isf.rip -= 2;
399 }
400 else if (error != EJUSTRETURN) {
401 if (error) {
402 regs->rax = error;
403 regs->isf.rflags |= EFL_CF; /* carry bit */
404 } else { /* (not error) */
405
406 switch (callp->sy_return_type) {
407 case _SYSCALL_RET_INT_T:
408 regs->rax = uthread->uu_rval[0];
409 regs->rdx = uthread->uu_rval[1];
410 break;
411 case _SYSCALL_RET_UINT_T:
412 regs->rax = ((u_int)uthread->uu_rval[0]);
413 regs->rdx = ((u_int)uthread->uu_rval[1]);
414 break;
415 case _SYSCALL_RET_OFF_T:
416 case _SYSCALL_RET_ADDR_T:
417 case _SYSCALL_RET_SIZE_T:
418 case _SYSCALL_RET_SSIZE_T:
419 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
420 regs->rdx = 0;
421 break;
422 case _SYSCALL_RET_NONE:
423 break;
424 default:
425 panic("unix_syscall: unknown return type");
426 break;
427 }
428 regs->isf.rflags &= ~EFL_CF;
429 }
430 }
431
432 if (KTRPOINT(p, KTR_SYSRET))
433 ktrsysret(p, code, error, uthread->uu_rval[0]);
434
435 if (cancel_enable == _SYSCALL_CANCEL_NONE)
436 uthread->uu_flag &= ~UT_NOTCANCELPT;
437
438 /*
439 * if we're holding the funnel
440 * than drop it regardless of whether
441 * we took it on system call entry
442 */
443 exit_funnel_section();
444
445 if (uthread->uu_lowpri_delay) {
446 /*
447 * task is marked as a low priority I/O type
448 * and the I/O we issued while in this system call
449 * collided with normal I/O operations... we'll
450 * delay in order to mitigate the impact of this
451 * task on the normal operation of the system
452 */
453 IOSleep(uthread->uu_lowpri_delay);
454 uthread->uu_lowpri_delay = 0;
455 }
456 if (code != 180)
457 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
458 error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
459
460 thread_exception_return();
461 /* NOTREACHED */
462}
463
464
465void
466unix_syscall_return(int error)
467{
468 thread_t thread;
469 struct uthread *uthread;
470 struct proc *p;
471 unsigned short code;
472 vm_offset_t params;
473 struct sysent *callp;
474 unsigned int cancel_enable;
475
476 thread = current_thread();
477 uthread = get_bsdthread_info(thread);
478
479 p = current_proc();
480
481 if (proc_is64bit(p)) {
482 x86_saved_state64_t *regs;
483
484 regs = saved_state64(find_user_regs(thread));
485
486 /* reconstruct code for tracing before blasting rax */
487 code = regs->rax & SYSCALL_NUMBER_MASK;
488 callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
489
490 if (callp == sysent)
491 /*
492 * indirect system call... system call number
493 * passed as 'arg0'
494 */
495 code = regs->rdi;
496
497 if (error == ERESTART) {
498 /*
499 * all system calls come through via the syscall instruction
500 * in 64 bit mode... its 2 bytes in length
501 * move the user's pc back to repeat the syscall:
502 */
503 regs->isf.rip -= 2;
504 }
505 else if (error != EJUSTRETURN) {
506 if (error) {
507 regs->rax = error;
508 regs->isf.rflags |= EFL_CF; /* carry bit */
509 } else { /* (not error) */
510
511 switch (callp->sy_return_type) {
512 case _SYSCALL_RET_INT_T:
513 regs->rax = uthread->uu_rval[0];
514 regs->rdx = uthread->uu_rval[1];
515 break;
516 case _SYSCALL_RET_UINT_T:
517 regs->rax = ((u_int)uthread->uu_rval[0]);
518 regs->rdx = ((u_int)uthread->uu_rval[1]);
519 break;
520 case _SYSCALL_RET_OFF_T:
521 case _SYSCALL_RET_ADDR_T:
522 case _SYSCALL_RET_SIZE_T:
523 case _SYSCALL_RET_SSIZE_T:
524 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
525 regs->rdx = 0;
526 break;
527 case _SYSCALL_RET_NONE:
528 break;
529 default:
530 panic("unix_syscall: unknown return type");
531 break;
532 }
533 regs->isf.rflags &= ~EFL_CF;
534 }
535 }
536 } else {
537 x86_saved_state32_t *regs;
538
539 regs = saved_state32(find_user_regs(thread));
540
541 /* reconstruct code for tracing before blasting eax */
542 code = regs->eax;
543 callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
544
545 if (callp == sysent) {
546 params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int));
547 code = fuword(params);
548 }
549 if (error == ERESTART) {
550 regs->eip -= ((regs->cs & 0xffff) == SYSENTER_CS) ? 5 : 2;
551 }
552 else if (error != EJUSTRETURN) {
553 if (error) {
554 regs->eax = error;
555 regs->efl |= EFL_CF; /* carry bit */
556 } else { /* (not error) */
557 regs->eax = uthread->uu_rval[0];
558 regs->edx = uthread->uu_rval[1];
559 regs->efl &= ~EFL_CF;
560 }
561 }
562 }
563 if (KTRPOINT(p, KTR_SYSRET))
564 ktrsysret(p, code, error, uthread->uu_rval[0]);
565
566 cancel_enable = callp->sy_cancel;
567
568 if (cancel_enable == _SYSCALL_CANCEL_NONE)
569 uthread->uu_flag &= ~UT_NOTCANCELPT;
570
571 /*
572 * if we're holding the funnel
573 * than drop it regardless of whether
574 * we took it on system call entry
575 */
576 exit_funnel_section();
577
578 if (uthread->uu_lowpri_delay) {
579 /*
580 * task is marked as a low priority I/O type
581 * and the I/O we issued while in this system call
582 * collided with normal I/O operations... we'll
583 * delay in order to mitigate the impact of this
584 * task on the normal operation of the system
585 */
586 IOSleep(uthread->uu_lowpri_delay);
587 uthread->uu_lowpri_delay = 0;
588 }
589 if (code != 180)
590 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
591 error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
592
593 thread_exception_return();
594 /* NOTREACHED */
595}
596
597void
598munge_wwwlww(
599 __unused const void *in32,
600 void *out64)
601{
602 uint32_t *arg32;
603 uint64_t *arg64;
604
605 /* we convert in place in out64 */
606 arg32 = (uint32_t *) out64;
607 arg64 = (uint64_t *) out64;
608
609 arg64[5] = arg32[6]; /* wwwlwW */
610 arg64[4] = arg32[5]; /* wwwlWw */
611 arg32[7] = arg32[4]; /* wwwLww (hi) */
612 arg32[6] = arg32[3]; /* wwwLww (lo) */
613 arg64[2] = arg32[2]; /* wwWlww */
614 arg64[1] = arg32[1]; /* wWwlww */
615 arg64[0] = arg32[0]; /* Wwwlww */
616}