]>
Commit | Line | Data |
---|---|---|
0c530ab8 | 1 | /* |
b0d623f7 | 2 | * Copyright (c) 2000-2008 Apple Inc. All rights reserved. |
0c530ab8 | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
0c530ab8 | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
0c530ab8 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
0c530ab8 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
0c530ab8 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
0c530ab8 A |
27 | */ |
28 | #include <kern/task.h> | |
29 | #include <kern/thread.h> | |
30 | #include <kern/assert.h> | |
31 | #include <kern/clock.h> | |
32 | #include <kern/locks.h> | |
33 | #include <kern/sched_prim.h> | |
b0d623f7 | 34 | #include <kern/debug.h> |
0c530ab8 A |
35 | #include <mach/machine/thread_status.h> |
36 | #include <mach/thread_act.h> | |
6d2010ae | 37 | #include <mach/branch_predicates.h> |
0c530ab8 A |
38 | |
39 | #include <sys/kernel.h> | |
40 | #include <sys/vm.h> | |
41 | #include <sys/proc_internal.h> | |
42 | #include <sys/syscall.h> | |
43 | #include <sys/systm.h> | |
44 | #include <sys/user.h> | |
45 | #include <sys/errno.h> | |
0c530ab8 A |
46 | #include <sys/kdebug.h> |
47 | #include <sys/sysent.h> | |
48 | #include <sys/sysproto.h> | |
49 | #include <sys/kauth.h> | |
50 | #include <sys/systm.h> | |
51 | ||
b0d623f7 | 52 | #include <security/audit/audit.h> |
0c530ab8 A |
53 | |
54 | #include <i386/seg.h> | |
55 | #include <i386/machine_routines.h> | |
56 | #include <mach/i386/syscall_sw.h> | |
57 | ||
6d2010ae A |
58 | #include <machine/pal_routines.h> |
59 | ||
2d21ac55 A |
60 | #if CONFIG_DTRACE |
61 | extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *); | |
62 | extern void dtrace_systrace_syscall_return(unsigned short, int, int *); | |
63 | #endif | |
64 | ||
0c530ab8 A |
65 | extern void unix_syscall(x86_saved_state_t *); |
66 | extern void unix_syscall64(x86_saved_state_t *); | |
0c530ab8 | 67 | extern void *find_user_regs(thread_t); |
0c530ab8 | 68 | |
2d21ac55 A |
69 | extern void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid); |
70 | extern boolean_t x86_sysenter_arg_store_isvalid(thread_t thread); | |
b0d623f7 A |
71 | |
72 | /* dynamically generated at build time based on syscalls.master */ | |
73 | extern const char *syscallnames[]; | |
74 | ||
6d2010ae A |
75 | /* |
76 | * This needs to be a single switch so that it's "all on" or "all off", | |
77 | * rather than being turned on for some code paths and not others, as this | |
78 | * has a tendency to introduce "blame the next guy" bugs. | |
79 | */ | |
80 | #if DEBUG | |
81 | #define FUNNEL_DEBUG 1 /* Check for funnel held on exit */ | |
82 | #endif | |
83 | ||
0c530ab8 A |
84 | /* |
85 | * Function: unix_syscall | |
86 | * | |
87 | * Inputs: regs - pointer to i386 save area | |
88 | * | |
89 | * Outputs: none | |
90 | */ | |
91 | void | |
92 | unix_syscall(x86_saved_state_t *state) | |
93 | { | |
2d21ac55 A |
94 | thread_t thread; |
95 | void *vt; | |
96 | unsigned int code; | |
97 | struct sysent *callp; | |
98 | ||
99 | int error; | |
100 | vm_offset_t params; | |
101 | struct proc *p; | |
102 | struct uthread *uthread; | |
0c530ab8 | 103 | x86_saved_state32_t *regs; |
2d21ac55 | 104 | boolean_t args_in_uthread; |
6d2010ae | 105 | boolean_t is_vfork; |
0c530ab8 A |
106 | |
107 | assert(is_saved_state32(state)); | |
108 | regs = saved_state32(state); | |
2d21ac55 | 109 | #if DEBUG |
0c530ab8 A |
110 | if (regs->eax == 0x800) |
111 | thread_exception_return(); | |
2d21ac55 | 112 | #endif |
0c530ab8 A |
113 | thread = current_thread(); |
114 | uthread = get_bsdthread_info(thread); | |
115 | ||
116 | /* Get the approriate proc; may be different from task's for vfork() */ | |
6d2010ae A |
117 | is_vfork = uthread->uu_flag & UT_VFORK; |
118 | if (__improbable(is_vfork != 0)) | |
0c530ab8 | 119 | p = current_proc(); |
6d2010ae A |
120 | else |
121 | p = (struct proc *)get_bsdtask_info(current_task()); | |
0c530ab8 A |
122 | |
123 | /* Verify that we are not being called from a task without a proc */ | |
6d2010ae | 124 | if (__improbable(p == NULL)) { |
0c530ab8 A |
125 | regs->eax = EPERM; |
126 | regs->efl |= EFL_CF; | |
127 | task_terminate_internal(current_task()); | |
128 | thread_exception_return(); | |
129 | /* NOTREACHED */ | |
130 | } | |
131 | ||
2d21ac55 | 132 | code = regs->eax & I386_SYSCALL_NUMBER_MASK; |
b0d623f7 A |
133 | DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n", |
134 | code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip); | |
2d21ac55 | 135 | args_in_uthread = ((regs->eax & I386_SYSCALL_ARG_BYTES_MASK) != 0) && x86_sysenter_arg_store_isvalid(thread); |
b0d623f7 | 136 | params = (vm_offset_t) (regs->uesp + sizeof (int)); |
2d21ac55 A |
137 | |
138 | regs->efl &= ~(EFL_CF); | |
139 | ||
140 | callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; | |
0c530ab8 | 141 | |
6d2010ae | 142 | if (__improbable(callp == sysent)) { |
0c530ab8 | 143 | code = fuword(params); |
2d21ac55 A |
144 | params += sizeof(int); |
145 | callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; | |
0c530ab8 | 146 | } |
2d21ac55 | 147 | |
0c530ab8 A |
148 | vt = (void *)uthread->uu_arg; |
149 | ||
2d21ac55 | 150 | if (callp->sy_arg_bytes != 0) { |
0c530ab8 A |
151 | sy_munge_t *mungerp; |
152 | ||
2d21ac55 A |
153 | assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg)); |
154 | if (!args_in_uthread) | |
155 | { | |
156 | uint32_t nargs; | |
157 | nargs = callp->sy_arg_bytes; | |
158 | error = copyin((user_addr_t) params, (char *) vt, nargs); | |
159 | if (error) { | |
160 | regs->eax = error; | |
161 | regs->efl |= EFL_CF; | |
162 | thread_exception_return(); | |
163 | /* NOTREACHED */ | |
164 | } | |
0c530ab8 | 165 | } |
2d21ac55 | 166 | |
6d2010ae | 167 | if (__probable(code != 180)) { |
2d21ac55 | 168 | int *ip = (int *)vt; |
0c530ab8 A |
169 | |
170 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, | |
2d21ac55 | 171 | *ip, *(ip+1), *(ip+2), *(ip+3), 0); |
0c530ab8 A |
172 | } |
173 | mungerp = callp->sy_arg_munge32; | |
174 | ||
175 | /* | |
176 | * If non-NULL, then call the syscall argument munger to | |
b0d623f7 | 177 | * copy in arguments (see xnu/bsd/dev/{i386|x86_64}/munge.s); the |
0c530ab8 A |
178 | * first argument is NULL because we are munging in place |
179 | * after a copyin because the ABI currently doesn't use | |
180 | * registers to pass system call arguments. | |
181 | */ | |
182 | if (mungerp != NULL) | |
183 | (*mungerp)(NULL, vt); | |
184 | } else | |
2d21ac55 A |
185 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, |
186 | 0, 0, 0, 0, 0); | |
187 | ||
0c530ab8 A |
188 | /* |
189 | * Delayed binding of thread credential to process credential, if we | |
190 | * are not running with an explicitly set thread credential. | |
191 | */ | |
2d21ac55 | 192 | kauth_cred_uthread_update(uthread, p); |
0c530ab8 A |
193 | |
194 | uthread->uu_rval[0] = 0; | |
195 | uthread->uu_rval[1] = regs->edx; | |
2d21ac55 | 196 | uthread->uu_flag |= UT_NOTCANCELPT; |
0c530ab8 | 197 | |
0c530ab8 | 198 | |
2d21ac55 A |
199 | #ifdef JOE_DEBUG |
200 | uthread->uu_iocount = 0; | |
201 | uthread->uu_vpindex = 0; | |
202 | #endif | |
0c530ab8 A |
203 | |
204 | AUDIT_SYSCALL_ENTER(code, p, uthread); | |
205 | error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0])); | |
2d21ac55 A |
206 | AUDIT_SYSCALL_EXIT(code, p, uthread, error); |
207 | ||
208 | #ifdef JOE_DEBUG | |
209 | if (uthread->uu_iocount) | |
b0d623f7 | 210 | printf("system call returned with uu_iocount != 0\n"); |
2d21ac55 A |
211 | #endif |
212 | #if CONFIG_DTRACE | |
213 | uthread->t_dtrace_errno = error; | |
214 | #endif /* CONFIG_DTRACE */ | |
215 | ||
6d2010ae | 216 | if (__improbable(error == ERESTART)) { |
0c530ab8 A |
217 | /* |
218 | * Move the user's pc back to repeat the syscall: | |
219 | * 5 bytes for a sysenter, or 2 for an int 8x. | |
220 | * The SYSENTER_TF_CS covers single-stepping over a sysenter | |
221 | * - see debug trap handler in idt.s/idt64.s | |
222 | */ | |
b0d623f7 | 223 | |
6d2010ae | 224 | pal_syscall_restart(thread, state); |
0c530ab8 A |
225 | } |
226 | else if (error != EJUSTRETURN) { | |
6d2010ae | 227 | if (__improbable(error)) { |
0c530ab8 A |
228 | regs->eax = error; |
229 | regs->efl |= EFL_CF; /* carry bit */ | |
230 | } else { /* (not error) */ | |
231 | regs->eax = uthread->uu_rval[0]; | |
232 | regs->edx = uthread->uu_rval[1]; | |
0c530ab8 A |
233 | } |
234 | } | |
235 | ||
b0d623f7 A |
236 | DEBUG_KPRINT_SYSCALL_UNIX( |
237 | "unix_syscall: error=%d retval=(%u,%u)\n", | |
238 | error, regs->eax, regs->edx); | |
239 | ||
2d21ac55 | 240 | uthread->uu_flag &= ~UT_NOTCANCELPT; |
6d2010ae | 241 | #if FUNNEL_DEBUG |
0c530ab8 | 242 | /* |
2d21ac55 | 243 | * if we're holding the funnel panic |
0c530ab8 | 244 | */ |
2d21ac55 | 245 | syscall_exit_funnelcheck(); |
6d2010ae A |
246 | #endif /* FUNNEL_DEBUG */ |
247 | ||
248 | if (__improbable(uthread->uu_lowpri_window)) { | |
0c530ab8 A |
249 | /* |
250 | * task is marked as a low priority I/O type | |
251 | * and the I/O we issued while in this system call | |
252 | * collided with normal I/O operations... we'll | |
253 | * delay in order to mitigate the impact of this | |
254 | * task on the normal operation of the system | |
255 | */ | |
593a1d5f | 256 | throttle_lowpri_io(TRUE); |
0c530ab8 | 257 | } |
6d2010ae | 258 | if (__probable(code != 180)) |
0c530ab8 | 259 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, |
b0d623f7 A |
260 | error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); |
261 | ||
6d2010ae A |
262 | if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) { |
263 | pal_execve_return(thread); | |
264 | } | |
0c530ab8 A |
265 | |
266 | thread_exception_return(); | |
267 | /* NOTREACHED */ | |
268 | } | |
269 | ||
270 | ||
271 | void | |
272 | unix_syscall64(x86_saved_state_t *state) | |
273 | { | |
274 | thread_t thread; | |
275 | unsigned int code; | |
276 | struct sysent *callp; | |
277 | void *uargp; | |
278 | int args_in_regs; | |
279 | int error; | |
0c530ab8 A |
280 | struct proc *p; |
281 | struct uthread *uthread; | |
0c530ab8 A |
282 | x86_saved_state64_t *regs; |
283 | ||
284 | assert(is_saved_state64(state)); | |
285 | regs = saved_state64(state); | |
6d2010ae | 286 | #if DEBUG |
0c530ab8 A |
287 | if (regs->rax == 0x2000800) |
288 | thread_exception_return(); | |
6d2010ae | 289 | #endif |
0c530ab8 A |
290 | thread = current_thread(); |
291 | uthread = get_bsdthread_info(thread); | |
292 | ||
293 | /* Get the approriate proc; may be different from task's for vfork() */ | |
6d2010ae | 294 | if (__probable(!(uthread->uu_flag & UT_VFORK))) |
0c530ab8 A |
295 | p = (struct proc *)get_bsdtask_info(current_task()); |
296 | else | |
297 | p = current_proc(); | |
298 | ||
299 | /* Verify that we are not being called from a task without a proc */ | |
6d2010ae | 300 | if (__improbable(p == NULL)) { |
0c530ab8 A |
301 | regs->rax = EPERM; |
302 | regs->isf.rflags |= EFL_CF; | |
303 | task_terminate_internal(current_task()); | |
304 | thread_exception_return(); | |
305 | /* NOTREACHED */ | |
306 | } | |
307 | args_in_regs = 6; | |
308 | ||
309 | code = regs->rax & SYSCALL_NUMBER_MASK; | |
b0d623f7 A |
310 | DEBUG_KPRINT_SYSCALL_UNIX( |
311 | "unix_syscall64: code=%d(%s) rip=%llx\n", | |
312 | code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip); | |
2d21ac55 | 313 | callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; |
0c530ab8 A |
314 | uargp = (void *)(®s->rdi); |
315 | ||
6d2010ae | 316 | if (__improbable(callp == sysent)) { |
0c530ab8 A |
317 | /* |
318 | * indirect system call... system call number | |
319 | * passed as 'arg0' | |
320 | */ | |
2d21ac55 A |
321 | code = regs->rdi; |
322 | callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; | |
0c530ab8 A |
323 | uargp = (void *)(®s->rsi); |
324 | args_in_regs = 5; | |
325 | } | |
326 | ||
327 | if (callp->sy_narg != 0) { | |
328 | if (code != 180) { | |
2d21ac55 | 329 | uint64_t *ip = (uint64_t *)uargp; |
0c530ab8 A |
330 | |
331 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, | |
2d21ac55 | 332 | (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0); |
0c530ab8 | 333 | } |
2d21ac55 | 334 | assert(callp->sy_narg <= 8); |
0c530ab8 | 335 | |
6d2010ae | 336 | if (__improbable(callp->sy_narg > args_in_regs)) { |
2d21ac55 | 337 | int copyin_count; |
0c530ab8 | 338 | |
2d21ac55 | 339 | copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t); |
0c530ab8 | 340 | |
2d21ac55 | 341 | error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count); |
0c530ab8 | 342 | if (error) { |
2d21ac55 | 343 | regs->rax = error; |
0c530ab8 A |
344 | regs->isf.rflags |= EFL_CF; |
345 | thread_exception_return(); | |
346 | /* NOTREACHED */ | |
347 | } | |
348 | } | |
349 | /* | |
350 | * XXX Turn 64 bit unsafe calls into nosys() | |
351 | */ | |
6d2010ae | 352 | if (__improbable(callp->sy_flags & UNSAFE_64BIT)) { |
2d21ac55 | 353 | callp = &sysent[63]; |
0c530ab8 A |
354 | goto unsafe; |
355 | } | |
0c530ab8 A |
356 | } else |
357 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, | |
358 | 0, 0, 0, 0, 0); | |
359 | unsafe: | |
360 | ||
361 | /* | |
362 | * Delayed binding of thread credential to process credential, if we | |
363 | * are not running with an explicitly set thread credential. | |
364 | */ | |
2d21ac55 | 365 | kauth_cred_uthread_update(uthread, p); |
0c530ab8 A |
366 | |
367 | uthread->uu_rval[0] = 0; | |
368 | uthread->uu_rval[1] = 0; | |
369 | ||
0c530ab8 | 370 | |
2d21ac55 | 371 | uthread->uu_flag |= UT_NOTCANCELPT; |
0c530ab8 | 372 | |
6d2010ae A |
373 | #ifdef JOE_DEBUG |
374 | uthread->uu_iocount = 0; | |
375 | uthread->uu_vpindex = 0; | |
376 | #endif | |
0c530ab8 A |
377 | |
378 | AUDIT_SYSCALL_ENTER(code, p, uthread); | |
379 | error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0])); | |
2d21ac55 A |
380 | AUDIT_SYSCALL_EXIT(code, p, uthread, error); |
381 | ||
6d2010ae A |
382 | #ifdef JOE_DEBUG |
383 | if (uthread->uu_iocount) | |
384 | printf("system call returned with uu_iocount != 0\n"); | |
385 | #endif | |
386 | ||
2d21ac55 A |
387 | #if CONFIG_DTRACE |
388 | uthread->t_dtrace_errno = error; | |
389 | #endif /* CONFIG_DTRACE */ | |
0c530ab8 | 390 | |
6d2010ae | 391 | if (__improbable(error == ERESTART)) { |
0c530ab8 A |
392 | /* |
393 | * all system calls come through via the syscall instruction | |
394 | * in 64 bit mode... its 2 bytes in length | |
395 | * move the user's pc back to repeat the syscall: | |
396 | */ | |
6d2010ae | 397 | pal_syscall_restart( thread, state ); |
0c530ab8 A |
398 | } |
399 | else if (error != EJUSTRETURN) { | |
6d2010ae | 400 | if (__improbable(error)) { |
2d21ac55 | 401 | regs->rax = error; |
0c530ab8 A |
402 | regs->isf.rflags |= EFL_CF; /* carry bit */ |
403 | } else { /* (not error) */ | |
404 | ||
405 | switch (callp->sy_return_type) { | |
406 | case _SYSCALL_RET_INT_T: | |
407 | regs->rax = uthread->uu_rval[0]; | |
408 | regs->rdx = uthread->uu_rval[1]; | |
409 | break; | |
410 | case _SYSCALL_RET_UINT_T: | |
411 | regs->rax = ((u_int)uthread->uu_rval[0]); | |
412 | regs->rdx = ((u_int)uthread->uu_rval[1]); | |
413 | break; | |
414 | case _SYSCALL_RET_OFF_T: | |
415 | case _SYSCALL_RET_ADDR_T: | |
416 | case _SYSCALL_RET_SIZE_T: | |
417 | case _SYSCALL_RET_SSIZE_T: | |
d1ecb069 | 418 | case _SYSCALL_RET_UINT64_T: |
0c530ab8 A |
419 | regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); |
420 | regs->rdx = 0; | |
421 | break; | |
422 | case _SYSCALL_RET_NONE: | |
423 | break; | |
424 | default: | |
425 | panic("unix_syscall: unknown return type"); | |
426 | break; | |
427 | } | |
428 | regs->isf.rflags &= ~EFL_CF; | |
429 | } | |
430 | } | |
431 | ||
b0d623f7 A |
432 | DEBUG_KPRINT_SYSCALL_UNIX( |
433 | "unix_syscall64: error=%d retval=(%llu,%llu)\n", | |
434 | error, regs->rax, regs->rdx); | |
435 | ||
2d21ac55 | 436 | uthread->uu_flag &= ~UT_NOTCANCELPT; |
0c530ab8 | 437 | |
6d2010ae | 438 | #if FUNNEL_DEBUG |
0c530ab8 | 439 | /* |
2d21ac55 | 440 | * if we're holding the funnel panic |
0c530ab8 | 441 | */ |
2d21ac55 | 442 | syscall_exit_funnelcheck(); |
6d2010ae | 443 | #endif /* FUNNEL_DEBUG */ |
0c530ab8 | 444 | |
6d2010ae | 445 | if (__improbable(uthread->uu_lowpri_window)) { |
0c530ab8 A |
446 | /* |
447 | * task is marked as a low priority I/O type | |
448 | * and the I/O we issued while in this system call | |
449 | * collided with normal I/O operations... we'll | |
450 | * delay in order to mitigate the impact of this | |
451 | * task on the normal operation of the system | |
452 | */ | |
593a1d5f | 453 | throttle_lowpri_io(TRUE); |
0c530ab8 | 454 | } |
6d2010ae | 455 | if (__probable(code != 180)) |
0c530ab8 | 456 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, |
b0d623f7 | 457 | error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); |
0c530ab8 A |
458 | |
459 | thread_exception_return(); | |
460 | /* NOTREACHED */ | |
461 | } | |
462 | ||
463 | ||
464 | void | |
465 | unix_syscall_return(int error) | |
466 | { | |
467 | thread_t thread; | |
468 | struct uthread *uthread; | |
469 | struct proc *p; | |
470 | unsigned int code; | |
471 | vm_offset_t params; | |
472 | struct sysent *callp; | |
0c530ab8 A |
473 | |
474 | thread = current_thread(); | |
475 | uthread = get_bsdthread_info(thread); | |
476 | ||
6d2010ae | 477 | pal_register_cache_state(thread, DIRTY); |
b0d623f7 | 478 | |
0c530ab8 A |
479 | p = current_proc(); |
480 | ||
481 | if (proc_is64bit(p)) { | |
2d21ac55 | 482 | x86_saved_state64_t *regs; |
0c530ab8 A |
483 | |
484 | regs = saved_state64(find_user_regs(thread)); | |
485 | ||
2d21ac55 | 486 | /* reconstruct code for tracing before blasting rax */ |
0c530ab8 | 487 | code = regs->rax & SYSCALL_NUMBER_MASK; |
2d21ac55 | 488 | callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; |
0c530ab8 A |
489 | |
490 | if (callp == sysent) | |
2d21ac55 | 491 | /* |
0c530ab8 A |
492 | * indirect system call... system call number |
493 | * passed as 'arg0' | |
494 | */ | |
2d21ac55 A |
495 | code = regs->rdi; |
496 | ||
497 | #if CONFIG_DTRACE | |
498 | if (callp->sy_call == dtrace_systrace_syscall) | |
499 | dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); | |
500 | #endif /* CONFIG_DTRACE */ | |
b0d623f7 | 501 | AUDIT_SYSCALL_EXIT(code, p, uthread, error); |
0c530ab8 A |
502 | |
503 | if (error == ERESTART) { | |
2d21ac55 | 504 | /* |
6d2010ae | 505 | * repeat the syscall |
0c530ab8 | 506 | */ |
6d2010ae | 507 | pal_syscall_restart( thread, find_user_regs(thread) ); |
0c530ab8 A |
508 | } |
509 | else if (error != EJUSTRETURN) { | |
2d21ac55 A |
510 | if (error) { |
511 | regs->rax = error; | |
0c530ab8 A |
512 | regs->isf.rflags |= EFL_CF; /* carry bit */ |
513 | } else { /* (not error) */ | |
514 | ||
2d21ac55 | 515 | switch (callp->sy_return_type) { |
0c530ab8 | 516 | case _SYSCALL_RET_INT_T: |
2d21ac55 | 517 | regs->rax = uthread->uu_rval[0]; |
0c530ab8 A |
518 | regs->rdx = uthread->uu_rval[1]; |
519 | break; | |
520 | case _SYSCALL_RET_UINT_T: | |
2d21ac55 | 521 | regs->rax = ((u_int)uthread->uu_rval[0]); |
0c530ab8 A |
522 | regs->rdx = ((u_int)uthread->uu_rval[1]); |
523 | break; | |
524 | case _SYSCALL_RET_OFF_T: | |
525 | case _SYSCALL_RET_ADDR_T: | |
526 | case _SYSCALL_RET_SIZE_T: | |
527 | case _SYSCALL_RET_SSIZE_T: | |
d1ecb069 | 528 | case _SYSCALL_RET_UINT64_T: |
2d21ac55 | 529 | regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); |
0c530ab8 A |
530 | regs->rdx = 0; |
531 | break; | |
532 | case _SYSCALL_RET_NONE: | |
2d21ac55 | 533 | break; |
0c530ab8 | 534 | default: |
2d21ac55 | 535 | panic("unix_syscall: unknown return type"); |
0c530ab8 A |
536 | break; |
537 | } | |
538 | regs->isf.rflags &= ~EFL_CF; | |
539 | } | |
540 | } | |
b0d623f7 A |
541 | DEBUG_KPRINT_SYSCALL_UNIX( |
542 | "unix_syscall_return: error=%d retval=(%llu,%llu)\n", | |
543 | error, regs->rax, regs->rdx); | |
0c530ab8 | 544 | } else { |
2d21ac55 | 545 | x86_saved_state32_t *regs; |
0c530ab8 A |
546 | |
547 | regs = saved_state32(find_user_regs(thread)); | |
548 | ||
2d21ac55 | 549 | regs->efl &= ~(EFL_CF); |
0c530ab8 | 550 | /* reconstruct code for tracing before blasting eax */ |
2d21ac55 A |
551 | code = regs->eax & I386_SYSCALL_NUMBER_MASK; |
552 | callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; | |
553 | ||
554 | #if CONFIG_DTRACE | |
555 | if (callp->sy_call == dtrace_systrace_syscall) | |
556 | dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); | |
557 | #endif /* CONFIG_DTRACE */ | |
b0d623f7 | 558 | AUDIT_SYSCALL_EXIT(code, p, uthread, error); |
0c530ab8 A |
559 | |
560 | if (callp == sysent) { | |
b0d623f7 | 561 | params = (vm_offset_t) (regs->uesp + sizeof (int)); |
2d21ac55 | 562 | code = fuword(params); |
0c530ab8 A |
563 | } |
564 | if (error == ERESTART) { | |
6d2010ae | 565 | pal_syscall_restart( thread, find_user_regs(thread) ); |
0c530ab8 A |
566 | } |
567 | else if (error != EJUSTRETURN) { | |
2d21ac55 A |
568 | if (error) { |
569 | regs->eax = error; | |
0c530ab8 A |
570 | regs->efl |= EFL_CF; /* carry bit */ |
571 | } else { /* (not error) */ | |
2d21ac55 | 572 | regs->eax = uthread->uu_rval[0]; |
0c530ab8 | 573 | regs->edx = uthread->uu_rval[1]; |
0c530ab8 A |
574 | } |
575 | } | |
b0d623f7 A |
576 | DEBUG_KPRINT_SYSCALL_UNIX( |
577 | "unix_syscall_return: error=%d retval=(%u,%u)\n", | |
578 | error, regs->eax, regs->edx); | |
0c530ab8 | 579 | } |
0c530ab8 | 580 | |
0c530ab8 | 581 | |
2d21ac55 | 582 | uthread->uu_flag &= ~UT_NOTCANCELPT; |
0c530ab8 | 583 | |
6d2010ae | 584 | #if FUNNEL_DEBUG |
0c530ab8 | 585 | /* |
2d21ac55 | 586 | * if we're holding the funnel panic |
0c530ab8 | 587 | */ |
2d21ac55 | 588 | syscall_exit_funnelcheck(); |
6d2010ae | 589 | #endif /* FUNNEL_DEBUG */ |
0c530ab8 | 590 | |
593a1d5f | 591 | if (uthread->uu_lowpri_window) { |
0c530ab8 A |
592 | /* |
593 | * task is marked as a low priority I/O type | |
594 | * and the I/O we issued while in this system call | |
595 | * collided with normal I/O operations... we'll | |
596 | * delay in order to mitigate the impact of this | |
597 | * task on the normal operation of the system | |
598 | */ | |
593a1d5f | 599 | throttle_lowpri_io(TRUE); |
0c530ab8 A |
600 | } |
601 | if (code != 180) | |
602 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, | |
b0d623f7 | 603 | error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); |
0c530ab8 A |
604 | |
605 | thread_exception_return(); | |
606 | /* NOTREACHED */ | |
607 | } | |
608 | ||
609 | void | |
610 | munge_wwwlww( | |
611 | __unused const void *in32, | |
612 | void *out64) | |
613 | { | |
614 | uint32_t *arg32; | |
615 | uint64_t *arg64; | |
616 | ||
617 | /* we convert in place in out64 */ | |
618 | arg32 = (uint32_t *) out64; | |
619 | arg64 = (uint64_t *) out64; | |
620 | ||
621 | arg64[5] = arg32[6]; /* wwwlwW */ | |
622 | arg64[4] = arg32[5]; /* wwwlWw */ | |
623 | arg32[7] = arg32[4]; /* wwwLww (hi) */ | |
624 | arg32[6] = arg32[3]; /* wwwLww (lo) */ | |
625 | arg64[2] = arg32[2]; /* wwWlww */ | |
626 | arg64[1] = arg32[1]; /* wWwlww */ | |
627 | arg64[0] = arg32[0]; /* Wwwlww */ | |
628 | } | |
2d21ac55 A |
629 | |
630 | ||
631 | void | |
632 | munge_wwlwww( | |
633 | __unused const void *in32, | |
634 | void *out64) | |
635 | { | |
636 | uint32_t *arg32; | |
637 | uint64_t *arg64; | |
638 | ||
639 | /* we convert in place in out64 */ | |
640 | arg32 = (uint32_t *) out64; | |
641 | arg64 = (uint64_t *) out64; | |
642 | ||
643 | arg64[5] = arg32[6]; /* wwlwwW */ | |
644 | arg64[4] = arg32[5]; /* wwlwWw */ | |
645 | arg64[3] = arg32[4]; /* wwlWww */ | |
646 | arg32[5] = arg32[3]; /* wwLwww (hi) */ | |
647 | arg32[4] = arg32[2]; /* wwLwww (lo) */ | |
648 | arg64[1] = arg32[1]; /* wWlwww */ | |
649 | arg64[0] = arg32[0]; /* Wwlwww */ | |
650 | } | |
651 |