2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 #include <mach_debug.h>
31 #include <mach_ldebug.h>
33 #include <mach/kern_return.h>
34 #include <mach/mach_traps.h>
35 #include <mach/thread_status.h>
36 #include <mach/vm_param.h>
38 #include <kern/counters.h>
39 #include <kern/cpu_data.h>
40 #include <kern/mach_param.h>
41 #include <kern/task.h>
42 #include <kern/thread.h>
43 #include <kern/sched_prim.h>
44 #include <kern/misc_protos.h>
45 #include <kern/assert.h>
46 #include <kern/debug.h>
48 #include <kern/syscall_sw.h>
49 #include <ipc/ipc_port.h>
50 #include <vm/vm_kern.h>
53 #include <i386/cpu_number.h>
54 #include <i386/eflags.h>
55 #include <i386/proc_reg.h>
57 #include <i386/user_ldt.h>
59 #include <i386/machdep_call.h>
60 #include <i386/vmparam.h>
61 #include <i386/mp_desc.h>
62 #include <i386/misc_protos.h>
63 #include <i386/thread.h>
64 #include <i386/trap.h>
66 #include <mach/i386/syscall_sw.h>
67 #include <sys/syscall.h>
68 #include <sys/kdebug.h>
69 #include <sys/errno.h>
70 #include <../bsd/sys/sysent.h>
73 extern void mach_kauth_cred_uthread_update(void);
95 void * find_user_regs(thread_t
);
97 unsigned int get_msr_exportmask(void);
99 unsigned int get_msr_nbits(void);
101 unsigned int get_msr_rbits(void);
103 extern void throttle_lowpri_io(boolean_t
);
109 * Return the user stack pointer from the machine
110 * dependent thread state info.
114 __unused thread_t thread
,
116 thread_state_t tstate
,
117 __unused
unsigned int count
,
118 user_addr_t
*user_stack
,
126 case x86_THREAD_STATE32
:
128 x86_thread_state32_t
*state25
;
130 state25
= (x86_thread_state32_t
*) tstate
;
133 *user_stack
= state25
->esp
;
135 *user_stack
= VM_USRSTACK32
;
136 if (customstack
&& state25
->esp
)
143 case x86_THREAD_STATE64
:
145 x86_thread_state64_t
*state25
;
147 state25
= (x86_thread_state64_t
*) tstate
;
150 *user_stack
= state25
->rsp
;
152 *user_stack
= VM_USRSTACK64
;
153 if (customstack
&& state25
->rsp
)
161 return (KERN_INVALID_ARGUMENT
);
164 return (KERN_SUCCESS
);
170 __unused thread_t thread
,
172 thread_state_t tstate
,
173 __unused
unsigned int count
,
174 mach_vm_offset_t
*entry_point
180 if (*entry_point
== 0)
181 *entry_point
= VM_MIN_ADDRESS
;
184 case x86_THREAD_STATE32
:
186 x86_thread_state32_t
*state25
;
188 state25
= (i386_thread_state_t
*) tstate
;
189 *entry_point
= state25
->eip
? state25
->eip
: VM_MIN_ADDRESS
;
193 case x86_THREAD_STATE64
:
195 x86_thread_state64_t
*state25
;
197 state25
= (x86_thread_state64_t
*) tstate
;
198 *entry_point
= state25
->rip
? state25
->rip
: VM_MIN_ADDRESS64
;
202 return (KERN_SUCCESS
);
206 * Duplicate parent state in child
219 if ((child_pcb
= child
->machine
.pcb
) == NULL
||
220 (parent_pcb
= parent
->machine
.pcb
) == NULL
)
221 return (KERN_FAILURE
);
223 * Copy over the x86_saved_state registers
225 if (cpu_mode_is64bit()) {
226 if (thread_is_64bit(parent
))
227 bcopy(USER_REGS64(parent
), USER_REGS64(child
), sizeof(x86_saved_state64_t
));
229 bcopy(USER_REGS32(parent
), USER_REGS32(child
), sizeof(x86_saved_state_compat32_t
));
231 bcopy(USER_REGS32(parent
), USER_REGS32(child
), sizeof(x86_saved_state32_t
));
234 * Check to see if parent is using floating point
235 * and if so, copy the registers to the child
237 fpu_dup_fxstate(parent
, child
);
241 * Copy the parent's cthread id and USER_CTHREAD descriptor, if 32-bit.
243 child_pcb
->cthread_self
= parent_pcb
->cthread_self
;
244 if (!thread_is_64bit(parent
))
245 child_pcb
->cthread_desc
= parent_pcb
->cthread_desc
;
248 * FIXME - should a user specified LDT, TSS and V86 info
249 * be duplicated as well?? - probably not.
251 // duplicate any use LDT entry that was set I think this is appropriate.
252 if (parent_pcb
->uldt_selector
!= 0) {
253 child_pcb
->uldt_selector
= parent_pcb
->uldt_selector
;
254 child_pcb
->uldt_desc
= parent_pcb
->uldt_desc
;
258 return (KERN_SUCCESS
);
262 * FIXME - thread_set_child
265 void thread_set_child(thread_t child
, int pid
);
267 thread_set_child(thread_t child
, int pid
)
270 if (thread_is_64bit(child
)) {
271 x86_saved_state64_t
*iss64
;
273 iss64
= USER_REGS64(child
);
277 iss64
->isf
.rflags
&= ~EFL_CF
;
279 x86_saved_state32_t
*iss32
;
281 iss32
= USER_REGS32(child
);
285 iss32
->efl
&= ~EFL_CF
;
290 void thread_set_parent(thread_t parent
, int pid
);
293 thread_set_parent(thread_t parent
, int pid
)
296 if (thread_is_64bit(parent
)) {
297 x86_saved_state64_t
*iss64
;
299 iss64
= USER_REGS64(parent
);
303 iss64
->isf
.rflags
&= ~EFL_CF
;
305 x86_saved_state32_t
*iss32
;
307 iss32
= USER_REGS32(parent
);
311 iss32
->efl
&= ~EFL_CF
;
317 * System Call handling code
320 extern long fuword(vm_offset_t
);
325 machdep_syscall(x86_saved_state_t
*state
)
327 int args
[machdep_call_count
];
330 machdep_call_t
*entry
;
331 x86_saved_state32_t
*regs
;
333 assert(is_saved_state32(state
));
334 regs
= saved_state32(state
);
338 kprintf("machdep_syscall(0x%08x) code=%d\n", regs
, trapno
);
341 DEBUG_KPRINT_SYSCALL_MDEP(
342 "machdep_syscall: trapno=%d\n", trapno
);
344 if (trapno
< 0 || trapno
>= machdep_call_count
) {
345 regs
->eax
= (unsigned int)kern_invalid(NULL
);
347 thread_exception_return();
350 entry
= &machdep_call_table
[trapno
];
351 nargs
= entry
->nargs
;
354 if (copyin((user_addr_t
) regs
->uesp
+ sizeof (int),
355 (char *) args
, (nargs
* sizeof (int)))) {
356 regs
->eax
= KERN_INVALID_ADDRESS
;
358 thread_exception_return();
364 regs
->eax
= (*entry
->routine
.args_0
)();
367 regs
->eax
= (*entry
->routine
.args_1
)(args
[0]);
370 regs
->eax
= (*entry
->routine
.args_2
)(args
[0],args
[1]);
373 if (!entry
->bsd_style
)
374 regs
->eax
= (*entry
->routine
.args_3
)(args
[0],args
[1],args
[2]);
379 error
= (*entry
->routine
.args_bsd_3
)(&rval
, args
[0], args
[1], args
[2]);
382 regs
->efl
|= EFL_CF
; /* carry bit */
385 regs
->efl
&= ~EFL_CF
;
390 regs
->eax
= (*entry
->routine
.args_4
)(args
[0], args
[1], args
[2], args
[3]);
394 panic("machdep_syscall: too many args");
396 if (current_thread()->funnel_lock
)
397 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
399 DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%u\n", regs
->eax
);
401 throttle_lowpri_io(TRUE
);
403 thread_exception_return();
409 machdep_syscall64(x86_saved_state_t
*state
)
412 machdep_call_t
*entry
;
413 x86_saved_state64_t
*regs
;
415 assert(is_saved_state64(state
));
416 regs
= saved_state64(state
);
418 trapno
= (int)(regs
->rax
& SYSCALL_NUMBER_MASK
);
420 DEBUG_KPRINT_SYSCALL_MDEP(
421 "machdep_syscall64: trapno=%d\n", trapno
);
423 if (trapno
< 0 || trapno
>= machdep_call_count
) {
424 regs
->rax
= (unsigned int)kern_invalid(NULL
);
426 thread_exception_return();
429 entry
= &machdep_call_table64
[trapno
];
431 switch (entry
->nargs
) {
433 regs
->rax
= (*entry
->routine
.args_0
)();
436 regs
->rax
= (*entry
->routine
.args64_1
)(regs
->rdi
);
439 panic("machdep_syscall64: too many args");
441 if (current_thread()->funnel_lock
)
442 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
444 DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%llu\n", regs
->rax
);
446 throttle_lowpri_io(TRUE
);
448 thread_exception_return();
453 * thread_fast_set_cthread_self: Sets the machine kernel thread ID of the
454 * current thread to the given thread ID; fast version for 32-bit processes
456 * Parameters: self Thread ID to set
462 thread_fast_set_cthread_self(uint32_t self
)
464 thread_t thread
= current_thread();
465 pcb_t pcb
= thread
->machine
.pcb
;
466 struct real_descriptor desc
= {
469 .base_low
= self
& 0xffff,
470 .base_med
= (self
>> 16) & 0xff,
471 .base_high
= (self
>> 24) & 0xff,
472 .access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
,
473 .granularity
= SZ_32
|SZ_G
,
476 current_thread()->machine
.pcb
->cthread_self
= (uint64_t) self
; /* preserve old func too */
478 /* assign descriptor */
479 mp_disable_preemption();
480 pcb
->cthread_desc
= desc
;
481 *ldt_desc_p(USER_CTHREAD
) = desc
;
482 saved_state32(pcb
->iss
)->gs
= USER_CTHREAD
;
483 mp_enable_preemption();
485 return (USER_CTHREAD
);
489 * thread_fast_set_cthread_self64: Sets the machine kernel thread ID of the
490 * current thread to the given thread ID; fast version for 64-bit processes
492 * Parameters: self Thread ID
498 thread_fast_set_cthread_self64(uint64_t self
)
500 pcb_t pcb
= current_thread()->machine
.pcb
;
502 /* check for canonical address, set 0 otherwise */
503 if (!IS_USERADDR64_CANONICAL(self
))
506 pcb
->cthread_self
= self
;
507 mp_disable_preemption();
508 #if defined(__x86_64__)
509 if (current_cpu_datap()->cpu_uber
.cu_user_gs_base
!= self
)
510 wrmsr64(MSR_IA32_KERNEL_GS_BASE
, self
);
512 current_cpu_datap()->cpu_uber
.cu_user_gs_base
= self
;
513 mp_enable_preemption();
514 return (USER_CTHREAD
);
518 * thread_set_user_ldt routine is the interface for the user level
519 * settable ldt entry feature. allowing a user to create arbitrary
520 * ldt entries seems to be too large of a security hole, so instead
521 * this mechanism is in place to allow user level processes to have
522 * an ldt entry that can be used in conjunction with the FS register.
524 * Swapping occurs inside the pcb.c file along with initialization
525 * when a thread is created. The basic functioning theory is that the
526 * pcb->uldt_selector variable will contain either 0 meaning the
527 * process has not set up any entry, or the selector to be used in
528 * the FS register. pcb->uldt_desc contains the actual descriptor the
529 * user has set up stored in machine usable ldt format.
531 * Currently one entry is shared by all threads (USER_SETTABLE), but
532 * this could be changed in the future by changing how this routine
533 * allocates the selector. There seems to be no real reason at this
534 * time to have this added feature, but in the future it might be
537 * address is the linear address of the start of the data area size
538 * is the size in bytes of the area flags should always be set to 0
539 * for now. in the future it could be used to set R/W permisions or
540 * other functions. Currently the segment is created as a data segment
541 * up to 1 megabyte in size with full read/write permisions only.
543 * this call returns the segment selector or -1 if any error occurs
546 thread_set_user_ldt(uint32_t address
, uint32_t size
, uint32_t flags
)
549 struct fake_descriptor temp
;
553 return -1; // flags not supported
555 return -1; // size too big, 1 meg is the limit
557 mp_disable_preemption();
558 mycpu
= cpu_number();
560 // create a "fake" descriptor so we can use fix_desc()
561 // to build a real one...
562 // 32 bit default operation size
563 // standard read/write perms for a data segment
564 pcb
= (pcb_t
)current_thread()->machine
.pcb
;
565 temp
.offset
= address
;
566 temp
.lim_or_seg
= size
;
567 temp
.size_or_wdct
= SZ_32
;
568 temp
.access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
;
570 // turn this into a real descriptor
573 // set up our data in the pcb
574 pcb
->uldt_desc
= *(struct real_descriptor
*)&temp
;
575 pcb
->uldt_selector
= USER_SETTABLE
; // set the selector value
577 // now set it up in the current table...
578 *ldt_desc_p(USER_SETTABLE
) = *(struct real_descriptor
*)&temp
;
580 mp_enable_preemption();
582 return USER_SETTABLE
;
585 #endif /* MACH_BSD */
588 typedef kern_return_t (*mach_call_t
)(void *);
590 struct mach_call_args
{
603 mach_call_arg_munger32(uint32_t sp
, int nargs
, int call_number
, struct mach_call_args
*args
);
607 mach_call_arg_munger32(uint32_t sp
, int nargs
, int call_number
, struct mach_call_args
*args
)
609 unsigned int args32
[9];
611 if (copyin((user_addr_t
)(sp
+ sizeof(int)), (char *)args32
, nargs
* sizeof (int)))
612 return KERN_INVALID_ARGUMENT
;
615 case 9: args
->arg9
= args32
[8];
616 case 8: args
->arg8
= args32
[7];
617 case 7: args
->arg7
= args32
[6];
618 case 6: args
->arg6
= args32
[5];
619 case 5: args
->arg5
= args32
[4];
620 case 4: args
->arg4
= args32
[3];
621 case 3: args
->arg3
= args32
[2];
622 case 2: args
->arg2
= args32
[1];
623 case 1: args
->arg1
= args32
[0];
625 if (call_number
== 90) {
626 /* munge_l for mach_wait_until_trap() */
627 args
->arg1
= (((uint64_t)(args32
[0])) | ((((uint64_t)(args32
[1]))<<32)));
629 if (call_number
== 93) {
630 /* munge_wl for mk_timer_arm_trap() */
631 args
->arg2
= (((uint64_t)(args32
[1])) | ((((uint64_t)(args32
[2]))<<32)));
638 __private_extern__
void mach_call_munger(x86_saved_state_t
*state
);
640 extern const char *mach_syscall_name_table
[];
643 mach_call_munger(x86_saved_state_t
*state
)
647 mach_call_t mach_call
;
648 kern_return_t retval
;
649 struct mach_call_args args
= { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
650 x86_saved_state32_t
*regs
;
652 assert(is_saved_state32(state
));
653 regs
= saved_state32(state
);
655 call_number
= -(regs
->eax
);
657 DEBUG_KPRINT_SYSCALL_MACH(
658 "mach_call_munger: code=%d(%s)\n",
659 call_number
, mach_syscall_name_table
[call_number
]);
661 kprintf("mach_call_munger(0x%08x) code=%d\n", regs
, call_number
);
664 if (call_number
< 0 || call_number
>= mach_trap_count
) {
665 i386_exception(EXC_SYSCALL
, call_number
, 1);
668 mach_call
= (mach_call_t
)mach_trap_table
[call_number
].mach_trap_function
;
670 if (mach_call
== (mach_call_t
)kern_invalid
) {
671 DEBUG_KPRINT_SYSCALL_MACH(
672 "mach_call_munger: kern_invalid 0x%x\n", regs
->eax
);
673 i386_exception(EXC_SYSCALL
, call_number
, 1);
677 argc
= mach_trap_table
[call_number
].mach_trap_arg_count
;
679 retval
= mach_call_arg_munger32(regs
->uesp
, argc
, call_number
, &args
);
680 if (retval
!= KERN_SUCCESS
) {
683 DEBUG_KPRINT_SYSCALL_MACH(
684 "mach_call_munger: retval=0x%x\n", retval
);
686 thread_exception_return();
692 mach_kauth_cred_uthread_update();
694 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
, (call_number
)) | DBG_FUNC_START
,
695 args
.arg1
, args
.arg2
, args
.arg3
, args
.arg4
, 0);
697 retval
= mach_call(&args
);
699 DEBUG_KPRINT_SYSCALL_MACH("mach_call_munger: retval=0x%x\n", retval
);
701 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,(call_number
)) | DBG_FUNC_END
,
705 throttle_lowpri_io(TRUE
);
707 thread_exception_return();
712 __private_extern__
void mach_call_munger64(x86_saved_state_t
*regs
);
715 mach_call_munger64(x86_saved_state_t
*state
)
719 mach_call_t mach_call
;
720 x86_saved_state64_t
*regs
;
722 assert(is_saved_state64(state
));
723 regs
= saved_state64(state
);
725 call_number
= (int)(regs
->rax
& SYSCALL_NUMBER_MASK
);
727 DEBUG_KPRINT_SYSCALL_MACH(
728 "mach_call_munger64: code=%d(%s)\n",
729 call_number
, mach_syscall_name_table
[call_number
]);
731 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,
732 (call_number
)) | DBG_FUNC_START
,
733 regs
->rdi
, regs
->rsi
,
734 regs
->rdx
, regs
->r10
, 0);
736 if (call_number
< 0 || call_number
>= mach_trap_count
) {
737 i386_exception(EXC_SYSCALL
, regs
->rax
, 1);
740 mach_call
= (mach_call_t
)mach_trap_table
[call_number
].mach_trap_function
;
742 if (mach_call
== (mach_call_t
)kern_invalid
) {
743 i386_exception(EXC_SYSCALL
, regs
->rax
, 1);
746 argc
= mach_trap_table
[call_number
].mach_trap_arg_count
;
751 copyin_count
= (argc
- 6) * (int)sizeof(uint64_t);
753 if (copyin((user_addr_t
)(regs
->isf
.rsp
+ sizeof(user_addr_t
)), (char *)®s
->v_arg6
, copyin_count
)) {
754 regs
->rax
= KERN_INVALID_ARGUMENT
;
756 thread_exception_return();
762 mach_kauth_cred_uthread_update();
765 regs
->rax
= (uint64_t)mach_call((void *)(®s
->rdi
));
767 DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: retval=0x%llx\n", regs
->rax
);
769 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,
770 (call_number
)) | DBG_FUNC_END
,
771 regs
->rax
, 0, 0, 0, 0);
773 throttle_lowpri_io(TRUE
);
775 thread_exception_return();
781 * thread_setuserstack:
783 * Sets the user stack pointer into the machine
784 * dependent thread state info.
789 mach_vm_address_t user_stack
)
791 if (thread_is_64bit(thread
)) {
792 x86_saved_state64_t
*iss64
;
794 iss64
= USER_REGS64(thread
);
796 iss64
->isf
.rsp
= (uint64_t)user_stack
;
798 x86_saved_state32_t
*iss32
;
800 iss32
= USER_REGS32(thread
);
802 iss32
->uesp
= CAST_DOWN_EXPLICIT(unsigned int, user_stack
);
807 * thread_adjuserstack:
809 * Returns the adjusted user stack pointer from the machine
810 * dependent thread state info. Used for small (<2G) deltas.
817 if (thread_is_64bit(thread
)) {
818 x86_saved_state64_t
*iss64
;
820 iss64
= USER_REGS64(thread
);
822 iss64
->isf
.rsp
+= adjust
;
824 return iss64
->isf
.rsp
;
826 x86_saved_state32_t
*iss32
;
828 iss32
= USER_REGS32(thread
);
830 iss32
->uesp
+= adjust
;
832 return CAST_USER_ADDR_T(iss32
->uesp
);
837 * thread_setentrypoint:
839 * Sets the user PC into the machine
840 * dependent thread state info.
843 thread_setentrypoint(thread_t thread
, mach_vm_address_t entry
)
845 if (thread_is_64bit(thread
)) {
846 x86_saved_state64_t
*iss64
;
848 iss64
= USER_REGS64(thread
);
850 iss64
->isf
.rip
= (uint64_t)entry
;
852 x86_saved_state32_t
*iss32
;
854 iss32
= USER_REGS32(thread
);
856 iss32
->eip
= CAST_DOWN_EXPLICIT(unsigned int, entry
);
862 thread_setsinglestep(thread_t thread
, int on
)
864 if (thread_is_64bit(thread
)) {
865 x86_saved_state64_t
*iss64
;
867 iss64
= USER_REGS64(thread
);
870 iss64
->isf
.rflags
|= EFL_TF
;
872 iss64
->isf
.rflags
&= ~EFL_TF
;
874 x86_saved_state32_t
*iss32
;
876 iss32
= USER_REGS32(thread
);
879 iss32
->efl
|= EFL_TF
;
881 if (iss32
->cs
== SYSENTER_CS
)
882 iss32
->cs
= SYSENTER_TF_CS
;
885 iss32
->efl
&= ~EFL_TF
;
888 return (KERN_SUCCESS
);
893 /* XXX this should be a struct savearea so that CHUD will work better on x86 */
895 find_user_regs(thread_t thread
)
897 return USER_STATE(thread
);
901 get_user_regs(thread_t th
)
904 return(USER_STATE(th
));
906 printf("[get_user_regs: thread does not have pcb]");
913 * DTrace would like to have a peek at the kernel interrupt state, if available.
914 * Based on osfmk/chud/i386/chud_thread_i386.c:chudxnu_thread_get_state(), which see.
916 x86_saved_state_t
*find_kern_regs(thread_t
);
919 find_kern_regs(thread_t thread
)
921 if (thread
== current_thread() &&
922 NULL
!= current_cpu_datap()->cpu_int_state
&&
923 !(USER_STATE(thread
) == current_cpu_datap()->cpu_int_state
&&
924 current_cpu_datap()->cpu_interrupt_level
== 1)) {
926 return current_cpu_datap()->cpu_int_state
;
932 vm_offset_t
dtrace_get_cpu_int_stack_top(void);
935 dtrace_get_cpu_int_stack_top(void)
937 return current_cpu_datap()->cpu_int_stack_top
;