2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 #include <mach_debug.h>
31 #include <mach_ldebug.h>
33 #include <mach/kern_return.h>
34 #include <mach/mach_traps.h>
35 #include <mach/thread_status.h>
36 #include <mach/vm_param.h>
38 #include <kern/counters.h>
39 #include <kern/cpu_data.h>
40 #include <kern/mach_param.h>
41 #include <kern/task.h>
42 #include <kern/thread.h>
43 #include <kern/sched_prim.h>
44 #include <kern/misc_protos.h>
45 #include <kern/assert.h>
47 #include <kern/syscall_sw.h>
48 #include <ipc/ipc_port.h>
49 #include <vm/vm_kern.h>
52 #include <i386/cpu_data.h>
53 #include <i386/cpu_number.h>
54 #include <i386/thread.h>
55 #include <i386/eflags.h>
56 #include <i386/proc_reg.h>
59 #include <i386/user_ldt.h>
61 #include <i386/machdep_call.h>
62 #include <i386/misc_protos.h>
63 #include <i386/cpu_data.h>
64 #include <i386/cpu_number.h>
65 #include <i386/mp_desc.h>
66 #include <i386/vmparam.h>
67 #include <i386/trap.h>
68 #include <mach/i386/syscall_sw.h>
69 #include <sys/syscall.h>
70 #include <sys/kdebug.h>
71 #include <sys/errno.h>
72 #include <../bsd/sys/sysent.h>
93 void * find_user_regs(thread_t
);
95 unsigned int get_msr_exportmask(void);
97 unsigned int get_msr_nbits(void);
99 unsigned int get_msr_rbits(void);
102 thread_compose_cthread_desc(unsigned int addr
, pcb_t pcb
);
106 void thread_set_cthreadself(thread_t thread
, uint64_t pself
, int isLP64
);
111 * Return the user stack pointer from the machine
112 * dependent thread state info.
116 __unused thread_t thread
,
118 thread_state_t tstate
,
119 __unused
unsigned int count
,
120 user_addr_t
*user_stack
,
128 case x86_THREAD_STATE32
:
130 x86_thread_state32_t
*state25
;
132 state25
= (x86_thread_state32_t
*) tstate
;
135 *user_stack
= state25
->esp
;
137 *user_stack
= VM_USRSTACK32
;
138 if (customstack
&& state25
->esp
)
145 case x86_THREAD_STATE64
:
147 x86_thread_state64_t
*state25
;
149 state25
= (x86_thread_state64_t
*) tstate
;
152 *user_stack
= state25
->rsp
;
154 *user_stack
= VM_USRSTACK64
;
155 if (customstack
&& state25
->rsp
)
163 return (KERN_INVALID_ARGUMENT
);
166 return (KERN_SUCCESS
);
172 __unused thread_t thread
,
174 thread_state_t tstate
,
175 __unused
unsigned int count
,
176 mach_vm_offset_t
*entry_point
182 if (*entry_point
== 0)
183 *entry_point
= VM_MIN_ADDRESS
;
186 case x86_THREAD_STATE32
:
188 x86_thread_state32_t
*state25
;
190 state25
= (i386_thread_state_t
*) tstate
;
191 *entry_point
= state25
->eip
? state25
->eip
: VM_MIN_ADDRESS
;
195 case x86_THREAD_STATE64
:
197 x86_thread_state64_t
*state25
;
199 state25
= (x86_thread_state64_t
*) tstate
;
200 *entry_point
= state25
->rip
? state25
->rip
: VM_MIN_ADDRESS64
;
204 return (KERN_SUCCESS
);
209 * Duplicate parent state in child
222 if ((child_pcb
= child
->machine
.pcb
) == NULL
||
223 (parent_pcb
= parent
->machine
.pcb
) == NULL
)
224 return (KERN_FAILURE
);
226 * Copy over the x86_saved_state registers
228 if (cpu_mode_is64bit()) {
229 if (thread_is_64bit(parent
))
230 bcopy(USER_REGS64(parent
), USER_REGS64(child
), sizeof(x86_saved_state64_t
));
232 bcopy(USER_REGS32(parent
), USER_REGS32(child
), sizeof(x86_saved_state_compat32_t
));
234 bcopy(USER_REGS32(parent
), USER_REGS32(child
), sizeof(x86_saved_state32_t
));
237 * Check to see if parent is using floating point
238 * and if so, copy the registers to the child
240 fpu_dup_fxstate(parent
, child
);
244 * Copy the parent's cthread id and USER_CTHREAD descriptor, if 32-bit.
246 child_pcb
->cthread_self
= parent_pcb
->cthread_self
;
247 if (!thread_is_64bit(parent
))
248 child_pcb
->cthread_desc
= parent_pcb
->cthread_desc
;
251 * FIXME - should a user specified LDT, TSS and V86 info
252 * be duplicated as well?? - probably not.
254 // duplicate any use LDT entry that was set I think this is appropriate.
255 if (parent_pcb
->uldt_selector
!= 0) {
256 child_pcb
->uldt_selector
= parent_pcb
->uldt_selector
;
257 child_pcb
->uldt_desc
= parent_pcb
->uldt_desc
;
261 return (KERN_SUCCESS
);
265 * FIXME - thread_set_child
268 void thread_set_child(thread_t child
, int pid
);
270 thread_set_child(thread_t child
, int pid
)
272 if (thread_is_64bit(child
)) {
273 x86_saved_state64_t
*iss64
;
275 iss64
= USER_REGS64(child
);
279 iss64
->isf
.rflags
&= ~EFL_CF
;
281 x86_saved_state32_t
*iss32
;
283 iss32
= USER_REGS32(child
);
287 iss32
->efl
&= ~EFL_CF
;
292 void thread_set_parent(thread_t parent
, int pid
);
295 thread_set_parent(thread_t parent
, int pid
)
297 if (thread_is_64bit(parent
)) {
298 x86_saved_state64_t
*iss64
;
300 iss64
= USER_REGS64(parent
);
304 iss64
->isf
.rflags
&= ~EFL_CF
;
306 x86_saved_state32_t
*iss32
;
308 iss32
= USER_REGS32(parent
);
312 iss32
->efl
&= ~EFL_CF
;
318 * System Call handling code
321 extern long fuword(vm_offset_t
);
326 machdep_syscall(x86_saved_state_t
*state
)
328 int args
[machdep_call_count
];
331 machdep_call_t
*entry
;
332 x86_saved_state32_t
*regs
;
334 assert(is_saved_state32(state
));
335 regs
= saved_state32(state
);
339 kprintf("machdep_syscall(0x%08x) code=%d\n", regs
, trapno
);
342 if (trapno
< 0 || trapno
>= machdep_call_count
) {
343 regs
->eax
= (unsigned int)kern_invalid(NULL
);
345 thread_exception_return();
348 entry
= &machdep_call_table
[trapno
];
349 nargs
= entry
->nargs
;
352 if (copyin((user_addr_t
) regs
->uesp
+ sizeof (int),
353 (char *) args
, (nargs
* sizeof (int)))) {
354 regs
->eax
= KERN_INVALID_ADDRESS
;
356 thread_exception_return();
362 regs
->eax
= (*entry
->routine
.args_0
)();
365 regs
->eax
= (*entry
->routine
.args_1
)(args
[0]);
368 regs
->eax
= (*entry
->routine
.args_2
)(args
[0],args
[1]);
371 if (!entry
->bsd_style
)
372 regs
->eax
= (*entry
->routine
.args_3
)(args
[0],args
[1],args
[2]);
377 error
= (*entry
->routine
.args_bsd_3
)(&rval
, args
[0], args
[1], args
[2]);
380 regs
->efl
|= EFL_CF
; /* carry bit */
383 regs
->efl
&= ~EFL_CF
;
388 regs
->eax
= (*entry
->routine
.args_4
)(args
[0], args
[1], args
[2], args
[3]);
392 panic("machdep_syscall: too many args");
394 if (current_thread()->funnel_lock
)
395 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
397 thread_exception_return();
403 machdep_syscall64(x86_saved_state_t
*state
)
406 machdep_call_t
*entry
;
407 x86_saved_state64_t
*regs
;
409 assert(is_saved_state64(state
));
410 regs
= saved_state64(state
);
412 trapno
= regs
->rax
& SYSCALL_NUMBER_MASK
;
414 if (trapno
< 0 || trapno
>= machdep_call_count
) {
415 regs
->rax
= (unsigned int)kern_invalid(NULL
);
417 thread_exception_return();
420 entry
= &machdep_call_table64
[trapno
];
422 switch (entry
->nargs
) {
424 regs
->rax
= (*entry
->routine
.args_0
)();
427 regs
->rax
= (*entry
->routine
.args64_1
)(regs
->rdi
);
430 panic("machdep_syscall64: too many args");
432 if (current_thread()->funnel_lock
)
433 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
435 thread_exception_return();
441 thread_compose_cthread_desc(unsigned int addr
, pcb_t pcb
)
443 struct real_descriptor desc
;
445 mp_disable_preemption();
449 desc
.base_low
= addr
& 0xffff;
450 desc
.base_med
= (addr
>> 16) & 0xff;
451 desc
.base_high
= (addr
>> 24) & 0xff;
452 desc
.access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
;
453 desc
.granularity
= SZ_32
|SZ_G
;
454 pcb
->cthread_desc
= desc
;
455 *ldt_desc_p(USER_CTHREAD
) = desc
;
457 mp_enable_preemption();
459 return(KERN_SUCCESS
);
463 thread_set_cthread_self(uint32_t self
)
465 current_thread()->machine
.pcb
->cthread_self
= (uint64_t) self
;
467 return (KERN_SUCCESS
);
471 thread_get_cthread_self(void)
473 return ((kern_return_t
)current_thread()->machine
.pcb
->cthread_self
);
477 thread_fast_set_cthread_self(uint32_t self
)
480 x86_saved_state32_t
*iss
;
482 pcb
= (pcb_t
)current_thread()->machine
.pcb
;
483 thread_compose_cthread_desc(self
, pcb
);
484 pcb
->cthread_self
= (uint64_t) self
; /* preserve old func too */
485 iss
= saved_state32(pcb
->iss
);
486 iss
->gs
= USER_CTHREAD
;
488 return (USER_CTHREAD
);
492 thread_set_cthreadself(thread_t thread
, uint64_t pself
, int isLP64
)
496 x86_saved_state32_t
*iss
;
498 pcb
= (pcb_t
)thread
->machine
.pcb
;
499 thread_compose_cthread_desc(pself
, pcb
);
500 pcb
->cthread_self
= (uint64_t) pself
; /* preserve old func too */
501 iss
= saved_state32(pcb
->iss
);
502 iss
->gs
= USER_CTHREAD
;
505 x86_saved_state64_t
*iss
;
507 pcb
= thread
->machine
.pcb
;
509 /* check for canonical address, set 0 otherwise */
510 if (!IS_USERADDR64_CANONICAL(pself
))
512 pcb
->cthread_self
= pself
;
514 /* XXX for 64-in-32 */
515 iss
= saved_state64(pcb
->iss
);
516 iss
->gs
= USER_CTHREAD
;
517 thread_compose_cthread_desc((uint32_t) pself
, pcb
);
523 thread_fast_set_cthread_self64(uint64_t self
)
526 x86_saved_state64_t
*iss
;
528 pcb
= current_thread()->machine
.pcb
;
530 /* check for canonical address, set 0 otherwise */
531 if (!IS_USERADDR64_CANONICAL(self
))
533 pcb
->cthread_self
= self
;
534 current_cpu_datap()->cpu_uber
.cu_user_gs_base
= self
;
536 /* XXX for 64-in-32 */
537 iss
= saved_state64(pcb
->iss
);
538 iss
->gs
= USER_CTHREAD
;
539 thread_compose_cthread_desc((uint32_t) self
, pcb
);
541 return (USER_CTHREAD
);
545 * thread_set_user_ldt routine is the interface for the user level
546 * settable ldt entry feature. allowing a user to create arbitrary
547 * ldt entries seems to be too large of a security hole, so instead
548 * this mechanism is in place to allow user level processes to have
549 * an ldt entry that can be used in conjunction with the FS register.
551 * Swapping occurs inside the pcb.c file along with initialization
552 * when a thread is created. The basic functioning theory is that the
553 * pcb->uldt_selector variable will contain either 0 meaning the
554 * process has not set up any entry, or the selector to be used in
555 * the FS register. pcb->uldt_desc contains the actual descriptor the
556 * user has set up stored in machine usable ldt format.
558 * Currently one entry is shared by all threads (USER_SETTABLE), but
559 * this could be changed in the future by changing how this routine
560 * allocates the selector. There seems to be no real reason at this
561 * time to have this added feature, but in the future it might be
564 * address is the linear address of the start of the data area size
565 * is the size in bytes of the area flags should always be set to 0
566 * for now. in the future it could be used to set R/W permisions or
567 * other functions. Currently the segment is created as a data segment
568 * up to 1 megabyte in size with full read/write permisions only.
570 * this call returns the segment selector or -1 if any error occurs
573 thread_set_user_ldt(uint32_t address
, uint32_t size
, uint32_t flags
)
576 struct fake_descriptor temp
;
580 return -1; // flags not supported
582 return -1; // size too big, 1 meg is the limit
584 mp_disable_preemption();
585 mycpu
= cpu_number();
587 // create a "fake" descriptor so we can use fix_desc()
588 // to build a real one...
589 // 32 bit default operation size
590 // standard read/write perms for a data segment
591 pcb
= (pcb_t
)current_thread()->machine
.pcb
;
592 temp
.offset
= address
;
593 temp
.lim_or_seg
= size
;
594 temp
.size_or_wdct
= SZ_32
;
595 temp
.access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
;
597 // turn this into a real descriptor
600 // set up our data in the pcb
601 pcb
->uldt_desc
= *(struct real_descriptor
*)&temp
;
602 pcb
->uldt_selector
= USER_SETTABLE
; // set the selector value
604 // now set it up in the current table...
605 *ldt_desc_p(USER_SETTABLE
) = *(struct real_descriptor
*)&temp
;
607 mp_enable_preemption();
609 return USER_SETTABLE
;
612 #endif /* MACH_BSD */
615 typedef kern_return_t (*mach_call_t
)(void *);
617 struct mach_call_args
{
630 mach_call_arg_munger32(uint32_t sp
, int nargs
, int call_number
, struct mach_call_args
*args
);
634 mach_call_arg_munger32(uint32_t sp
, int nargs
, int call_number
, struct mach_call_args
*args
)
636 unsigned int args32
[9];
638 if (copyin((user_addr_t
)(sp
+ sizeof(int)), (char *)args32
, nargs
* sizeof (int)))
639 return KERN_INVALID_ARGUMENT
;
642 case 9: args
->arg9
= args32
[8];
643 case 8: args
->arg8
= args32
[7];
644 case 7: args
->arg7
= args32
[6];
645 case 6: args
->arg6
= args32
[5];
646 case 5: args
->arg5
= args32
[4];
647 case 4: args
->arg4
= args32
[3];
648 case 3: args
->arg3
= args32
[2];
649 case 2: args
->arg2
= args32
[1];
650 case 1: args
->arg1
= args32
[0];
652 if (call_number
== 90) {
653 /* munge_l for mach_wait_until_trap() */
654 args
->arg1
= (((uint64_t)(args32
[0])) | ((((uint64_t)(args32
[1]))<<32)));
656 if (call_number
== 93) {
657 /* munge_wl for mk_timer_arm_trap() */
658 args
->arg2
= (((uint64_t)(args32
[1])) | ((((uint64_t)(args32
[2]))<<32)));
665 __private_extern__
void mach_call_munger(x86_saved_state_t
*state
);
668 mach_call_munger(x86_saved_state_t
*state
)
672 mach_call_t mach_call
;
673 kern_return_t retval
;
674 struct mach_call_args args
= { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
675 x86_saved_state32_t
*regs
;
677 assert(is_saved_state32(state
));
678 regs
= saved_state32(state
);
680 call_number
= -(regs
->eax
);
682 kprintf("mach_call_munger(0x%08x) code=%d\n", regs
, call_number
);
685 if (call_number
< 0 || call_number
>= mach_trap_count
) {
686 i386_exception(EXC_SYSCALL
, call_number
, 1);
689 mach_call
= (mach_call_t
)mach_trap_table
[call_number
].mach_trap_function
;
691 if (mach_call
== (mach_call_t
)kern_invalid
) {
692 i386_exception(EXC_SYSCALL
, call_number
, 1);
696 argc
= mach_trap_table
[call_number
].mach_trap_arg_count
;
698 retval
= mach_call_arg_munger32(regs
->uesp
, argc
, call_number
, &args
);
699 if (retval
!= KERN_SUCCESS
) {
702 thread_exception_return();
706 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
, (call_number
)) | DBG_FUNC_START
,
707 (int) args
.arg1
, (int) args
.arg2
, (int) args
.arg3
, (int) args
.arg4
, 0);
709 retval
= mach_call(&args
);
711 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,(call_number
)) | DBG_FUNC_END
,
715 thread_exception_return();
720 __private_extern__
void mach_call_munger64(x86_saved_state_t
*regs
);
723 mach_call_munger64(x86_saved_state_t
*state
)
727 mach_call_t mach_call
;
728 x86_saved_state64_t
*regs
;
730 assert(is_saved_state64(state
));
731 regs
= saved_state64(state
);
733 call_number
= regs
->rax
& SYSCALL_NUMBER_MASK
;
735 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,
736 (call_number
)) | DBG_FUNC_START
,
737 (int) regs
->rdi
, (int) regs
->rsi
,
738 (int) regs
->rdx
, (int) regs
->r10
, 0);
740 if (call_number
< 0 || call_number
>= mach_trap_count
) {
741 i386_exception(EXC_SYSCALL
, regs
->rax
, 1);
744 mach_call
= (mach_call_t
)mach_trap_table
[call_number
].mach_trap_function
;
746 if (mach_call
== (mach_call_t
)kern_invalid
) {
747 i386_exception(EXC_SYSCALL
, regs
->rax
, 1);
750 argc
= mach_trap_table
[call_number
].mach_trap_arg_count
;
755 copyin_count
= (argc
- 6) * sizeof(uint64_t);
757 if (copyin((user_addr_t
)(regs
->isf
.rsp
+ sizeof(user_addr_t
)), (char *)®s
->v_arg6
, copyin_count
)) {
758 regs
->rax
= KERN_INVALID_ARGUMENT
;
760 thread_exception_return();
764 regs
->rax
= (uint64_t)mach_call((void *)(®s
->rdi
));
766 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,
767 (call_number
)) | DBG_FUNC_END
,
768 (int)regs
->rax
, 0, 0, 0, 0);
770 thread_exception_return();
776 * thread_setuserstack:
778 * Sets the user stack pointer into the machine
779 * dependent thread state info.
784 mach_vm_address_t user_stack
)
786 if (thread_is_64bit(thread
)) {
787 x86_saved_state64_t
*iss64
;
789 iss64
= USER_REGS64(thread
);
791 iss64
->isf
.rsp
= (uint64_t)user_stack
;
793 x86_saved_state32_t
*iss32
;
795 iss32
= USER_REGS32(thread
);
797 iss32
->uesp
= CAST_DOWN(unsigned int, user_stack
);
802 * thread_adjuserstack:
804 * Returns the adjusted user stack pointer from the machine
805 * dependent thread state info. Used for small (<2G) deltas.
812 if (thread_is_64bit(thread
)) {
813 x86_saved_state64_t
*iss64
;
815 iss64
= USER_REGS64(thread
);
817 iss64
->isf
.rsp
+= adjust
;
819 return iss64
->isf
.rsp
;
821 x86_saved_state32_t
*iss32
;
823 iss32
= USER_REGS32(thread
);
825 iss32
->uesp
+= adjust
;
827 return CAST_USER_ADDR_T(iss32
->uesp
);
832 * thread_setentrypoint:
834 * Sets the user PC into the machine
835 * dependent thread state info.
838 thread_setentrypoint(thread_t thread
, mach_vm_address_t entry
)
840 if (thread_is_64bit(thread
)) {
841 x86_saved_state64_t
*iss64
;
843 iss64
= USER_REGS64(thread
);
845 iss64
->isf
.rip
= (uint64_t)entry
;
847 x86_saved_state32_t
*iss32
;
849 iss32
= USER_REGS32(thread
);
851 iss32
->eip
= CAST_DOWN(unsigned int, entry
);
857 thread_setsinglestep(thread_t thread
, int on
)
859 if (thread_is_64bit(thread
)) {
860 x86_saved_state64_t
*iss64
;
862 iss64
= USER_REGS64(thread
);
865 iss64
->isf
.rflags
|= EFL_TF
;
867 iss64
->isf
.rflags
&= ~EFL_TF
;
869 x86_saved_state32_t
*iss32
;
871 iss32
= USER_REGS32(thread
);
874 iss32
->efl
|= EFL_TF
;
876 iss32
->efl
&= ~EFL_TF
;
879 return (KERN_SUCCESS
);
884 /* XXX this should be a struct savearea so that CHUD will work better on x86 */
886 find_user_regs(thread_t thread
)
888 return USER_STATE(thread
);
892 get_user_regs(thread_t th
)
895 return(USER_STATE(th
));
897 printf("[get_user_regs: thread does not have pcb]");
904 * DTrace would like to have a peek at the kernel interrupt state, if available.
905 * Based on osfmk/chud/i386/chud_thread_i386.c:chudxnu_thread_get_state(), which see.
907 x86_saved_state32_t
*find_kern_regs(thread_t
);
909 x86_saved_state32_t
*
910 find_kern_regs(thread_t thread
)
912 if (thread
== current_thread() &&
913 NULL
!= current_cpu_datap()->cpu_int_state
&&
914 !(USER_STATE(thread
) == current_cpu_datap()->cpu_int_state
&&
915 current_cpu_datap()->cpu_interrupt_level
== 1)) {
917 return saved_state32(current_cpu_datap()->cpu_int_state
);
923 vm_offset_t
dtrace_get_cpu_int_stack_top(void);
926 dtrace_get_cpu_int_stack_top(void)
928 return current_cpu_datap()->cpu_int_stack_top
;