2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
24 #include <mach_debug.h>
25 #include <mach_ldebug.h>
27 #include <mach/kern_return.h>
28 #include <mach/mach_traps.h>
29 #include <mach/thread_status.h>
30 #include <mach/vm_param.h>
32 #include <kern/counters.h>
33 #include <kern/cpu_data.h>
34 #include <kern/mach_param.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/sched_prim.h>
38 #include <kern/misc_protos.h>
39 #include <kern/assert.h>
41 #include <kern/syscall_sw.h>
42 #include <ipc/ipc_port.h>
43 #include <vm/vm_kern.h>
46 #include <i386/cpu_data.h>
47 #include <i386/cpu_number.h>
48 #include <i386/thread.h>
49 #include <i386/eflags.h>
50 #include <i386/proc_reg.h>
53 #include <i386/user_ldt.h>
55 #include <i386/iopb_entries.h>
56 #include <i386/machdep_call.h>
57 #include <i386/misc_protos.h>
58 #include <i386/cpu_data.h>
59 #include <i386/cpu_number.h>
60 #include <i386/mp_desc.h>
61 #include <i386/vmparam.h>
62 #include <i386/trap.h>
63 #include <mach/i386/syscall_sw.h>
64 #include <sys/syscall.h>
65 #include <sys/kdebug.h>
66 #include <sys/ktrace.h>
67 #include <sys/errno.h>
68 #include <../bsd/sys/sysent.h>
70 extern struct proc
*current_proc(void);
71 extern struct proc
* kernproc
;
92 void * find_user_regs(thread_t
);
94 unsigned int get_msr_exportmask(void);
96 unsigned int get_msr_nbits(void);
98 unsigned int get_msr_rbits(void);
101 thread_compose_cthread_desc(unsigned int addr
, pcb_t pcb
);
108 * Return the user stack pointer from the machine
109 * dependent thread state info.
113 __unused thread_t thread
,
115 thread_state_t tstate
,
116 __unused
unsigned int count
,
117 user_addr_t
*user_stack
,
125 case OLD_i386_THREAD_STATE
:
126 case x86_THREAD_STATE32
:
128 x86_thread_state32_t
*state25
;
130 state25
= (x86_thread_state32_t
*) tstate
;
133 *user_stack
= state25
->esp
;
135 *user_stack
= VM_USRSTACK32
;
136 if (customstack
&& state25
->esp
)
143 case x86_THREAD_STATE64
:
145 x86_thread_state64_t
*state25
;
147 state25
= (x86_thread_state64_t
*) tstate
;
150 *user_stack
= state25
->rsp
;
152 *user_stack
= VM_USRSTACK64
;
153 if (customstack
&& state25
->rsp
)
161 return (KERN_INVALID_ARGUMENT
);
164 return (KERN_SUCCESS
);
170 __unused thread_t thread
,
172 thread_state_t tstate
,
173 __unused
unsigned int count
,
174 mach_vm_offset_t
*entry_point
180 if (*entry_point
== 0)
181 *entry_point
= VM_MIN_ADDRESS
;
184 case OLD_i386_THREAD_STATE
:
185 case x86_THREAD_STATE32
:
187 x86_thread_state32_t
*state25
;
189 state25
= (x86_thread_state32_t
*) tstate
;
190 *entry_point
= state25
->eip
? state25
->eip
: VM_MIN_ADDRESS
;
194 case x86_THREAD_STATE64
:
196 x86_thread_state64_t
*state25
;
198 state25
= (x86_thread_state64_t
*) tstate
;
199 *entry_point
= state25
->rip
? state25
->rip
: VM_MIN_ADDRESS64
;
203 return (KERN_SUCCESS
);
208 * Duplicate parent state in child
217 if (child
->machine
.pcb
== NULL
|| parent
->machine
.pcb
== NULL
)
218 return (KERN_FAILURE
);
220 * Copy over the i386_saved_state registers
222 if (cpu_mode_is64bit()) {
223 if (thread_is_64bit(parent
))
224 bcopy(USER_REGS64(parent
), USER_REGS64(child
), sizeof(x86_saved_state64_t
));
226 bcopy(USER_REGS32(parent
), USER_REGS32(child
), sizeof(x86_saved_state_compat32_t
));
228 bcopy(USER_REGS32(parent
), USER_REGS32(child
), sizeof(x86_saved_state32_t
));
231 * Check to see if parent is using floating point
232 * and if so, copy the registers to the child
234 fpu_dup_fxstate(parent
, child
);
237 * FIXME - should a user specified LDT, TSS and V86 info
238 * be duplicated as well?? - probably not.
240 // duplicate any use LDT entry that was set I think this is appropriate.
242 if (parent
->machine
.pcb
->uldt_selector
!= 0) {
243 child
->machine
.pcb
->uldt_selector
= parent
->machine
.pcb
->uldt_selector
;
244 child
->machine
.pcb
->uldt_desc
= parent
->machine
.pcb
->uldt_desc
;
248 return (KERN_SUCCESS
);
252 * FIXME - thread_set_child
255 void thread_set_child(thread_t child
, int pid
);
257 thread_set_child(thread_t child
, int pid
)
260 if (thread_is_64bit(child
)) {
261 x86_saved_state64_t
*iss64
;
263 iss64
= USER_REGS64(child
);
267 iss64
->isf
.rflags
&= ~EFL_CF
;
269 x86_saved_state32_t
*iss32
;
271 iss32
= USER_REGS32(child
);
275 iss32
->efl
&= ~EFL_CF
;
280 void thread_set_parent(thread_t parent
, int pid
);
282 thread_set_parent(thread_t parent
, int pid
)
285 if (thread_is_64bit(parent
)) {
286 x86_saved_state64_t
*iss64
;
288 iss64
= USER_REGS64(parent
);
292 iss64
->isf
.rflags
&= ~EFL_CF
;
294 x86_saved_state32_t
*iss32
;
296 iss32
= USER_REGS32(parent
);
300 iss32
->efl
&= ~EFL_CF
;
307 * System Call handling code
310 extern struct proc
* i386_current_proc(void);
312 extern long fuword(vm_offset_t
);
315 /* following implemented in bsd/dev/i386/unix_signal.c */
316 int __pthread_cset(struct sysent
*);
318 void __pthread_creset(struct sysent
*);
322 machdep_syscall(x86_saved_state_t
*state
)
324 int args
[machdep_call_count
];
327 machdep_call_t
*entry
;
328 x86_saved_state32_t
*regs
;
330 assert(is_saved_state32(state
));
331 regs
= saved_state32(state
);
335 kprintf("machdep_syscall(0x%08x) code=%d\n", regs
, trapno
);
338 if (trapno
< 0 || trapno
>= machdep_call_count
) {
339 regs
->eax
= (unsigned int)kern_invalid(NULL
);
341 thread_exception_return();
344 entry
= &machdep_call_table
[trapno
];
345 nargs
= entry
->nargs
;
348 if (copyin((user_addr_t
) regs
->uesp
+ sizeof (int),
349 (char *) args
, (nargs
* sizeof (int)))) {
350 regs
->eax
= KERN_INVALID_ADDRESS
;
352 thread_exception_return();
358 regs
->eax
= (*entry
->routine
.args_0
)();
361 regs
->eax
= (*entry
->routine
.args_1
)(args
[0]);
364 regs
->eax
= (*entry
->routine
.args_2
)(args
[0], args
[1]);
367 if (!entry
->bsd_style
)
368 regs
->eax
= (*entry
->routine
.args_3
)(args
[0], args
[1], args
[2]);
373 error
= (*entry
->routine
.args_bsd_3
)(&rval
, args
[0], args
[1], args
[2]);
376 regs
->efl
|= EFL_CF
; /* carry bit */
379 regs
->efl
&= ~EFL_CF
;
384 regs
->eax
= (*entry
->routine
.args_4
)(args
[0], args
[1], args
[2], args
[3]);
388 panic("machdep_syscall: too many args");
390 if (current_thread()->funnel_lock
)
391 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
393 thread_exception_return();
399 machdep_syscall64(x86_saved_state_t
*state
)
402 machdep_call_t
*entry
;
403 x86_saved_state64_t
*regs
;
405 assert(is_saved_state64(state
));
406 regs
= saved_state64(state
);
408 trapno
= regs
->rax
& SYSCALL_NUMBER_MASK
;
410 if (trapno
< 0 || trapno
>= machdep_call_count
) {
411 regs
->rax
= (unsigned int)kern_invalid(NULL
);
413 thread_exception_return();
416 entry
= &machdep_call_table64
[trapno
];
418 switch (entry
->nargs
) {
420 regs
->rax
= (*entry
->routine
.args_0
)();
423 regs
->rax
= (*entry
->routine
.args64_1
)(regs
->rdi
);
426 panic("machdep_syscall64: too many args");
428 if (current_thread()->funnel_lock
)
429 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
431 thread_exception_return();
437 thread_compose_cthread_desc(unsigned int addr
, pcb_t pcb
)
439 struct real_descriptor desc
;
441 mp_disable_preemption();
445 desc
.base_low
= addr
& 0xffff;
446 desc
.base_med
= (addr
>> 16) & 0xff;
447 desc
.base_high
= (addr
>> 24) & 0xff;
448 desc
.access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
;
449 desc
.granularity
= SZ_32
|SZ_G
;
450 pcb
->cthread_desc
= desc
;
451 *ldt_desc_p(USER_CTHREAD
) = desc
;
453 mp_enable_preemption();
455 return(KERN_SUCCESS
);
459 thread_set_cthread_self(uint32_t self
)
461 current_thread()->machine
.pcb
->cthread_self
= (uint64_t) self
;
463 return (KERN_SUCCESS
);
467 thread_get_cthread_self(void)
469 return ((kern_return_t
)current_thread()->machine
.pcb
->cthread_self
);
473 thread_fast_set_cthread_self(uint32_t self
)
476 x86_saved_state32_t
*iss
;
478 pcb
= (pcb_t
)current_thread()->machine
.pcb
;
479 thread_compose_cthread_desc(self
, pcb
);
480 pcb
->cthread_self
= (uint64_t) self
; /* preserve old func too */
481 iss
= saved_state32(pcb
->iss
);
482 iss
->gs
= USER_CTHREAD
;
484 return (USER_CTHREAD
);
488 thread_fast_set_cthread_self64(uint64_t self
)
491 x86_saved_state64_t
*iss
;
493 pcb
= current_thread()->machine
.pcb
;
495 /* check for canonical address, set 0 otherwise */
496 if (!IS_USERADDR64_CANONICAL(self
))
498 pcb
->cthread_self
= self
;
499 current_cpu_datap()->cpu_uber
.cu_user_gs_base
= self
;
501 /* XXX for 64-in-32 */
502 iss
= saved_state64(pcb
->iss
);
503 iss
->gs
= USER_CTHREAD
;
504 thread_compose_cthread_desc((uint32_t) self
, pcb
);
506 return (USER_CTHREAD
);
510 * thread_set_user_ldt routine is the interface for the user level
511 * settable ldt entry feature. allowing a user to create arbitrary
512 * ldt entries seems to be too large of a security hole, so instead
513 * this mechanism is in place to allow user level processes to have
514 * an ldt entry that can be used in conjunction with the FS register.
516 * Swapping occurs inside the pcb.c file along with initialization
517 * when a thread is created. The basic functioning theory is that the
518 * pcb->uldt_selector variable will contain either 0 meaning the
519 * process has not set up any entry, or the selector to be used in
520 * the FS register. pcb->uldt_desc contains the actual descriptor the
521 * user has set up stored in machine usable ldt format.
523 * Currently one entry is shared by all threads (USER_SETTABLE), but
524 * this could be changed in the future by changing how this routine
525 * allocates the selector. There seems to be no real reason at this
526 * time to have this added feature, but in the future it might be
529 * address is the linear address of the start of the data area size
530 * is the size in bytes of the area flags should always be set to 0
531 * for now. in the future it could be used to set R/W permisions or
532 * other functions. Currently the segment is created as a data segment
533 * up to 1 megabyte in size with full read/write permisions only.
535 * this call returns the segment selector or -1 if any error occurs
538 thread_set_user_ldt(uint32_t address
, uint32_t size
, uint32_t flags
)
541 struct fake_descriptor temp
;
545 return -1; // flags not supported
547 return -1; // size too big, 1 meg is the limit
549 mp_disable_preemption();
550 mycpu
= cpu_number();
552 // create a "fake" descriptor so we can use fix_desc()
553 // to build a real one...
554 // 32 bit default operation size
555 // standard read/write perms for a data segment
556 pcb
= (pcb_t
)current_thread()->machine
.pcb
;
557 temp
.offset
= address
;
558 temp
.lim_or_seg
= size
;
559 temp
.size_or_wdct
= SZ_32
;
560 temp
.access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
;
562 // turn this into a real descriptor
565 // set up our data in the pcb
566 pcb
->uldt_desc
= *(struct real_descriptor
*)&temp
;
567 pcb
->uldt_selector
= USER_SETTABLE
; // set the selector value
569 // now set it up in the current table...
570 *ldt_desc_p(USER_SETTABLE
) = *(struct real_descriptor
*)&temp
;
572 mp_enable_preemption();
574 return USER_SETTABLE
;
577 #endif /* MACH_BSD */
580 typedef kern_return_t (*mach_call_t
)(void *);
582 struct mach_call_args
{
596 mach_call_arg_munger32(uint32_t sp
, int nargs
, int call_number
, struct mach_call_args
*args
);
600 mach_call_arg_munger32(uint32_t sp
, int nargs
, int call_number
, struct mach_call_args
*args
)
602 unsigned int args32
[9];
604 if (copyin((user_addr_t
)(sp
+ sizeof(int)), (char *)args32
, nargs
* sizeof (int)))
605 return KERN_INVALID_ARGUMENT
;
608 case 9: args
->arg9
= args32
[8];
609 case 8: args
->arg8
= args32
[7];
610 case 7: args
->arg7
= args32
[6];
611 case 6: args
->arg6
= args32
[5];
612 case 5: args
->arg5
= args32
[4];
613 case 4: args
->arg4
= args32
[3];
614 case 3: args
->arg3
= args32
[2];
615 case 2: args
->arg2
= args32
[1];
616 case 1: args
->arg1
= args32
[0];
618 if (call_number
== 90) {
619 /* munge_l for mach_wait_until_trap() */
620 args
->arg1
= (((uint64_t)(args32
[0])) | ((((uint64_t)(args32
[1]))<<32)));
622 if (call_number
== 93) {
623 /* munge_wl for mk_timer_arm_trap() */
624 args
->arg2
= (((uint64_t)(args32
[1])) | ((((uint64_t)(args32
[2]))<<32)));
631 __private_extern__
void
632 mach_call_munger(x86_saved_state_t
*state
);
637 mach_call_munger(x86_saved_state_t
*state
)
641 mach_call_t mach_call
;
642 kern_return_t retval
;
643 struct mach_call_args args
= { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
644 x86_saved_state32_t
*regs
;
646 assert(is_saved_state32(state
));
647 regs
= saved_state32(state
);
649 call_number
= -(regs
->eax
);
651 kprintf("mach_call_munger(0x%08x) code=%d\n", regs
, call_number
);
654 if (call_number
< 0 || call_number
>= mach_trap_count
) {
655 i386_exception(EXC_SYSCALL
, call_number
, 1);
658 mach_call
= (mach_call_t
)mach_trap_table
[call_number
].mach_trap_function
;
660 if (mach_call
== (mach_call_t
)kern_invalid
) {
661 i386_exception(EXC_SYSCALL
, call_number
, 1);
664 argc
= mach_trap_table
[call_number
].mach_trap_arg_count
;
667 retval
= mach_call_arg_munger32(regs
->uesp
, argc
, call_number
, &args
);
669 if (retval
!= KERN_SUCCESS
) {
672 thread_exception_return();
676 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
, (call_number
)) | DBG_FUNC_START
,
677 (int) args
.arg1
, (int) args
.arg2
, (int) args
.arg3
, (int) args
.arg4
, 0);
679 retval
= mach_call(&args
);
681 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,(call_number
)) | DBG_FUNC_END
,
685 thread_exception_return();
691 __private_extern__
void
692 mach_call_munger64(x86_saved_state_t
*state
);
697 mach_call_munger64(x86_saved_state_t
*state
)
701 mach_call_t mach_call
;
702 x86_saved_state64_t
*regs
;
704 assert(is_saved_state64(state
));
705 regs
= saved_state64(state
);
707 call_number
= regs
->rax
& SYSCALL_NUMBER_MASK
;
709 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
, (call_number
)) | DBG_FUNC_START
,
710 (int) regs
->rdi
, (int) regs
->rsi
, (int) regs
->rdx
, (int) regs
->r10
, 0);
712 if (call_number
< 0 || call_number
>= mach_trap_count
) {
713 i386_exception(EXC_SYSCALL
, regs
->rax
, 1);
716 mach_call
= (mach_call_t
)mach_trap_table
[call_number
].mach_trap_function
;
718 if (mach_call
== (mach_call_t
)kern_invalid
) {
719 i386_exception(EXC_SYSCALL
, regs
->rax
, 1);
722 argc
= mach_trap_table
[call_number
].mach_trap_arg_count
;
727 copyin_count
= (argc
- 6) * sizeof(uint64_t);
729 if (copyin((user_addr_t
)(regs
->isf
.rsp
+ sizeof(user_addr_t
)), (char *)®s
->v_arg6
, copyin_count
)) {
730 regs
->rax
= KERN_INVALID_ARGUMENT
;
732 thread_exception_return();
736 regs
->rax
= (uint64_t)mach_call((void *)(®s
->rdi
));
738 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,(call_number
)) | DBG_FUNC_END
,
739 (int)regs
->rax
, 0, 0, 0, 0);
741 thread_exception_return();
748 * thread_setuserstack:
750 * Sets the user stack pointer into the machine
751 * dependent thread state info.
756 mach_vm_address_t user_stack
)
758 if (thread_is_64bit(thread
)) {
759 x86_saved_state64_t
*iss64
;
761 iss64
= USER_REGS64(thread
);
763 iss64
->isf
.rsp
= (uint64_t)user_stack
;
765 x86_saved_state32_t
*iss32
;
767 iss32
= USER_REGS32(thread
);
769 iss32
->uesp
= CAST_DOWN(unsigned int, user_stack
);
774 * thread_adjuserstack:
776 * Returns the adjusted user stack pointer from the machine
777 * dependent thread state info. Used for small (<2G) deltas.
784 if (thread_is_64bit(thread
)) {
785 x86_saved_state64_t
*iss64
;
787 iss64
= USER_REGS64(thread
);
789 iss64
->isf
.rsp
+= adjust
;
791 return iss64
->isf
.rsp
;
793 x86_saved_state32_t
*iss32
;
795 iss32
= USER_REGS32(thread
);
797 iss32
->uesp
+= adjust
;
799 return CAST_USER_ADDR_T(iss32
->uesp
);
804 * thread_setentrypoint:
806 * Sets the user PC into the machine
807 * dependent thread state info.
810 thread_setentrypoint(thread_t thread
, mach_vm_address_t entry
)
812 if (thread_is_64bit(thread
)) {
813 x86_saved_state64_t
*iss64
;
815 iss64
= USER_REGS64(thread
);
817 iss64
->isf
.rip
= (uint64_t)entry
;
819 x86_saved_state32_t
*iss32
;
821 iss32
= USER_REGS32(thread
);
823 iss32
->eip
= CAST_DOWN(unsigned int, entry
);
829 thread_setsinglestep(thread_t thread
, int on
)
831 if (thread_is_64bit(thread
)) {
832 x86_saved_state64_t
*iss64
;
834 iss64
= USER_REGS64(thread
);
837 iss64
->isf
.rflags
|= EFL_TF
;
839 iss64
->isf
.rflags
&= ~EFL_TF
;
841 x86_saved_state32_t
*iss32
;
843 iss32
= USER_REGS32(thread
);
846 iss32
->efl
|= EFL_TF
;
848 iss32
->efl
&= ~EFL_TF
;
854 /* XXX this should be a struct savearea so that CHUD will work better on x86 */
859 return USER_STATE(thread
);