2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 #include <mach_debug.h>
26 #include <mach_ldebug.h>
28 #include <mach/kern_return.h>
29 #include <mach/mach_traps.h>
30 #include <mach/thread_status.h>
31 #include <mach/vm_param.h>
33 #include <kern/counters.h>
34 #include <kern/cpu_data.h>
35 #include <kern/mach_param.h>
36 #include <kern/task.h>
37 #include <kern/thread.h>
38 #include <kern/sched_prim.h>
39 #include <kern/misc_protos.h>
40 #include <kern/assert.h>
42 #include <kern/syscall_sw.h>
43 #include <ipc/ipc_port.h>
44 #include <vm/vm_kern.h>
47 #include <i386/cpu_data.h>
48 #include <i386/cpu_number.h>
49 #include <i386/thread.h>
50 #include <i386/eflags.h>
51 #include <i386/proc_reg.h>
54 #include <i386/user_ldt.h>
56 #include <i386/iopb_entries.h>
57 #include <i386/machdep_call.h>
58 #include <i386/misc_protos.h>
59 #include <i386/cpu_data.h>
60 #include <i386/cpu_number.h>
61 #include <i386/mp_desc.h>
62 #include <i386/vmparam.h>
63 #include <sys/syscall.h>
64 #include <sys/kdebug.h>
65 #include <sys/ktrace.h>
66 #include <../bsd/sys/sysent.h>
68 extern struct proc
*current_proc(void);
89 unsigned int get_msr_exportmask(void);
91 unsigned int get_msr_nbits(void);
93 unsigned int get_msr_rbits(void);
96 thread_compose_cthread_desc(unsigned int addr
, pcb_t pcb
);
103 * Return the user stack pointer from the machine
104 * dependent thread state info.
108 __unused thread_t thread
,
110 thread_state_t tstate
,
112 user_addr_t
*user_stack
,
116 struct i386_saved_state
*state
;
117 i386_thread_state_t
*state25
;
124 case i386_THREAD_STATE
: /* FIXME */
125 state25
= (i386_thread_state_t
*) tstate
;
127 *user_stack
= state25
->esp
;
129 *user_stack
= USRSTACK
;
130 if (customstack
&& state25
->esp
)
136 case i386_NEW_THREAD_STATE
:
137 if (count
< i386_NEW_THREAD_STATE_COUNT
)
138 return (KERN_INVALID_ARGUMENT
);
140 state
= (struct i386_saved_state
*) tstate
;
144 /* If a valid user stack is specified, use it. */
148 *user_stack
= USRSTACK
;
149 if (customstack
&& uesp
)
155 return (KERN_INVALID_ARGUMENT
);
158 return (KERN_SUCCESS
);
163 __unused thread_t thread
,
165 thread_state_t tstate
,
167 mach_vm_offset_t
*entry_point
170 struct i386_saved_state
*state
;
171 i386_thread_state_t
*state25
;
176 if (*entry_point
== 0)
177 *entry_point
= VM_MIN_ADDRESS
;
180 case i386_THREAD_STATE
:
181 state25
= (i386_thread_state_t
*) tstate
;
182 *entry_point
= state25
->eip
? state25
->eip
: VM_MIN_ADDRESS
;
185 case i386_NEW_THREAD_STATE
:
186 if (count
< i386_THREAD_STATE_COUNT
)
187 return (KERN_INVALID_ARGUMENT
);
189 state
= (struct i386_saved_state
*) tstate
;
192 * If a valid entry point is specified, use it.
194 *entry_point
= state
->eip
? state
->eip
: VM_MIN_ADDRESS
;
199 return (KERN_SUCCESS
);
202 struct i386_saved_state
*
203 get_user_regs(thread_t th
)
206 return(USER_REGS(th
));
208 printf("[get_user_regs: thread does not have pcb]");
214 * Duplicate parent state in child
223 struct i386_float_state floatregs
;
226 /* Save the FPU state */
227 if ((pcb_t
)(per_proc_info
[cpu_number()].fpu_pcb
) == parent
->machine
.pcb
) {
228 fp_state_save(parent
);
232 if (child
->machine
.pcb
== NULL
|| parent
->machine
.pcb
== NULL
)
233 return (KERN_FAILURE
);
235 /* Copy over the i386_saved_state registers */
236 child
->machine
.pcb
->iss
= parent
->machine
.pcb
->iss
;
238 /* Check to see if parent is using floating point
239 * and if so, copy the registers to the child
240 * FIXME - make sure this works.
243 if (parent
->machine
.pcb
->ims
.ifps
) {
244 if (fpu_get_state(parent
, &floatregs
) == KERN_SUCCESS
)
245 fpu_set_state(child
, &floatregs
);
248 /* FIXME - should a user specified LDT, TSS and V86 info
249 * be duplicated as well?? - probably not.
251 // duplicate any use LDT entry that was set I think this is appropriate.
253 if (parent
->machine
.pcb
->uldt_selector
!= 0) {
254 child
->machine
.pcb
->uldt_selector
= parent
->machine
.pcb
->uldt_selector
;
255 child
->machine
.pcb
->uldt_desc
= parent
->machine
.pcb
->uldt_desc
;
260 return (KERN_SUCCESS
);
264 * FIXME - thread_set_child
267 void thread_set_child(thread_t child
, int pid
);
269 thread_set_child(thread_t child
, int pid
)
271 child
->machine
.pcb
->iss
.eax
= pid
;
272 child
->machine
.pcb
->iss
.edx
= 1;
273 child
->machine
.pcb
->iss
.efl
&= ~EFL_CF
;
275 void thread_set_parent(thread_t parent
, int pid
);
277 thread_set_parent(thread_t parent
, int pid
)
279 parent
->machine
.pcb
->iss
.eax
= pid
;
280 parent
->machine
.pcb
->iss
.edx
= 0;
281 parent
->machine
.pcb
->iss
.efl
&= ~EFL_CF
;
287 * System Call handling code
290 #define ERESTART -1 /* restart syscall */
291 #define EJUSTRETURN -2 /* don't modify regs, just return */
295 #define KERNEL_FUNNEL 1
297 extern funnel_t
* kernel_flock
;
299 extern int set_bsduthreadargs (thread_t
, struct i386_saved_state
*, void *);
300 extern void * get_bsduthreadarg(thread_t
);
301 extern int * get_bsduthreadrval(thread_t th
);
302 extern int * get_bsduthreadlowpridelay(thread_t th
);
304 extern long fuword(vm_offset_t
);
306 extern void unix_syscall(struct i386_saved_state
*);
307 extern void unix_syscall_return(int);
309 /* following implemented in bsd/dev/i386/unix_signal.c */
310 int __pthread_cset(struct sysent
*);
312 void __pthread_creset(struct sysent
*);
316 unix_syscall_return(int error
)
320 struct i386_saved_state
*regs
;
324 struct sysent
*callp
;
325 volatile int *lowpri_delay
;
327 thread
= current_thread();
328 rval
= get_bsduthreadrval(thread
);
329 lowpri_delay
= get_bsduthreadlowpridelay(thread
);
332 regs
= USER_REGS(thread
);
334 /* reconstruct code for tracing before blasting eax */
336 params
= (vm_offset_t
) ((caddr_t
)regs
->uesp
+ sizeof (int));
337 callp
= (code
>= nsysent
) ? &sysent
[63] : &sysent
[code
];
338 if (callp
== sysent
) {
339 code
= fuword(params
);
342 if (error
== ERESTART
) {
345 else if (error
!= EJUSTRETURN
) {
348 regs
->efl
|= EFL_CF
; /* carry bit */
349 } else { /* (not error) */
352 regs
->efl
&= ~EFL_CF
;
356 ktrsysret(p
, code
, error
, rval
[0], (callp
->sy_funnel
& FUNNEL_MASK
));
358 __pthread_creset(callp
);
360 if ((callp
->sy_funnel
& FUNNEL_MASK
) != NO_FUNNEL
)
361 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
365 * task is marked as a low priority I/O type
366 * and the I/O we issued while in this system call
367 * collided with normal I/O operations... we'll
368 * delay in order to mitigate the impact of this
369 * task on the normal operation of the system
371 IOSleep(*lowpri_delay
);
374 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
375 error
, rval
[0], rval
[1], 0, 0);
377 thread_exception_return();
383 unix_syscall(struct i386_saved_state
*regs
)
388 struct sysent
*callp
;
395 volatile int *lowpri_delay
;
397 thread
= current_thread();
399 rval
= get_bsduthreadrval(thread
);
400 lowpri_delay
= get_bsduthreadlowpridelay(thread
);
402 thread
->task
->syscalls_unix
++; /* MP-safety ignored */
404 //printf("[scall : eax %x]", regs->eax);
406 params
= (vm_offset_t
) ((caddr_t
)regs
->uesp
+ sizeof (int));
407 callp
= (code
>= nsysent
) ? &sysent
[63] : &sysent
[code
];
408 if (callp
== sysent
) {
409 code
= fuword(params
);
410 params
+= sizeof (int);
411 callp
= (code
>= nsysent
) ? &sysent
[63] : &sysent
[code
];
414 vt
= get_bsduthreadarg(thread
);
416 if ((nargs
= (callp
->sy_narg
* sizeof (int))) &&
417 (error
= copyin((user_addr_t
) params
, (char *) vt
, nargs
)) != 0) {
420 thread_exception_return();
427 if ((error
= __pthread_cset(callp
))) {
428 /* cancelled system call; let it returned with EINTR for handling */
431 thread_exception_return();
435 funnel_type
= (callp
->sy_funnel
& FUNNEL_MASK
);
436 if(funnel_type
== KERNEL_FUNNEL
)
437 (void) thread_funnel_set(kernel_flock
, TRUE
);
439 (void) set_bsduthreadargs(thread
, regs
, NULL
);
441 if (callp
->sy_narg
> 8)
442 panic("unix_syscall max arg count exceeded (%d)", callp
->sy_narg
);
444 ktrsyscall(p
, code
, callp
->sy_narg
, vt
, funnel_type
);
448 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
449 *ip
, *(ip
+1), *(ip
+2), *(ip
+3), 0);
452 error
= (*(callp
->sy_call
))((void *) p
, (void *) vt
, &rval
[0]);
455 /* May be needed with vfork changes */
456 regs
= USER_REGS(thread
);
458 if (error
== ERESTART
) {
461 else if (error
!= EJUSTRETURN
) {
464 regs
->efl
|= EFL_CF
; /* carry bit */
465 } else { /* (not error) */
468 regs
->efl
&= ~EFL_CF
;
472 ktrsysret(p
, code
, error
, rval
[0], funnel_type
);
474 __pthread_creset(callp
);
476 if(funnel_type
!= NO_FUNNEL
)
477 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
481 * task is marked as a low priority I/O type
482 * and the I/O we issued while in this system call
483 * collided with normal I/O operations... we'll
484 * delay in order to mitigate the impact of this
485 * task on the normal operation of the system
487 IOSleep(*lowpri_delay
);
490 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
491 error
, rval
[0], rval
[1], 0, 0);
493 thread_exception_return();
499 machdep_syscall( struct i386_saved_state
*regs
)
502 machdep_call_t
*entry
;
505 if (trapno
< 0 || trapno
>= machdep_call_count
) {
506 regs
->eax
= (unsigned int)kern_invalid(NULL
);
508 thread_exception_return();
512 entry
= &machdep_call_table
[trapno
];
513 nargs
= entry
->nargs
;
518 if (copyin((user_addr_t
) regs
->uesp
+ sizeof (int),
520 nargs
* sizeof (int))) {
522 regs
->eax
= KERN_INVALID_ADDRESS
;
524 thread_exception_return();
530 regs
->eax
= (*entry
->routine
.args_1
)(args
[0]);
533 regs
->eax
= (*entry
->routine
.args_2
)(args
[0],args
[1]);
536 regs
->eax
= (*entry
->routine
.args_3
)(args
[0],args
[1],args
[2]);
539 regs
->eax
= (*entry
->routine
.args_4
)(args
[0],args
[1],args
[2],args
[3]);
542 panic("machdep_syscall(): too many args");
546 regs
->eax
= (*entry
->routine
.args_0
)();
548 if (current_thread()->funnel_lock
)
549 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
551 thread_exception_return();
557 thread_compose_cthread_desc(unsigned int addr
, pcb_t pcb
)
559 struct real_descriptor desc
;
561 mp_disable_preemption();
565 desc
.base_low
= addr
& 0xffff;
566 desc
.base_med
= (addr
>> 16) & 0xff;
567 desc
.base_high
= (addr
>> 24) & 0xff;
568 desc
.access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
;
569 desc
.granularity
= SZ_32
|SZ_G
;
570 pcb
->cthread_desc
= desc
;
571 *ldt_desc_p(USER_CTHREAD
) = desc
;
573 mp_enable_preemption();
575 return(KERN_SUCCESS
);
579 thread_set_cthread_self(uint32_t self
)
581 current_thread()->machine
.pcb
->cthread_self
= self
;
583 return (KERN_SUCCESS
);
587 thread_get_cthread_self(void)
589 return ((kern_return_t
)current_thread()->machine
.pcb
->cthread_self
);
593 thread_fast_set_cthread_self(uint32_t self
)
596 pcb
= (pcb_t
)current_thread()->machine
.pcb
;
597 thread_compose_cthread_desc(self
, pcb
);
598 pcb
->cthread_self
= self
; /* preserve old func too */
599 return (USER_CTHREAD
);
603 * thread_set_user_ldt routine is the interface for the user level
604 * settable ldt entry feature. allowing a user to create arbitrary
605 * ldt entries seems to be too large of a security hole, so instead
606 * this mechanism is in place to allow user level processes to have
607 * an ldt entry that can be used in conjunction with the FS register.
609 * Swapping occurs inside the pcb.c file along with initialization
610 * when a thread is created. The basic functioning theory is that the
611 * pcb->uldt_selector variable will contain either 0 meaning the
612 * process has not set up any entry, or the selector to be used in
613 * the FS register. pcb->uldt_desc contains the actual descriptor the
614 * user has set up stored in machine usable ldt format.
616 * Currently one entry is shared by all threads (USER_SETTABLE), but
617 * this could be changed in the future by changing how this routine
618 * allocates the selector. There seems to be no real reason at this
619 * time to have this added feature, but in the future it might be
622 * address is the linear address of the start of the data area size
623 * is the size in bytes of the area flags should always be set to 0
624 * for now. in the future it could be used to set R/W permisions or
625 * other functions. Currently the segment is created as a data segment
626 * up to 1 megabyte in size with full read/write permisions only.
628 * this call returns the segment selector or -1 if any error occurs
631 thread_set_user_ldt(uint32_t address
, uint32_t size
, uint32_t flags
)
634 struct fake_descriptor temp
;
638 return -1; // flags not supported
640 return -1; // size too big, 1 meg is the limit
642 mp_disable_preemption();
643 mycpu
= cpu_number();
645 // create a "fake" descriptor so we can use fix_desc()
646 // to build a real one...
647 // 32 bit default operation size
648 // standard read/write perms for a data segment
649 pcb
= (pcb_t
)current_thread()->machine
.pcb
;
650 temp
.offset
= address
;
651 temp
.lim_or_seg
= size
;
652 temp
.size_or_wdct
= SZ_32
;
653 temp
.access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
;
655 // turn this into a real descriptor
658 // set up our data in the pcb
659 pcb
->uldt_desc
= *(struct real_descriptor
*)&temp
;
660 pcb
->uldt_selector
= USER_SETTABLE
; // set the selector value
662 // now set it up in the current table...
663 *ldt_desc_p(USER_SETTABLE
) = *(struct real_descriptor
*)&temp
;
665 mp_enable_preemption();
667 return USER_SETTABLE
;
670 mach25_syscall(struct i386_saved_state
*regs
)
672 printf("*** Atttempt to execute a Mach 2.5 system call at EIP=%x EAX=%x(%d)\n",
673 regs
->eip
, regs
->eax
, -regs
->eax
);
676 #endif /* MACH_BSD */
679 /* This routine is called from assembly before each and every mach trap.
682 extern unsigned int mach_call_start(unsigned int, unsigned int *);
686 mach_call_start(unsigned int call_number
, unsigned int *args
)
689 unsigned int kdarg
[3];
691 current_thread()->task
->syscalls_mach
++; /* MP-safety ignored */
693 /* Always prepare to trace mach system calls */
699 argc
= mach_trap_table
[call_number
>>4].mach_trap_arg_count
;
704 for (i
=0; i
< argc
; i
++)
705 kdarg
[i
] = (int)*(args
+ i
);
707 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
, (call_number
>>4)) | DBG_FUNC_START
,
708 kdarg
[0], kdarg
[1], kdarg
[2], 0, 0);
710 return call_number
; /* pass this back thru */
713 /* This routine is called from assembly after each mach system call
716 extern unsigned int mach_call_end(unsigned int, unsigned int);
720 mach_call_end(unsigned int call_number
, unsigned int retval
)
722 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,(call_number
>>4)) | DBG_FUNC_END
,
724 return retval
; /* pass this back thru */
727 typedef kern_return_t (*mach_call_t
)(void *);
729 extern __attribute__((regparm(1))) kern_return_t
730 mach_call_munger(unsigned int call_number
,
742 struct mach_call_args
{
754 __attribute__((regparm(1))) kern_return_t
755 mach_call_munger(unsigned int call_number
,
768 mach_call_t mach_call
;
769 kern_return_t retval
;
770 struct mach_call_args args
= { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
772 current_thread()->task
->syscalls_mach
++; /* MP-safety ignored */
775 argc
= mach_trap_table
[call_number
].mach_trap_arg_count
;
777 case 9: args
.arg9
= arg9
;
778 case 8: args
.arg8
= arg8
;
779 case 7: args
.arg7
= arg7
;
780 case 6: args
.arg6
= arg6
;
781 case 5: args
.arg5
= arg5
;
782 case 4: args
.arg4
= arg4
;
783 case 3: args
.arg3
= arg3
;
784 case 2: args
.arg2
= arg2
;
785 case 1: args
.arg1
= arg1
;
788 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
, (call_number
)) | DBG_FUNC_START
,
789 args
.arg1
, args
.arg2
, args
.arg3
, 0, 0);
791 mach_call
= (mach_call_t
)mach_trap_table
[call_number
].mach_trap_function
;
792 retval
= mach_call(&args
);
794 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,(call_number
)) | DBG_FUNC_END
,
801 * thread_setuserstack:
803 * Sets the user stack pointer into the machine
804 * dependent thread state info.
809 mach_vm_address_t user_stack
)
811 struct i386_saved_state
*ss
= get_user_regs(thread
);
813 ss
->uesp
= CAST_DOWN(unsigned int,user_stack
);
817 * thread_adjuserstack:
819 * Returns the adjusted user stack pointer from the machine
820 * dependent thread state info. Used for small (<2G) deltas.
827 struct i386_saved_state
*ss
= get_user_regs(thread
);
830 return CAST_USER_ADDR_T(ss
->uesp
);
834 * thread_setentrypoint:
836 * Sets the user PC into the machine
837 * dependent thread state info.
840 thread_setentrypoint(
842 mach_vm_address_t entry
)
844 struct i386_saved_state
*ss
= get_user_regs(thread
);
846 ss
->eip
= CAST_DOWN(unsigned int,entry
);