2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
24 #include <mach_debug.h>
25 #include <mach_ldebug.h>
27 #include <mach/kern_return.h>
28 #include <mach/mach_traps.h>
29 #include <mach/thread_status.h>
30 #include <mach/vm_param.h>
32 #include <kern/counters.h>
33 #include <kern/cpu_data.h>
34 #include <kern/mach_param.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/sched_prim.h>
38 #include <kern/misc_protos.h>
39 #include <kern/assert.h>
41 #include <kern/syscall_sw.h>
42 #include <ipc/ipc_port.h>
43 #include <vm/vm_kern.h>
46 #include <i386/cpu_data.h>
47 #include <i386/cpu_number.h>
48 #include <i386/thread.h>
49 #include <i386/eflags.h>
50 #include <i386/proc_reg.h>
53 #include <i386/user_ldt.h>
55 #include <i386/iopb_entries.h>
56 #include <i386/machdep_call.h>
57 #include <i386/misc_protos.h>
58 #include <i386/cpu_data.h>
59 #include <i386/cpu_number.h>
60 #include <i386/mp_desc.h>
61 #include <i386/vmparam.h>
62 #include <sys/syscall.h>
63 #include <sys/kdebug.h>
64 #include <sys/ktrace.h>
65 #include <../bsd/sys/sysent.h>
67 extern struct proc
*current_proc(void);
88 unsigned int get_msr_exportmask(void);
90 unsigned int get_msr_nbits(void);
92 unsigned int get_msr_rbits(void);
95 thread_compose_cthread_desc(unsigned int addr
, pcb_t pcb
);
102 * Return the user stack pointer from the machine
103 * dependent thread state info.
107 __unused thread_t thread
,
109 thread_state_t tstate
,
111 user_addr_t
*user_stack
,
115 struct i386_saved_state
*state
;
116 i386_thread_state_t
*state25
;
123 case i386_THREAD_STATE
: /* FIXME */
124 state25
= (i386_thread_state_t
*) tstate
;
126 *user_stack
= state25
->esp
;
128 *user_stack
= USRSTACK
;
129 if (customstack
&& state25
->esp
)
135 case i386_NEW_THREAD_STATE
:
136 if (count
< i386_NEW_THREAD_STATE_COUNT
)
137 return (KERN_INVALID_ARGUMENT
);
139 state
= (struct i386_saved_state
*) tstate
;
143 /* If a valid user stack is specified, use it. */
147 *user_stack
= USRSTACK
;
148 if (customstack
&& uesp
)
154 return (KERN_INVALID_ARGUMENT
);
157 return (KERN_SUCCESS
);
162 __unused thread_t thread
,
164 thread_state_t tstate
,
166 mach_vm_offset_t
*entry_point
169 struct i386_saved_state
*state
;
170 i386_thread_state_t
*state25
;
175 if (*entry_point
== 0)
176 *entry_point
= VM_MIN_ADDRESS
;
179 case i386_THREAD_STATE
:
180 state25
= (i386_thread_state_t
*) tstate
;
181 *entry_point
= state25
->eip
? state25
->eip
: VM_MIN_ADDRESS
;
184 case i386_NEW_THREAD_STATE
:
185 if (count
< i386_THREAD_STATE_COUNT
)
186 return (KERN_INVALID_ARGUMENT
);
188 state
= (struct i386_saved_state
*) tstate
;
191 * If a valid entry point is specified, use it.
193 *entry_point
= state
->eip
? state
->eip
: VM_MIN_ADDRESS
;
198 return (KERN_SUCCESS
);
201 struct i386_saved_state
*
202 get_user_regs(thread_t th
)
205 return(USER_REGS(th
));
207 printf("[get_user_regs: thread does not have pcb]");
213 * Duplicate parent state in child
222 struct i386_float_state floatregs
;
225 /* Save the FPU state */
226 if ((pcb_t
)(per_proc_info
[cpu_number()].fpu_pcb
) == parent
->machine
.pcb
) {
227 fp_state_save(parent
);
231 if (child
->machine
.pcb
== NULL
|| parent
->machine
.pcb
== NULL
)
232 return (KERN_FAILURE
);
234 /* Copy over the i386_saved_state registers */
235 child
->machine
.pcb
->iss
= parent
->machine
.pcb
->iss
;
237 /* Check to see if parent is using floating point
238 * and if so, copy the registers to the child
239 * FIXME - make sure this works.
242 if (parent
->machine
.pcb
->ims
.ifps
) {
243 if (fpu_get_state(parent
, &floatregs
) == KERN_SUCCESS
)
244 fpu_set_state(child
, &floatregs
);
247 /* FIXME - should a user specified LDT, TSS and V86 info
248 * be duplicated as well?? - probably not.
250 // duplicate any use LDT entry that was set I think this is appropriate.
252 if (parent
->machine
.pcb
->uldt_selector
!= 0) {
253 child
->machine
.pcb
->uldt_selector
= parent
->machine
.pcb
->uldt_selector
;
254 child
->machine
.pcb
->uldt_desc
= parent
->machine
.pcb
->uldt_desc
;
259 return (KERN_SUCCESS
);
263 * FIXME - thread_set_child
266 void thread_set_child(thread_t child
, int pid
);
268 thread_set_child(thread_t child
, int pid
)
270 child
->machine
.pcb
->iss
.eax
= pid
;
271 child
->machine
.pcb
->iss
.edx
= 1;
272 child
->machine
.pcb
->iss
.efl
&= ~EFL_CF
;
274 void thread_set_parent(thread_t parent
, int pid
);
276 thread_set_parent(thread_t parent
, int pid
)
278 parent
->machine
.pcb
->iss
.eax
= pid
;
279 parent
->machine
.pcb
->iss
.edx
= 0;
280 parent
->machine
.pcb
->iss
.efl
&= ~EFL_CF
;
286 * System Call handling code
289 #define ERESTART -1 /* restart syscall */
290 #define EJUSTRETURN -2 /* don't modify regs, just return */
294 #define KERNEL_FUNNEL 1
296 extern funnel_t
* kernel_flock
;
298 extern int set_bsduthreadargs (thread_t
, struct i386_saved_state
*, void *);
299 extern void * get_bsduthreadarg(thread_t
);
300 extern int * get_bsduthreadrval(thread_t th
);
301 extern int * get_bsduthreadlowpridelay(thread_t th
);
303 extern long fuword(vm_offset_t
);
305 extern void unix_syscall(struct i386_saved_state
*);
306 extern void unix_syscall_return(int);
308 /* following implemented in bsd/dev/i386/unix_signal.c */
309 int __pthread_cset(struct sysent
*);
311 void __pthread_creset(struct sysent
*);
315 unix_syscall_return(int error
)
319 struct i386_saved_state
*regs
;
323 struct sysent
*callp
;
324 volatile int *lowpri_delay
;
326 thread
= current_thread();
327 rval
= get_bsduthreadrval(thread
);
328 lowpri_delay
= get_bsduthreadlowpridelay(thread
);
331 regs
= USER_REGS(thread
);
333 /* reconstruct code for tracing before blasting eax */
335 params
= (vm_offset_t
) ((caddr_t
)regs
->uesp
+ sizeof (int));
336 callp
= (code
>= nsysent
) ? &sysent
[63] : &sysent
[code
];
337 if (callp
== sysent
) {
338 code
= fuword(params
);
341 if (error
== ERESTART
) {
344 else if (error
!= EJUSTRETURN
) {
347 regs
->efl
|= EFL_CF
; /* carry bit */
348 } else { /* (not error) */
351 regs
->efl
&= ~EFL_CF
;
355 ktrsysret(p
, code
, error
, rval
[0], (callp
->sy_funnel
& FUNNEL_MASK
));
357 __pthread_creset(callp
);
359 if ((callp
->sy_funnel
& FUNNEL_MASK
) != NO_FUNNEL
)
360 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
364 * task is marked as a low priority I/O type
365 * and the I/O we issued while in this system call
366 * collided with normal I/O operations... we'll
367 * delay in order to mitigate the impact of this
368 * task on the normal operation of the system
370 IOSleep(*lowpri_delay
);
373 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
374 error
, rval
[0], rval
[1], 0, 0);
376 thread_exception_return();
382 unix_syscall(struct i386_saved_state
*regs
)
387 struct sysent
*callp
;
394 volatile int *lowpri_delay
;
396 thread
= current_thread();
398 rval
= get_bsduthreadrval(thread
);
399 lowpri_delay
= get_bsduthreadlowpridelay(thread
);
401 thread
->task
->syscalls_unix
++; /* MP-safety ignored */
403 //printf("[scall : eax %x]", regs->eax);
405 params
= (vm_offset_t
) ((caddr_t
)regs
->uesp
+ sizeof (int));
406 callp
= (code
>= nsysent
) ? &sysent
[63] : &sysent
[code
];
407 if (callp
== sysent
) {
408 code
= fuword(params
);
409 params
+= sizeof (int);
410 callp
= (code
>= nsysent
) ? &sysent
[63] : &sysent
[code
];
413 vt
= get_bsduthreadarg(thread
);
415 if ((nargs
= (callp
->sy_narg
* sizeof (int))) &&
416 (error
= copyin((user_addr_t
) params
, (char *) vt
, nargs
)) != 0) {
419 thread_exception_return();
426 if ((error
= __pthread_cset(callp
))) {
427 /* cancelled system call; let it returned with EINTR for handling */
430 thread_exception_return();
434 funnel_type
= (callp
->sy_funnel
& FUNNEL_MASK
);
435 if(funnel_type
== KERNEL_FUNNEL
)
436 (void) thread_funnel_set(kernel_flock
, TRUE
);
438 (void) set_bsduthreadargs(thread
, regs
, NULL
);
440 if (callp
->sy_narg
> 8)
441 panic("unix_syscall max arg count exceeded (%d)", callp
->sy_narg
);
443 ktrsyscall(p
, code
, callp
->sy_narg
, vt
, funnel_type
);
447 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
448 *ip
, *(ip
+1), *(ip
+2), *(ip
+3), 0);
451 error
= (*(callp
->sy_call
))((void *) p
, (void *) vt
, &rval
[0]);
454 /* May be needed with vfork changes */
455 regs
= USER_REGS(thread
);
457 if (error
== ERESTART
) {
460 else if (error
!= EJUSTRETURN
) {
463 regs
->efl
|= EFL_CF
; /* carry bit */
464 } else { /* (not error) */
467 regs
->efl
&= ~EFL_CF
;
471 ktrsysret(p
, code
, error
, rval
[0], funnel_type
);
473 __pthread_creset(callp
);
475 if(funnel_type
!= NO_FUNNEL
)
476 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
480 * task is marked as a low priority I/O type
481 * and the I/O we issued while in this system call
482 * collided with normal I/O operations... we'll
483 * delay in order to mitigate the impact of this
484 * task on the normal operation of the system
486 IOSleep(*lowpri_delay
);
489 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
490 error
, rval
[0], rval
[1], 0, 0);
492 thread_exception_return();
498 machdep_syscall( struct i386_saved_state
*regs
)
501 machdep_call_t
*entry
;
504 if (trapno
< 0 || trapno
>= machdep_call_count
) {
505 regs
->eax
= (unsigned int)kern_invalid(NULL
);
507 thread_exception_return();
511 entry
= &machdep_call_table
[trapno
];
512 nargs
= entry
->nargs
;
517 if (copyin((user_addr_t
) regs
->uesp
+ sizeof (int),
519 nargs
* sizeof (int))) {
521 regs
->eax
= KERN_INVALID_ADDRESS
;
523 thread_exception_return();
529 regs
->eax
= (*entry
->routine
.args_1
)(args
[0]);
532 regs
->eax
= (*entry
->routine
.args_2
)(args
[0],args
[1]);
535 regs
->eax
= (*entry
->routine
.args_3
)(args
[0],args
[1],args
[2]);
538 regs
->eax
= (*entry
->routine
.args_4
)(args
[0],args
[1],args
[2],args
[3]);
541 panic("machdep_syscall(): too many args");
545 regs
->eax
= (*entry
->routine
.args_0
)();
547 if (current_thread()->funnel_lock
)
548 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
550 thread_exception_return();
556 thread_compose_cthread_desc(unsigned int addr
, pcb_t pcb
)
558 struct real_descriptor desc
;
560 mp_disable_preemption();
564 desc
.base_low
= addr
& 0xffff;
565 desc
.base_med
= (addr
>> 16) & 0xff;
566 desc
.base_high
= (addr
>> 24) & 0xff;
567 desc
.access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
;
568 desc
.granularity
= SZ_32
|SZ_G
;
569 pcb
->cthread_desc
= desc
;
570 *ldt_desc_p(USER_CTHREAD
) = desc
;
572 mp_enable_preemption();
574 return(KERN_SUCCESS
);
578 thread_set_cthread_self(uint32_t self
)
580 current_thread()->machine
.pcb
->cthread_self
= self
;
582 return (KERN_SUCCESS
);
586 thread_get_cthread_self(void)
588 return ((kern_return_t
)current_thread()->machine
.pcb
->cthread_self
);
592 thread_fast_set_cthread_self(uint32_t self
)
595 pcb
= (pcb_t
)current_thread()->machine
.pcb
;
596 thread_compose_cthread_desc(self
, pcb
);
597 pcb
->cthread_self
= self
; /* preserve old func too */
598 return (USER_CTHREAD
);
602 * thread_set_user_ldt routine is the interface for the user level
603 * settable ldt entry feature. allowing a user to create arbitrary
604 * ldt entries seems to be too large of a security hole, so instead
605 * this mechanism is in place to allow user level processes to have
606 * an ldt entry that can be used in conjunction with the FS register.
608 * Swapping occurs inside the pcb.c file along with initialization
609 * when a thread is created. The basic functioning theory is that the
610 * pcb->uldt_selector variable will contain either 0 meaning the
611 * process has not set up any entry, or the selector to be used in
612 * the FS register. pcb->uldt_desc contains the actual descriptor the
613 * user has set up stored in machine usable ldt format.
615 * Currently one entry is shared by all threads (USER_SETTABLE), but
616 * this could be changed in the future by changing how this routine
617 * allocates the selector. There seems to be no real reason at this
618 * time to have this added feature, but in the future it might be
621 * address is the linear address of the start of the data area size
622 * is the size in bytes of the area flags should always be set to 0
623 * for now. in the future it could be used to set R/W permisions or
624 * other functions. Currently the segment is created as a data segment
625 * up to 1 megabyte in size with full read/write permisions only.
627 * this call returns the segment selector or -1 if any error occurs
630 thread_set_user_ldt(uint32_t address
, uint32_t size
, uint32_t flags
)
633 struct fake_descriptor temp
;
637 return -1; // flags not supported
639 return -1; // size too big, 1 meg is the limit
641 mp_disable_preemption();
642 mycpu
= cpu_number();
644 // create a "fake" descriptor so we can use fix_desc()
645 // to build a real one...
646 // 32 bit default operation size
647 // standard read/write perms for a data segment
648 pcb
= (pcb_t
)current_thread()->machine
.pcb
;
649 temp
.offset
= address
;
650 temp
.lim_or_seg
= size
;
651 temp
.size_or_wdct
= SZ_32
;
652 temp
.access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
;
654 // turn this into a real descriptor
657 // set up our data in the pcb
658 pcb
->uldt_desc
= *(struct real_descriptor
*)&temp
;
659 pcb
->uldt_selector
= USER_SETTABLE
; // set the selector value
661 // now set it up in the current table...
662 *ldt_desc_p(USER_SETTABLE
) = *(struct real_descriptor
*)&temp
;
664 mp_enable_preemption();
666 return USER_SETTABLE
;
669 mach25_syscall(struct i386_saved_state
*regs
)
671 printf("*** Atttempt to execute a Mach 2.5 system call at EIP=%x EAX=%x(%d)\n",
672 regs
->eip
, regs
->eax
, -regs
->eax
);
675 #endif /* MACH_BSD */
678 /* This routine is called from assembly before each and every mach trap.
681 extern unsigned int mach_call_start(unsigned int, unsigned int *);
685 mach_call_start(unsigned int call_number
, unsigned int *args
)
688 unsigned int kdarg
[3];
690 current_thread()->task
->syscalls_mach
++; /* MP-safety ignored */
692 /* Always prepare to trace mach system calls */
698 argc
= mach_trap_table
[call_number
>>4].mach_trap_arg_count
;
703 for (i
=0; i
< argc
; i
++)
704 kdarg
[i
] = (int)*(args
+ i
);
706 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
, (call_number
>>4)) | DBG_FUNC_START
,
707 kdarg
[0], kdarg
[1], kdarg
[2], 0, 0);
709 return call_number
; /* pass this back thru */
712 /* This routine is called from assembly after each mach system call
715 extern unsigned int mach_call_end(unsigned int, unsigned int);
719 mach_call_end(unsigned int call_number
, unsigned int retval
)
721 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,(call_number
>>4)) | DBG_FUNC_END
,
723 return retval
; /* pass this back thru */
726 typedef kern_return_t (*mach_call_t
)(void *);
728 extern __attribute__((regparm(1))) kern_return_t
729 mach_call_munger(unsigned int call_number
,
741 struct mach_call_args
{
753 __attribute__((regparm(1))) kern_return_t
754 mach_call_munger(unsigned int call_number
,
767 mach_call_t mach_call
;
768 kern_return_t retval
;
769 struct mach_call_args args
= { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
771 current_thread()->task
->syscalls_mach
++; /* MP-safety ignored */
774 argc
= mach_trap_table
[call_number
].mach_trap_arg_count
;
776 case 9: args
.arg9
= arg9
;
777 case 8: args
.arg8
= arg8
;
778 case 7: args
.arg7
= arg7
;
779 case 6: args
.arg6
= arg6
;
780 case 5: args
.arg5
= arg5
;
781 case 4: args
.arg4
= arg4
;
782 case 3: args
.arg3
= arg3
;
783 case 2: args
.arg2
= arg2
;
784 case 1: args
.arg1
= arg1
;
787 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
, (call_number
)) | DBG_FUNC_START
,
788 args
.arg1
, args
.arg2
, args
.arg3
, 0, 0);
790 mach_call
= (mach_call_t
)mach_trap_table
[call_number
].mach_trap_function
;
791 retval
= mach_call(&args
);
793 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,(call_number
)) | DBG_FUNC_END
,
800 * thread_setuserstack:
802 * Sets the user stack pointer into the machine
803 * dependent thread state info.
808 mach_vm_address_t user_stack
)
810 struct i386_saved_state
*ss
= get_user_regs(thread
);
812 ss
->uesp
= CAST_DOWN(unsigned int,user_stack
);
816 * thread_adjuserstack:
818 * Returns the adjusted user stack pointer from the machine
819 * dependent thread state info. Used for small (<2G) deltas.
826 struct i386_saved_state
*ss
= get_user_regs(thread
);
829 return CAST_USER_ADDR_T(ss
->uesp
);
833 * thread_setentrypoint:
835 * Sets the user PC into the machine
836 * dependent thread state info.
839 thread_setentrypoint(
841 mach_vm_address_t entry
)
843 struct i386_saved_state
*ss
= get_user_regs(thread
);
845 ss
->eip
= CAST_DOWN(unsigned int,entry
);