2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 #include <mach_debug.h>
31 #include <mach_ldebug.h>
33 #include <mach/kern_return.h>
34 #include <mach/mach_traps.h>
35 #include <mach/thread_status.h>
36 #include <mach/vm_param.h>
38 #include <kern/counters.h>
39 #include <kern/cpu_data.h>
40 #include <kern/mach_param.h>
41 #include <kern/task.h>
42 #include <kern/thread.h>
43 #include <kern/sched_prim.h>
44 #include <kern/misc_protos.h>
45 #include <kern/assert.h>
47 #include <kern/syscall_sw.h>
48 #include <ipc/ipc_port.h>
49 #include <vm/vm_kern.h>
52 #include <i386/cpu_data.h>
53 #include <i386/cpu_number.h>
54 #include <i386/thread.h>
55 #include <i386/eflags.h>
56 #include <i386/proc_reg.h>
59 #include <i386/user_ldt.h>
61 #include <i386/iopb_entries.h>
62 #include <i386/machdep_call.h>
63 #include <i386/misc_protos.h>
64 #include <i386/cpu_data.h>
65 #include <i386/cpu_number.h>
66 #include <i386/mp_desc.h>
67 #include <i386/vmparam.h>
68 #include <sys/syscall.h>
69 #include <sys/kdebug.h>
70 #include <sys/ktrace.h>
71 #include <../bsd/sys/sysent.h>
73 extern struct proc
*current_proc(void);
94 unsigned int get_msr_exportmask(void);
96 unsigned int get_msr_nbits(void);
98 unsigned int get_msr_rbits(void);
101 thread_compose_cthread_desc(unsigned int addr
, pcb_t pcb
);
108 * Return the user stack pointer from the machine
109 * dependent thread state info.
113 __unused thread_t thread
,
115 thread_state_t tstate
,
117 user_addr_t
*user_stack
,
121 struct i386_saved_state
*state
;
122 i386_thread_state_t
*state25
;
129 case i386_THREAD_STATE
: /* FIXME */
130 state25
= (i386_thread_state_t
*) tstate
;
132 *user_stack
= state25
->esp
;
134 *user_stack
= USRSTACK
;
135 if (customstack
&& state25
->esp
)
141 case i386_NEW_THREAD_STATE
:
142 if (count
< i386_NEW_THREAD_STATE_COUNT
)
143 return (KERN_INVALID_ARGUMENT
);
145 state
= (struct i386_saved_state
*) tstate
;
149 /* If a valid user stack is specified, use it. */
153 *user_stack
= USRSTACK
;
154 if (customstack
&& uesp
)
160 return (KERN_INVALID_ARGUMENT
);
163 return (KERN_SUCCESS
);
168 __unused thread_t thread
,
170 thread_state_t tstate
,
172 mach_vm_offset_t
*entry_point
175 struct i386_saved_state
*state
;
176 i386_thread_state_t
*state25
;
181 if (*entry_point
== 0)
182 *entry_point
= VM_MIN_ADDRESS
;
185 case i386_THREAD_STATE
:
186 state25
= (i386_thread_state_t
*) tstate
;
187 *entry_point
= state25
->eip
? state25
->eip
: VM_MIN_ADDRESS
;
190 case i386_NEW_THREAD_STATE
:
191 if (count
< i386_THREAD_STATE_COUNT
)
192 return (KERN_INVALID_ARGUMENT
);
194 state
= (struct i386_saved_state
*) tstate
;
197 * If a valid entry point is specified, use it.
199 *entry_point
= state
->eip
? state
->eip
: VM_MIN_ADDRESS
;
204 return (KERN_SUCCESS
);
207 struct i386_saved_state
*
208 get_user_regs(thread_t th
)
211 return(USER_REGS(th
));
213 printf("[get_user_regs: thread does not have pcb]");
219 * Duplicate parent state in child
228 struct i386_float_state floatregs
;
231 /* Save the FPU state */
232 if ((pcb_t
)(per_proc_info
[cpu_number()].fpu_pcb
) == parent
->machine
.pcb
) {
233 fp_state_save(parent
);
237 if (child
->machine
.pcb
== NULL
|| parent
->machine
.pcb
== NULL
)
238 return (KERN_FAILURE
);
240 /* Copy over the i386_saved_state registers */
241 child
->machine
.pcb
->iss
= parent
->machine
.pcb
->iss
;
243 /* Check to see if parent is using floating point
244 * and if so, copy the registers to the child
245 * FIXME - make sure this works.
248 if (parent
->machine
.pcb
->ims
.ifps
) {
249 if (fpu_get_state(parent
, &floatregs
) == KERN_SUCCESS
)
250 fpu_set_state(child
, &floatregs
);
253 /* FIXME - should a user specified LDT, TSS and V86 info
254 * be duplicated as well?? - probably not.
256 // duplicate any use LDT entry that was set I think this is appropriate.
258 if (parent
->machine
.pcb
->uldt_selector
!= 0) {
259 child
->machine
.pcb
->uldt_selector
= parent
->machine
.pcb
->uldt_selector
;
260 child
->machine
.pcb
->uldt_desc
= parent
->machine
.pcb
->uldt_desc
;
265 return (KERN_SUCCESS
);
269 * FIXME - thread_set_child
272 void thread_set_child(thread_t child
, int pid
);
274 thread_set_child(thread_t child
, int pid
)
276 child
->machine
.pcb
->iss
.eax
= pid
;
277 child
->machine
.pcb
->iss
.edx
= 1;
278 child
->machine
.pcb
->iss
.efl
&= ~EFL_CF
;
280 void thread_set_parent(thread_t parent
, int pid
);
282 thread_set_parent(thread_t parent
, int pid
)
284 parent
->machine
.pcb
->iss
.eax
= pid
;
285 parent
->machine
.pcb
->iss
.edx
= 0;
286 parent
->machine
.pcb
->iss
.efl
&= ~EFL_CF
;
292 * System Call handling code
295 #define ERESTART -1 /* restart syscall */
296 #define EJUSTRETURN -2 /* don't modify regs, just return */
300 #define KERNEL_FUNNEL 1
302 extern funnel_t
* kernel_flock
;
304 extern int set_bsduthreadargs (thread_t
, struct i386_saved_state
*, void *);
305 extern void * get_bsduthreadarg(thread_t
);
306 extern int * get_bsduthreadrval(thread_t th
);
307 extern int * get_bsduthreadlowpridelay(thread_t th
);
309 extern long fuword(vm_offset_t
);
311 extern void unix_syscall(struct i386_saved_state
*);
312 extern void unix_syscall_return(int);
314 /* following implemented in bsd/dev/i386/unix_signal.c */
315 int __pthread_cset(struct sysent
*);
317 void __pthread_creset(struct sysent
*);
321 unix_syscall_return(int error
)
325 struct i386_saved_state
*regs
;
329 struct sysent
*callp
;
330 volatile int *lowpri_delay
;
332 thread
= current_thread();
333 rval
= get_bsduthreadrval(thread
);
334 lowpri_delay
= get_bsduthreadlowpridelay(thread
);
337 regs
= USER_REGS(thread
);
339 /* reconstruct code for tracing before blasting eax */
341 params
= (vm_offset_t
) ((caddr_t
)regs
->uesp
+ sizeof (int));
342 callp
= (code
>= nsysent
) ? &sysent
[63] : &sysent
[code
];
343 if (callp
== sysent
) {
344 code
= fuword(params
);
347 if (error
== ERESTART
) {
350 else if (error
!= EJUSTRETURN
) {
353 regs
->efl
|= EFL_CF
; /* carry bit */
354 } else { /* (not error) */
357 regs
->efl
&= ~EFL_CF
;
361 ktrsysret(p
, code
, error
, rval
[0], (callp
->sy_funnel
& FUNNEL_MASK
));
363 __pthread_creset(callp
);
365 if ((callp
->sy_funnel
& FUNNEL_MASK
) != NO_FUNNEL
)
366 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
370 * task is marked as a low priority I/O type
371 * and the I/O we issued while in this system call
372 * collided with normal I/O operations... we'll
373 * delay in order to mitigate the impact of this
374 * task on the normal operation of the system
376 IOSleep(*lowpri_delay
);
379 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
380 error
, rval
[0], rval
[1], 0, 0);
382 thread_exception_return();
388 unix_syscall(struct i386_saved_state
*regs
)
393 struct sysent
*callp
;
400 volatile int *lowpri_delay
;
402 thread
= current_thread();
404 rval
= get_bsduthreadrval(thread
);
405 lowpri_delay
= get_bsduthreadlowpridelay(thread
);
407 thread
->task
->syscalls_unix
++; /* MP-safety ignored */
409 //printf("[scall : eax %x]", regs->eax);
411 params
= (vm_offset_t
) ((caddr_t
)regs
->uesp
+ sizeof (int));
412 callp
= (code
>= nsysent
) ? &sysent
[63] : &sysent
[code
];
413 if (callp
== sysent
) {
414 code
= fuword(params
);
415 params
+= sizeof (int);
416 callp
= (code
>= nsysent
) ? &sysent
[63] : &sysent
[code
];
419 vt
= get_bsduthreadarg(thread
);
421 if ((nargs
= (callp
->sy_narg
* sizeof (int))) &&
422 (error
= copyin((user_addr_t
) params
, (char *) vt
, nargs
)) != 0) {
425 thread_exception_return();
432 if ((error
= __pthread_cset(callp
))) {
433 /* cancelled system call; let it returned with EINTR for handling */
436 thread_exception_return();
440 funnel_type
= (callp
->sy_funnel
& FUNNEL_MASK
);
441 if(funnel_type
== KERNEL_FUNNEL
)
442 (void) thread_funnel_set(kernel_flock
, TRUE
);
444 (void) set_bsduthreadargs(thread
, regs
, NULL
);
446 if (callp
->sy_narg
> 8)
447 panic("unix_syscall max arg count exceeded (%d)", callp
->sy_narg
);
449 ktrsyscall(p
, code
, callp
->sy_narg
, vt
, funnel_type
);
453 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
454 *ip
, *(ip
+1), *(ip
+2), *(ip
+3), 0);
457 error
= (*(callp
->sy_call
))((void *) p
, (void *) vt
, &rval
[0]);
460 /* May be needed with vfork changes */
461 regs
= USER_REGS(thread
);
463 if (error
== ERESTART
) {
466 else if (error
!= EJUSTRETURN
) {
469 regs
->efl
|= EFL_CF
; /* carry bit */
470 } else { /* (not error) */
473 regs
->efl
&= ~EFL_CF
;
477 ktrsysret(p
, code
, error
, rval
[0], funnel_type
);
479 __pthread_creset(callp
);
481 if(funnel_type
!= NO_FUNNEL
)
482 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
486 * task is marked as a low priority I/O type
487 * and the I/O we issued while in this system call
488 * collided with normal I/O operations... we'll
489 * delay in order to mitigate the impact of this
490 * task on the normal operation of the system
492 IOSleep(*lowpri_delay
);
495 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
496 error
, rval
[0], rval
[1], 0, 0);
498 thread_exception_return();
504 machdep_syscall( struct i386_saved_state
*regs
)
507 machdep_call_t
*entry
;
510 if (trapno
< 0 || trapno
>= machdep_call_count
) {
511 regs
->eax
= (unsigned int)kern_invalid(NULL
);
513 thread_exception_return();
517 entry
= &machdep_call_table
[trapno
];
518 nargs
= entry
->nargs
;
523 if (copyin((user_addr_t
) regs
->uesp
+ sizeof (int),
525 nargs
* sizeof (int))) {
527 regs
->eax
= KERN_INVALID_ADDRESS
;
529 thread_exception_return();
535 regs
->eax
= (*entry
->routine
.args_1
)(args
[0]);
538 regs
->eax
= (*entry
->routine
.args_2
)(args
[0],args
[1]);
541 regs
->eax
= (*entry
->routine
.args_3
)(args
[0],args
[1],args
[2]);
544 regs
->eax
= (*entry
->routine
.args_4
)(args
[0],args
[1],args
[2],args
[3]);
547 panic("machdep_syscall(): too many args");
551 regs
->eax
= (*entry
->routine
.args_0
)();
553 if (current_thread()->funnel_lock
)
554 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
556 thread_exception_return();
562 thread_compose_cthread_desc(unsigned int addr
, pcb_t pcb
)
564 struct real_descriptor desc
;
566 mp_disable_preemption();
570 desc
.base_low
= addr
& 0xffff;
571 desc
.base_med
= (addr
>> 16) & 0xff;
572 desc
.base_high
= (addr
>> 24) & 0xff;
573 desc
.access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
;
574 desc
.granularity
= SZ_32
|SZ_G
;
575 pcb
->cthread_desc
= desc
;
576 *ldt_desc_p(USER_CTHREAD
) = desc
;
578 mp_enable_preemption();
580 return(KERN_SUCCESS
);
584 thread_set_cthread_self(uint32_t self
)
586 current_thread()->machine
.pcb
->cthread_self
= self
;
588 return (KERN_SUCCESS
);
592 thread_get_cthread_self(void)
594 return ((kern_return_t
)current_thread()->machine
.pcb
->cthread_self
);
598 thread_fast_set_cthread_self(uint32_t self
)
601 pcb
= (pcb_t
)current_thread()->machine
.pcb
;
602 thread_compose_cthread_desc(self
, pcb
);
603 pcb
->cthread_self
= self
; /* preserve old func too */
604 return (USER_CTHREAD
);
608 * thread_set_user_ldt routine is the interface for the user level
609 * settable ldt entry feature. allowing a user to create arbitrary
610 * ldt entries seems to be too large of a security hole, so instead
611 * this mechanism is in place to allow user level processes to have
612 * an ldt entry that can be used in conjunction with the FS register.
614 * Swapping occurs inside the pcb.c file along with initialization
615 * when a thread is created. The basic functioning theory is that the
616 * pcb->uldt_selector variable will contain either 0 meaning the
617 * process has not set up any entry, or the selector to be used in
618 * the FS register. pcb->uldt_desc contains the actual descriptor the
619 * user has set up stored in machine usable ldt format.
621 * Currently one entry is shared by all threads (USER_SETTABLE), but
622 * this could be changed in the future by changing how this routine
623 * allocates the selector. There seems to be no real reason at this
624 * time to have this added feature, but in the future it might be
627 * address is the linear address of the start of the data area size
628 * is the size in bytes of the area flags should always be set to 0
629 * for now. in the future it could be used to set R/W permisions or
630 * other functions. Currently the segment is created as a data segment
631 * up to 1 megabyte in size with full read/write permisions only.
633 * this call returns the segment selector or -1 if any error occurs
636 thread_set_user_ldt(uint32_t address
, uint32_t size
, uint32_t flags
)
639 struct fake_descriptor temp
;
643 return -1; // flags not supported
645 return -1; // size too big, 1 meg is the limit
647 mp_disable_preemption();
648 mycpu
= cpu_number();
650 // create a "fake" descriptor so we can use fix_desc()
651 // to build a real one...
652 // 32 bit default operation size
653 // standard read/write perms for a data segment
654 pcb
= (pcb_t
)current_thread()->machine
.pcb
;
655 temp
.offset
= address
;
656 temp
.lim_or_seg
= size
;
657 temp
.size_or_wdct
= SZ_32
;
658 temp
.access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
;
660 // turn this into a real descriptor
663 // set up our data in the pcb
664 pcb
->uldt_desc
= *(struct real_descriptor
*)&temp
;
665 pcb
->uldt_selector
= USER_SETTABLE
; // set the selector value
667 // now set it up in the current table...
668 *ldt_desc_p(USER_SETTABLE
) = *(struct real_descriptor
*)&temp
;
670 mp_enable_preemption();
672 return USER_SETTABLE
;
675 mach25_syscall(struct i386_saved_state
*regs
)
677 printf("*** Atttempt to execute a Mach 2.5 system call at EIP=%x EAX=%x(%d)\n",
678 regs
->eip
, regs
->eax
, -regs
->eax
);
681 #endif /* MACH_BSD */
684 /* This routine is called from assembly before each and every mach trap.
687 extern unsigned int mach_call_start(unsigned int, unsigned int *);
691 mach_call_start(unsigned int call_number
, unsigned int *args
)
694 unsigned int kdarg
[3];
696 current_thread()->task
->syscalls_mach
++; /* MP-safety ignored */
698 /* Always prepare to trace mach system calls */
704 argc
= mach_trap_table
[call_number
>>4].mach_trap_arg_count
;
709 for (i
=0; i
< argc
; i
++)
710 kdarg
[i
] = (int)*(args
+ i
);
712 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
, (call_number
>>4)) | DBG_FUNC_START
,
713 kdarg
[0], kdarg
[1], kdarg
[2], 0, 0);
715 return call_number
; /* pass this back thru */
718 /* This routine is called from assembly after each mach system call
721 extern unsigned int mach_call_end(unsigned int, unsigned int);
725 mach_call_end(unsigned int call_number
, unsigned int retval
)
727 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,(call_number
>>4)) | DBG_FUNC_END
,
729 return retval
; /* pass this back thru */
732 typedef kern_return_t (*mach_call_t
)(void *);
734 extern __attribute__((regparm(1))) kern_return_t
735 mach_call_munger(unsigned int call_number
,
747 struct mach_call_args
{
759 __attribute__((regparm(1))) kern_return_t
760 mach_call_munger(unsigned int call_number
,
773 mach_call_t mach_call
;
774 kern_return_t retval
;
775 struct mach_call_args args
= { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
777 current_thread()->task
->syscalls_mach
++; /* MP-safety ignored */
780 argc
= mach_trap_table
[call_number
].mach_trap_arg_count
;
782 case 9: args
.arg9
= arg9
;
783 case 8: args
.arg8
= arg8
;
784 case 7: args
.arg7
= arg7
;
785 case 6: args
.arg6
= arg6
;
786 case 5: args
.arg5
= arg5
;
787 case 4: args
.arg4
= arg4
;
788 case 3: args
.arg3
= arg3
;
789 case 2: args
.arg2
= arg2
;
790 case 1: args
.arg1
= arg1
;
793 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
, (call_number
)) | DBG_FUNC_START
,
794 args
.arg1
, args
.arg2
, args
.arg3
, 0, 0);
796 mach_call
= (mach_call_t
)mach_trap_table
[call_number
].mach_trap_function
;
797 retval
= mach_call(&args
);
799 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,(call_number
)) | DBG_FUNC_END
,
806 * thread_setuserstack:
808 * Sets the user stack pointer into the machine
809 * dependent thread state info.
814 mach_vm_address_t user_stack
)
816 struct i386_saved_state
*ss
= get_user_regs(thread
);
818 ss
->uesp
= CAST_DOWN(unsigned int,user_stack
);
822 * thread_adjuserstack:
824 * Returns the adjusted user stack pointer from the machine
825 * dependent thread state info. Used for small (<2G) deltas.
832 struct i386_saved_state
*ss
= get_user_regs(thread
);
835 return CAST_USER_ADDR_T(ss
->uesp
);
839 * thread_setentrypoint:
841 * Sets the user PC into the machine
842 * dependent thread state info.
845 thread_setentrypoint(
847 mach_vm_address_t entry
)
849 struct i386_saved_state
*ss
= get_user_regs(thread
);
851 ss
->eip
= CAST_DOWN(unsigned int,entry
);