2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
32 #include <mach_debug.h>
33 #include <mach_ldebug.h>
35 #include <mach/kern_return.h>
36 #include <mach/mach_traps.h>
37 #include <mach/thread_status.h>
38 #include <mach/vm_param.h>
40 #include <kern/counters.h>
41 #include <kern/cpu_data.h>
42 #include <kern/mach_param.h>
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/sched_prim.h>
46 #include <kern/misc_protos.h>
47 #include <kern/assert.h>
49 #include <kern/syscall_sw.h>
50 #include <ipc/ipc_port.h>
51 #include <vm/vm_kern.h>
54 #include <i386/cpu_data.h>
55 #include <i386/cpu_number.h>
56 #include <i386/thread.h>
57 #include <i386/eflags.h>
58 #include <i386/proc_reg.h>
61 #include <i386/user_ldt.h>
63 #include <i386/iopb_entries.h>
64 #include <i386/machdep_call.h>
65 #include <i386/misc_protos.h>
66 #include <i386/cpu_data.h>
67 #include <i386/cpu_number.h>
68 #include <i386/mp_desc.h>
69 #include <i386/vmparam.h>
70 #include <sys/syscall.h>
71 #include <sys/kdebug.h>
72 #include <sys/ktrace.h>
73 #include <../bsd/sys/sysent.h>
75 extern struct proc
*current_proc(void);
96 unsigned int get_msr_exportmask(void);
98 unsigned int get_msr_nbits(void);
100 unsigned int get_msr_rbits(void);
103 thread_compose_cthread_desc(unsigned int addr
, pcb_t pcb
);
110 * Return the user stack pointer from the machine
111 * dependent thread state info.
115 __unused thread_t thread
,
117 thread_state_t tstate
,
119 user_addr_t
*user_stack
,
123 struct i386_saved_state
*state
;
124 i386_thread_state_t
*state25
;
131 case i386_THREAD_STATE
: /* FIXME */
132 state25
= (i386_thread_state_t
*) tstate
;
134 *user_stack
= state25
->esp
;
136 *user_stack
= USRSTACK
;
137 if (customstack
&& state25
->esp
)
143 case i386_NEW_THREAD_STATE
:
144 if (count
< i386_NEW_THREAD_STATE_COUNT
)
145 return (KERN_INVALID_ARGUMENT
);
147 state
= (struct i386_saved_state
*) tstate
;
151 /* If a valid user stack is specified, use it. */
155 *user_stack
= USRSTACK
;
156 if (customstack
&& uesp
)
162 return (KERN_INVALID_ARGUMENT
);
165 return (KERN_SUCCESS
);
170 __unused thread_t thread
,
172 thread_state_t tstate
,
174 mach_vm_offset_t
*entry_point
177 struct i386_saved_state
*state
;
178 i386_thread_state_t
*state25
;
183 if (*entry_point
== 0)
184 *entry_point
= VM_MIN_ADDRESS
;
187 case i386_THREAD_STATE
:
188 state25
= (i386_thread_state_t
*) tstate
;
189 *entry_point
= state25
->eip
? state25
->eip
: VM_MIN_ADDRESS
;
192 case i386_NEW_THREAD_STATE
:
193 if (count
< i386_THREAD_STATE_COUNT
)
194 return (KERN_INVALID_ARGUMENT
);
196 state
= (struct i386_saved_state
*) tstate
;
199 * If a valid entry point is specified, use it.
201 *entry_point
= state
->eip
? state
->eip
: VM_MIN_ADDRESS
;
206 return (KERN_SUCCESS
);
209 struct i386_saved_state
*
210 get_user_regs(thread_t th
)
213 return(USER_REGS(th
));
215 printf("[get_user_regs: thread does not have pcb]");
221 * Duplicate parent state in child
230 struct i386_float_state floatregs
;
233 /* Save the FPU state */
234 if ((pcb_t
)(per_proc_info
[cpu_number()].fpu_pcb
) == parent
->machine
.pcb
) {
235 fp_state_save(parent
);
239 if (child
->machine
.pcb
== NULL
|| parent
->machine
.pcb
== NULL
)
240 return (KERN_FAILURE
);
242 /* Copy over the i386_saved_state registers */
243 child
->machine
.pcb
->iss
= parent
->machine
.pcb
->iss
;
245 /* Check to see if parent is using floating point
246 * and if so, copy the registers to the child
247 * FIXME - make sure this works.
250 if (parent
->machine
.pcb
->ims
.ifps
) {
251 if (fpu_get_state(parent
, &floatregs
) == KERN_SUCCESS
)
252 fpu_set_state(child
, &floatregs
);
255 /* FIXME - should a user specified LDT, TSS and V86 info
256 * be duplicated as well?? - probably not.
258 // duplicate any use LDT entry that was set I think this is appropriate.
260 if (parent
->machine
.pcb
->uldt_selector
!= 0) {
261 child
->machine
.pcb
->uldt_selector
= parent
->machine
.pcb
->uldt_selector
;
262 child
->machine
.pcb
->uldt_desc
= parent
->machine
.pcb
->uldt_desc
;
267 return (KERN_SUCCESS
);
271 * FIXME - thread_set_child
274 void thread_set_child(thread_t child
, int pid
);
276 thread_set_child(thread_t child
, int pid
)
278 child
->machine
.pcb
->iss
.eax
= pid
;
279 child
->machine
.pcb
->iss
.edx
= 1;
280 child
->machine
.pcb
->iss
.efl
&= ~EFL_CF
;
282 void thread_set_parent(thread_t parent
, int pid
);
284 thread_set_parent(thread_t parent
, int pid
)
286 parent
->machine
.pcb
->iss
.eax
= pid
;
287 parent
->machine
.pcb
->iss
.edx
= 0;
288 parent
->machine
.pcb
->iss
.efl
&= ~EFL_CF
;
294 * System Call handling code
297 #define ERESTART -1 /* restart syscall */
298 #define EJUSTRETURN -2 /* don't modify regs, just return */
302 #define KERNEL_FUNNEL 1
304 extern funnel_t
* kernel_flock
;
306 extern int set_bsduthreadargs (thread_t
, struct i386_saved_state
*, void *);
307 extern void * get_bsduthreadarg(thread_t
);
308 extern int * get_bsduthreadrval(thread_t th
);
309 extern int * get_bsduthreadlowpridelay(thread_t th
);
311 extern long fuword(vm_offset_t
);
313 extern void unix_syscall(struct i386_saved_state
*);
314 extern void unix_syscall_return(int);
316 /* following implemented in bsd/dev/i386/unix_signal.c */
317 int __pthread_cset(struct sysent
*);
319 void __pthread_creset(struct sysent
*);
323 unix_syscall_return(int error
)
327 struct i386_saved_state
*regs
;
331 struct sysent
*callp
;
332 volatile int *lowpri_delay
;
334 thread
= current_thread();
335 rval
= get_bsduthreadrval(thread
);
336 lowpri_delay
= get_bsduthreadlowpridelay(thread
);
339 regs
= USER_REGS(thread
);
341 /* reconstruct code for tracing before blasting eax */
343 params
= (vm_offset_t
) ((caddr_t
)regs
->uesp
+ sizeof (int));
344 callp
= (code
>= nsysent
) ? &sysent
[63] : &sysent
[code
];
345 if (callp
== sysent
) {
346 code
= fuword(params
);
349 if (error
== ERESTART
) {
352 else if (error
!= EJUSTRETURN
) {
355 regs
->efl
|= EFL_CF
; /* carry bit */
356 } else { /* (not error) */
359 regs
->efl
&= ~EFL_CF
;
363 ktrsysret(p
, code
, error
, rval
[0], (callp
->sy_funnel
& FUNNEL_MASK
));
365 __pthread_creset(callp
);
367 if ((callp
->sy_funnel
& FUNNEL_MASK
) != NO_FUNNEL
)
368 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
372 * task is marked as a low priority I/O type
373 * and the I/O we issued while in this system call
374 * collided with normal I/O operations... we'll
375 * delay in order to mitigate the impact of this
376 * task on the normal operation of the system
378 IOSleep(*lowpri_delay
);
381 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
382 error
, rval
[0], rval
[1], 0, 0);
384 thread_exception_return();
390 unix_syscall(struct i386_saved_state
*regs
)
395 struct sysent
*callp
;
402 volatile int *lowpri_delay
;
404 thread
= current_thread();
406 rval
= get_bsduthreadrval(thread
);
407 lowpri_delay
= get_bsduthreadlowpridelay(thread
);
409 thread
->task
->syscalls_unix
++; /* MP-safety ignored */
411 //printf("[scall : eax %x]", regs->eax);
413 params
= (vm_offset_t
) ((caddr_t
)regs
->uesp
+ sizeof (int));
414 callp
= (code
>= nsysent
) ? &sysent
[63] : &sysent
[code
];
415 if (callp
== sysent
) {
416 code
= fuword(params
);
417 params
+= sizeof (int);
418 callp
= (code
>= nsysent
) ? &sysent
[63] : &sysent
[code
];
421 vt
= get_bsduthreadarg(thread
);
423 if ((nargs
= (callp
->sy_narg
* sizeof (int))) &&
424 (error
= copyin((user_addr_t
) params
, (char *) vt
, nargs
)) != 0) {
427 thread_exception_return();
434 if ((error
= __pthread_cset(callp
))) {
435 /* cancelled system call; let it returned with EINTR for handling */
438 thread_exception_return();
442 funnel_type
= (callp
->sy_funnel
& FUNNEL_MASK
);
443 if(funnel_type
== KERNEL_FUNNEL
)
444 (void) thread_funnel_set(kernel_flock
, TRUE
);
446 (void) set_bsduthreadargs(thread
, regs
, NULL
);
448 if (callp
->sy_narg
> 8)
449 panic("unix_syscall max arg count exceeded (%d)", callp
->sy_narg
);
451 ktrsyscall(p
, code
, callp
->sy_narg
, vt
, funnel_type
);
455 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
456 *ip
, *(ip
+1), *(ip
+2), *(ip
+3), 0);
459 error
= (*(callp
->sy_call
))((void *) p
, (void *) vt
, &rval
[0]);
462 /* May be needed with vfork changes */
463 regs
= USER_REGS(thread
);
465 if (error
== ERESTART
) {
468 else if (error
!= EJUSTRETURN
) {
471 regs
->efl
|= EFL_CF
; /* carry bit */
472 } else { /* (not error) */
475 regs
->efl
&= ~EFL_CF
;
479 ktrsysret(p
, code
, error
, rval
[0], funnel_type
);
481 __pthread_creset(callp
);
483 if(funnel_type
!= NO_FUNNEL
)
484 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
488 * task is marked as a low priority I/O type
489 * and the I/O we issued while in this system call
490 * collided with normal I/O operations... we'll
491 * delay in order to mitigate the impact of this
492 * task on the normal operation of the system
494 IOSleep(*lowpri_delay
);
497 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
498 error
, rval
[0], rval
[1], 0, 0);
500 thread_exception_return();
506 machdep_syscall( struct i386_saved_state
*regs
)
509 machdep_call_t
*entry
;
512 if (trapno
< 0 || trapno
>= machdep_call_count
) {
513 regs
->eax
= (unsigned int)kern_invalid(NULL
);
515 thread_exception_return();
519 entry
= &machdep_call_table
[trapno
];
520 nargs
= entry
->nargs
;
525 if (copyin((user_addr_t
) regs
->uesp
+ sizeof (int),
527 nargs
* sizeof (int))) {
529 regs
->eax
= KERN_INVALID_ADDRESS
;
531 thread_exception_return();
537 regs
->eax
= (*entry
->routine
.args_1
)(args
[0]);
540 regs
->eax
= (*entry
->routine
.args_2
)(args
[0],args
[1]);
543 regs
->eax
= (*entry
->routine
.args_3
)(args
[0],args
[1],args
[2]);
546 regs
->eax
= (*entry
->routine
.args_4
)(args
[0],args
[1],args
[2],args
[3]);
549 panic("machdep_syscall(): too many args");
553 regs
->eax
= (*entry
->routine
.args_0
)();
555 if (current_thread()->funnel_lock
)
556 (void) thread_funnel_set(current_thread()->funnel_lock
, FALSE
);
558 thread_exception_return();
564 thread_compose_cthread_desc(unsigned int addr
, pcb_t pcb
)
566 struct real_descriptor desc
;
568 mp_disable_preemption();
572 desc
.base_low
= addr
& 0xffff;
573 desc
.base_med
= (addr
>> 16) & 0xff;
574 desc
.base_high
= (addr
>> 24) & 0xff;
575 desc
.access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
;
576 desc
.granularity
= SZ_32
|SZ_G
;
577 pcb
->cthread_desc
= desc
;
578 *ldt_desc_p(USER_CTHREAD
) = desc
;
580 mp_enable_preemption();
582 return(KERN_SUCCESS
);
586 thread_set_cthread_self(uint32_t self
)
588 current_thread()->machine
.pcb
->cthread_self
= self
;
590 return (KERN_SUCCESS
);
594 thread_get_cthread_self(void)
596 return ((kern_return_t
)current_thread()->machine
.pcb
->cthread_self
);
600 thread_fast_set_cthread_self(uint32_t self
)
603 pcb
= (pcb_t
)current_thread()->machine
.pcb
;
604 thread_compose_cthread_desc(self
, pcb
);
605 pcb
->cthread_self
= self
; /* preserve old func too */
606 return (USER_CTHREAD
);
610 * thread_set_user_ldt routine is the interface for the user level
611 * settable ldt entry feature. allowing a user to create arbitrary
612 * ldt entries seems to be too large of a security hole, so instead
613 * this mechanism is in place to allow user level processes to have
614 * an ldt entry that can be used in conjunction with the FS register.
616 * Swapping occurs inside the pcb.c file along with initialization
617 * when a thread is created. The basic functioning theory is that the
618 * pcb->uldt_selector variable will contain either 0 meaning the
619 * process has not set up any entry, or the selector to be used in
620 * the FS register. pcb->uldt_desc contains the actual descriptor the
621 * user has set up stored in machine usable ldt format.
623 * Currently one entry is shared by all threads (USER_SETTABLE), but
624 * this could be changed in the future by changing how this routine
625 * allocates the selector. There seems to be no real reason at this
626 * time to have this added feature, but in the future it might be
629 * address is the linear address of the start of the data area size
630 * is the size in bytes of the area flags should always be set to 0
631 * for now. in the future it could be used to set R/W permisions or
632 * other functions. Currently the segment is created as a data segment
633 * up to 1 megabyte in size with full read/write permisions only.
635 * this call returns the segment selector or -1 if any error occurs
638 thread_set_user_ldt(uint32_t address
, uint32_t size
, uint32_t flags
)
641 struct fake_descriptor temp
;
645 return -1; // flags not supported
647 return -1; // size too big, 1 meg is the limit
649 mp_disable_preemption();
650 mycpu
= cpu_number();
652 // create a "fake" descriptor so we can use fix_desc()
653 // to build a real one...
654 // 32 bit default operation size
655 // standard read/write perms for a data segment
656 pcb
= (pcb_t
)current_thread()->machine
.pcb
;
657 temp
.offset
= address
;
658 temp
.lim_or_seg
= size
;
659 temp
.size_or_wdct
= SZ_32
;
660 temp
.access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
;
662 // turn this into a real descriptor
665 // set up our data in the pcb
666 pcb
->uldt_desc
= *(struct real_descriptor
*)&temp
;
667 pcb
->uldt_selector
= USER_SETTABLE
; // set the selector value
669 // now set it up in the current table...
670 *ldt_desc_p(USER_SETTABLE
) = *(struct real_descriptor
*)&temp
;
672 mp_enable_preemption();
674 return USER_SETTABLE
;
677 mach25_syscall(struct i386_saved_state
*regs
)
679 printf("*** Atttempt to execute a Mach 2.5 system call at EIP=%x EAX=%x(%d)\n",
680 regs
->eip
, regs
->eax
, -regs
->eax
);
683 #endif /* MACH_BSD */
686 /* This routine is called from assembly before each and every mach trap.
689 extern unsigned int mach_call_start(unsigned int, unsigned int *);
693 mach_call_start(unsigned int call_number
, unsigned int *args
)
696 unsigned int kdarg
[3];
698 current_thread()->task
->syscalls_mach
++; /* MP-safety ignored */
700 /* Always prepare to trace mach system calls */
706 argc
= mach_trap_table
[call_number
>>4].mach_trap_arg_count
;
711 for (i
=0; i
< argc
; i
++)
712 kdarg
[i
] = (int)*(args
+ i
);
714 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
, (call_number
>>4)) | DBG_FUNC_START
,
715 kdarg
[0], kdarg
[1], kdarg
[2], 0, 0);
717 return call_number
; /* pass this back thru */
720 /* This routine is called from assembly after each mach system call
723 extern unsigned int mach_call_end(unsigned int, unsigned int);
727 mach_call_end(unsigned int call_number
, unsigned int retval
)
729 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,(call_number
>>4)) | DBG_FUNC_END
,
731 return retval
; /* pass this back thru */
734 typedef kern_return_t (*mach_call_t
)(void *);
736 extern __attribute__((regparm(1))) kern_return_t
737 mach_call_munger(unsigned int call_number
,
749 struct mach_call_args
{
761 __attribute__((regparm(1))) kern_return_t
762 mach_call_munger(unsigned int call_number
,
775 mach_call_t mach_call
;
776 kern_return_t retval
;
777 struct mach_call_args args
= { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
779 current_thread()->task
->syscalls_mach
++; /* MP-safety ignored */
782 argc
= mach_trap_table
[call_number
].mach_trap_arg_count
;
784 case 9: args
.arg9
= arg9
;
785 case 8: args
.arg8
= arg8
;
786 case 7: args
.arg7
= arg7
;
787 case 6: args
.arg6
= arg6
;
788 case 5: args
.arg5
= arg5
;
789 case 4: args
.arg4
= arg4
;
790 case 3: args
.arg3
= arg3
;
791 case 2: args
.arg2
= arg2
;
792 case 1: args
.arg1
= arg1
;
795 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
, (call_number
)) | DBG_FUNC_START
,
796 args
.arg1
, args
.arg2
, args
.arg3
, 0, 0);
798 mach_call
= (mach_call_t
)mach_trap_table
[call_number
].mach_trap_function
;
799 retval
= mach_call(&args
);
801 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC
,(call_number
)) | DBG_FUNC_END
,
808 * thread_setuserstack:
810 * Sets the user stack pointer into the machine
811 * dependent thread state info.
816 mach_vm_address_t user_stack
)
818 struct i386_saved_state
*ss
= get_user_regs(thread
);
820 ss
->uesp
= CAST_DOWN(unsigned int,user_stack
);
824 * thread_adjuserstack:
826 * Returns the adjusted user stack pointer from the machine
827 * dependent thread state info. Used for small (<2G) deltas.
834 struct i386_saved_state
*ss
= get_user_regs(thread
);
837 return CAST_USER_ADDR_T(ss
->uesp
);
841 * thread_setentrypoint:
843 * Sets the user PC into the machine
844 * dependent thread state info.
847 thread_setentrypoint(
849 mach_vm_address_t entry
)
851 struct i386_saved_state
*ss
= get_user_regs(thread
);
853 ss
->eip
= CAST_DOWN(unsigned int,entry
);