2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
58 #include <mach_debug.h>
59 #include <mach_ldebug.h>
61 #include <sys/kdebug.h>
63 #include <mach/kern_return.h>
64 #include <mach/thread_status.h>
65 #include <mach/vm_param.h>
67 #include <kern/counters.h>
68 #include <kern/kalloc.h>
69 #include <kern/mach_param.h>
70 #include <kern/processor.h>
71 #include <kern/cpu_data.h>
72 #include <kern/cpu_number.h>
73 #include <kern/task.h>
74 #include <kern/thread.h>
75 #include <kern/sched_prim.h>
76 #include <kern/misc_protos.h>
77 #include <kern/assert.h>
79 #include <kern/machine.h>
80 #include <ipc/ipc_port.h>
81 #include <vm/vm_kern.h>
82 #include <vm/vm_map.h>
84 #include <vm/vm_protos.h>
86 #include <i386/commpage/commpage.h>
87 #include <i386/cpu_data.h>
88 #include <i386/cpu_number.h>
89 #include <i386/eflags.h>
90 #include <i386/proc_reg.h>
92 #include <i386/user_ldt.h>
94 #include <i386/mp_desc.h>
95 #include <i386/misc_protos.h>
96 #include <i386/thread.h>
100 #include <i386/seg.h>
101 #include <i386/machine_routines.h>
103 #define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_) \
104 extern char assert_is_16byte_multiple_sizeof_ ## _type_ \
105 [(sizeof(_type_) % 16) == 0 ? 1 : -1]
107 /* Compile-time checks for vital save area sizing: */
108 ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t
);
109 ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_sframe64_t
);
110 ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_compat32_t
);
111 ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t
);
113 #define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT)
115 extern zone_t iss_zone
; /* zone for saved_state area */
116 extern zone_t ids_zone
; /* zone for debug_state area */
118 extern void *get_bsduthreadarg(thread_t
);
120 act_machine_switch_pcb(__unused thread_t old
, thread_t
new)
122 pcb_t pcb
= THREAD_TO_PCB(new);
123 cpu_data_t
*cdp
= current_cpu_datap();
124 struct real_descriptor
*ldtp
;
125 mach_vm_offset_t pcb_stack_top
;
127 assert(new->kernel_stack
!= 0);
128 assert(ml_get_interrupts_enabled() == FALSE
);
129 #ifdef DIRECTION_FLAG_DEBUG
130 if (x86_get_flags() & EFL_DF
) {
131 panic("Direction flag detected: 0x%lx", x86_get_flags());
135 #if defined(__x86_64__)
137 * Clear segment state
138 * unconditionally for DS/ES/FS but more carefully for GS whose
139 * cached state we track.
144 if (get_gs() != NULL_SEG
) {
145 swapgs(); /* switch to user's GS context */
147 swapgs(); /* and back to kernel */
149 /* record the active machine state lost */
150 cdp
->cpu_uber
.cu_user_gs_base
= 0;
153 if (is_saved_state64(pcb
->iss
)) {
155 * The test above is performed against the thread save state
156 * flavor and not task's 64-bit feature flag because of the
157 * thread/task 64-bit state divergence that can arise in
158 * task_set_64bit() x86: the task state is changed before
159 * the individual thread(s).
161 x86_saved_state64_tagged_t
*iss64
;
164 assert(is_saved_state64(pcb
->iss
));
166 iss64
= (x86_saved_state64_tagged_t
*) pcb
->iss
;
169 * Set pointer to PCB's interrupt stack frame in cpu data.
170 * Used by syscall and double-fault trap handlers.
172 isf
= (vm_offset_t
) &iss64
->state
.isf
;
173 cdp
->cpu_uber
.cu_isf
= isf
;
174 pcb_stack_top
= (vm_offset_t
) (iss64
+ 1);
175 /* require 16-byte alignment */
176 assert((pcb_stack_top
& 0xF) == 0);
178 /* Interrupt stack is pcb */
179 current_ktss64()->rsp0
= pcb_stack_top
;
182 * Top of temporary sysenter stack points to pcb stack.
183 * Although this is not normally used by 64-bit users,
184 * it needs to be set in case a sysenter is attempted.
186 *current_sstk64() = pcb_stack_top
;
188 cdp
->cpu_task_map
= new->map
->pmap
->pm_task_map
;
191 * Enable the 64-bit user code segment, USER64_CS.
192 * Disable the 32-bit user code segment, USER_CS.
194 ldt_desc_p(USER64_CS
)->access
|= ACC_PL_U
;
195 ldt_desc_p(USER_CS
)->access
&= ~ACC_PL_U
;
198 * Switch user's GS base if necessary
199 * by setting the Kernel's GS base MSR
200 * - this will become the user's on the swapgs when
201 * returning to user-space. Avoid this for
202 * kernel threads (no user TLS support required)
203 * and verify the memory shadow of the segment base
204 * in the event it was altered in user space.
206 if ((pcb
->cthread_self
!= 0) || (new->task
!= kernel_task
)) {
207 if ((cdp
->cpu_uber
.cu_user_gs_base
!= pcb
->cthread_self
) || (pcb
->cthread_self
!= rdmsr64(MSR_IA32_KERNEL_GS_BASE
))) {
208 cdp
->cpu_uber
.cu_user_gs_base
= pcb
->cthread_self
;
209 wrmsr64(MSR_IA32_KERNEL_GS_BASE
, pcb
->cthread_self
);
213 x86_saved_state_compat32_t
*iss32compat
;
216 assert(is_saved_state32(pcb
->iss
));
217 iss32compat
= (x86_saved_state_compat32_t
*) pcb
->iss
;
219 pcb_stack_top
= (uintptr_t) (iss32compat
+ 1);
220 /* require 16-byte alignment */
221 assert((pcb_stack_top
& 0xF) == 0);
224 * Set pointer to PCB's interrupt stack frame in cpu data.
225 * Used by debug trap handler.
227 isf
= (vm_offset_t
) &iss32compat
->isf64
;
228 cdp
->cpu_uber
.cu_isf
= isf
;
230 /* Top of temporary sysenter stack points to pcb stack */
231 *current_sstk64() = pcb_stack_top
;
233 /* Interrupt stack is pcb */
234 current_ktss64()->rsp0
= pcb_stack_top
;
236 cdp
->cpu_task_map
= TASK_MAP_32BIT
;
237 /* Precalculate pointers to syscall argument store, for use
238 * in the trampolines.
240 cdp
->cpu_uber_arg_store
= (vm_offset_t
)get_bsduthreadarg(new);
241 cdp
->cpu_uber_arg_store_valid
= (vm_offset_t
)&pcb
->arg_store_valid
;
242 pcb
->arg_store_valid
= 0;
248 ldt_desc_p(USER64_CS
)->access
&= ~ACC_PL_U
;
249 ldt_desc_p(USER_CS
)->access
|= ACC_PL_U
;
252 * Set the thread`s cthread (a.k.a pthread)
253 * For 32-bit user this involves setting the USER_CTHREAD
254 * descriptor in the LDT to point to the cthread data.
255 * The involves copying in the pre-initialized descriptor.
257 ldtp
= (struct real_descriptor
*)current_ldt();
258 ldtp
[sel_idx(USER_CTHREAD
)] = pcb
->cthread_desc
;
259 if (pcb
->uldt_selector
!= 0)
260 ldtp
[sel_idx(pcb
->uldt_selector
)] = pcb
->uldt_desc
;
261 cdp
->cpu_uber
.cu_user_gs_base
= pcb
->cthread_self
;
264 * Set the thread`s LDT or LDT entry.
266 if (new->task
== TASK_NULL
|| new->task
->i386_ldt
== 0) {
270 ml_cpu_set_ldt(KERNEL_LDT
);
273 * Task has its own LDT.
279 #else /* !__x86_64__ */
281 vm_offset_t hi_pcb_stack_top
;
284 if (!cpu_mode_is64bit()) {
285 x86_saved_state32_tagged_t
*hi_iss32
;
287 * Save a pointer to the top of the "kernel" stack -
288 * actually the place in the PCB where a trap into
289 * kernel mode will push the registers.
291 hi_iss
= (vm_offset_t
)((unsigned long)
292 pmap_cpu_high_map_vaddr(cpu_number(), HIGH_CPU_ISS0
) |
293 ((unsigned long)pcb
->iss
& PAGE_MASK
));
295 cdp
->cpu_hi_iss
= (void *)hi_iss
;
297 pmap_high_map(pcb
->iss_pte0
, HIGH_CPU_ISS0
);
298 pmap_high_map(pcb
->iss_pte1
, HIGH_CPU_ISS1
);
300 hi_iss32
= (x86_saved_state32_tagged_t
*) hi_iss
;
301 assert(hi_iss32
->tag
== x86_SAVED_STATE32
);
303 hi_pcb_stack_top
= (int) (hi_iss32
+ 1);
306 * For fast syscall, top of interrupt stack points to pcb stack
308 *(vm_offset_t
*) current_sstk() = hi_pcb_stack_top
;
310 current_ktss()->esp0
= hi_pcb_stack_top
;
312 } else if (is_saved_state64(pcb
->iss
)) {
314 * The test above is performed against the thread save state
315 * flavor and not task's 64-bit feature flag because of the
316 * thread/task 64-bit state divergence that can arise in
317 * task_set_64bit() x86: the task state is changed before
318 * the individual thread(s).
320 x86_saved_state64_tagged_t
*iss64
;
323 assert(is_saved_state64(pcb
->iss
));
325 iss64
= (x86_saved_state64_tagged_t
*) pcb
->iss
;
328 * Set pointer to PCB's interrupt stack frame in cpu data.
329 * Used by syscall and double-fault trap handlers.
331 isf
= (vm_offset_t
) &iss64
->state
.isf
;
332 cdp
->cpu_uber
.cu_isf
= UBER64(isf
);
333 pcb_stack_top
= (vm_offset_t
) (iss64
+ 1);
334 /* require 16-byte alignment */
335 assert((pcb_stack_top
& 0xF) == 0);
336 /* Interrupt stack is pcb */
337 current_ktss64()->rsp0
= UBER64(pcb_stack_top
);
340 * Top of temporary sysenter stack points to pcb stack.
341 * Although this is not normally used by 64-bit users,
342 * it needs to be set in case a sysenter is attempted.
344 *current_sstk64() = UBER64(pcb_stack_top
);
346 cdp
->cpu_task_map
= new->map
->pmap
->pm_task_map
;
349 * Enable the 64-bit user code segment, USER64_CS.
350 * Disable the 32-bit user code segment, USER_CS.
352 ldt_desc_p(USER64_CS
)->access
|= ACC_PL_U
;
353 ldt_desc_p(USER_CS
)->access
&= ~ACC_PL_U
;
356 x86_saved_state_compat32_t
*iss32compat
;
359 assert(is_saved_state32(pcb
->iss
));
360 iss32compat
= (x86_saved_state_compat32_t
*) pcb
->iss
;
362 pcb_stack_top
= (int) (iss32compat
+ 1);
363 /* require 16-byte alignment */
364 assert((pcb_stack_top
& 0xF) == 0);
367 * Set pointer to PCB's interrupt stack frame in cpu data.
368 * Used by debug trap handler.
370 isf
= (vm_offset_t
) &iss32compat
->isf64
;
371 cdp
->cpu_uber
.cu_isf
= UBER64(isf
);
373 /* Top of temporary sysenter stack points to pcb stack */
374 *current_sstk64() = UBER64(pcb_stack_top
);
376 /* Interrupt stack is pcb */
377 current_ktss64()->rsp0
= UBER64(pcb_stack_top
);
379 cdp
->cpu_task_map
= TASK_MAP_32BIT
;
380 /* Precalculate pointers to syscall argument store, for use
381 * in the trampolines.
383 cdp
->cpu_uber_arg_store
= UBER64((vm_offset_t
)get_bsduthreadarg(new));
384 cdp
->cpu_uber_arg_store_valid
= UBER64((vm_offset_t
)&pcb
->arg_store_valid
);
385 pcb
->arg_store_valid
= 0;
391 ldt_desc_p(USER64_CS
)->access
&= ~ACC_PL_U
;
392 ldt_desc_p(USER_CS
)->access
|= ACC_PL_U
;
396 * Set the thread`s cthread (a.k.a pthread)
397 * For 32-bit user this involves setting the USER_CTHREAD
398 * descriptor in the LDT to point to the cthread data.
399 * The involves copying in the pre-initialized descriptor.
401 ldtp
= (struct real_descriptor
*)current_ldt();
402 ldtp
[sel_idx(USER_CTHREAD
)] = pcb
->cthread_desc
;
403 if (pcb
->uldt_selector
!= 0)
404 ldtp
[sel_idx(pcb
->uldt_selector
)] = pcb
->uldt_desc
;
407 * For 64-bit, we additionally set the 64-bit User GS base
408 * address. On return to 64-bit user, the GS.Base MSR will be written.
410 cdp
->cpu_uber
.cu_user_gs_base
= pcb
->cthread_self
;
413 * Set the thread`s LDT or LDT entry.
415 if (new->task
== TASK_NULL
|| new->task
->i386_ldt
== 0) {
419 ml_cpu_set_ldt(KERNEL_LDT
);
422 * Task has its own LDT.
429 * Bump the scheduler generation count in the commpage.
430 * This can be read by user code to detect its preemption.
432 commpage_sched_gen_inc();
435 thread_set_wq_state32(thread_t thread
, thread_state_t tstate
)
437 x86_thread_state32_t
*state
;
438 x86_saved_state32_t
*saved_state
;
439 thread_t curth
= current_thread();
442 pal_register_cache_state(thread
, DIRTY
);
444 saved_state
= USER_REGS32(thread
);
446 state
= (x86_thread_state32_t
*)tstate
;
448 if (curth
!= thread
) {
453 saved_state
->ebp
= 0;
454 saved_state
->eip
= state
->eip
;
455 saved_state
->eax
= state
->eax
;
456 saved_state
->ebx
= state
->ebx
;
457 saved_state
->ecx
= state
->ecx
;
458 saved_state
->edx
= state
->edx
;
459 saved_state
->edi
= state
->edi
;
460 saved_state
->esi
= state
->esi
;
461 saved_state
->uesp
= state
->esp
;
462 saved_state
->efl
= EFL_USER_SET
;
464 saved_state
->cs
= USER_CS
;
465 saved_state
->ss
= USER_DS
;
466 saved_state
->ds
= USER_DS
;
467 saved_state
->es
= USER_DS
;
469 if (curth
!= thread
) {
470 thread_unlock(thread
);
477 thread_set_wq_state64(thread_t thread
, thread_state_t tstate
)
479 x86_thread_state64_t
*state
;
480 x86_saved_state64_t
*saved_state
;
481 thread_t curth
= current_thread();
484 pal_register_cache_state(thread
, DIRTY
);
486 saved_state
= USER_REGS64(thread
);
487 state
= (x86_thread_state64_t
*)tstate
;
489 if (curth
!= thread
) {
494 saved_state
->rbp
= 0;
495 saved_state
->rdi
= state
->rdi
;
496 saved_state
->rsi
= state
->rsi
;
497 saved_state
->rdx
= state
->rdx
;
498 saved_state
->rcx
= state
->rcx
;
499 saved_state
->r8
= state
->r8
;
500 saved_state
->r9
= state
->r9
;
502 saved_state
->isf
.rip
= state
->rip
;
503 saved_state
->isf
.rsp
= state
->rsp
;
504 saved_state
->isf
.cs
= USER64_CS
;
505 saved_state
->isf
.rflags
= EFL_USER_SET
;
507 if (curth
!= thread
) {
508 thread_unlock(thread
);
514 * Initialize the machine-dependent state for a new thread.
517 machine_thread_create(
521 pcb_t pcb
= THREAD_TO_PCB(thread
);
522 x86_saved_state_t
*iss
;
524 #if NCOPY_WINDOWS > 0
525 inval_copy_windows(thread
);
527 thread
->machine
.physwindow_pte
= 0;
528 thread
->machine
.physwindow_busy
= 0;
532 * Allocate save frame only if required.
534 if (pcb
->sf
== NULL
) {
535 assert((get_preemption_level() == 0));
536 pcb
->sf
= zalloc(iss_zone
);
541 if (task_has_64BitAddr(task
)) {
542 x86_sframe64_t
*sf64
;
544 sf64
= (x86_sframe64_t
*) pcb
->sf
;
546 bzero((char *)sf64
, sizeof(x86_sframe64_t
));
548 iss
= (x86_saved_state_t
*) &sf64
->ssf
;
549 iss
->flavor
= x86_SAVED_STATE64
;
551 * Guarantee that the bootstrapped thread will be in user
554 iss
->ss_64
.isf
.rflags
= EFL_USER_SET
;
555 iss
->ss_64
.isf
.cs
= USER64_CS
;
556 iss
->ss_64
.isf
.ss
= USER_DS
;
557 iss
->ss_64
.fs
= USER_DS
;
558 iss
->ss_64
.gs
= USER_DS
;
560 if (cpu_mode_is64bit()) {
561 x86_sframe_compat32_t
*sfc32
;
563 sfc32
= (x86_sframe_compat32_t
*)pcb
->sf
;
565 bzero((char *)sfc32
, sizeof(x86_sframe_compat32_t
));
567 iss
= (x86_saved_state_t
*) &sfc32
->ssf
.iss32
;
568 iss
->flavor
= x86_SAVED_STATE32
;
569 #if defined(__i386__)
572 sfc32
->pad_for_16byte_alignment
[0] = 0x64326432;
573 sfc32
->pad_for_16byte_alignment
[1] = 0x64326432;
577 x86_sframe32_t
*sf32
;
578 struct real_descriptor
*ldtp
;
581 sf32
= (x86_sframe32_t
*) pcb
->sf
;
583 bzero((char *)sf32
, sizeof(x86_sframe32_t
));
585 iss
= (x86_saved_state_t
*) &sf32
->ssf
;
586 iss
->flavor
= x86_SAVED_STATE32
;
588 pcb
->iss_pte0
= pte_kernel_rw(kvtophys((vm_offset_t
)iss
));
589 if (0 == (paddr
= pa_to_pte(kvtophys((vm_offset_t
)iss
+ PAGE_SIZE
))))
590 pcb
->iss_pte1
= INTEL_PTE_INVALID
;
592 pcb
->iss_pte1
= pte_kernel_rw(paddr
);
594 ldtp
= (struct real_descriptor
*)
595 pmap_index_to_virt(HIGH_FIXED_LDT_BEGIN
);
596 pcb
->cthread_desc
= ldtp
[sel_idx(USER_DS
)];
597 pcb
->uldt_desc
= ldtp
[sel_idx(USER_DS
)];
598 #endif /* __i386__ */
601 * Guarantee that the bootstrapped thread will be in user
604 iss
->ss_32
.cs
= USER_CS
;
605 iss
->ss_32
.ss
= USER_DS
;
606 iss
->ss_32
.ds
= USER_DS
;
607 iss
->ss_32
.es
= USER_DS
;
608 iss
->ss_32
.fs
= USER_DS
;
609 iss
->ss_32
.gs
= USER_DS
;
610 iss
->ss_32
.efl
= EFL_USER_SET
;
615 simple_lock_init(&pcb
->lock
, 0);
617 pcb
->arg_store_valid
= 0;
618 pcb
->cthread_self
= 0;
619 pcb
->uldt_selector
= 0;
621 /* Ensure that the "cthread" descriptor describes a valid
624 if ((pcb
->cthread_desc
.access
& ACC_P
) == 0) {
625 struct real_descriptor
*ldtp
;
626 ldtp
= (struct real_descriptor
*)current_ldt();
627 pcb
->cthread_desc
= ldtp
[sel_idx(USER_DS
)];
630 return(KERN_SUCCESS
);
634 * Machine-dependent cleanup prior to destroying a thread
637 machine_thread_destroy(
640 register pcb_t pcb
= THREAD_TO_PCB(thread
);
645 zfree(iss_zone
, pcb
->sf
);
649 zfree(ids_zone
, pcb
->ids
);