2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
57 #include <mach_debug.h>
58 #include <mach_ldebug.h>
60 #include <sys/kdebug.h>
62 #include <mach/kern_return.h>
63 #include <mach/thread_status.h>
64 #include <mach/vm_param.h>
66 #include <kern/counters.h>
67 #include <kern/kalloc.h>
68 #include <kern/mach_param.h>
69 #include <kern/processor.h>
70 #include <kern/cpu_data.h>
71 #include <kern/cpu_number.h>
72 #include <kern/task.h>
73 #include <kern/thread.h>
74 #include <kern/sched_prim.h>
75 #include <kern/misc_protos.h>
76 #include <kern/assert.h>
78 #include <kern/machine.h>
79 #include <ipc/ipc_port.h>
80 #include <vm/vm_kern.h>
81 #include <vm/vm_map.h>
83 #include <vm/vm_protos.h>
85 #include <i386/commpage/commpage.h>
86 #include <i386/cpu_data.h>
87 #include <i386/cpu_number.h>
88 #include <i386/eflags.h>
89 #include <i386/proc_reg.h>
91 #include <i386/user_ldt.h>
93 #include <i386/mp_desc.h>
94 #include <i386/misc_protos.h>
95 #include <i386/thread.h>
97 #include <i386/machine_routines.h>
100 #include <kern/hv_support.h>
103 #define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_) \
104 extern char assert_is_16byte_multiple_sizeof_ ## _type_ \
105 [(sizeof(_type_) % 16) == 0 ? 1 : -1]
107 /* Compile-time checks for vital save area sizing: */
108 ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t
);
109 ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t
);
111 #define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT)
113 extern zone_t iss_zone
; /* zone for saved_state area */
114 extern zone_t ids_zone
; /* zone for debug_state area */
117 act_machine_switch_pcb(__unused thread_t old
, thread_t
new)
119 pcb_t pcb
= THREAD_TO_PCB(new);
120 cpu_data_t
*cdp
= current_cpu_datap();
121 struct real_descriptor
*ldtp
;
122 mach_vm_offset_t pcb_stack_top
;
124 assert(new->kernel_stack
!= 0);
125 assert(ml_get_interrupts_enabled() == FALSE
);
126 #ifdef DIRECTION_FLAG_DEBUG
127 if (x86_get_flags() & EFL_DF
) {
128 panic("Direction flag detected: 0x%lx", x86_get_flags());
133 * Clear segment state
134 * unconditionally for DS/ES/FS but more carefully for GS whose
135 * cached state we track.
140 if (get_gs() != NULL_SEG
) {
141 swapgs(); /* switch to user's GS context */
143 swapgs(); /* and back to kernel */
145 /* record the active machine state lost */
146 cdp
->cpu_uber
.cu_user_gs_base
= 0;
152 * Set pointer to PCB's interrupt stack frame in cpu data.
153 * Used by syscall and double-fault trap handlers.
155 isf
= (vm_offset_t
) &pcb
->iss
->ss_64
.isf
;
156 cdp
->cpu_uber
.cu_isf
= isf
;
157 pcb_stack_top
= (vm_offset_t
) (pcb
->iss
+ 1);
158 /* require 16-byte alignment */
159 assert((pcb_stack_top
& 0xF) == 0);
161 /* Interrupt stack is pcb */
162 current_ktss64()->rsp0
= pcb_stack_top
;
165 * Top of temporary sysenter stack points to pcb stack.
166 * Although this is not normally used by 64-bit users,
167 * it needs to be set in case a sysenter is attempted.
169 *current_sstk64() = pcb_stack_top
;
171 if (is_saved_state64(pcb
->iss
)) {
173 cdp
->cpu_task_map
= new->map
->pmap
->pm_task_map
;
176 * Enable the 64-bit user code segment, USER64_CS.
177 * Disable the 32-bit user code segment, USER_CS.
179 gdt_desc_p(USER64_CS
)->access
|= ACC_PL_U
;
180 gdt_desc_p(USER_CS
)->access
&= ~ACC_PL_U
;
183 * Switch user's GS base if necessary
184 * by setting the Kernel's GS base MSR
185 * - this will become the user's on the swapgs when
186 * returning to user-space. Avoid this for
187 * kernel threads (no user TLS support required)
188 * and verify the memory shadow of the segment base
189 * in the event it was altered in user space.
191 if ((pcb
->cthread_self
!= 0) || (new->task
!= kernel_task
)) {
192 if ((cdp
->cpu_uber
.cu_user_gs_base
!= pcb
->cthread_self
) ||
193 (pcb
->cthread_self
!= rdmsr64(MSR_IA32_KERNEL_GS_BASE
))) {
194 cdp
->cpu_uber
.cu_user_gs_base
= pcb
->cthread_self
;
195 wrmsr64(MSR_IA32_KERNEL_GS_BASE
, pcb
->cthread_self
);
201 cdp
->cpu_task_map
= TASK_MAP_32BIT
;
208 /* It's possible that writing to the GDT areas
209 * is expensive, if the processor intercepts those
210 * writes to invalidate its internal segment caches
211 * TODO: perhaps only do this if switching bitness
213 gdt_desc_p(USER64_CS
)->access
&= ~ACC_PL_U
;
214 gdt_desc_p(USER_CS
)->access
|= ACC_PL_U
;
217 * Set the thread`s cthread (a.k.a pthread)
218 * For 32-bit user this involves setting the USER_CTHREAD
219 * descriptor in the LDT to point to the cthread data.
220 * The involves copying in the pre-initialized descriptor.
222 ldtp
= (struct real_descriptor
*)current_ldt();
223 ldtp
[sel_idx(USER_CTHREAD
)] = pcb
->cthread_desc
;
224 if (pcb
->uldt_selector
!= 0)
225 ldtp
[sel_idx(pcb
->uldt_selector
)] = pcb
->uldt_desc
;
226 cdp
->cpu_uber
.cu_user_gs_base
= pcb
->cthread_self
;
229 * Set the thread`s LDT or LDT entry.
231 if (new->task
== TASK_NULL
|| new->task
->i386_ldt
== 0) {
235 ml_cpu_set_ldt(KERNEL_LDT
);
238 * Task has its own LDT.
245 * Bump the scheduler generation count in the commpage.
246 * This can be read by user code to detect its preemption.
248 commpage_sched_gen_inc();
252 thread_set_wq_state32(thread_t thread
, thread_state_t tstate
)
254 x86_thread_state32_t
*state
;
255 x86_saved_state32_t
*saved_state
;
256 thread_t curth
= current_thread();
259 pal_register_cache_state(thread
, DIRTY
);
261 saved_state
= USER_REGS32(thread
);
263 state
= (x86_thread_state32_t
*)tstate
;
265 if (curth
!= thread
) {
270 saved_state
->ebp
= 0;
271 saved_state
->eip
= state
->eip
;
272 saved_state
->eax
= state
->eax
;
273 saved_state
->ebx
= state
->ebx
;
274 saved_state
->ecx
= state
->ecx
;
275 saved_state
->edx
= state
->edx
;
276 saved_state
->edi
= state
->edi
;
277 saved_state
->esi
= state
->esi
;
278 saved_state
->uesp
= state
->esp
;
279 saved_state
->efl
= EFL_USER_SET
;
281 saved_state
->cs
= USER_CS
;
282 saved_state
->ss
= USER_DS
;
283 saved_state
->ds
= USER_DS
;
284 saved_state
->es
= USER_DS
;
286 if (curth
!= thread
) {
287 thread_unlock(thread
);
296 thread_set_wq_state64(thread_t thread
, thread_state_t tstate
)
298 x86_thread_state64_t
*state
;
299 x86_saved_state64_t
*saved_state
;
300 thread_t curth
= current_thread();
303 saved_state
= USER_REGS64(thread
);
304 state
= (x86_thread_state64_t
*)tstate
;
306 /* Disallow setting non-canonical PC or stack */
307 if (!IS_USERADDR64_CANONICAL(state
->rsp
) ||
308 !IS_USERADDR64_CANONICAL(state
->rip
)) {
312 pal_register_cache_state(thread
, DIRTY
);
314 if (curth
!= thread
) {
319 saved_state
->rbp
= 0;
320 saved_state
->rdi
= state
->rdi
;
321 saved_state
->rsi
= state
->rsi
;
322 saved_state
->rdx
= state
->rdx
;
323 saved_state
->rcx
= state
->rcx
;
324 saved_state
->r8
= state
->r8
;
325 saved_state
->r9
= state
->r9
;
327 saved_state
->isf
.rip
= state
->rip
;
328 saved_state
->isf
.rsp
= state
->rsp
;
329 saved_state
->isf
.cs
= USER64_CS
;
330 saved_state
->isf
.rflags
= EFL_USER_SET
;
332 if (curth
!= thread
) {
333 thread_unlock(thread
);
341 * Initialize the machine-dependent state for a new thread.
344 machine_thread_create(
348 pcb_t pcb
= THREAD_TO_PCB(thread
);
350 #if NCOPY_WINDOWS > 0
351 inval_copy_windows(thread
);
353 thread
->machine
.physwindow_pte
= 0;
354 thread
->machine
.physwindow_busy
= 0;
358 * Allocate save frame only if required.
360 if (pcb
->iss
== NULL
) {
361 assert((get_preemption_level() == 0));
362 pcb
->iss
= (x86_saved_state_t
*) zalloc(iss_zone
);
363 if (pcb
->iss
== NULL
)
368 * Ensure that the synthesized 32-bit state including
369 * the 64-bit interrupt state can be acommodated in the
370 * 64-bit state we allocate for both 32-bit and 64-bit threads.
372 assert(sizeof(pcb
->iss
->ss_32
) + sizeof(pcb
->iss
->ss_64
.isf
) <=
373 sizeof(pcb
->iss
->ss_64
));
375 bzero((char *)pcb
->iss
, sizeof(x86_saved_state_t
));
377 if (task_has_64BitAddr(task
)) {
378 pcb
->iss
->flavor
= x86_SAVED_STATE64
;
380 pcb
->iss
->ss_64
.isf
.cs
= USER64_CS
;
381 pcb
->iss
->ss_64
.isf
.ss
= USER_DS
;
382 pcb
->iss
->ss_64
.fs
= USER_DS
;
383 pcb
->iss
->ss_64
.gs
= USER_DS
;
384 pcb
->iss
->ss_64
.isf
.rflags
= EFL_USER_SET
;
386 pcb
->iss
->flavor
= x86_SAVED_STATE32
;
388 pcb
->iss
->ss_32
.cs
= USER_CS
;
389 pcb
->iss
->ss_32
.ss
= USER_DS
;
390 pcb
->iss
->ss_32
.ds
= USER_DS
;
391 pcb
->iss
->ss_32
.es
= USER_DS
;
392 pcb
->iss
->ss_32
.fs
= USER_DS
;
393 pcb
->iss
->ss_32
.gs
= USER_DS
;
394 pcb
->iss
->ss_32
.efl
= EFL_USER_SET
;
397 simple_lock_init(&pcb
->lock
, 0);
399 pcb
->cthread_self
= 0;
400 pcb
->uldt_selector
= 0;
401 pcb
->thread_gpu_ns
= 0;
402 /* Ensure that the "cthread" descriptor describes a valid
405 if ((pcb
->cthread_desc
.access
& ACC_P
) == 0) {
406 struct real_descriptor
*ldtp
;
407 ldtp
= (struct real_descriptor
*)current_ldt();
408 pcb
->cthread_desc
= ldtp
[sel_idx(USER_DS
)];
411 return(KERN_SUCCESS
);
415 * Machine-dependent cleanup prior to destroying a thread
418 machine_thread_destroy(
421 pcb_t pcb
= THREAD_TO_PCB(thread
);
424 if (thread
->hv_thread_target
) {
425 hv_callbacks
.thread_destroy(thread
->hv_thread_target
);
426 thread
->hv_thread_target
= NULL
;
431 fpu_free(thread
, pcb
->ifps
);
433 zfree(iss_zone
, pcb
->iss
);
437 zfree(ids_zone
, pcb
->ids
);
443 machine_thread_set_tsd_base(
445 mach_vm_offset_t tsd_base
)
448 if (thread
->task
== kernel_task
) {
449 return KERN_INVALID_ARGUMENT
;
452 if (thread_is_64bit(thread
)) {
453 /* check for canonical address, set 0 otherwise */
454 if (!IS_USERADDR64_CANONICAL(tsd_base
))
457 if (tsd_base
> UINT32_MAX
)
461 pcb_t pcb
= THREAD_TO_PCB(thread
);
462 pcb
->cthread_self
= tsd_base
;
464 if (!thread_is_64bit(thread
)) {
465 /* Set up descriptor for later use */
466 struct real_descriptor desc
= {
469 .base_low
= tsd_base
& 0xffff,
470 .base_med
= (tsd_base
>> 16) & 0xff,
471 .base_high
= (tsd_base
>> 24) & 0xff,
472 .access
= ACC_P
|ACC_PL_U
|ACC_DATA_W
,
473 .granularity
= SZ_32
|SZ_G
,
476 pcb
->cthread_desc
= desc
;
477 saved_state32(pcb
->iss
)->gs
= USER_CTHREAD
;
480 /* For current thread, make the TSD base active immediately */
481 if (thread
== current_thread()) {
483 if (thread_is_64bit(thread
)) {
486 mp_disable_preemption();
487 cdp
= current_cpu_datap();
488 if ((cdp
->cpu_uber
.cu_user_gs_base
!= pcb
->cthread_self
) ||
489 (pcb
->cthread_self
!= rdmsr64(MSR_IA32_KERNEL_GS_BASE
)))
490 wrmsr64(MSR_IA32_KERNEL_GS_BASE
, tsd_base
);
491 cdp
->cpu_uber
.cu_user_gs_base
= tsd_base
;
492 mp_enable_preemption();
495 /* assign descriptor */
496 mp_disable_preemption();
497 *ldt_desc_p(USER_CTHREAD
) = pcb
->cthread_desc
;
498 mp_enable_preemption();