2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
57 #include <mach_debug.h>
58 #include <mach_ldebug.h>
60 #include <sys/kdebug.h>
62 #include <mach/kern_return.h>
63 #include <mach/thread_status.h>
64 #include <mach/vm_param.h>
66 #include <kern/counters.h>
67 #include <kern/mach_param.h>
68 #include <kern/processor.h>
69 #include <kern/cpu_data.h>
70 #include <kern/cpu_number.h>
71 #include <kern/task.h>
72 #include <kern/thread.h>
73 #include <kern/sched_prim.h>
74 #include <kern/misc_protos.h>
75 #include <kern/assert.h>
77 #include <kern/machine.h>
78 #include <ipc/ipc_port.h>
79 #include <vm/vm_kern.h>
80 #include <vm/vm_map.h>
82 #include <vm/vm_protos.h>
84 #include <i386/commpage/commpage.h>
85 #include <i386/cpu_data.h>
86 #include <i386/cpu_number.h>
87 #include <i386/cpuid.h>
88 #include <i386/eflags.h>
89 #include <i386/proc_reg.h>
91 #include <i386/user_ldt.h>
93 #include <i386/mp_desc.h>
94 #include <i386/misc_protos.h>
95 #include <i386/thread.h>
97 #include <i386/machine_routines.h>
100 #include <kern/hv_support.h>
103 #define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_) \
104 extern char assert_is_16byte_multiple_sizeof_ ## _type_ \
105 [(sizeof(_type_) % 16) == 0 ? 1 : -1]
107 /* Compile-time checks for vital save area sizing: */
108 ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t
);
109 ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t
);
111 #define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT)
113 extern zone_t iss_zone
; /* zone for saved_state area */
114 extern zone_t ids_zone
; /* zone for debug_state area */
115 extern int tecs_mode_supported
;
116 extern boolean_t cpuid_tsx_supported
;
118 bool lbr_need_tsx_workaround
= false;
120 int force_thread_policy_tecs
;
130 struct lbr_group msr_lbrs
[X86_MAX_LBRS
];
133 const struct cpu_lbrs
*cpu_lbr_setp
= NULL
;
136 const struct cpu_lbrs nhm_cpu_lbrs
= {
139 { 0x680 /* FROM_0 */, 0x6c0 /* TO_0 */, 0 /* INFO_0 */ },
140 { 0x681 /* FROM_1 */, 0x6c1 /* TO_1 */, 0 /* INFO_1 */ },
141 { 0x682 /* FROM_2 */, 0x6c2 /* TO_2 */, 0 /* INFO_2 */ },
142 { 0x683 /* FROM_3 */, 0x6c3 /* TO_3 */, 0 /* INFO_3 */ },
143 { 0x684 /* FROM_4 */, 0x6c4 /* TO_4 */, 0 /* INFO_4 */ },
144 { 0x685 /* FROM_5 */, 0x6c5 /* TO_5 */, 0 /* INFO_5 */ },
145 { 0x686 /* FROM_6 */, 0x6c6 /* TO_6 */, 0 /* INFO_6 */ },
146 { 0x687 /* FROM_7 */, 0x6c7 /* TO_7 */, 0 /* INFO_7 */ },
147 { 0x688 /* FROM_8 */, 0x6c8 /* TO_8 */, 0 /* INFO_8 */ },
148 { 0x689 /* FROM_9 */, 0x6c9 /* TO_9 */, 0 /* INFO_9 */ },
149 { 0x68A /* FROM_10 */, 0x6ca /* TO_10 */, 0 /* INFO_10 */ },
150 { 0x68B /* FROM_11 */, 0x6cb /* TO_11 */, 0 /* INFO_11 */ },
151 { 0x68C /* FROM_12 */, 0x6cc /* TO_12 */, 0 /* INFO_12 */ },
152 { 0x68D /* FROM_13 */, 0x6cd /* TO_13 */, 0 /* INFO_13 */ },
153 { 0x68E /* FROM_14 */, 0x6ce /* TO_14 */, 0 /* INFO_14 */ },
154 { 0x68F /* FROM_15 */, 0x6cf /* TO_15 */, 0 /* INFO_15 */ }
160 { 0x680 /* FROM_0 */, 0x6c0 /* TO_0 */, 0xdc0 /* INFO_0 */ },
161 { 0x681 /* FROM_1 */, 0x6c1 /* TO_1 */, 0xdc1 /* INFO_1 */ },
162 { 0x682 /* FROM_2 */, 0x6c2 /* TO_2 */, 0xdc2 /* INFO_2 */ },
163 { 0x683 /* FROM_3 */, 0x6c3 /* TO_3 */, 0xdc3 /* INFO_3 */ },
164 { 0x684 /* FROM_4 */, 0x6c4 /* TO_4 */, 0xdc4 /* INFO_4 */ },
165 { 0x685 /* FROM_5 */, 0x6c5 /* TO_5 */, 0xdc5 /* INFO_5 */ },
166 { 0x686 /* FROM_6 */, 0x6c6 /* TO_6 */, 0xdc6 /* INFO_6 */ },
167 { 0x687 /* FROM_7 */, 0x6c7 /* TO_7 */, 0xdc7 /* INFO_7 */ },
168 { 0x688 /* FROM_8 */, 0x6c8 /* TO_8 */, 0xdc8 /* INFO_8 */ },
169 { 0x689 /* FROM_9 */, 0x6c9 /* TO_9 */, 0xdc9 /* INFO_9 */ },
170 { 0x68A /* FROM_10 */, 0x6ca /* TO_10 */, 0xdca /* INFO_10 */ },
171 { 0x68B /* FROM_11 */, 0x6cb /* TO_11 */, 0xdcb /* INFO_11 */ },
172 { 0x68C /* FROM_12 */, 0x6cc /* TO_12 */, 0xdcc /* INFO_12 */ },
173 { 0x68D /* FROM_13 */, 0x6cd /* TO_13 */, 0xdcd /* INFO_13 */ },
174 { 0x68E /* FROM_14 */, 0x6ce /* TO_14 */, 0xdce /* INFO_14 */ },
175 { 0x68F /* FROM_15 */, 0x6cf /* TO_15 */, 0xdcf /* INFO_15 */ },
176 { 0x690 /* FROM_16 */, 0x6d0 /* TO_16 */, 0xdd0 /* INFO_16 */ },
177 { 0x691 /* FROM_17 */, 0x6d1 /* TO_17 */, 0xdd1 /* INFO_17 */ },
178 { 0x692 /* FROM_18 */, 0x6d2 /* TO_18 */, 0xdd2 /* INFO_18 */ },
179 { 0x693 /* FROM_19 */, 0x6d3 /* TO_19 */, 0xdd3 /* INFO_19 */ },
180 { 0x694 /* FROM_20 */, 0x6d4 /* TO_20 */, 0xdd4 /* INFO_20 */ },
181 { 0x695 /* FROM_21 */, 0x6d5 /* TO_21 */, 0xdd5 /* INFO_21 */ },
182 { 0x696 /* FROM_22 */, 0x6d6 /* TO_22 */, 0xdd6 /* INFO_22 */ },
183 { 0x697 /* FROM_23 */, 0x6d7 /* TO_23 */, 0xdd7 /* INFO_23 */ },
184 { 0x698 /* FROM_24 */, 0x6d8 /* TO_24 */, 0xdd8 /* INFO_24 */ },
185 { 0x699 /* FROM_25 */, 0x6d9 /* TO_25 */, 0xdd9 /* INFO_25 */ },
186 { 0x69a /* FROM_26 */, 0x6da /* TO_26 */, 0xdda /* INFO_26 */ },
187 { 0x69b /* FROM_27 */, 0x6db /* TO_27 */, 0xddb /* INFO_27 */ },
188 { 0x69c /* FROM_28 */, 0x6dc /* TO_28 */, 0xddc /* INFO_28 */ },
189 { 0x69d /* FROM_29 */, 0x6dd /* TO_29 */, 0xddd /* INFO_29 */ },
190 { 0x69e /* FROM_30 */, 0x6de /* TO_30 */, 0xdde /* INFO_30 */ },
191 { 0x69f /* FROM_31 */, 0x6df /* TO_31 */, 0xddf /* INFO_31 */ }
196 i386_lbr_disable(void)
199 wrmsr64(MSR_IA32_DEBUGCTLMSR
, rdmsr64(MSR_IA32_DEBUGCTLMSR
) & ~DEBUGCTL_LBR_ENA
);
203 * Disable ASAN for i386_lbr_enable and i386_lbr_init, otherwise we get a KASAN panic
204 * because the shadow map is not been initialized when these functions are called in
207 void __attribute__((no_sanitize("address")))
208 i386_lbr_enable(void)
210 if (last_branch_support_enabled
) {
212 wrmsr64(MSR_IA32_DEBUGCTLMSR
, rdmsr64(MSR_IA32_DEBUGCTLMSR
) | DEBUGCTL_LBR_ENA
);
216 void __attribute__((no_sanitize("address")))
217 i386_lbr_init(i386_cpu_info_t
*info_p
, bool is_master
)
219 if (!last_branch_support_enabled
) {
225 /* All NHM+ CPUs support PERF_CAPABILITIES, so no need to check cpuid for its presence */
226 cpu_lbr_type
= PERFCAP_LBR_TYPE(rdmsr64(MSR_IA32_PERF_CAPABILITIES
));
228 switch (info_p
->cpuid_cpufamily
) {
229 case CPUFAMILY_INTEL_NEHALEM
:
230 case CPUFAMILY_INTEL_WESTMERE
:
231 /* NHM family shares an LBR_SELECT MSR for both logical CPUs per core */
232 cpu_lbr_setp
= &nhm_cpu_lbrs
;
235 case CPUFAMILY_INTEL_SANDYBRIDGE
:
236 case CPUFAMILY_INTEL_IVYBRIDGE
:
237 /* SNB+ has dedicated LBR_SELECT MSRs for each logical CPU per core */
238 cpu_lbr_setp
= &nhm_cpu_lbrs
;
241 case CPUFAMILY_INTEL_HASWELL
:
242 case CPUFAMILY_INTEL_BROADWELL
:
243 lbr_need_tsx_workaround
= cpuid_tsx_supported
? false : true;
244 cpu_lbr_setp
= &nhm_cpu_lbrs
;
247 case CPUFAMILY_INTEL_SKYLAKE
:
248 case CPUFAMILY_INTEL_KABYLAKE
:
249 case CPUFAMILY_INTEL_ICELAKE
:
250 cpu_lbr_setp
= &skl_cpu_lbrs
;
254 panic("Unknown CPU family");
258 /* Configure LBR_SELECT for CPL > 0 records only */
259 wrmsr64(MSR_IA32_LBR_SELECT
, LBR_SELECT_CPL_EQ_0
);
262 wrmsr64(MSR_IA32_DEBUGCTLMSR
, rdmsr64(MSR_IA32_DEBUGCTLMSR
) | DEBUGCTL_LBR_ENA
);
266 i386_lbr_native_state_to_mach_thread_state(pcb_t pcb
, last_branch_state_t
*machlbrp
)
270 uint64_t from_rip
, to_rip
;
271 #define LBR_SENTINEL_KERNEL_MODE (0x66726d6b65726e6cULL /* "frmkernl" */ )
273 machlbrp
->lbr_count
= cpu_lbr_setp
->lbr_count
;
274 lbr_tos
= pcb
->lbrs
.lbr_tos
& (X86_MAX_LBRS
- 1);
275 last_entry
= (lbr_tos
== (cpu_lbr_setp
->lbr_count
- 1)) ? 0 : (lbr_tos
+ 1);
277 switch (cpu_lbr_type
) {
278 case PERFCAP_LBR_TYPE_MISPRED
: /* NHM */
280 machlbrp
->lbr_supported_tsx
= 0;
281 machlbrp
->lbr_supported_cycle_count
= 0;
282 for (j
= 0, i
= lbr_tos
;; (i
= (i
== 0) ? (cpu_lbr_setp
->lbr_count
- 1) : (i
- 1)), j
++) {
283 to_rip
= pcb
->lbrs
.lbrs
[i
].to_rip
;
284 machlbrp
->lbrs
[j
].to_ip
= (to_rip
> VM_MAX_USER_PAGE_ADDRESS
) ? LBR_SENTINEL_KERNEL_MODE
: to_rip
;
285 from_rip
= LBR_TYPE_MISPRED_FROMRIP(pcb
->lbrs
.lbrs
[i
].from_rip
);
286 machlbrp
->lbrs
[j
].from_ip
= (from_rip
> VM_MAX_USER_PAGE_ADDRESS
) ? LBR_SENTINEL_KERNEL_MODE
: from_rip
;
287 machlbrp
->lbrs
[j
].mispredict
= LBR_TYPE_MISPRED_MISPREDICT(pcb
->lbrs
.lbrs
[i
].from_rip
);
288 machlbrp
->lbrs
[j
].tsx_abort
= machlbrp
->lbrs
[j
].in_tsx
= 0; /* Not Supported */
289 if (i
== last_entry
) {
295 case PERFCAP_LBR_TYPE_TSXINFO
: /* HSW/BDW */
297 machlbrp
->lbr_supported_tsx
= cpuid_tsx_supported
? 1 : 0;
298 machlbrp
->lbr_supported_cycle_count
= 0;
299 for (j
= 0, i
= lbr_tos
;; (i
= (i
== 0) ? (cpu_lbr_setp
->lbr_count
- 1) : (i
- 1)), j
++) {
300 to_rip
= pcb
->lbrs
.lbrs
[i
].to_rip
;
301 machlbrp
->lbrs
[j
].to_ip
= (to_rip
> VM_MAX_USER_PAGE_ADDRESS
) ? LBR_SENTINEL_KERNEL_MODE
: to_rip
;
303 from_rip
= LBR_TYPE_TSXINFO_FROMRIP(pcb
->lbrs
.lbrs
[i
].from_rip
);
304 machlbrp
->lbrs
[j
].from_ip
= (from_rip
> VM_MAX_USER_PAGE_ADDRESS
) ? LBR_SENTINEL_KERNEL_MODE
: from_rip
;
305 machlbrp
->lbrs
[j
].mispredict
= LBR_TYPE_TSXINFO_MISPREDICT(pcb
->lbrs
.lbrs
[i
].from_rip
);
306 if (cpuid_tsx_supported
) {
307 machlbrp
->lbrs
[j
].tsx_abort
= LBR_TYPE_TSXINFO_TSX_ABORT(pcb
->lbrs
.lbrs
[i
].from_rip
);
308 machlbrp
->lbrs
[j
].in_tsx
= LBR_TYPE_TSXINFO_IN_TSX(pcb
->lbrs
.lbrs
[i
].from_rip
);
310 machlbrp
->lbrs
[j
].tsx_abort
= 0;
311 machlbrp
->lbrs
[j
].in_tsx
= 0;
313 if (i
== last_entry
) {
319 case PERFCAP_LBR_TYPE_EIP_WITH_LBRINFO
: /* SKL+ */
321 machlbrp
->lbr_supported_tsx
= cpuid_tsx_supported
? 1 : 0;
322 machlbrp
->lbr_supported_cycle_count
= 1;
323 for (j
= 0, i
= lbr_tos
;; (i
= (i
== 0) ? (cpu_lbr_setp
->lbr_count
- 1) : (i
- 1)), j
++) {
324 from_rip
= pcb
->lbrs
.lbrs
[i
].from_rip
;
325 machlbrp
->lbrs
[j
].from_ip
= (from_rip
> VM_MAX_USER_PAGE_ADDRESS
) ? LBR_SENTINEL_KERNEL_MODE
: from_rip
;
326 to_rip
= pcb
->lbrs
.lbrs
[i
].to_rip
;
327 machlbrp
->lbrs
[j
].to_ip
= (to_rip
> VM_MAX_USER_PAGE_ADDRESS
) ? LBR_SENTINEL_KERNEL_MODE
: to_rip
;
328 machlbrp
->lbrs
[j
].mispredict
= LBR_TYPE_EIP_WITH_LBRINFO_MISPREDICT(pcb
->lbrs
.lbrs
[i
].info
);
329 machlbrp
->lbrs
[j
].tsx_abort
= LBR_TYPE_EIP_WITH_LBRINFO_TSX_ABORT(pcb
->lbrs
.lbrs
[i
].info
);
330 machlbrp
->lbrs
[j
].in_tsx
= LBR_TYPE_EIP_WITH_LBRINFO_IN_TSX(pcb
->lbrs
.lbrs
[i
].info
);
331 machlbrp
->lbrs
[j
].cycle_count
= LBR_TYPE_EIP_WITH_LBRINFO_CYC_COUNT(pcb
->lbrs
.lbrs
[i
].info
);
332 if (i
== last_entry
) {
339 #if DEBUG || DEVELOPMENT
340 panic("Unknown LBR format: %d!", cpu_lbr_type
);
351 i386_lbr_synch(thread_t thr
)
353 pcb_t old_pcb
= THREAD_TO_PCB(thr
);
356 /* First, save current LBRs to the old thread's PCB */
357 if (cpu_lbr_setp
->msr_lbrs
[0].msr_info
!= 0) {
358 for (i
= 0; i
< cpu_lbr_setp
->lbr_count
; i
++) {
359 old_pcb
->lbrs
.lbrs
[i
].from_rip
= rdmsr64(cpu_lbr_setp
->msr_lbrs
[i
].msr_from
);
360 old_pcb
->lbrs
.lbrs
[i
].to_rip
= rdmsr64(cpu_lbr_setp
->msr_lbrs
[i
].msr_to
);
361 old_pcb
->lbrs
.lbrs
[i
].info
= rdmsr64(cpu_lbr_setp
->msr_lbrs
[i
].msr_info
);
364 for (i
= 0; i
< cpu_lbr_setp
->lbr_count
; i
++) {
365 old_pcb
->lbrs
.lbrs
[i
].from_rip
= rdmsr64(cpu_lbr_setp
->msr_lbrs
[i
].msr_from
);
366 old_pcb
->lbrs
.lbrs
[i
].to_rip
= rdmsr64(cpu_lbr_setp
->msr_lbrs
[i
].msr_to
);
370 /* Finally, save the TOS */
371 old_pcb
->lbrs
.lbr_tos
= rdmsr64(MSR_IA32_LASTBRANCH_TOS
);
375 i386_switch_lbrs(thread_t old
, thread_t
new)
379 bool save_old
= (old
!= NULL
&& old
->task
!= kernel_task
);
380 bool restore_new
= (new->task
!= kernel_task
);
382 if (!save_old
&& !restore_new
) {
386 assert(cpu_lbr_setp
!= NULL
);
388 new_pcb
= THREAD_TO_PCB(new);
397 /* Now restore the new threads's LBRs */
398 if (cpu_lbr_setp
->msr_lbrs
[0].msr_info
!= 0) {
399 for (i
= 0; i
< cpu_lbr_setp
->lbr_count
; i
++) {
400 wrmsr64(cpu_lbr_setp
->msr_lbrs
[i
].msr_from
, new_pcb
->lbrs
.lbrs
[i
].from_rip
);
401 wrmsr64(cpu_lbr_setp
->msr_lbrs
[i
].msr_to
, new_pcb
->lbrs
.lbrs
[i
].to_rip
);
402 wrmsr64(cpu_lbr_setp
->msr_lbrs
[i
].msr_info
, new_pcb
->lbrs
.lbrs
[i
].info
);
405 if (lbr_need_tsx_workaround
) {
406 for (i
= 0; i
< cpu_lbr_setp
->lbr_count
; i
++) {
408 * If TSX has been disabled, the hardware expects those two bits to be sign
409 * extensions of bit 47 (even though it didn't return them that way via the rdmsr!)
411 #define BIT_47 (1ULL << 47)
412 wrmsr64(cpu_lbr_setp
->msr_lbrs
[i
].msr_from
,
413 new_pcb
->lbrs
.lbrs
[i
].from_rip
|
414 ((new_pcb
->lbrs
.lbrs
[i
].from_rip
& BIT_47
) ? 0x6000000000000000ULL
: 0));
415 wrmsr64(cpu_lbr_setp
->msr_lbrs
[i
].msr_to
,
416 new_pcb
->lbrs
.lbrs
[i
].to_rip
|
417 ((new_pcb
->lbrs
.lbrs
[i
].to_rip
& BIT_47
) ? 0x6000000000000000ULL
: 0));
420 for (i
= 0; i
< cpu_lbr_setp
->lbr_count
; i
++) {
421 wrmsr64(cpu_lbr_setp
->msr_lbrs
[i
].msr_from
, new_pcb
->lbrs
.lbrs
[i
].from_rip
);
422 wrmsr64(cpu_lbr_setp
->msr_lbrs
[i
].msr_to
, new_pcb
->lbrs
.lbrs
[i
].to_rip
);
427 /* Lastly, restore the new threads's TOS */
428 wrmsr64(MSR_IA32_LASTBRANCH_TOS
, new_pcb
->lbrs
.lbr_tos
);
435 act_machine_switch_pcb(thread_t old
, thread_t
new)
437 pcb_t pcb
= THREAD_TO_PCB(new);
438 cpu_data_t
*cdp
= current_cpu_datap();
439 struct real_descriptor
*ldtp
;
440 mach_vm_offset_t pcb_stack_top
;
442 assert(new->kernel_stack
!= 0);
443 assert(ml_get_interrupts_enabled() == FALSE
);
444 #ifdef DIRECTION_FLAG_DEBUG
445 if (x86_get_flags() & EFL_DF
) {
446 panic("Direction flag detected: 0x%lx", x86_get_flags());
451 * Clear segment state
452 * unconditionally for DS/ES/FS but more carefully for GS whose
453 * cached state we track.
459 if (get_gs() != NULL_SEG
) {
460 swapgs(); /* switch to user's GS context */
462 swapgs(); /* and back to kernel */
464 /* record the active machine state lost */
465 cdp
->cpu_uber
.cu_user_gs_base
= 0;
471 * Set pointer to PCB's interrupt stack frame in cpu data.
472 * Used by syscall and double-fault trap handlers.
474 isf
= (vm_offset_t
) &pcb
->iss
->ss_64
.isf
;
475 cdp
->cpu_uber
.cu_isf
= isf
;
476 pcb_stack_top
= (vm_offset_t
) (pcb
->iss
+ 1);
477 /* require 16-byte alignment */
478 assert((pcb_stack_top
& 0xF) == 0);
480 current_ktss64()->rsp0
= cdp
->cpu_desc_index
.cdi_sstku
;
482 * Top of temporary sysenter stack points to pcb stack.
483 * Although this is not normally used by 64-bit users,
484 * it needs to be set in case a sysenter is attempted.
486 *current_sstk64() = pcb_stack_top
;
488 cdp
->cd_estack
= cpu_shadowp(cdp
->cpu_number
)->cd_estack
= cdp
->cpu_desc_index
.cdi_sstku
;
490 if (is_saved_state64(pcb
->iss
)) {
491 cdp
->cpu_task_map
= new->map
->pmap
->pm_task_map
;
494 * Enable the 64-bit user code segment, USER64_CS.
495 * Disable the 32-bit user code segment, USER_CS.
497 gdt_desc_p(USER64_CS
)->access
|= ACC_PL_U
;
498 gdt_desc_p(USER_CS
)->access
&= ~ACC_PL_U
;
501 * Switch user's GS base if necessary
502 * by setting the Kernel's GS base MSR
503 * - this will become the user's on the swapgs when
504 * returning to user-space. Avoid this for
505 * kernel threads (no user TLS support required)
506 * and verify the memory shadow of the segment base
507 * in the event it was altered in user space.
509 if ((pcb
->cthread_self
!= 0) || (new->task
!= kernel_task
)) {
510 if ((cdp
->cpu_uber
.cu_user_gs_base
!= pcb
->cthread_self
) ||
511 (pcb
->cthread_self
!= rdmsr64(MSR_IA32_KERNEL_GS_BASE
))) {
512 cdp
->cpu_uber
.cu_user_gs_base
= pcb
->cthread_self
;
513 wrmsr64(MSR_IA32_KERNEL_GS_BASE
, pcb
->cthread_self
);
517 cdp
->cpu_task_map
= TASK_MAP_32BIT
;
524 /* It's possible that writing to the GDT areas
525 * is expensive, if the processor intercepts those
526 * writes to invalidate its internal segment caches
527 * TODO: perhaps only do this if switching bitness
529 gdt_desc_p(USER64_CS
)->access
&= ~ACC_PL_U
;
530 gdt_desc_p(USER_CS
)->access
|= ACC_PL_U
;
533 * Set the thread`s cthread (a.k.a pthread)
534 * For 32-bit user this involves setting the USER_CTHREAD
535 * descriptor in the LDT to point to the cthread data.
536 * The involves copying in the pre-initialized descriptor.
538 ldtp
= current_ldt();
539 ldtp
[sel_idx(USER_CTHREAD
)] = pcb
->cthread_desc
;
540 if (pcb
->uldt_selector
!= 0) {
541 ldtp
[sel_idx(pcb
->uldt_selector
)] = pcb
->uldt_desc
;
543 cdp
->cpu_uber
.cu_user_gs_base
= pcb
->cthread_self
;
546 cdp
->cpu_curthread_do_segchk
= new->machine
.mthr_do_segchk
;
548 if (last_branch_support_enabled
) {
549 i386_switch_lbrs(old
, new);
553 * Set the thread`s LDT or LDT entry.
555 if (__probable(new->task
== TASK_NULL
|| new->task
->i386_ldt
== 0)) {
559 ml_cpu_set_ldt(KERNEL_LDT
);
560 cdp
->cpu_curtask_has_ldt
= 0;
563 * Task has its own LDT.
566 cdp
->cpu_curtask_has_ldt
= 1;
571 thread_set_wq_state32(thread_t thread
, thread_state_t tstate
)
573 x86_thread_state32_t
*state
;
574 x86_saved_state32_t
*saved_state
;
575 thread_t curth
= current_thread();
578 pal_register_cache_state(thread
, DIRTY
);
580 saved_state
= USER_REGS32(thread
);
582 state
= (x86_thread_state32_t
*)tstate
;
584 if (curth
!= thread
) {
589 saved_state
->ebp
= 0;
590 saved_state
->eip
= state
->eip
;
591 saved_state
->eax
= state
->eax
;
592 saved_state
->ebx
= state
->ebx
;
593 saved_state
->ecx
= state
->ecx
;
594 saved_state
->edx
= state
->edx
;
595 saved_state
->edi
= state
->edi
;
596 saved_state
->esi
= state
->esi
;
597 saved_state
->uesp
= state
->esp
;
598 saved_state
->efl
= EFL_USER_SET
;
600 saved_state
->cs
= USER_CS
;
601 saved_state
->ss
= USER_DS
;
602 saved_state
->ds
= USER_DS
;
603 saved_state
->es
= USER_DS
;
605 if (curth
!= thread
) {
606 thread_unlock(thread
);
615 thread_set_wq_state64(thread_t thread
, thread_state_t tstate
)
617 x86_thread_state64_t
*state
;
618 x86_saved_state64_t
*saved_state
;
619 thread_t curth
= current_thread();
622 saved_state
= USER_REGS64(thread
);
623 state
= (x86_thread_state64_t
*)tstate
;
625 /* Disallow setting non-canonical PC or stack */
626 if (!IS_USERADDR64_CANONICAL(state
->rsp
) ||
627 !IS_USERADDR64_CANONICAL(state
->rip
)) {
631 pal_register_cache_state(thread
, DIRTY
);
633 if (curth
!= thread
) {
638 saved_state
->rbp
= 0;
639 saved_state
->rdi
= state
->rdi
;
640 saved_state
->rsi
= state
->rsi
;
641 saved_state
->rdx
= state
->rdx
;
642 saved_state
->rcx
= state
->rcx
;
643 saved_state
->r8
= state
->r8
;
644 saved_state
->r9
= state
->r9
;
646 saved_state
->isf
.rip
= state
->rip
;
647 saved_state
->isf
.rsp
= state
->rsp
;
648 saved_state
->isf
.cs
= USER64_CS
;
649 saved_state
->isf
.rflags
= EFL_USER_SET
;
651 if (curth
!= thread
) {
652 thread_unlock(thread
);
660 * Initialize the machine-dependent state for a new thread.
663 machine_thread_create(
667 pcb_t pcb
= THREAD_TO_PCB(thread
);
669 if ((task
->t_flags
& TF_TECS
) || __improbable(force_thread_policy_tecs
)) {
670 thread
->machine
.mthr_do_segchk
= 1;
672 thread
->machine
.mthr_do_segchk
= 0;
676 * Allocate save frame only if required.
678 if (pcb
->iss
== NULL
) {
679 assert((get_preemption_level() == 0));
680 pcb
->iss
= (x86_saved_state_t
*) zalloc(iss_zone
);
681 if (pcb
->iss
== NULL
) {
687 * Ensure that the synthesized 32-bit state including
688 * the 64-bit interrupt state can be acommodated in the
689 * 64-bit state we allocate for both 32-bit and 64-bit threads.
691 assert(sizeof(pcb
->iss
->ss_32
) + sizeof(pcb
->iss
->ss_64
.isf
) <=
692 sizeof(pcb
->iss
->ss_64
));
694 bzero((char *)pcb
->iss
, sizeof(x86_saved_state_t
));
696 bzero(&pcb
->lbrs
, sizeof(x86_lbrs_t
));
698 if (task_has_64Bit_addr(task
)) {
699 pcb
->iss
->flavor
= x86_SAVED_STATE64
;
701 pcb
->iss
->ss_64
.isf
.cs
= USER64_CS
;
702 pcb
->iss
->ss_64
.isf
.ss
= USER_DS
;
703 pcb
->iss
->ss_64
.fs
= USER_DS
;
704 pcb
->iss
->ss_64
.gs
= USER_DS
;
705 pcb
->iss
->ss_64
.isf
.rflags
= EFL_USER_SET
;
707 pcb
->iss
->flavor
= x86_SAVED_STATE32
;
709 pcb
->iss
->ss_32
.cs
= USER_CS
;
710 pcb
->iss
->ss_32
.ss
= USER_DS
;
711 pcb
->iss
->ss_32
.ds
= USER_DS
;
712 pcb
->iss
->ss_32
.es
= USER_DS
;
713 pcb
->iss
->ss_32
.fs
= USER_DS
;
714 pcb
->iss
->ss_32
.gs
= USER_DS
;
715 pcb
->iss
->ss_32
.efl
= EFL_USER_SET
;
718 simple_lock_init(&pcb
->lock
, 0);
720 pcb
->cthread_self
= 0;
721 pcb
->uldt_selector
= 0;
722 pcb
->thread_gpu_ns
= 0;
723 /* Ensure that the "cthread" descriptor describes a valid
726 if ((pcb
->cthread_desc
.access
& ACC_P
) == 0) {
727 pcb
->cthread_desc
= *gdt_desc_p(USER_DS
);
731 pcb
->insn_state_copyin_failure_errorcode
= 0;
732 if (pcb
->insn_state
!= 0) { /* Reinit for new thread */
733 bzero(pcb
->insn_state
, sizeof(x86_instruction_state_t
));
734 pcb
->insn_state
->insn_stream_valid_bytes
= -1;
741 * Machine-dependent cleanup prior to destroying a thread
744 machine_thread_destroy(
747 pcb_t pcb
= THREAD_TO_PCB(thread
);
750 if (thread
->hv_thread_target
) {
751 hv_callbacks
.thread_destroy(thread
->hv_thread_target
);
752 thread
->hv_thread_target
= NULL
;
756 if (pcb
->ifps
!= 0) {
757 fpu_free(thread
, pcb
->ifps
);
760 zfree(iss_zone
, pcb
->iss
);
764 zfree(ids_zone
, pcb
->ids
);
768 if (pcb
->insn_state
!= 0) {
769 kfree(pcb
->insn_state
, sizeof(x86_instruction_state_t
));
772 pcb
->insn_state_copyin_failure_errorcode
= 0;
776 machine_thread_set_tsd_base(
778 mach_vm_offset_t tsd_base
)
780 if (thread
->task
== kernel_task
) {
781 return KERN_INVALID_ARGUMENT
;
784 if (thread_is_64bit_addr(thread
)) {
785 /* check for canonical address, set 0 otherwise */
786 if (!IS_USERADDR64_CANONICAL(tsd_base
)) {
790 if (tsd_base
> UINT32_MAX
) {
795 pcb_t pcb
= THREAD_TO_PCB(thread
);
796 pcb
->cthread_self
= tsd_base
;
798 if (!thread_is_64bit_addr(thread
)) {
799 /* Set up descriptor for later use */
800 struct real_descriptor desc
= {
803 .base_low
= tsd_base
& 0xffff,
804 .base_med
= (tsd_base
>> 16) & 0xff,
805 .base_high
= (tsd_base
>> 24) & 0xff,
806 .access
= ACC_P
| ACC_PL_U
| ACC_DATA_W
,
807 .granularity
= SZ_32
| SZ_G
,
810 pcb
->cthread_desc
= desc
;
811 saved_state32(pcb
->iss
)->gs
= USER_CTHREAD
;
814 /* For current thread, make the TSD base active immediately */
815 if (thread
== current_thread()) {
816 if (thread_is_64bit_addr(thread
)) {
819 mp_disable_preemption();
820 cdp
= current_cpu_datap();
821 if ((cdp
->cpu_uber
.cu_user_gs_base
!= pcb
->cthread_self
) ||
822 (pcb
->cthread_self
!= rdmsr64(MSR_IA32_KERNEL_GS_BASE
))) {
823 wrmsr64(MSR_IA32_KERNEL_GS_BASE
, tsd_base
);
825 cdp
->cpu_uber
.cu_user_gs_base
= tsd_base
;
826 mp_enable_preemption();
828 /* assign descriptor */
829 mp_disable_preemption();
830 *ldt_desc_p(USER_CTHREAD
) = pcb
->cthread_desc
;
831 mp_enable_preemption();
839 machine_tecs(thread_t thr
)
841 if (tecs_mode_supported
) {
842 thr
->machine
.mthr_do_segchk
= 1;
847 machine_csv(cpuvn_e cve
)
851 return (cpuid_wa_required(CPU_INTEL_SEGCHK
) & CWA_ON
) != 0;