]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/i386/pcb_native.c
xnu-7195.50.7.100.1.tar.gz
[apple/xnu.git] / osfmk / i386 / pcb_native.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57#include <mach_debug.h>
58#include <mach_ldebug.h>
59
60#include <sys/kdebug.h>
61
62#include <mach/kern_return.h>
63#include <mach/thread_status.h>
64#include <mach/vm_param.h>
65
66#include <kern/counters.h>
67#include <kern/mach_param.h>
68#include <kern/processor.h>
69#include <kern/cpu_data.h>
70#include <kern/cpu_number.h>
71#include <kern/task.h>
72#include <kern/thread.h>
73#include <kern/sched_prim.h>
74#include <kern/misc_protos.h>
75#include <kern/assert.h>
76#include <kern/spl.h>
77#include <kern/machine.h>
78#include <ipc/ipc_port.h>
79#include <vm/vm_kern.h>
80#include <vm/vm_map.h>
81#include <vm/pmap.h>
82#include <vm/vm_protos.h>
83
84#include <i386/commpage/commpage.h>
85#include <i386/cpu_data.h>
86#include <i386/cpu_number.h>
87#include <i386/cpuid.h>
88#include <i386/eflags.h>
89#include <i386/proc_reg.h>
90#include <i386/tss.h>
91#include <i386/user_ldt.h>
92#include <i386/fpu.h>
93#include <i386/mp_desc.h>
94#include <i386/misc_protos.h>
95#include <i386/thread.h>
96#include <i386/seg.h>
97#include <i386/machine_routines.h>
98
99#if HYPERVISOR
100#include <kern/hv_support.h>
101#endif
102
103#define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_) \
104extern char assert_is_16byte_multiple_sizeof_ ## _type_ \
105 [(sizeof(_type_) % 16) == 0 ? 1 : -1]
106
107/* Compile-time checks for vital save area sizing: */
108ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t);
109ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t);
110
111#define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT)
112
113extern zone_t iss_zone; /* zone for saved_state area */
114extern zone_t ids_zone; /* zone for debug_state area */
115extern int tecs_mode_supported;
116extern boolean_t cpuid_tsx_supported;
117
118bool lbr_need_tsx_workaround = false;
119
120int force_thread_policy_tecs;
121
122struct lbr_group {
123 uint32_t msr_from;
124 uint32_t msr_to;
125 uint32_t msr_info;
126};
127
128struct cpu_lbrs {
129 uint32_t lbr_count;
130 struct lbr_group msr_lbrs[X86_MAX_LBRS];
131};
132
133const struct cpu_lbrs *cpu_lbr_setp = NULL;
134int cpu_lbr_type;
135
136const struct cpu_lbrs nhm_cpu_lbrs = {
137 16 /* LBR count */,
138 {
139 { 0x680 /* FROM_0 */, 0x6c0 /* TO_0 */, 0 /* INFO_0 */ },
140 { 0x681 /* FROM_1 */, 0x6c1 /* TO_1 */, 0 /* INFO_1 */ },
141 { 0x682 /* FROM_2 */, 0x6c2 /* TO_2 */, 0 /* INFO_2 */ },
142 { 0x683 /* FROM_3 */, 0x6c3 /* TO_3 */, 0 /* INFO_3 */ },
143 { 0x684 /* FROM_4 */, 0x6c4 /* TO_4 */, 0 /* INFO_4 */ },
144 { 0x685 /* FROM_5 */, 0x6c5 /* TO_5 */, 0 /* INFO_5 */ },
145 { 0x686 /* FROM_6 */, 0x6c6 /* TO_6 */, 0 /* INFO_6 */ },
146 { 0x687 /* FROM_7 */, 0x6c7 /* TO_7 */, 0 /* INFO_7 */ },
147 { 0x688 /* FROM_8 */, 0x6c8 /* TO_8 */, 0 /* INFO_8 */ },
148 { 0x689 /* FROM_9 */, 0x6c9 /* TO_9 */, 0 /* INFO_9 */ },
149 { 0x68A /* FROM_10 */, 0x6ca /* TO_10 */, 0 /* INFO_10 */ },
150 { 0x68B /* FROM_11 */, 0x6cb /* TO_11 */, 0 /* INFO_11 */ },
151 { 0x68C /* FROM_12 */, 0x6cc /* TO_12 */, 0 /* INFO_12 */ },
152 { 0x68D /* FROM_13 */, 0x6cd /* TO_13 */, 0 /* INFO_13 */ },
153 { 0x68E /* FROM_14 */, 0x6ce /* TO_14 */, 0 /* INFO_14 */ },
154 { 0x68F /* FROM_15 */, 0x6cf /* TO_15 */, 0 /* INFO_15 */ }
155 }
156},
157 skl_cpu_lbrs = {
158 32 /* LBR count */,
159 {
160 { 0x680 /* FROM_0 */, 0x6c0 /* TO_0 */, 0xdc0 /* INFO_0 */ },
161 { 0x681 /* FROM_1 */, 0x6c1 /* TO_1 */, 0xdc1 /* INFO_1 */ },
162 { 0x682 /* FROM_2 */, 0x6c2 /* TO_2 */, 0xdc2 /* INFO_2 */ },
163 { 0x683 /* FROM_3 */, 0x6c3 /* TO_3 */, 0xdc3 /* INFO_3 */ },
164 { 0x684 /* FROM_4 */, 0x6c4 /* TO_4 */, 0xdc4 /* INFO_4 */ },
165 { 0x685 /* FROM_5 */, 0x6c5 /* TO_5 */, 0xdc5 /* INFO_5 */ },
166 { 0x686 /* FROM_6 */, 0x6c6 /* TO_6 */, 0xdc6 /* INFO_6 */ },
167 { 0x687 /* FROM_7 */, 0x6c7 /* TO_7 */, 0xdc7 /* INFO_7 */ },
168 { 0x688 /* FROM_8 */, 0x6c8 /* TO_8 */, 0xdc8 /* INFO_8 */ },
169 { 0x689 /* FROM_9 */, 0x6c9 /* TO_9 */, 0xdc9 /* INFO_9 */ },
170 { 0x68A /* FROM_10 */, 0x6ca /* TO_10 */, 0xdca /* INFO_10 */ },
171 { 0x68B /* FROM_11 */, 0x6cb /* TO_11 */, 0xdcb /* INFO_11 */ },
172 { 0x68C /* FROM_12 */, 0x6cc /* TO_12 */, 0xdcc /* INFO_12 */ },
173 { 0x68D /* FROM_13 */, 0x6cd /* TO_13 */, 0xdcd /* INFO_13 */ },
174 { 0x68E /* FROM_14 */, 0x6ce /* TO_14 */, 0xdce /* INFO_14 */ },
175 { 0x68F /* FROM_15 */, 0x6cf /* TO_15 */, 0xdcf /* INFO_15 */ },
176 { 0x690 /* FROM_16 */, 0x6d0 /* TO_16 */, 0xdd0 /* INFO_16 */ },
177 { 0x691 /* FROM_17 */, 0x6d1 /* TO_17 */, 0xdd1 /* INFO_17 */ },
178 { 0x692 /* FROM_18 */, 0x6d2 /* TO_18 */, 0xdd2 /* INFO_18 */ },
179 { 0x693 /* FROM_19 */, 0x6d3 /* TO_19 */, 0xdd3 /* INFO_19 */ },
180 { 0x694 /* FROM_20 */, 0x6d4 /* TO_20 */, 0xdd4 /* INFO_20 */ },
181 { 0x695 /* FROM_21 */, 0x6d5 /* TO_21 */, 0xdd5 /* INFO_21 */ },
182 { 0x696 /* FROM_22 */, 0x6d6 /* TO_22 */, 0xdd6 /* INFO_22 */ },
183 { 0x697 /* FROM_23 */, 0x6d7 /* TO_23 */, 0xdd7 /* INFO_23 */ },
184 { 0x698 /* FROM_24 */, 0x6d8 /* TO_24 */, 0xdd8 /* INFO_24 */ },
185 { 0x699 /* FROM_25 */, 0x6d9 /* TO_25 */, 0xdd9 /* INFO_25 */ },
186 { 0x69a /* FROM_26 */, 0x6da /* TO_26 */, 0xdda /* INFO_26 */ },
187 { 0x69b /* FROM_27 */, 0x6db /* TO_27 */, 0xddb /* INFO_27 */ },
188 { 0x69c /* FROM_28 */, 0x6dc /* TO_28 */, 0xddc /* INFO_28 */ },
189 { 0x69d /* FROM_29 */, 0x6dd /* TO_29 */, 0xddd /* INFO_29 */ },
190 { 0x69e /* FROM_30 */, 0x6de /* TO_30 */, 0xdde /* INFO_30 */ },
191 { 0x69f /* FROM_31 */, 0x6df /* TO_31 */, 0xddf /* INFO_31 */ }
192 }
193};
194
195void
196i386_lbr_disable(void)
197{
198 /* Enable LBRs */
199 wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) & ~DEBUGCTL_LBR_ENA);
200}
201
202/*
203 * Disable ASAN for i386_lbr_enable and i386_lbr_init, otherwise we get a KASAN panic
204 * because the shadow map is not been initialized when these functions are called in
205 * early boot.
206 */
207void __attribute__((no_sanitize("address")))
208i386_lbr_enable(void)
209{
210 if (last_branch_support_enabled) {
211 /* Enable LBRs */
212 wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) | DEBUGCTL_LBR_ENA);
213 }
214}
215
216void __attribute__((no_sanitize("address")))
217i386_lbr_init(i386_cpu_info_t *info_p, bool is_master)
218{
219 if (!last_branch_support_enabled) {
220 i386_lbr_disable();
221 return;
222 }
223
224 if (is_master) {
225 /* All NHM+ CPUs support PERF_CAPABILITIES, so no need to check cpuid for its presence */
226 cpu_lbr_type = PERFCAP_LBR_TYPE(rdmsr64(MSR_IA32_PERF_CAPABILITIES));
227
228 switch (info_p->cpuid_cpufamily) {
229 case CPUFAMILY_INTEL_NEHALEM:
230 case CPUFAMILY_INTEL_WESTMERE:
231 /* NHM family shares an LBR_SELECT MSR for both logical CPUs per core */
232 cpu_lbr_setp = &nhm_cpu_lbrs;
233 break;
234
235 case CPUFAMILY_INTEL_SANDYBRIDGE:
236 case CPUFAMILY_INTEL_IVYBRIDGE:
237 /* SNB+ has dedicated LBR_SELECT MSRs for each logical CPU per core */
238 cpu_lbr_setp = &nhm_cpu_lbrs;
239 break;
240
241 case CPUFAMILY_INTEL_HASWELL:
242 case CPUFAMILY_INTEL_BROADWELL:
243 lbr_need_tsx_workaround = cpuid_tsx_supported ? false : true;
244 cpu_lbr_setp = &nhm_cpu_lbrs;
245 break;
246
247 case CPUFAMILY_INTEL_SKYLAKE:
248 case CPUFAMILY_INTEL_KABYLAKE:
249 case CPUFAMILY_INTEL_ICELAKE:
250 cpu_lbr_setp = &skl_cpu_lbrs;
251 break;
252
253 default:
254 panic("Unknown CPU family");
255 }
256 }
257
258 /* Configure LBR_SELECT for CPL > 0 records only */
259 wrmsr64(MSR_IA32_LBR_SELECT, LBR_SELECT_CPL_EQ_0);
260
261 /* Enable LBRs */
262 wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) | DEBUGCTL_LBR_ENA);
263}
264
265int
266i386_lbr_native_state_to_mach_thread_state(pcb_t pcb, last_branch_state_t *machlbrp)
267{
268 int last_entry;
269 int i, j, lbr_tos;
270 uint64_t from_rip, to_rip;
271#define LBR_SENTINEL_KERNEL_MODE (0x66726d6b65726e6cULL /* "frmkernl" */ )
272
273 machlbrp->lbr_count = cpu_lbr_setp->lbr_count;
274 lbr_tos = pcb->lbrs.lbr_tos & (X86_MAX_LBRS - 1);
275 last_entry = (lbr_tos == (cpu_lbr_setp->lbr_count - 1)) ? 0 : (lbr_tos + 1);
276
277 switch (cpu_lbr_type) {
278 case PERFCAP_LBR_TYPE_MISPRED: /* NHM */
279
280 machlbrp->lbr_supported_tsx = 0;
281 machlbrp->lbr_supported_cycle_count = 0;
282 for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) {
283 to_rip = pcb->lbrs.lbrs[i].to_rip;
284 machlbrp->lbrs[j].to_ip = (to_rip > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : to_rip;
285 from_rip = LBR_TYPE_MISPRED_FROMRIP(pcb->lbrs.lbrs[i].from_rip);
286 machlbrp->lbrs[j].from_ip = (from_rip > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : from_rip;
287 machlbrp->lbrs[j].mispredict = LBR_TYPE_MISPRED_MISPREDICT(pcb->lbrs.lbrs[i].from_rip);
288 machlbrp->lbrs[j].tsx_abort = machlbrp->lbrs[j].in_tsx = 0; /* Not Supported */
289 if (i == last_entry) {
290 break;
291 }
292 }
293 break;
294
295 case PERFCAP_LBR_TYPE_TSXINFO: /* HSW/BDW */
296
297 machlbrp->lbr_supported_tsx = cpuid_tsx_supported ? 1 : 0;
298 machlbrp->lbr_supported_cycle_count = 0;
299 for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) {
300 to_rip = pcb->lbrs.lbrs[i].to_rip;
301 machlbrp->lbrs[j].to_ip = (to_rip > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : to_rip;
302
303 from_rip = LBR_TYPE_TSXINFO_FROMRIP(pcb->lbrs.lbrs[i].from_rip);
304 machlbrp->lbrs[j].from_ip = (from_rip > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : from_rip;
305 machlbrp->lbrs[j].mispredict = LBR_TYPE_TSXINFO_MISPREDICT(pcb->lbrs.lbrs[i].from_rip);
306 if (cpuid_tsx_supported) {
307 machlbrp->lbrs[j].tsx_abort = LBR_TYPE_TSXINFO_TSX_ABORT(pcb->lbrs.lbrs[i].from_rip);
308 machlbrp->lbrs[j].in_tsx = LBR_TYPE_TSXINFO_IN_TSX(pcb->lbrs.lbrs[i].from_rip);
309 } else {
310 machlbrp->lbrs[j].tsx_abort = 0;
311 machlbrp->lbrs[j].in_tsx = 0;
312 }
313 if (i == last_entry) {
314 break;
315 }
316 }
317 break;
318
319 case PERFCAP_LBR_TYPE_EIP_WITH_LBRINFO: /* SKL+ */
320
321 machlbrp->lbr_supported_tsx = cpuid_tsx_supported ? 1 : 0;
322 machlbrp->lbr_supported_cycle_count = 1;
323 for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) {
324 from_rip = pcb->lbrs.lbrs[i].from_rip;
325 machlbrp->lbrs[j].from_ip = (from_rip > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : from_rip;
326 to_rip = pcb->lbrs.lbrs[i].to_rip;
327 machlbrp->lbrs[j].to_ip = (to_rip > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : to_rip;
328 machlbrp->lbrs[j].mispredict = LBR_TYPE_EIP_WITH_LBRINFO_MISPREDICT(pcb->lbrs.lbrs[i].info);
329 machlbrp->lbrs[j].tsx_abort = LBR_TYPE_EIP_WITH_LBRINFO_TSX_ABORT(pcb->lbrs.lbrs[i].info);
330 machlbrp->lbrs[j].in_tsx = LBR_TYPE_EIP_WITH_LBRINFO_IN_TSX(pcb->lbrs.lbrs[i].info);
331 machlbrp->lbrs[j].cycle_count = LBR_TYPE_EIP_WITH_LBRINFO_CYC_COUNT(pcb->lbrs.lbrs[i].info);
332 if (i == last_entry) {
333 break;
334 }
335 }
336 break;
337
338 default:
339#if DEBUG || DEVELOPMENT
340 panic("Unknown LBR format: %d!", cpu_lbr_type);
341 /*NOTREACHED*/
342#else
343 return -1;
344#endif
345 }
346
347 return 0;
348}
349
350void
351i386_lbr_synch(thread_t thr)
352{
353 pcb_t old_pcb = THREAD_TO_PCB(thr);
354 int i;
355
356 /* First, save current LBRs to the old thread's PCB */
357 if (cpu_lbr_setp->msr_lbrs[0].msr_info != 0) {
358 for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
359 old_pcb->lbrs.lbrs[i].from_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from);
360 old_pcb->lbrs.lbrs[i].to_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to);
361 old_pcb->lbrs.lbrs[i].info = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_info);
362 }
363 } else {
364 for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
365 old_pcb->lbrs.lbrs[i].from_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from);
366 old_pcb->lbrs.lbrs[i].to_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to);
367 }
368 }
369
370 /* Finally, save the TOS */
371 old_pcb->lbrs.lbr_tos = rdmsr64(MSR_IA32_LASTBRANCH_TOS);
372}
373
374void
375i386_switch_lbrs(thread_t old, thread_t new)
376{
377 pcb_t new_pcb;
378 int i;
379 bool save_old = (old != NULL && old->task != kernel_task);
380 bool restore_new = (new->task != kernel_task);
381
382 if (!save_old && !restore_new) {
383 return;
384 }
385
386 assert(cpu_lbr_setp != NULL);
387
388 new_pcb = THREAD_TO_PCB(new);
389
390 i386_lbr_disable();
391
392 if (save_old) {
393 i386_lbr_synch(old);
394 }
395
396 if (restore_new) {
397 /* Now restore the new threads's LBRs */
398 if (cpu_lbr_setp->msr_lbrs[0].msr_info != 0) {
399 for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
400 wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from, new_pcb->lbrs.lbrs[i].from_rip);
401 wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to, new_pcb->lbrs.lbrs[i].to_rip);
402 wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_info, new_pcb->lbrs.lbrs[i].info);
403 }
404 } else {
405 if (lbr_need_tsx_workaround) {
406 for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
407 /*
408 * If TSX has been disabled, the hardware expects those two bits to be sign
409 * extensions of bit 47 (even though it didn't return them that way via the rdmsr!)
410 */
411#define BIT_47 (1ULL << 47)
412 wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from,
413 new_pcb->lbrs.lbrs[i].from_rip |
414 ((new_pcb->lbrs.lbrs[i].from_rip & BIT_47) ? 0x6000000000000000ULL : 0));
415 wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to,
416 new_pcb->lbrs.lbrs[i].to_rip |
417 ((new_pcb->lbrs.lbrs[i].to_rip & BIT_47) ? 0x6000000000000000ULL : 0));
418 }
419 } else {
420 for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
421 wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from, new_pcb->lbrs.lbrs[i].from_rip);
422 wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to, new_pcb->lbrs.lbrs[i].to_rip);
423 }
424 }
425 }
426
427 /* Lastly, restore the new threads's TOS */
428 wrmsr64(MSR_IA32_LASTBRANCH_TOS, new_pcb->lbrs.lbr_tos);
429 }
430
431 i386_lbr_enable();
432}
433
434void
435act_machine_switch_pcb(thread_t old, thread_t new)
436{
437 pcb_t pcb = THREAD_TO_PCB(new);
438 cpu_data_t *cdp = current_cpu_datap();
439 struct real_descriptor *ldtp;
440 mach_vm_offset_t pcb_stack_top;
441
442 assert(new->kernel_stack != 0);
443 assert(ml_get_interrupts_enabled() == FALSE);
444#ifdef DIRECTION_FLAG_DEBUG
445 if (x86_get_flags() & EFL_DF) {
446 panic("Direction flag detected: 0x%lx", x86_get_flags());
447 }
448#endif
449
450 /*
451 * Clear segment state
452 * unconditionally for DS/ES/FS but more carefully for GS whose
453 * cached state we track.
454 */
455 set_ds(NULL_SEG);
456 set_es(NULL_SEG);
457 set_fs(NULL_SEG);
458
459 if (get_gs() != NULL_SEG) {
460 swapgs(); /* switch to user's GS context */
461 set_gs(NULL_SEG);
462 swapgs(); /* and back to kernel */
463
464 /* record the active machine state lost */
465 cdp->cpu_uber.cu_user_gs_base = 0;
466 }
467
468 vm_offset_t isf;
469
470 /*
471 * Set pointer to PCB's interrupt stack frame in cpu data.
472 * Used by syscall and double-fault trap handlers.
473 */
474 isf = (vm_offset_t) &pcb->iss->ss_64.isf;
475 cdp->cpu_uber.cu_isf = isf;
476 pcb_stack_top = (vm_offset_t) (pcb->iss + 1);
477 /* require 16-byte alignment */
478 assert((pcb_stack_top & 0xF) == 0);
479
480 current_ktss64()->rsp0 = cdp->cpu_desc_index.cdi_sstku;
481 /*
482 * Top of temporary sysenter stack points to pcb stack.
483 * Although this is not normally used by 64-bit users,
484 * it needs to be set in case a sysenter is attempted.
485 */
486 *current_sstk64() = pcb_stack_top;
487
488 cdp->cd_estack = cpu_shadowp(cdp->cpu_number)->cd_estack = cdp->cpu_desc_index.cdi_sstku;
489
490 if (is_saved_state64(pcb->iss)) {
491 cdp->cpu_task_map = new->map->pmap->pm_task_map;
492
493 /*
494 * Enable the 64-bit user code segment, USER64_CS.
495 * Disable the 32-bit user code segment, USER_CS.
496 */
497 gdt_desc_p(USER64_CS)->access |= ACC_PL_U;
498 gdt_desc_p(USER_CS)->access &= ~ACC_PL_U;
499
500 /*
501 * Switch user's GS base if necessary
502 * by setting the Kernel's GS base MSR
503 * - this will become the user's on the swapgs when
504 * returning to user-space. Avoid this for
505 * kernel threads (no user TLS support required)
506 * and verify the memory shadow of the segment base
507 * in the event it was altered in user space.
508 */
509 if ((pcb->cthread_self != 0) || (new->task != kernel_task)) {
510 if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) ||
511 (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) {
512 cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
513 wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self);
514 }
515 }
516 } else {
517 cdp->cpu_task_map = TASK_MAP_32BIT;
518
519 /*
520 * Disable USER64_CS
521 * Enable USER_CS
522 */
523
524 /* It's possible that writing to the GDT areas
525 * is expensive, if the processor intercepts those
526 * writes to invalidate its internal segment caches
527 * TODO: perhaps only do this if switching bitness
528 */
529 gdt_desc_p(USER64_CS)->access &= ~ACC_PL_U;
530 gdt_desc_p(USER_CS)->access |= ACC_PL_U;
531
532 /*
533 * Set the thread`s cthread (a.k.a pthread)
534 * For 32-bit user this involves setting the USER_CTHREAD
535 * descriptor in the LDT to point to the cthread data.
536 * The involves copying in the pre-initialized descriptor.
537 */
538 ldtp = current_ldt();
539 ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc;
540 if (pcb->uldt_selector != 0) {
541 ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc;
542 }
543 cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
544 }
545
546 cdp->cpu_curthread_do_segchk = new->machine.mthr_do_segchk;
547
548 if (last_branch_support_enabled) {
549 i386_switch_lbrs(old, new);
550 }
551
552 /*
553 * Set the thread`s LDT or LDT entry.
554 */
555 if (__probable(new->task == TASK_NULL || new->task->i386_ldt == 0)) {
556 /*
557 * Use system LDT.
558 */
559 ml_cpu_set_ldt(KERNEL_LDT);
560 cdp->cpu_curtask_has_ldt = 0;
561 } else {
562 /*
563 * Task has its own LDT.
564 */
565 user_ldt_set(new);
566 cdp->cpu_curtask_has_ldt = 1;
567 }
568}
569
570kern_return_t
571thread_set_wq_state32(thread_t thread, thread_state_t tstate)
572{
573 x86_thread_state32_t *state;
574 x86_saved_state32_t *saved_state;
575 thread_t curth = current_thread();
576 spl_t s = 0;
577
578 pal_register_cache_state(thread, DIRTY);
579
580 saved_state = USER_REGS32(thread);
581
582 state = (x86_thread_state32_t *)tstate;
583
584 if (curth != thread) {
585 s = splsched();
586 thread_lock(thread);
587 }
588
589 saved_state->ebp = 0;
590 saved_state->eip = state->eip;
591 saved_state->eax = state->eax;
592 saved_state->ebx = state->ebx;
593 saved_state->ecx = state->ecx;
594 saved_state->edx = state->edx;
595 saved_state->edi = state->edi;
596 saved_state->esi = state->esi;
597 saved_state->uesp = state->esp;
598 saved_state->efl = EFL_USER_SET;
599
600 saved_state->cs = USER_CS;
601 saved_state->ss = USER_DS;
602 saved_state->ds = USER_DS;
603 saved_state->es = USER_DS;
604
605 if (curth != thread) {
606 thread_unlock(thread);
607 splx(s);
608 }
609
610 return KERN_SUCCESS;
611}
612
613
614kern_return_t
615thread_set_wq_state64(thread_t thread, thread_state_t tstate)
616{
617 x86_thread_state64_t *state;
618 x86_saved_state64_t *saved_state;
619 thread_t curth = current_thread();
620 spl_t s = 0;
621
622 saved_state = USER_REGS64(thread);
623 state = (x86_thread_state64_t *)tstate;
624
625 /* Disallow setting non-canonical PC or stack */
626 if (!IS_USERADDR64_CANONICAL(state->rsp) ||
627 !IS_USERADDR64_CANONICAL(state->rip)) {
628 return KERN_FAILURE;
629 }
630
631 pal_register_cache_state(thread, DIRTY);
632
633 if (curth != thread) {
634 s = splsched();
635 thread_lock(thread);
636 }
637
638 saved_state->rbp = 0;
639 saved_state->rdi = state->rdi;
640 saved_state->rsi = state->rsi;
641 saved_state->rdx = state->rdx;
642 saved_state->rcx = state->rcx;
643 saved_state->r8 = state->r8;
644 saved_state->r9 = state->r9;
645
646 saved_state->isf.rip = state->rip;
647 saved_state->isf.rsp = state->rsp;
648 saved_state->isf.cs = USER64_CS;
649 saved_state->isf.rflags = EFL_USER_SET;
650
651 if (curth != thread) {
652 thread_unlock(thread);
653 splx(s);
654 }
655
656 return KERN_SUCCESS;
657}
658
659/*
660 * Initialize the machine-dependent state for a new thread.
661 */
662kern_return_t
663machine_thread_create(
664 thread_t thread,
665 task_t task)
666{
667 pcb_t pcb = THREAD_TO_PCB(thread);
668
669 if ((task->t_flags & TF_TECS) || __improbable(force_thread_policy_tecs)) {
670 thread->machine.mthr_do_segchk = 1;
671 } else {
672 thread->machine.mthr_do_segchk = 0;
673 }
674
675 /*
676 * Allocate save frame only if required.
677 */
678 if (pcb->iss == NULL) {
679 assert((get_preemption_level() == 0));
680 pcb->iss = (x86_saved_state_t *) zalloc(iss_zone);
681 if (pcb->iss == NULL) {
682 panic("iss_zone");
683 }
684 }
685
686 /*
687 * Ensure that the synthesized 32-bit state including
688 * the 64-bit interrupt state can be acommodated in the
689 * 64-bit state we allocate for both 32-bit and 64-bit threads.
690 */
691 assert(sizeof(pcb->iss->ss_32) + sizeof(pcb->iss->ss_64.isf) <=
692 sizeof(pcb->iss->ss_64));
693
694 bzero((char *)pcb->iss, sizeof(x86_saved_state_t));
695
696 bzero(&pcb->lbrs, sizeof(x86_lbrs_t));
697
698 if (task_has_64Bit_addr(task)) {
699 pcb->iss->flavor = x86_SAVED_STATE64;
700
701 pcb->iss->ss_64.isf.cs = USER64_CS;
702 pcb->iss->ss_64.isf.ss = USER_DS;
703 pcb->iss->ss_64.fs = USER_DS;
704 pcb->iss->ss_64.gs = USER_DS;
705 pcb->iss->ss_64.isf.rflags = EFL_USER_SET;
706 } else {
707 pcb->iss->flavor = x86_SAVED_STATE32;
708
709 pcb->iss->ss_32.cs = USER_CS;
710 pcb->iss->ss_32.ss = USER_DS;
711 pcb->iss->ss_32.ds = USER_DS;
712 pcb->iss->ss_32.es = USER_DS;
713 pcb->iss->ss_32.fs = USER_DS;
714 pcb->iss->ss_32.gs = USER_DS;
715 pcb->iss->ss_32.efl = EFL_USER_SET;
716 }
717
718 simple_lock_init(&pcb->lock, 0);
719
720 pcb->cthread_self = 0;
721 pcb->uldt_selector = 0;
722 pcb->thread_gpu_ns = 0;
723 /* Ensure that the "cthread" descriptor describes a valid
724 * segment.
725 */
726 if ((pcb->cthread_desc.access & ACC_P) == 0) {
727 pcb->cthread_desc = *gdt_desc_p(USER_DS);
728 }
729
730
731 pcb->insn_state_copyin_failure_errorcode = 0;
732 if (pcb->insn_state != 0) { /* Reinit for new thread */
733 bzero(pcb->insn_state, sizeof(x86_instruction_state_t));
734 pcb->insn_state->insn_stream_valid_bytes = -1;
735 }
736
737 return KERN_SUCCESS;
738}
739
740/*
741 * Machine-dependent cleanup prior to destroying a thread
742 */
743void
744machine_thread_destroy(
745 thread_t thread)
746{
747 pcb_t pcb = THREAD_TO_PCB(thread);
748
749#if HYPERVISOR
750 if (thread->hv_thread_target) {
751 hv_callbacks.thread_destroy(thread->hv_thread_target);
752 thread->hv_thread_target = NULL;
753 }
754#endif
755
756 if (pcb->ifps != 0) {
757 fpu_free(thread, pcb->ifps);
758 }
759 if (pcb->iss != 0) {
760 zfree(iss_zone, pcb->iss);
761 pcb->iss = 0;
762 }
763 if (pcb->ids) {
764 zfree(ids_zone, pcb->ids);
765 pcb->ids = NULL;
766 }
767
768 if (pcb->insn_state != 0) {
769 kfree(pcb->insn_state, sizeof(x86_instruction_state_t));
770 pcb->insn_state = 0;
771 }
772 pcb->insn_state_copyin_failure_errorcode = 0;
773}
774
775kern_return_t
776machine_thread_set_tsd_base(
777 thread_t thread,
778 mach_vm_offset_t tsd_base)
779{
780 if (thread->task == kernel_task) {
781 return KERN_INVALID_ARGUMENT;
782 }
783
784 if (thread_is_64bit_addr(thread)) {
785 /* check for canonical address, set 0 otherwise */
786 if (!IS_USERADDR64_CANONICAL(tsd_base)) {
787 tsd_base = 0ULL;
788 }
789 } else {
790 if (tsd_base > UINT32_MAX) {
791 tsd_base = 0ULL;
792 }
793 }
794
795 pcb_t pcb = THREAD_TO_PCB(thread);
796 pcb->cthread_self = tsd_base;
797
798 if (!thread_is_64bit_addr(thread)) {
799 /* Set up descriptor for later use */
800 struct real_descriptor desc = {
801 .limit_low = 1,
802 .limit_high = 0,
803 .base_low = tsd_base & 0xffff,
804 .base_med = (tsd_base >> 16) & 0xff,
805 .base_high = (tsd_base >> 24) & 0xff,
806 .access = ACC_P | ACC_PL_U | ACC_DATA_W,
807 .granularity = SZ_32 | SZ_G,
808 };
809
810 pcb->cthread_desc = desc;
811 saved_state32(pcb->iss)->gs = USER_CTHREAD;
812 }
813
814 /* For current thread, make the TSD base active immediately */
815 if (thread == current_thread()) {
816 if (thread_is_64bit_addr(thread)) {
817 cpu_data_t *cdp;
818
819 mp_disable_preemption();
820 cdp = current_cpu_datap();
821 if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) ||
822 (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) {
823 wrmsr64(MSR_IA32_KERNEL_GS_BASE, tsd_base);
824 }
825 cdp->cpu_uber.cu_user_gs_base = tsd_base;
826 mp_enable_preemption();
827 } else {
828 /* assign descriptor */
829 mp_disable_preemption();
830 *ldt_desc_p(USER_CTHREAD) = pcb->cthread_desc;
831 mp_enable_preemption();
832 }
833 }
834
835 return KERN_SUCCESS;
836}
837
838void
839machine_tecs(thread_t thr)
840{
841 if (tecs_mode_supported) {
842 thr->machine.mthr_do_segchk = 1;
843 }
844}
845
846int
847machine_csv(cpuvn_e cve)
848{
849 switch (cve) {
850 case CPUVN_CI:
851 return (cpuid_wa_required(CPU_INTEL_SEGCHK) & CWA_ON) != 0;
852
853 default:
854 break;
855 }
856
857 return 0;
858}