]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2020 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
30 | */ | |
31 | /* | |
32 | * Mach Operating System | |
33 | * Copyright (c) 1991,1990 Carnegie Mellon University | |
34 | * All Rights Reserved. | |
35 | * | |
36 | * Permission to use, copy, modify and distribute this software and its | |
37 | * documentation is hereby granted, provided that both the copyright | |
38 | * notice and this permission notice appear in all copies of the | |
39 | * software, derivative works or modified versions, and any portions | |
40 | * thereof, and that both notices appear in supporting documentation. | |
41 | * | |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
45 | * | |
46 | * Carnegie Mellon requests users of this software to return to | |
47 | * | |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
49 | * School of Computer Science | |
50 | * Carnegie Mellon University | |
51 | * Pittsburgh PA 15213-3890 | |
52 | * | |
53 | * any improvements or extensions that they make and grant Carnegie Mellon | |
54 | * the rights to redistribute these changes. | |
55 | */ | |
56 | ||
57 | #include <mach_debug.h> | |
58 | #include <mach_ldebug.h> | |
59 | ||
60 | #include <sys/kdebug.h> | |
61 | ||
62 | #include <mach/kern_return.h> | |
63 | #include <mach/thread_status.h> | |
64 | #include <mach/vm_param.h> | |
65 | ||
66 | #include <kern/counters.h> | |
67 | #include <kern/mach_param.h> | |
68 | #include <kern/processor.h> | |
69 | #include <kern/cpu_data.h> | |
70 | #include <kern/cpu_number.h> | |
71 | #include <kern/task.h> | |
72 | #include <kern/thread.h> | |
73 | #include <kern/sched_prim.h> | |
74 | #include <kern/misc_protos.h> | |
75 | #include <kern/assert.h> | |
76 | #include <kern/spl.h> | |
77 | #include <kern/machine.h> | |
78 | #include <ipc/ipc_port.h> | |
79 | #include <vm/vm_kern.h> | |
80 | #include <vm/vm_map.h> | |
81 | #include <vm/pmap.h> | |
82 | #include <vm/vm_protos.h> | |
83 | ||
84 | #include <i386/commpage/commpage.h> | |
85 | #include <i386/cpu_data.h> | |
86 | #include <i386/cpu_number.h> | |
87 | #include <i386/cpuid.h> | |
88 | #include <i386/eflags.h> | |
89 | #include <i386/proc_reg.h> | |
90 | #include <i386/tss.h> | |
91 | #include <i386/user_ldt.h> | |
92 | #include <i386/fpu.h> | |
93 | #include <i386/mp_desc.h> | |
94 | #include <i386/misc_protos.h> | |
95 | #include <i386/thread.h> | |
96 | #include <i386/seg.h> | |
97 | #include <i386/machine_routines.h> | |
98 | ||
99 | #if HYPERVISOR | |
100 | #include <kern/hv_support.h> | |
101 | #endif | |
102 | ||
103 | #define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_) \ | |
104 | extern char assert_is_16byte_multiple_sizeof_ ## _type_ \ | |
105 | [(sizeof(_type_) % 16) == 0 ? 1 : -1] | |
106 | ||
107 | /* Compile-time checks for vital save area sizing: */ | |
108 | ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t); | |
109 | ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t); | |
110 | ||
111 | #define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT) | |
112 | ||
113 | extern zone_t iss_zone; /* zone for saved_state area */ | |
114 | extern zone_t ids_zone; /* zone for debug_state area */ | |
115 | extern int tecs_mode_supported; | |
116 | extern boolean_t cpuid_tsx_supported; | |
117 | ||
118 | bool lbr_need_tsx_workaround = false; | |
119 | ||
120 | int force_thread_policy_tecs; | |
121 | ||
122 | struct lbr_group { | |
123 | uint32_t msr_from; | |
124 | uint32_t msr_to; | |
125 | uint32_t msr_info; | |
126 | }; | |
127 | ||
128 | struct cpu_lbrs { | |
129 | uint32_t lbr_count; | |
130 | struct lbr_group msr_lbrs[X86_MAX_LBRS]; | |
131 | }; | |
132 | ||
133 | const struct cpu_lbrs *cpu_lbr_setp = NULL; | |
134 | int cpu_lbr_type; | |
135 | ||
136 | const struct cpu_lbrs nhm_cpu_lbrs = { | |
137 | 16 /* LBR count */, | |
138 | { | |
139 | { 0x680 /* FROM_0 */, 0x6c0 /* TO_0 */, 0 /* INFO_0 */ }, | |
140 | { 0x681 /* FROM_1 */, 0x6c1 /* TO_1 */, 0 /* INFO_1 */ }, | |
141 | { 0x682 /* FROM_2 */, 0x6c2 /* TO_2 */, 0 /* INFO_2 */ }, | |
142 | { 0x683 /* FROM_3 */, 0x6c3 /* TO_3 */, 0 /* INFO_3 */ }, | |
143 | { 0x684 /* FROM_4 */, 0x6c4 /* TO_4 */, 0 /* INFO_4 */ }, | |
144 | { 0x685 /* FROM_5 */, 0x6c5 /* TO_5 */, 0 /* INFO_5 */ }, | |
145 | { 0x686 /* FROM_6 */, 0x6c6 /* TO_6 */, 0 /* INFO_6 */ }, | |
146 | { 0x687 /* FROM_7 */, 0x6c7 /* TO_7 */, 0 /* INFO_7 */ }, | |
147 | { 0x688 /* FROM_8 */, 0x6c8 /* TO_8 */, 0 /* INFO_8 */ }, | |
148 | { 0x689 /* FROM_9 */, 0x6c9 /* TO_9 */, 0 /* INFO_9 */ }, | |
149 | { 0x68A /* FROM_10 */, 0x6ca /* TO_10 */, 0 /* INFO_10 */ }, | |
150 | { 0x68B /* FROM_11 */, 0x6cb /* TO_11 */, 0 /* INFO_11 */ }, | |
151 | { 0x68C /* FROM_12 */, 0x6cc /* TO_12 */, 0 /* INFO_12 */ }, | |
152 | { 0x68D /* FROM_13 */, 0x6cd /* TO_13 */, 0 /* INFO_13 */ }, | |
153 | { 0x68E /* FROM_14 */, 0x6ce /* TO_14 */, 0 /* INFO_14 */ }, | |
154 | { 0x68F /* FROM_15 */, 0x6cf /* TO_15 */, 0 /* INFO_15 */ } | |
155 | } | |
156 | }, | |
157 | skl_cpu_lbrs = { | |
158 | 32 /* LBR count */, | |
159 | { | |
160 | { 0x680 /* FROM_0 */, 0x6c0 /* TO_0 */, 0xdc0 /* INFO_0 */ }, | |
161 | { 0x681 /* FROM_1 */, 0x6c1 /* TO_1 */, 0xdc1 /* INFO_1 */ }, | |
162 | { 0x682 /* FROM_2 */, 0x6c2 /* TO_2 */, 0xdc2 /* INFO_2 */ }, | |
163 | { 0x683 /* FROM_3 */, 0x6c3 /* TO_3 */, 0xdc3 /* INFO_3 */ }, | |
164 | { 0x684 /* FROM_4 */, 0x6c4 /* TO_4 */, 0xdc4 /* INFO_4 */ }, | |
165 | { 0x685 /* FROM_5 */, 0x6c5 /* TO_5 */, 0xdc5 /* INFO_5 */ }, | |
166 | { 0x686 /* FROM_6 */, 0x6c6 /* TO_6 */, 0xdc6 /* INFO_6 */ }, | |
167 | { 0x687 /* FROM_7 */, 0x6c7 /* TO_7 */, 0xdc7 /* INFO_7 */ }, | |
168 | { 0x688 /* FROM_8 */, 0x6c8 /* TO_8 */, 0xdc8 /* INFO_8 */ }, | |
169 | { 0x689 /* FROM_9 */, 0x6c9 /* TO_9 */, 0xdc9 /* INFO_9 */ }, | |
170 | { 0x68A /* FROM_10 */, 0x6ca /* TO_10 */, 0xdca /* INFO_10 */ }, | |
171 | { 0x68B /* FROM_11 */, 0x6cb /* TO_11 */, 0xdcb /* INFO_11 */ }, | |
172 | { 0x68C /* FROM_12 */, 0x6cc /* TO_12 */, 0xdcc /* INFO_12 */ }, | |
173 | { 0x68D /* FROM_13 */, 0x6cd /* TO_13 */, 0xdcd /* INFO_13 */ }, | |
174 | { 0x68E /* FROM_14 */, 0x6ce /* TO_14 */, 0xdce /* INFO_14 */ }, | |
175 | { 0x68F /* FROM_15 */, 0x6cf /* TO_15 */, 0xdcf /* INFO_15 */ }, | |
176 | { 0x690 /* FROM_16 */, 0x6d0 /* TO_16 */, 0xdd0 /* INFO_16 */ }, | |
177 | { 0x691 /* FROM_17 */, 0x6d1 /* TO_17 */, 0xdd1 /* INFO_17 */ }, | |
178 | { 0x692 /* FROM_18 */, 0x6d2 /* TO_18 */, 0xdd2 /* INFO_18 */ }, | |
179 | { 0x693 /* FROM_19 */, 0x6d3 /* TO_19 */, 0xdd3 /* INFO_19 */ }, | |
180 | { 0x694 /* FROM_20 */, 0x6d4 /* TO_20 */, 0xdd4 /* INFO_20 */ }, | |
181 | { 0x695 /* FROM_21 */, 0x6d5 /* TO_21 */, 0xdd5 /* INFO_21 */ }, | |
182 | { 0x696 /* FROM_22 */, 0x6d6 /* TO_22 */, 0xdd6 /* INFO_22 */ }, | |
183 | { 0x697 /* FROM_23 */, 0x6d7 /* TO_23 */, 0xdd7 /* INFO_23 */ }, | |
184 | { 0x698 /* FROM_24 */, 0x6d8 /* TO_24 */, 0xdd8 /* INFO_24 */ }, | |
185 | { 0x699 /* FROM_25 */, 0x6d9 /* TO_25 */, 0xdd9 /* INFO_25 */ }, | |
186 | { 0x69a /* FROM_26 */, 0x6da /* TO_26 */, 0xdda /* INFO_26 */ }, | |
187 | { 0x69b /* FROM_27 */, 0x6db /* TO_27 */, 0xddb /* INFO_27 */ }, | |
188 | { 0x69c /* FROM_28 */, 0x6dc /* TO_28 */, 0xddc /* INFO_28 */ }, | |
189 | { 0x69d /* FROM_29 */, 0x6dd /* TO_29 */, 0xddd /* INFO_29 */ }, | |
190 | { 0x69e /* FROM_30 */, 0x6de /* TO_30 */, 0xdde /* INFO_30 */ }, | |
191 | { 0x69f /* FROM_31 */, 0x6df /* TO_31 */, 0xddf /* INFO_31 */ } | |
192 | } | |
193 | }; | |
194 | ||
195 | void | |
196 | i386_lbr_disable(void) | |
197 | { | |
198 | /* Enable LBRs */ | |
199 | wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) & ~DEBUGCTL_LBR_ENA); | |
200 | } | |
201 | ||
202 | /* | |
203 | * Disable ASAN for i386_lbr_enable and i386_lbr_init, otherwise we get a KASAN panic | |
204 | * because the shadow map is not been initialized when these functions are called in | |
205 | * early boot. | |
206 | */ | |
207 | void __attribute__((no_sanitize("address"))) | |
208 | i386_lbr_enable(void) | |
209 | { | |
210 | if (last_branch_support_enabled) { | |
211 | /* Enable LBRs */ | |
212 | wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) | DEBUGCTL_LBR_ENA); | |
213 | } | |
214 | } | |
215 | ||
216 | void __attribute__((no_sanitize("address"))) | |
217 | i386_lbr_init(i386_cpu_info_t *info_p, bool is_master) | |
218 | { | |
219 | if (!last_branch_support_enabled) { | |
220 | i386_lbr_disable(); | |
221 | return; | |
222 | } | |
223 | ||
224 | if (is_master) { | |
225 | /* All NHM+ CPUs support PERF_CAPABILITIES, so no need to check cpuid for its presence */ | |
226 | cpu_lbr_type = PERFCAP_LBR_TYPE(rdmsr64(MSR_IA32_PERF_CAPABILITIES)); | |
227 | ||
228 | switch (info_p->cpuid_cpufamily) { | |
229 | case CPUFAMILY_INTEL_NEHALEM: | |
230 | case CPUFAMILY_INTEL_WESTMERE: | |
231 | /* NHM family shares an LBR_SELECT MSR for both logical CPUs per core */ | |
232 | cpu_lbr_setp = &nhm_cpu_lbrs; | |
233 | break; | |
234 | ||
235 | case CPUFAMILY_INTEL_SANDYBRIDGE: | |
236 | case CPUFAMILY_INTEL_IVYBRIDGE: | |
237 | /* SNB+ has dedicated LBR_SELECT MSRs for each logical CPU per core */ | |
238 | cpu_lbr_setp = &nhm_cpu_lbrs; | |
239 | break; | |
240 | ||
241 | case CPUFAMILY_INTEL_HASWELL: | |
242 | case CPUFAMILY_INTEL_BROADWELL: | |
243 | lbr_need_tsx_workaround = cpuid_tsx_supported ? false : true; | |
244 | cpu_lbr_setp = &nhm_cpu_lbrs; | |
245 | break; | |
246 | ||
247 | case CPUFAMILY_INTEL_SKYLAKE: | |
248 | case CPUFAMILY_INTEL_KABYLAKE: | |
249 | case CPUFAMILY_INTEL_ICELAKE: | |
250 | cpu_lbr_setp = &skl_cpu_lbrs; | |
251 | break; | |
252 | ||
253 | default: | |
254 | panic("Unknown CPU family"); | |
255 | } | |
256 | } | |
257 | ||
258 | /* Configure LBR_SELECT for CPL > 0 records only */ | |
259 | wrmsr64(MSR_IA32_LBR_SELECT, LBR_SELECT_CPL_EQ_0); | |
260 | ||
261 | /* Enable LBRs */ | |
262 | wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) | DEBUGCTL_LBR_ENA); | |
263 | } | |
264 | ||
265 | int | |
266 | i386_lbr_native_state_to_mach_thread_state(pcb_t pcb, last_branch_state_t *machlbrp) | |
267 | { | |
268 | int last_entry; | |
269 | int i, j, lbr_tos; | |
270 | uint64_t from_rip, to_rip; | |
271 | #define LBR_SENTINEL_KERNEL_MODE (0x66726d6b65726e6cULL /* "frmkernl" */ ) | |
272 | ||
273 | machlbrp->lbr_count = cpu_lbr_setp->lbr_count; | |
274 | lbr_tos = pcb->lbrs.lbr_tos & (X86_MAX_LBRS - 1); | |
275 | last_entry = (lbr_tos == (cpu_lbr_setp->lbr_count - 1)) ? 0 : (lbr_tos + 1); | |
276 | ||
277 | switch (cpu_lbr_type) { | |
278 | case PERFCAP_LBR_TYPE_MISPRED: /* NHM */ | |
279 | ||
280 | machlbrp->lbr_supported_tsx = 0; | |
281 | machlbrp->lbr_supported_cycle_count = 0; | |
282 | for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) { | |
283 | to_rip = pcb->lbrs.lbrs[i].to_rip; | |
284 | machlbrp->lbrs[j].to_ip = (to_rip > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : to_rip; | |
285 | from_rip = LBR_TYPE_MISPRED_FROMRIP(pcb->lbrs.lbrs[i].from_rip); | |
286 | machlbrp->lbrs[j].from_ip = (from_rip > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : from_rip; | |
287 | machlbrp->lbrs[j].mispredict = LBR_TYPE_MISPRED_MISPREDICT(pcb->lbrs.lbrs[i].from_rip); | |
288 | machlbrp->lbrs[j].tsx_abort = machlbrp->lbrs[j].in_tsx = 0; /* Not Supported */ | |
289 | if (i == last_entry) { | |
290 | break; | |
291 | } | |
292 | } | |
293 | break; | |
294 | ||
295 | case PERFCAP_LBR_TYPE_TSXINFO: /* HSW/BDW */ | |
296 | ||
297 | machlbrp->lbr_supported_tsx = cpuid_tsx_supported ? 1 : 0; | |
298 | machlbrp->lbr_supported_cycle_count = 0; | |
299 | for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) { | |
300 | to_rip = pcb->lbrs.lbrs[i].to_rip; | |
301 | machlbrp->lbrs[j].to_ip = (to_rip > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : to_rip; | |
302 | ||
303 | from_rip = LBR_TYPE_TSXINFO_FROMRIP(pcb->lbrs.lbrs[i].from_rip); | |
304 | machlbrp->lbrs[j].from_ip = (from_rip > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : from_rip; | |
305 | machlbrp->lbrs[j].mispredict = LBR_TYPE_TSXINFO_MISPREDICT(pcb->lbrs.lbrs[i].from_rip); | |
306 | if (cpuid_tsx_supported) { | |
307 | machlbrp->lbrs[j].tsx_abort = LBR_TYPE_TSXINFO_TSX_ABORT(pcb->lbrs.lbrs[i].from_rip); | |
308 | machlbrp->lbrs[j].in_tsx = LBR_TYPE_TSXINFO_IN_TSX(pcb->lbrs.lbrs[i].from_rip); | |
309 | } else { | |
310 | machlbrp->lbrs[j].tsx_abort = 0; | |
311 | machlbrp->lbrs[j].in_tsx = 0; | |
312 | } | |
313 | if (i == last_entry) { | |
314 | break; | |
315 | } | |
316 | } | |
317 | break; | |
318 | ||
319 | case PERFCAP_LBR_TYPE_EIP_WITH_LBRINFO: /* SKL+ */ | |
320 | ||
321 | machlbrp->lbr_supported_tsx = cpuid_tsx_supported ? 1 : 0; | |
322 | machlbrp->lbr_supported_cycle_count = 1; | |
323 | for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) { | |
324 | from_rip = pcb->lbrs.lbrs[i].from_rip; | |
325 | machlbrp->lbrs[j].from_ip = (from_rip > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : from_rip; | |
326 | to_rip = pcb->lbrs.lbrs[i].to_rip; | |
327 | machlbrp->lbrs[j].to_ip = (to_rip > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : to_rip; | |
328 | machlbrp->lbrs[j].mispredict = LBR_TYPE_EIP_WITH_LBRINFO_MISPREDICT(pcb->lbrs.lbrs[i].info); | |
329 | machlbrp->lbrs[j].tsx_abort = LBR_TYPE_EIP_WITH_LBRINFO_TSX_ABORT(pcb->lbrs.lbrs[i].info); | |
330 | machlbrp->lbrs[j].in_tsx = LBR_TYPE_EIP_WITH_LBRINFO_IN_TSX(pcb->lbrs.lbrs[i].info); | |
331 | machlbrp->lbrs[j].cycle_count = LBR_TYPE_EIP_WITH_LBRINFO_CYC_COUNT(pcb->lbrs.lbrs[i].info); | |
332 | if (i == last_entry) { | |
333 | break; | |
334 | } | |
335 | } | |
336 | break; | |
337 | ||
338 | default: | |
339 | #if DEBUG || DEVELOPMENT | |
340 | panic("Unknown LBR format: %d!", cpu_lbr_type); | |
341 | /*NOTREACHED*/ | |
342 | #else | |
343 | return -1; | |
344 | #endif | |
345 | } | |
346 | ||
347 | return 0; | |
348 | } | |
349 | ||
350 | void | |
351 | i386_lbr_synch(thread_t thr) | |
352 | { | |
353 | pcb_t old_pcb = THREAD_TO_PCB(thr); | |
354 | int i; | |
355 | ||
356 | /* First, save current LBRs to the old thread's PCB */ | |
357 | if (cpu_lbr_setp->msr_lbrs[0].msr_info != 0) { | |
358 | for (i = 0; i < cpu_lbr_setp->lbr_count; i++) { | |
359 | old_pcb->lbrs.lbrs[i].from_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from); | |
360 | old_pcb->lbrs.lbrs[i].to_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to); | |
361 | old_pcb->lbrs.lbrs[i].info = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_info); | |
362 | } | |
363 | } else { | |
364 | for (i = 0; i < cpu_lbr_setp->lbr_count; i++) { | |
365 | old_pcb->lbrs.lbrs[i].from_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from); | |
366 | old_pcb->lbrs.lbrs[i].to_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to); | |
367 | } | |
368 | } | |
369 | ||
370 | /* Finally, save the TOS */ | |
371 | old_pcb->lbrs.lbr_tos = rdmsr64(MSR_IA32_LASTBRANCH_TOS); | |
372 | } | |
373 | ||
374 | void | |
375 | i386_switch_lbrs(thread_t old, thread_t new) | |
376 | { | |
377 | pcb_t new_pcb; | |
378 | int i; | |
379 | bool save_old = (old != NULL && old->task != kernel_task); | |
380 | bool restore_new = (new->task != kernel_task); | |
381 | ||
382 | if (!save_old && !restore_new) { | |
383 | return; | |
384 | } | |
385 | ||
386 | assert(cpu_lbr_setp != NULL); | |
387 | ||
388 | new_pcb = THREAD_TO_PCB(new); | |
389 | ||
390 | i386_lbr_disable(); | |
391 | ||
392 | if (save_old) { | |
393 | i386_lbr_synch(old); | |
394 | } | |
395 | ||
396 | if (restore_new) { | |
397 | /* Now restore the new threads's LBRs */ | |
398 | if (cpu_lbr_setp->msr_lbrs[0].msr_info != 0) { | |
399 | for (i = 0; i < cpu_lbr_setp->lbr_count; i++) { | |
400 | wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from, new_pcb->lbrs.lbrs[i].from_rip); | |
401 | wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to, new_pcb->lbrs.lbrs[i].to_rip); | |
402 | wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_info, new_pcb->lbrs.lbrs[i].info); | |
403 | } | |
404 | } else { | |
405 | if (lbr_need_tsx_workaround) { | |
406 | for (i = 0; i < cpu_lbr_setp->lbr_count; i++) { | |
407 | /* | |
408 | * If TSX has been disabled, the hardware expects those two bits to be sign | |
409 | * extensions of bit 47 (even though it didn't return them that way via the rdmsr!) | |
410 | */ | |
411 | #define BIT_47 (1ULL << 47) | |
412 | wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from, | |
413 | new_pcb->lbrs.lbrs[i].from_rip | | |
414 | ((new_pcb->lbrs.lbrs[i].from_rip & BIT_47) ? 0x6000000000000000ULL : 0)); | |
415 | wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to, | |
416 | new_pcb->lbrs.lbrs[i].to_rip | | |
417 | ((new_pcb->lbrs.lbrs[i].to_rip & BIT_47) ? 0x6000000000000000ULL : 0)); | |
418 | } | |
419 | } else { | |
420 | for (i = 0; i < cpu_lbr_setp->lbr_count; i++) { | |
421 | wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from, new_pcb->lbrs.lbrs[i].from_rip); | |
422 | wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to, new_pcb->lbrs.lbrs[i].to_rip); | |
423 | } | |
424 | } | |
425 | } | |
426 | ||
427 | /* Lastly, restore the new threads's TOS */ | |
428 | wrmsr64(MSR_IA32_LASTBRANCH_TOS, new_pcb->lbrs.lbr_tos); | |
429 | } | |
430 | ||
431 | i386_lbr_enable(); | |
432 | } | |
433 | ||
434 | void | |
435 | act_machine_switch_pcb(thread_t old, thread_t new) | |
436 | { | |
437 | pcb_t pcb = THREAD_TO_PCB(new); | |
438 | cpu_data_t *cdp = current_cpu_datap(); | |
439 | struct real_descriptor *ldtp; | |
440 | mach_vm_offset_t pcb_stack_top; | |
441 | ||
442 | assert(new->kernel_stack != 0); | |
443 | assert(ml_get_interrupts_enabled() == FALSE); | |
444 | #ifdef DIRECTION_FLAG_DEBUG | |
445 | if (x86_get_flags() & EFL_DF) { | |
446 | panic("Direction flag detected: 0x%lx", x86_get_flags()); | |
447 | } | |
448 | #endif | |
449 | ||
450 | /* | |
451 | * Clear segment state | |
452 | * unconditionally for DS/ES/FS but more carefully for GS whose | |
453 | * cached state we track. | |
454 | */ | |
455 | set_ds(NULL_SEG); | |
456 | set_es(NULL_SEG); | |
457 | set_fs(NULL_SEG); | |
458 | ||
459 | if (get_gs() != NULL_SEG) { | |
460 | swapgs(); /* switch to user's GS context */ | |
461 | set_gs(NULL_SEG); | |
462 | swapgs(); /* and back to kernel */ | |
463 | ||
464 | /* record the active machine state lost */ | |
465 | cdp->cpu_uber.cu_user_gs_base = 0; | |
466 | } | |
467 | ||
468 | vm_offset_t isf; | |
469 | ||
470 | /* | |
471 | * Set pointer to PCB's interrupt stack frame in cpu data. | |
472 | * Used by syscall and double-fault trap handlers. | |
473 | */ | |
474 | isf = (vm_offset_t) &pcb->iss->ss_64.isf; | |
475 | cdp->cpu_uber.cu_isf = isf; | |
476 | pcb_stack_top = (vm_offset_t) (pcb->iss + 1); | |
477 | /* require 16-byte alignment */ | |
478 | assert((pcb_stack_top & 0xF) == 0); | |
479 | ||
480 | current_ktss64()->rsp0 = cdp->cpu_desc_index.cdi_sstku; | |
481 | /* | |
482 | * Top of temporary sysenter stack points to pcb stack. | |
483 | * Although this is not normally used by 64-bit users, | |
484 | * it needs to be set in case a sysenter is attempted. | |
485 | */ | |
486 | *current_sstk64() = pcb_stack_top; | |
487 | ||
488 | cdp->cd_estack = cpu_shadowp(cdp->cpu_number)->cd_estack = cdp->cpu_desc_index.cdi_sstku; | |
489 | ||
490 | if (is_saved_state64(pcb->iss)) { | |
491 | cdp->cpu_task_map = new->map->pmap->pm_task_map; | |
492 | ||
493 | /* | |
494 | * Enable the 64-bit user code segment, USER64_CS. | |
495 | * Disable the 32-bit user code segment, USER_CS. | |
496 | */ | |
497 | gdt_desc_p(USER64_CS)->access |= ACC_PL_U; | |
498 | gdt_desc_p(USER_CS)->access &= ~ACC_PL_U; | |
499 | ||
500 | /* | |
501 | * Switch user's GS base if necessary | |
502 | * by setting the Kernel's GS base MSR | |
503 | * - this will become the user's on the swapgs when | |
504 | * returning to user-space. Avoid this for | |
505 | * kernel threads (no user TLS support required) | |
506 | * and verify the memory shadow of the segment base | |
507 | * in the event it was altered in user space. | |
508 | */ | |
509 | if ((pcb->cthread_self != 0) || (new->task != kernel_task)) { | |
510 | if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || | |
511 | (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) { | |
512 | cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; | |
513 | wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self); | |
514 | } | |
515 | } | |
516 | } else { | |
517 | cdp->cpu_task_map = TASK_MAP_32BIT; | |
518 | ||
519 | /* | |
520 | * Disable USER64_CS | |
521 | * Enable USER_CS | |
522 | */ | |
523 | ||
524 | /* It's possible that writing to the GDT areas | |
525 | * is expensive, if the processor intercepts those | |
526 | * writes to invalidate its internal segment caches | |
527 | * TODO: perhaps only do this if switching bitness | |
528 | */ | |
529 | gdt_desc_p(USER64_CS)->access &= ~ACC_PL_U; | |
530 | gdt_desc_p(USER_CS)->access |= ACC_PL_U; | |
531 | ||
532 | /* | |
533 | * Set the thread`s cthread (a.k.a pthread) | |
534 | * For 32-bit user this involves setting the USER_CTHREAD | |
535 | * descriptor in the LDT to point to the cthread data. | |
536 | * The involves copying in the pre-initialized descriptor. | |
537 | */ | |
538 | ldtp = current_ldt(); | |
539 | ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc; | |
540 | if (pcb->uldt_selector != 0) { | |
541 | ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc; | |
542 | } | |
543 | cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; | |
544 | } | |
545 | ||
546 | cdp->cpu_curthread_do_segchk = new->machine.mthr_do_segchk; | |
547 | ||
548 | if (last_branch_support_enabled) { | |
549 | i386_switch_lbrs(old, new); | |
550 | } | |
551 | ||
552 | /* | |
553 | * Set the thread`s LDT or LDT entry. | |
554 | */ | |
555 | if (__probable(new->task == TASK_NULL || new->task->i386_ldt == 0)) { | |
556 | /* | |
557 | * Use system LDT. | |
558 | */ | |
559 | ml_cpu_set_ldt(KERNEL_LDT); | |
560 | cdp->cpu_curtask_has_ldt = 0; | |
561 | } else { | |
562 | /* | |
563 | * Task has its own LDT. | |
564 | */ | |
565 | user_ldt_set(new); | |
566 | cdp->cpu_curtask_has_ldt = 1; | |
567 | } | |
568 | } | |
569 | ||
570 | kern_return_t | |
571 | thread_set_wq_state32(thread_t thread, thread_state_t tstate) | |
572 | { | |
573 | x86_thread_state32_t *state; | |
574 | x86_saved_state32_t *saved_state; | |
575 | thread_t curth = current_thread(); | |
576 | spl_t s = 0; | |
577 | ||
578 | pal_register_cache_state(thread, DIRTY); | |
579 | ||
580 | saved_state = USER_REGS32(thread); | |
581 | ||
582 | state = (x86_thread_state32_t *)tstate; | |
583 | ||
584 | if (curth != thread) { | |
585 | s = splsched(); | |
586 | thread_lock(thread); | |
587 | } | |
588 | ||
589 | saved_state->ebp = 0; | |
590 | saved_state->eip = state->eip; | |
591 | saved_state->eax = state->eax; | |
592 | saved_state->ebx = state->ebx; | |
593 | saved_state->ecx = state->ecx; | |
594 | saved_state->edx = state->edx; | |
595 | saved_state->edi = state->edi; | |
596 | saved_state->esi = state->esi; | |
597 | saved_state->uesp = state->esp; | |
598 | saved_state->efl = EFL_USER_SET; | |
599 | ||
600 | saved_state->cs = USER_CS; | |
601 | saved_state->ss = USER_DS; | |
602 | saved_state->ds = USER_DS; | |
603 | saved_state->es = USER_DS; | |
604 | ||
605 | if (curth != thread) { | |
606 | thread_unlock(thread); | |
607 | splx(s); | |
608 | } | |
609 | ||
610 | return KERN_SUCCESS; | |
611 | } | |
612 | ||
613 | ||
614 | kern_return_t | |
615 | thread_set_wq_state64(thread_t thread, thread_state_t tstate) | |
616 | { | |
617 | x86_thread_state64_t *state; | |
618 | x86_saved_state64_t *saved_state; | |
619 | thread_t curth = current_thread(); | |
620 | spl_t s = 0; | |
621 | ||
622 | saved_state = USER_REGS64(thread); | |
623 | state = (x86_thread_state64_t *)tstate; | |
624 | ||
625 | /* Disallow setting non-canonical PC or stack */ | |
626 | if (!IS_USERADDR64_CANONICAL(state->rsp) || | |
627 | !IS_USERADDR64_CANONICAL(state->rip)) { | |
628 | return KERN_FAILURE; | |
629 | } | |
630 | ||
631 | pal_register_cache_state(thread, DIRTY); | |
632 | ||
633 | if (curth != thread) { | |
634 | s = splsched(); | |
635 | thread_lock(thread); | |
636 | } | |
637 | ||
638 | saved_state->rbp = 0; | |
639 | saved_state->rdi = state->rdi; | |
640 | saved_state->rsi = state->rsi; | |
641 | saved_state->rdx = state->rdx; | |
642 | saved_state->rcx = state->rcx; | |
643 | saved_state->r8 = state->r8; | |
644 | saved_state->r9 = state->r9; | |
645 | ||
646 | saved_state->isf.rip = state->rip; | |
647 | saved_state->isf.rsp = state->rsp; | |
648 | saved_state->isf.cs = USER64_CS; | |
649 | saved_state->isf.rflags = EFL_USER_SET; | |
650 | ||
651 | if (curth != thread) { | |
652 | thread_unlock(thread); | |
653 | splx(s); | |
654 | } | |
655 | ||
656 | return KERN_SUCCESS; | |
657 | } | |
658 | ||
659 | /* | |
660 | * Initialize the machine-dependent state for a new thread. | |
661 | */ | |
662 | kern_return_t | |
663 | machine_thread_create( | |
664 | thread_t thread, | |
665 | task_t task) | |
666 | { | |
667 | pcb_t pcb = THREAD_TO_PCB(thread); | |
668 | ||
669 | if ((task->t_flags & TF_TECS) || __improbable(force_thread_policy_tecs)) { | |
670 | thread->machine.mthr_do_segchk = 1; | |
671 | } else { | |
672 | thread->machine.mthr_do_segchk = 0; | |
673 | } | |
674 | ||
675 | /* | |
676 | * Allocate save frame only if required. | |
677 | */ | |
678 | if (pcb->iss == NULL) { | |
679 | assert((get_preemption_level() == 0)); | |
680 | pcb->iss = (x86_saved_state_t *) zalloc(iss_zone); | |
681 | if (pcb->iss == NULL) { | |
682 | panic("iss_zone"); | |
683 | } | |
684 | } | |
685 | ||
686 | /* | |
687 | * Ensure that the synthesized 32-bit state including | |
688 | * the 64-bit interrupt state can be acommodated in the | |
689 | * 64-bit state we allocate for both 32-bit and 64-bit threads. | |
690 | */ | |
691 | assert(sizeof(pcb->iss->ss_32) + sizeof(pcb->iss->ss_64.isf) <= | |
692 | sizeof(pcb->iss->ss_64)); | |
693 | ||
694 | bzero((char *)pcb->iss, sizeof(x86_saved_state_t)); | |
695 | ||
696 | bzero(&pcb->lbrs, sizeof(x86_lbrs_t)); | |
697 | ||
698 | if (task_has_64Bit_addr(task)) { | |
699 | pcb->iss->flavor = x86_SAVED_STATE64; | |
700 | ||
701 | pcb->iss->ss_64.isf.cs = USER64_CS; | |
702 | pcb->iss->ss_64.isf.ss = USER_DS; | |
703 | pcb->iss->ss_64.fs = USER_DS; | |
704 | pcb->iss->ss_64.gs = USER_DS; | |
705 | pcb->iss->ss_64.isf.rflags = EFL_USER_SET; | |
706 | } else { | |
707 | pcb->iss->flavor = x86_SAVED_STATE32; | |
708 | ||
709 | pcb->iss->ss_32.cs = USER_CS; | |
710 | pcb->iss->ss_32.ss = USER_DS; | |
711 | pcb->iss->ss_32.ds = USER_DS; | |
712 | pcb->iss->ss_32.es = USER_DS; | |
713 | pcb->iss->ss_32.fs = USER_DS; | |
714 | pcb->iss->ss_32.gs = USER_DS; | |
715 | pcb->iss->ss_32.efl = EFL_USER_SET; | |
716 | } | |
717 | ||
718 | simple_lock_init(&pcb->lock, 0); | |
719 | ||
720 | pcb->cthread_self = 0; | |
721 | pcb->uldt_selector = 0; | |
722 | pcb->thread_gpu_ns = 0; | |
723 | /* Ensure that the "cthread" descriptor describes a valid | |
724 | * segment. | |
725 | */ | |
726 | if ((pcb->cthread_desc.access & ACC_P) == 0) { | |
727 | pcb->cthread_desc = *gdt_desc_p(USER_DS); | |
728 | } | |
729 | ||
730 | ||
731 | pcb->insn_state_copyin_failure_errorcode = 0; | |
732 | if (pcb->insn_state != 0) { /* Reinit for new thread */ | |
733 | bzero(pcb->insn_state, sizeof(x86_instruction_state_t)); | |
734 | pcb->insn_state->insn_stream_valid_bytes = -1; | |
735 | } | |
736 | ||
737 | return KERN_SUCCESS; | |
738 | } | |
739 | ||
740 | /* | |
741 | * Machine-dependent cleanup prior to destroying a thread | |
742 | */ | |
743 | void | |
744 | machine_thread_destroy( | |
745 | thread_t thread) | |
746 | { | |
747 | pcb_t pcb = THREAD_TO_PCB(thread); | |
748 | ||
749 | #if HYPERVISOR | |
750 | if (thread->hv_thread_target) { | |
751 | hv_callbacks.thread_destroy(thread->hv_thread_target); | |
752 | thread->hv_thread_target = NULL; | |
753 | } | |
754 | #endif | |
755 | ||
756 | if (pcb->ifps != 0) { | |
757 | fpu_free(thread, pcb->ifps); | |
758 | } | |
759 | if (pcb->iss != 0) { | |
760 | zfree(iss_zone, pcb->iss); | |
761 | pcb->iss = 0; | |
762 | } | |
763 | if (pcb->ids) { | |
764 | zfree(ids_zone, pcb->ids); | |
765 | pcb->ids = NULL; | |
766 | } | |
767 | ||
768 | if (pcb->insn_state != 0) { | |
769 | kfree(pcb->insn_state, sizeof(x86_instruction_state_t)); | |
770 | pcb->insn_state = 0; | |
771 | } | |
772 | pcb->insn_state_copyin_failure_errorcode = 0; | |
773 | } | |
774 | ||
775 | kern_return_t | |
776 | machine_thread_set_tsd_base( | |
777 | thread_t thread, | |
778 | mach_vm_offset_t tsd_base) | |
779 | { | |
780 | if (thread->task == kernel_task) { | |
781 | return KERN_INVALID_ARGUMENT; | |
782 | } | |
783 | ||
784 | if (thread_is_64bit_addr(thread)) { | |
785 | /* check for canonical address, set 0 otherwise */ | |
786 | if (!IS_USERADDR64_CANONICAL(tsd_base)) { | |
787 | tsd_base = 0ULL; | |
788 | } | |
789 | } else { | |
790 | if (tsd_base > UINT32_MAX) { | |
791 | tsd_base = 0ULL; | |
792 | } | |
793 | } | |
794 | ||
795 | pcb_t pcb = THREAD_TO_PCB(thread); | |
796 | pcb->cthread_self = tsd_base; | |
797 | ||
798 | if (!thread_is_64bit_addr(thread)) { | |
799 | /* Set up descriptor for later use */ | |
800 | struct real_descriptor desc = { | |
801 | .limit_low = 1, | |
802 | .limit_high = 0, | |
803 | .base_low = tsd_base & 0xffff, | |
804 | .base_med = (tsd_base >> 16) & 0xff, | |
805 | .base_high = (tsd_base >> 24) & 0xff, | |
806 | .access = ACC_P | ACC_PL_U | ACC_DATA_W, | |
807 | .granularity = SZ_32 | SZ_G, | |
808 | }; | |
809 | ||
810 | pcb->cthread_desc = desc; | |
811 | saved_state32(pcb->iss)->gs = USER_CTHREAD; | |
812 | } | |
813 | ||
814 | /* For current thread, make the TSD base active immediately */ | |
815 | if (thread == current_thread()) { | |
816 | if (thread_is_64bit_addr(thread)) { | |
817 | cpu_data_t *cdp; | |
818 | ||
819 | mp_disable_preemption(); | |
820 | cdp = current_cpu_datap(); | |
821 | if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || | |
822 | (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) { | |
823 | wrmsr64(MSR_IA32_KERNEL_GS_BASE, tsd_base); | |
824 | } | |
825 | cdp->cpu_uber.cu_user_gs_base = tsd_base; | |
826 | mp_enable_preemption(); | |
827 | } else { | |
828 | /* assign descriptor */ | |
829 | mp_disable_preemption(); | |
830 | *ldt_desc_p(USER_CTHREAD) = pcb->cthread_desc; | |
831 | mp_enable_preemption(); | |
832 | } | |
833 | } | |
834 | ||
835 | return KERN_SUCCESS; | |
836 | } | |
837 | ||
838 | void | |
839 | machine_tecs(thread_t thr) | |
840 | { | |
841 | if (tecs_mode_supported) { | |
842 | thr->machine.mthr_do_segchk = 1; | |
843 | } | |
844 | } | |
845 | ||
846 | int | |
847 | machine_csv(cpuvn_e cve) | |
848 | { | |
849 | switch (cve) { | |
850 | case CPUVN_CI: | |
851 | return (cpuid_wa_required(CPU_INTEL_SEGCHK) & CWA_ON) != 0; | |
852 | ||
853 | default: | |
854 | break; | |
855 | } | |
856 | ||
857 | return 0; | |
858 | } |