]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pcb_native.c
xnu-6153.11.26.tar.gz
[apple/xnu.git] / osfmk / i386 / pcb_native.c
CommitLineData
6d2010ae 1/*
0a7de745 2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
6d2010ae
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
6d2010ae
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
6d2010ae
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
6d2010ae
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
6d2010ae
A
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
0a7de745 31/*
6d2010ae
A
32 * Mach Operating System
33 * Copyright (c) 1991,1990 Carnegie Mellon University
34 * All Rights Reserved.
0a7de745 35 *
6d2010ae
A
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
0a7de745 41 *
6d2010ae
A
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
0a7de745 45 *
6d2010ae 46 * Carnegie Mellon requests users of this software to return to
0a7de745 47 *
6d2010ae
A
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
0a7de745 52 *
6d2010ae
A
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
6d2010ae
A
57#include <mach_debug.h>
58#include <mach_ldebug.h>
59
60#include <sys/kdebug.h>
61
62#include <mach/kern_return.h>
63#include <mach/thread_status.h>
64#include <mach/vm_param.h>
65
66#include <kern/counters.h>
67#include <kern/kalloc.h>
68#include <kern/mach_param.h>
69#include <kern/processor.h>
70#include <kern/cpu_data.h>
71#include <kern/cpu_number.h>
72#include <kern/task.h>
73#include <kern/thread.h>
74#include <kern/sched_prim.h>
75#include <kern/misc_protos.h>
76#include <kern/assert.h>
77#include <kern/spl.h>
78#include <kern/machine.h>
79#include <ipc/ipc_port.h>
80#include <vm/vm_kern.h>
81#include <vm/vm_map.h>
82#include <vm/pmap.h>
83#include <vm/vm_protos.h>
84
85#include <i386/commpage/commpage.h>
86#include <i386/cpu_data.h>
87#include <i386/cpu_number.h>
0a7de745 88#include <i386/cpuid.h>
6d2010ae
A
89#include <i386/eflags.h>
90#include <i386/proc_reg.h>
91#include <i386/tss.h>
92#include <i386/user_ldt.h>
93#include <i386/fpu.h>
94#include <i386/mp_desc.h>
95#include <i386/misc_protos.h>
96#include <i386/thread.h>
6d2010ae
A
97#include <i386/seg.h>
98#include <i386/machine_routines.h>
99
fe8ab488
A
100#if HYPERVISOR
101#include <kern/hv_support.h>
102#endif
103
0a7de745
A
104#define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_) \
105extern char assert_is_16byte_multiple_sizeof_ ## _type_ \
106 [(sizeof(_type_) % 16) == 0 ? 1 : -1]
6d2010ae
A
107
108/* Compile-time checks for vital save area sizing: */
109ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t);
6d2010ae
A
110ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t);
111
112#define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT)
113
0a7de745
A
114extern zone_t iss_zone; /* zone for saved_state area */
115extern zone_t ids_zone; /* zone for debug_state area */
116extern int tecs_mode_supported;
117
118int force_thread_policy_tecs;
6d2010ae 119
6d2010ae
A
120void
121act_machine_switch_pcb(__unused thread_t old, thread_t new)
122{
0a7de745
A
123 pcb_t pcb = THREAD_TO_PCB(new);
124 cpu_data_t *cdp = current_cpu_datap();
125 struct real_descriptor *ldtp;
126 mach_vm_offset_t pcb_stack_top;
6d2010ae
A
127
128 assert(new->kernel_stack != 0);
129 assert(ml_get_interrupts_enabled() == FALSE);
0a7de745 130#ifdef DIRECTION_FLAG_DEBUG
6d2010ae
A
131 if (x86_get_flags() & EFL_DF) {
132 panic("Direction flag detected: 0x%lx", x86_get_flags());
133 }
134#endif
135
6d2010ae
A
136 /*
137 * Clear segment state
138 * unconditionally for DS/ES/FS but more carefully for GS whose
139 * cached state we track.
140 */
141 set_ds(NULL_SEG);
142 set_es(NULL_SEG);
143 set_fs(NULL_SEG);
5c9f4661 144
6d2010ae 145 if (get_gs() != NULL_SEG) {
0a7de745 146 swapgs(); /* switch to user's GS context */
6d2010ae 147 set_gs(NULL_SEG);
0a7de745 148 swapgs(); /* and back to kernel */
6d2010ae
A
149
150 /* record the active machine state lost */
151 cdp->cpu_uber.cu_user_gs_base = 0;
0a7de745 152 }
6d2010ae 153
0a7de745 154 vm_offset_t isf;
6d2010ae 155
39236c6e
A
156 /*
157 * Set pointer to PCB's interrupt stack frame in cpu data.
158 * Used by syscall and double-fault trap handlers.
159 */
160 isf = (vm_offset_t) &pcb->iss->ss_64.isf;
161 cdp->cpu_uber.cu_isf = isf;
162 pcb_stack_top = (vm_offset_t) (pcb->iss + 1);
163 /* require 16-byte alignment */
164 assert((pcb_stack_top & 0xF) == 0);
6d2010ae 165
5c9f4661 166 current_ktss64()->rsp0 = cdp->cpu_desc_index.cdi_sstku;
39236c6e
A
167 /*
168 * Top of temporary sysenter stack points to pcb stack.
169 * Although this is not normally used by 64-bit users,
170 * it needs to be set in case a sysenter is attempted.
171 */
172 *current_sstk64() = pcb_stack_top;
173
5c9f4661
A
174 cdp->cd_estack = cpu_shadowp(cdp->cpu_number)->cd_estack = cdp->cpu_desc_index.cdi_sstku;
175
39236c6e 176 if (is_saved_state64(pcb->iss)) {
0a7de745 177 cdp->cpu_task_map = new->map->pmap->pm_task_map;
6d2010ae
A
178
179 /*
180 * Enable the 64-bit user code segment, USER64_CS.
181 * Disable the 32-bit user code segment, USER_CS.
182 */
00867663
A
183 gdt_desc_p(USER64_CS)->access |= ACC_PL_U;
184 gdt_desc_p(USER_CS)->access &= ~ACC_PL_U;
6d2010ae
A
185
186 /*
187 * Switch user's GS base if necessary
188 * by setting the Kernel's GS base MSR
189 * - this will become the user's on the swapgs when
190 * returning to user-space. Avoid this for
191 * kernel threads (no user TLS support required)
192 * and verify the memory shadow of the segment base
193 * in the event it was altered in user space.
194 */
195 if ((pcb->cthread_self != 0) || (new->task != kernel_task)) {
00867663
A
196 if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) ||
197 (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) {
6d2010ae
A
198 cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
199 wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self);
200 }
201 }
39236c6e 202 } else {
6d2010ae 203 cdp->cpu_task_map = TASK_MAP_32BIT;
6d2010ae
A
204
205 /*
206 * Disable USER64_CS
207 * Enable USER_CS
208 */
00867663
A
209
210 /* It's possible that writing to the GDT areas
211 * is expensive, if the processor intercepts those
212 * writes to invalidate its internal segment caches
213 * TODO: perhaps only do this if switching bitness
214 */
215 gdt_desc_p(USER64_CS)->access &= ~ACC_PL_U;
216 gdt_desc_p(USER_CS)->access |= ACC_PL_U;
6d2010ae
A
217
218 /*
219 * Set the thread`s cthread (a.k.a pthread)
220 * For 32-bit user this involves setting the USER_CTHREAD
221 * descriptor in the LDT to point to the cthread data.
222 * The involves copying in the pre-initialized descriptor.
0a7de745
A
223 */
224 ldtp = current_ldt();
6d2010ae 225 ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc;
0a7de745 226 if (pcb->uldt_selector != 0) {
6d2010ae 227 ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc;
0a7de745 228 }
6d2010ae 229 cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
0a7de745 230 }
6d2010ae 231
0a7de745
A
232 cdp->cpu_curthread_do_segchk = new->machine.mthr_do_segchk;
233
234 /*
235 * Set the thread`s LDT or LDT entry.
236 */
237 if (__probable(new->task == TASK_NULL || new->task->i386_ldt == 0)) {
6d2010ae 238 /*
0a7de745 239 * Use system LDT.
6d2010ae 240 */
0a7de745
A
241 ml_cpu_set_ldt(KERNEL_LDT);
242 cdp->cpu_curtask_has_ldt = 0;
243 } else {
244 /*
245 * Task has its own LDT.
246 */
247 user_ldt_set(new);
248 cdp->cpu_curtask_has_ldt = 1;
6d2010ae
A
249 }
250
6d2010ae
A
251 /*
252 * Bump the scheduler generation count in the commpage.
253 * This can be read by user code to detect its preemption.
254 */
255 commpage_sched_gen_inc();
256}
39236c6e
A
257
258kern_return_t
6d2010ae
A
259thread_set_wq_state32(thread_t thread, thread_state_t tstate)
260{
0a7de745
A
261 x86_thread_state32_t *state;
262 x86_saved_state32_t *saved_state;
6d2010ae 263 thread_t curth = current_thread();
0a7de745 264 spl_t s = 0;
6d2010ae
A
265
266 pal_register_cache_state(thread, DIRTY);
267
268 saved_state = USER_REGS32(thread);
269
270 state = (x86_thread_state32_t *)tstate;
0a7de745 271
6d2010ae
A
272 if (curth != thread) {
273 s = splsched();
0a7de745 274 thread_lock(thread);
6d2010ae
A
275 }
276
277 saved_state->ebp = 0;
278 saved_state->eip = state->eip;
279 saved_state->eax = state->eax;
280 saved_state->ebx = state->ebx;
281 saved_state->ecx = state->ecx;
282 saved_state->edx = state->edx;
283 saved_state->edi = state->edi;
284 saved_state->esi = state->esi;
285 saved_state->uesp = state->esp;
286 saved_state->efl = EFL_USER_SET;
287
288 saved_state->cs = USER_CS;
289 saved_state->ss = USER_DS;
290 saved_state->ds = USER_DS;
291 saved_state->es = USER_DS;
292
293 if (curth != thread) {
0a7de745 294 thread_unlock(thread);
6d2010ae
A
295 splx(s);
296 }
39236c6e
A
297
298 return KERN_SUCCESS;
6d2010ae
A
299}
300
301
39236c6e 302kern_return_t
6d2010ae
A
303thread_set_wq_state64(thread_t thread, thread_state_t tstate)
304{
0a7de745
A
305 x86_thread_state64_t *state;
306 x86_saved_state64_t *saved_state;
6d2010ae 307 thread_t curth = current_thread();
0a7de745 308 spl_t s = 0;
6d2010ae 309
6d2010ae
A
310 saved_state = USER_REGS64(thread);
311 state = (x86_thread_state64_t *)tstate;
0a7de745 312
39236c6e
A
313 /* Disallow setting non-canonical PC or stack */
314 if (!IS_USERADDR64_CANONICAL(state->rsp) ||
315 !IS_USERADDR64_CANONICAL(state->rip)) {
316 return KERN_FAILURE;
317 }
318
319 pal_register_cache_state(thread, DIRTY);
320
6d2010ae
A
321 if (curth != thread) {
322 s = splsched();
0a7de745 323 thread_lock(thread);
6d2010ae
A
324 }
325
326 saved_state->rbp = 0;
327 saved_state->rdi = state->rdi;
328 saved_state->rsi = state->rsi;
329 saved_state->rdx = state->rdx;
330 saved_state->rcx = state->rcx;
331 saved_state->r8 = state->r8;
332 saved_state->r9 = state->r9;
333
334 saved_state->isf.rip = state->rip;
335 saved_state->isf.rsp = state->rsp;
336 saved_state->isf.cs = USER64_CS;
337 saved_state->isf.rflags = EFL_USER_SET;
338
339 if (curth != thread) {
0a7de745 340 thread_unlock(thread);
6d2010ae
A
341 splx(s);
342 }
39236c6e
A
343
344 return KERN_SUCCESS;
6d2010ae
A
345}
346
347/*
348 * Initialize the machine-dependent state for a new thread.
349 */
350kern_return_t
351machine_thread_create(
0a7de745
A
352 thread_t thread,
353 task_t task)
6d2010ae 354{
0a7de745 355 pcb_t pcb = THREAD_TO_PCB(thread);
6d2010ae
A
356
357#if NCOPY_WINDOWS > 0
358 inval_copy_windows(thread);
359
360 thread->machine.physwindow_pte = 0;
361 thread->machine.physwindow_busy = 0;
362#endif
363
0a7de745
A
364 if (__improbable(force_thread_policy_tecs)) {
365 thread->machine.mthr_do_segchk = 1;
366 } else {
367 thread->machine.mthr_do_segchk = 0;
368 }
369
6d2010ae
A
370 /*
371 * Allocate save frame only if required.
372 */
39236c6e 373 if (pcb->iss == NULL) {
6d2010ae 374 assert((get_preemption_level() == 0));
39236c6e 375 pcb->iss = (x86_saved_state_t *) zalloc(iss_zone);
0a7de745 376 if (pcb->iss == NULL) {
6d2010ae 377 panic("iss_zone");
0a7de745 378 }
6d2010ae
A
379 }
380
39236c6e 381 /*
00867663 382 * Ensure that the synthesized 32-bit state including
0a7de745 383 * the 64-bit interrupt state can be acommodated in the
39236c6e
A
384 * 64-bit state we allocate for both 32-bit and 64-bit threads.
385 */
386 assert(sizeof(pcb->iss->ss_32) + sizeof(pcb->iss->ss_64.isf) <=
0a7de745 387 sizeof(pcb->iss->ss_64));
6d2010ae 388
39236c6e 389 bzero((char *)pcb->iss, sizeof(x86_saved_state_t));
6d2010ae 390
0a7de745 391 if (task_has_64Bit_addr(task)) {
39236c6e 392 pcb->iss->flavor = x86_SAVED_STATE64;
6d2010ae 393
39236c6e
A
394 pcb->iss->ss_64.isf.cs = USER64_CS;
395 pcb->iss->ss_64.isf.ss = USER_DS;
396 pcb->iss->ss_64.fs = USER_DS;
397 pcb->iss->ss_64.gs = USER_DS;
398 pcb->iss->ss_64.isf.rflags = EFL_USER_SET;
6d2010ae 399 } else {
39236c6e
A
400 pcb->iss->flavor = x86_SAVED_STATE32;
401
402 pcb->iss->ss_32.cs = USER_CS;
403 pcb->iss->ss_32.ss = USER_DS;
404 pcb->iss->ss_32.ds = USER_DS;
405 pcb->iss->ss_32.es = USER_DS;
406 pcb->iss->ss_32.fs = USER_DS;
407 pcb->iss->ss_32.gs = USER_DS;
408 pcb->iss->ss_32.efl = EFL_USER_SET;
6d2010ae 409 }
6d2010ae
A
410
411 simple_lock_init(&pcb->lock, 0);
412
6d2010ae
A
413 pcb->cthread_self = 0;
414 pcb->uldt_selector = 0;
fe8ab488 415 pcb->thread_gpu_ns = 0;
6d2010ae
A
416 /* Ensure that the "cthread" descriptor describes a valid
417 * segment.
418 */
419 if ((pcb->cthread_desc.access & ACC_P) == 0) {
5c9f4661 420 pcb->cthread_desc = *gdt_desc_p(USER_DS);
6d2010ae
A
421 }
422
0a7de745
A
423
424 return KERN_SUCCESS;
6d2010ae
A
425}
426
427/*
428 * Machine-dependent cleanup prior to destroying a thread
429 */
430void
431machine_thread_destroy(
0a7de745 432 thread_t thread)
6d2010ae 433{
0a7de745 434 pcb_t pcb = THREAD_TO_PCB(thread);
6d2010ae 435
fe8ab488
A
436#if HYPERVISOR
437 if (thread->hv_thread_target) {
438 hv_callbacks.thread_destroy(thread->hv_thread_target);
439 thread->hv_thread_target = NULL;
440 }
441#endif
442
0a7de745 443 if (pcb->ifps != 0) {
5ba3f43e 444 fpu_free(thread, pcb->ifps);
0a7de745 445 }
39236c6e
A
446 if (pcb->iss != 0) {
447 zfree(iss_zone, pcb->iss);
448 pcb->iss = 0;
6d2010ae
A
449 }
450 if (pcb->ids) {
451 zfree(ids_zone, pcb->ids);
452 pcb->ids = NULL;
453 }
454}
fe8ab488
A
455
456kern_return_t
457machine_thread_set_tsd_base(
0a7de745
A
458 thread_t thread,
459 mach_vm_offset_t tsd_base)
fe8ab488 460{
fe8ab488
A
461 if (thread->task == kernel_task) {
462 return KERN_INVALID_ARGUMENT;
463 }
464
d9a64523 465 if (thread_is_64bit_addr(thread)) {
fe8ab488 466 /* check for canonical address, set 0 otherwise */
0a7de745 467 if (!IS_USERADDR64_CANONICAL(tsd_base)) {
fe8ab488 468 tsd_base = 0ULL;
0a7de745 469 }
fe8ab488 470 } else {
0a7de745 471 if (tsd_base > UINT32_MAX) {
fe8ab488 472 tsd_base = 0ULL;
0a7de745 473 }
fe8ab488
A
474 }
475
476 pcb_t pcb = THREAD_TO_PCB(thread);
477 pcb->cthread_self = tsd_base;
478
d9a64523 479 if (!thread_is_64bit_addr(thread)) {
fe8ab488
A
480 /* Set up descriptor for later use */
481 struct real_descriptor desc = {
482 .limit_low = 1,
483 .limit_high = 0,
484 .base_low = tsd_base & 0xffff,
485 .base_med = (tsd_base >> 16) & 0xff,
486 .base_high = (tsd_base >> 24) & 0xff,
0a7de745
A
487 .access = ACC_P | ACC_PL_U | ACC_DATA_W,
488 .granularity = SZ_32 | SZ_G,
fe8ab488
A
489 };
490
491 pcb->cthread_desc = desc;
492 saved_state32(pcb->iss)->gs = USER_CTHREAD;
493 }
494
495 /* For current thread, make the TSD base active immediately */
496 if (thread == current_thread()) {
d9a64523 497 if (thread_is_64bit_addr(thread)) {
fe8ab488
A
498 cpu_data_t *cdp;
499
500 mp_disable_preemption();
501 cdp = current_cpu_datap();
502 if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) ||
0a7de745 503 (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) {
fe8ab488 504 wrmsr64(MSR_IA32_KERNEL_GS_BASE, tsd_base);
0a7de745 505 }
fe8ab488
A
506 cdp->cpu_uber.cu_user_gs_base = tsd_base;
507 mp_enable_preemption();
508 } else {
fe8ab488
A
509 /* assign descriptor */
510 mp_disable_preemption();
511 *ldt_desc_p(USER_CTHREAD) = pcb->cthread_desc;
512 mp_enable_preemption();
513 }
514 }
515
516 return KERN_SUCCESS;
517}
0a7de745
A
518
519void
520machine_tecs(thread_t thr)
521{
522 if (tecs_mode_supported) {
523 thr->machine.mthr_do_segchk = 1;
524 }
525}
526
527int
528machine_csv(cpuvn_e cve)
529{
530 switch (cve) {
531 case CPUVN_CI:
532 return (cpuid_wa_required(CPU_INTEL_SEGCHK) & CWA_ON) != 0;
533
534 default:
535 break;
536 }
537
538 return 0;
539}