]>
Commit | Line | Data |
---|---|---|
6d2010ae | 1 | /* |
39236c6e | 2 | * Copyright (c) 2000-2012 Apple Inc. All rights reserved. |
6d2010ae A |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
30 | */ | |
31 | /* | |
32 | * Mach Operating System | |
33 | * Copyright (c) 1991,1990 Carnegie Mellon University | |
34 | * All Rights Reserved. | |
35 | * | |
36 | * Permission to use, copy, modify and distribute this software and its | |
37 | * documentation is hereby granted, provided that both the copyright | |
38 | * notice and this permission notice appear in all copies of the | |
39 | * software, derivative works or modified versions, and any portions | |
40 | * thereof, and that both notices appear in supporting documentation. | |
41 | * | |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
45 | * | |
46 | * Carnegie Mellon requests users of this software to return to | |
47 | * | |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
49 | * School of Computer Science | |
50 | * Carnegie Mellon University | |
51 | * Pittsburgh PA 15213-3890 | |
52 | * | |
53 | * any improvements or extensions that they make and grant Carnegie Mellon | |
54 | * the rights to redistribute these changes. | |
55 | */ | |
56 | ||
57 | #include <mach_rt.h> | |
58 | #include <mach_debug.h> | |
59 | #include <mach_ldebug.h> | |
60 | ||
61 | #include <sys/kdebug.h> | |
62 | ||
63 | #include <mach/kern_return.h> | |
64 | #include <mach/thread_status.h> | |
65 | #include <mach/vm_param.h> | |
66 | ||
67 | #include <kern/counters.h> | |
68 | #include <kern/kalloc.h> | |
69 | #include <kern/mach_param.h> | |
70 | #include <kern/processor.h> | |
71 | #include <kern/cpu_data.h> | |
72 | #include <kern/cpu_number.h> | |
73 | #include <kern/task.h> | |
74 | #include <kern/thread.h> | |
75 | #include <kern/sched_prim.h> | |
76 | #include <kern/misc_protos.h> | |
77 | #include <kern/assert.h> | |
78 | #include <kern/spl.h> | |
79 | #include <kern/machine.h> | |
80 | #include <ipc/ipc_port.h> | |
81 | #include <vm/vm_kern.h> | |
82 | #include <vm/vm_map.h> | |
83 | #include <vm/pmap.h> | |
84 | #include <vm/vm_protos.h> | |
85 | ||
86 | #include <i386/commpage/commpage.h> | |
87 | #include <i386/cpu_data.h> | |
88 | #include <i386/cpu_number.h> | |
89 | #include <i386/eflags.h> | |
90 | #include <i386/proc_reg.h> | |
91 | #include <i386/tss.h> | |
92 | #include <i386/user_ldt.h> | |
93 | #include <i386/fpu.h> | |
94 | #include <i386/mp_desc.h> | |
95 | #include <i386/misc_protos.h> | |
96 | #include <i386/thread.h> | |
6d2010ae A |
97 | #include <i386/seg.h> |
98 | #include <i386/machine_routines.h> | |
99 | ||
fe8ab488 A |
100 | #if HYPERVISOR |
101 | #include <kern/hv_support.h> | |
102 | #endif | |
103 | ||
6d2010ae A |
104 | #define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_) \ |
105 | extern char assert_is_16byte_multiple_sizeof_ ## _type_ \ | |
106 | [(sizeof(_type_) % 16) == 0 ? 1 : -1] | |
107 | ||
108 | /* Compile-time checks for vital save area sizing: */ | |
109 | ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t); | |
6d2010ae A |
110 | ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t); |
111 | ||
112 | #define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT) | |
113 | ||
114 | extern zone_t iss_zone; /* zone for saved_state area */ | |
115 | extern zone_t ids_zone; /* zone for debug_state area */ | |
116 | ||
6d2010ae A |
117 | void |
118 | act_machine_switch_pcb(__unused thread_t old, thread_t new) | |
119 | { | |
120 | pcb_t pcb = THREAD_TO_PCB(new); | |
121 | cpu_data_t *cdp = current_cpu_datap(); | |
122 | struct real_descriptor *ldtp; | |
123 | mach_vm_offset_t pcb_stack_top; | |
124 | ||
125 | assert(new->kernel_stack != 0); | |
126 | assert(ml_get_interrupts_enabled() == FALSE); | |
127 | #ifdef DIRECTION_FLAG_DEBUG | |
128 | if (x86_get_flags() & EFL_DF) { | |
129 | panic("Direction flag detected: 0x%lx", x86_get_flags()); | |
130 | } | |
131 | #endif | |
132 | ||
6d2010ae A |
133 | /* |
134 | * Clear segment state | |
135 | * unconditionally for DS/ES/FS but more carefully for GS whose | |
136 | * cached state we track. | |
137 | */ | |
138 | set_ds(NULL_SEG); | |
139 | set_es(NULL_SEG); | |
140 | set_fs(NULL_SEG); | |
141 | if (get_gs() != NULL_SEG) { | |
142 | swapgs(); /* switch to user's GS context */ | |
143 | set_gs(NULL_SEG); | |
144 | swapgs(); /* and back to kernel */ | |
145 | ||
146 | /* record the active machine state lost */ | |
147 | cdp->cpu_uber.cu_user_gs_base = 0; | |
148 | } | |
149 | ||
39236c6e | 150 | vm_offset_t isf; |
6d2010ae | 151 | |
39236c6e A |
152 | /* |
153 | * Set pointer to PCB's interrupt stack frame in cpu data. | |
154 | * Used by syscall and double-fault trap handlers. | |
155 | */ | |
156 | isf = (vm_offset_t) &pcb->iss->ss_64.isf; | |
157 | cdp->cpu_uber.cu_isf = isf; | |
158 | pcb_stack_top = (vm_offset_t) (pcb->iss + 1); | |
159 | /* require 16-byte alignment */ | |
160 | assert((pcb_stack_top & 0xF) == 0); | |
6d2010ae | 161 | |
39236c6e A |
162 | /* Interrupt stack is pcb */ |
163 | current_ktss64()->rsp0 = pcb_stack_top; | |
6d2010ae | 164 | |
39236c6e A |
165 | /* |
166 | * Top of temporary sysenter stack points to pcb stack. | |
167 | * Although this is not normally used by 64-bit users, | |
168 | * it needs to be set in case a sysenter is attempted. | |
169 | */ | |
170 | *current_sstk64() = pcb_stack_top; | |
171 | ||
172 | if (is_saved_state64(pcb->iss)) { | |
6d2010ae A |
173 | |
174 | cdp->cpu_task_map = new->map->pmap->pm_task_map; | |
175 | ||
176 | /* | |
177 | * Enable the 64-bit user code segment, USER64_CS. | |
178 | * Disable the 32-bit user code segment, USER_CS. | |
179 | */ | |
00867663 A |
180 | gdt_desc_p(USER64_CS)->access |= ACC_PL_U; |
181 | gdt_desc_p(USER_CS)->access &= ~ACC_PL_U; | |
6d2010ae A |
182 | |
183 | /* | |
184 | * Switch user's GS base if necessary | |
185 | * by setting the Kernel's GS base MSR | |
186 | * - this will become the user's on the swapgs when | |
187 | * returning to user-space. Avoid this for | |
188 | * kernel threads (no user TLS support required) | |
189 | * and verify the memory shadow of the segment base | |
190 | * in the event it was altered in user space. | |
191 | */ | |
192 | if ((pcb->cthread_self != 0) || (new->task != kernel_task)) { | |
00867663 A |
193 | if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || |
194 | (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) { | |
6d2010ae A |
195 | cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; |
196 | wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self); | |
197 | } | |
198 | } | |
6d2010ae | 199 | |
39236c6e | 200 | } else { |
6d2010ae A |
201 | |
202 | cdp->cpu_task_map = TASK_MAP_32BIT; | |
6d2010ae A |
203 | |
204 | /* | |
205 | * Disable USER64_CS | |
206 | * Enable USER_CS | |
207 | */ | |
00867663 A |
208 | |
209 | /* It's possible that writing to the GDT areas | |
210 | * is expensive, if the processor intercepts those | |
211 | * writes to invalidate its internal segment caches | |
212 | * TODO: perhaps only do this if switching bitness | |
213 | */ | |
214 | gdt_desc_p(USER64_CS)->access &= ~ACC_PL_U; | |
215 | gdt_desc_p(USER_CS)->access |= ACC_PL_U; | |
6d2010ae A |
216 | |
217 | /* | |
218 | * Set the thread`s cthread (a.k.a pthread) | |
219 | * For 32-bit user this involves setting the USER_CTHREAD | |
220 | * descriptor in the LDT to point to the cthread data. | |
221 | * The involves copying in the pre-initialized descriptor. | |
222 | */ | |
223 | ldtp = (struct real_descriptor *)current_ldt(); | |
224 | ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc; | |
225 | if (pcb->uldt_selector != 0) | |
226 | ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc; | |
227 | cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; | |
228 | ||
229 | /* | |
230 | * Set the thread`s LDT or LDT entry. | |
231 | */ | |
232 | if (new->task == TASK_NULL || new->task->i386_ldt == 0) { | |
233 | /* | |
234 | * Use system LDT. | |
235 | */ | |
236 | ml_cpu_set_ldt(KERNEL_LDT); | |
237 | } else { | |
238 | /* | |
239 | * Task has its own LDT. | |
240 | */ | |
241 | user_ldt_set(new); | |
242 | } | |
243 | } | |
244 | ||
6d2010ae A |
245 | /* |
246 | * Bump the scheduler generation count in the commpage. | |
247 | * This can be read by user code to detect its preemption. | |
248 | */ | |
249 | commpage_sched_gen_inc(); | |
250 | } | |
39236c6e A |
251 | |
252 | kern_return_t | |
6d2010ae A |
253 | thread_set_wq_state32(thread_t thread, thread_state_t tstate) |
254 | { | |
255 | x86_thread_state32_t *state; | |
256 | x86_saved_state32_t *saved_state; | |
257 | thread_t curth = current_thread(); | |
258 | spl_t s=0; | |
259 | ||
260 | pal_register_cache_state(thread, DIRTY); | |
261 | ||
262 | saved_state = USER_REGS32(thread); | |
263 | ||
264 | state = (x86_thread_state32_t *)tstate; | |
265 | ||
266 | if (curth != thread) { | |
267 | s = splsched(); | |
268 | thread_lock(thread); | |
269 | } | |
270 | ||
271 | saved_state->ebp = 0; | |
272 | saved_state->eip = state->eip; | |
273 | saved_state->eax = state->eax; | |
274 | saved_state->ebx = state->ebx; | |
275 | saved_state->ecx = state->ecx; | |
276 | saved_state->edx = state->edx; | |
277 | saved_state->edi = state->edi; | |
278 | saved_state->esi = state->esi; | |
279 | saved_state->uesp = state->esp; | |
280 | saved_state->efl = EFL_USER_SET; | |
281 | ||
282 | saved_state->cs = USER_CS; | |
283 | saved_state->ss = USER_DS; | |
284 | saved_state->ds = USER_DS; | |
285 | saved_state->es = USER_DS; | |
286 | ||
287 | if (curth != thread) { | |
288 | thread_unlock(thread); | |
289 | splx(s); | |
290 | } | |
39236c6e A |
291 | |
292 | return KERN_SUCCESS; | |
6d2010ae A |
293 | } |
294 | ||
295 | ||
39236c6e | 296 | kern_return_t |
6d2010ae A |
297 | thread_set_wq_state64(thread_t thread, thread_state_t tstate) |
298 | { | |
299 | x86_thread_state64_t *state; | |
300 | x86_saved_state64_t *saved_state; | |
301 | thread_t curth = current_thread(); | |
302 | spl_t s=0; | |
303 | ||
6d2010ae A |
304 | saved_state = USER_REGS64(thread); |
305 | state = (x86_thread_state64_t *)tstate; | |
306 | ||
39236c6e A |
307 | /* Disallow setting non-canonical PC or stack */ |
308 | if (!IS_USERADDR64_CANONICAL(state->rsp) || | |
309 | !IS_USERADDR64_CANONICAL(state->rip)) { | |
310 | return KERN_FAILURE; | |
311 | } | |
312 | ||
313 | pal_register_cache_state(thread, DIRTY); | |
314 | ||
6d2010ae A |
315 | if (curth != thread) { |
316 | s = splsched(); | |
317 | thread_lock(thread); | |
318 | } | |
319 | ||
320 | saved_state->rbp = 0; | |
321 | saved_state->rdi = state->rdi; | |
322 | saved_state->rsi = state->rsi; | |
323 | saved_state->rdx = state->rdx; | |
324 | saved_state->rcx = state->rcx; | |
325 | saved_state->r8 = state->r8; | |
326 | saved_state->r9 = state->r9; | |
327 | ||
328 | saved_state->isf.rip = state->rip; | |
329 | saved_state->isf.rsp = state->rsp; | |
330 | saved_state->isf.cs = USER64_CS; | |
331 | saved_state->isf.rflags = EFL_USER_SET; | |
332 | ||
333 | if (curth != thread) { | |
334 | thread_unlock(thread); | |
335 | splx(s); | |
336 | } | |
39236c6e A |
337 | |
338 | return KERN_SUCCESS; | |
6d2010ae A |
339 | } |
340 | ||
341 | /* | |
342 | * Initialize the machine-dependent state for a new thread. | |
343 | */ | |
344 | kern_return_t | |
345 | machine_thread_create( | |
346 | thread_t thread, | |
347 | task_t task) | |
348 | { | |
349 | pcb_t pcb = THREAD_TO_PCB(thread); | |
6d2010ae A |
350 | |
351 | #if NCOPY_WINDOWS > 0 | |
352 | inval_copy_windows(thread); | |
353 | ||
354 | thread->machine.physwindow_pte = 0; | |
355 | thread->machine.physwindow_busy = 0; | |
356 | #endif | |
357 | ||
358 | /* | |
359 | * Allocate save frame only if required. | |
360 | */ | |
39236c6e | 361 | if (pcb->iss == NULL) { |
6d2010ae | 362 | assert((get_preemption_level() == 0)); |
39236c6e A |
363 | pcb->iss = (x86_saved_state_t *) zalloc(iss_zone); |
364 | if (pcb->iss == NULL) | |
6d2010ae A |
365 | panic("iss_zone"); |
366 | } | |
367 | ||
39236c6e | 368 | /* |
00867663 | 369 | * Ensure that the synthesized 32-bit state including |
39236c6e A |
370 | * the 64-bit interrupt state can be acommodated in the |
371 | * 64-bit state we allocate for both 32-bit and 64-bit threads. | |
372 | */ | |
373 | assert(sizeof(pcb->iss->ss_32) + sizeof(pcb->iss->ss_64.isf) <= | |
374 | sizeof(pcb->iss->ss_64)); | |
6d2010ae | 375 | |
39236c6e | 376 | bzero((char *)pcb->iss, sizeof(x86_saved_state_t)); |
6d2010ae | 377 | |
39236c6e A |
378 | if (task_has_64BitAddr(task)) { |
379 | pcb->iss->flavor = x86_SAVED_STATE64; | |
6d2010ae | 380 | |
39236c6e A |
381 | pcb->iss->ss_64.isf.cs = USER64_CS; |
382 | pcb->iss->ss_64.isf.ss = USER_DS; | |
383 | pcb->iss->ss_64.fs = USER_DS; | |
384 | pcb->iss->ss_64.gs = USER_DS; | |
385 | pcb->iss->ss_64.isf.rflags = EFL_USER_SET; | |
6d2010ae | 386 | } else { |
39236c6e A |
387 | pcb->iss->flavor = x86_SAVED_STATE32; |
388 | ||
389 | pcb->iss->ss_32.cs = USER_CS; | |
390 | pcb->iss->ss_32.ss = USER_DS; | |
391 | pcb->iss->ss_32.ds = USER_DS; | |
392 | pcb->iss->ss_32.es = USER_DS; | |
393 | pcb->iss->ss_32.fs = USER_DS; | |
394 | pcb->iss->ss_32.gs = USER_DS; | |
395 | pcb->iss->ss_32.efl = EFL_USER_SET; | |
6d2010ae | 396 | } |
6d2010ae A |
397 | |
398 | simple_lock_init(&pcb->lock, 0); | |
399 | ||
6d2010ae A |
400 | pcb->cthread_self = 0; |
401 | pcb->uldt_selector = 0; | |
fe8ab488 | 402 | pcb->thread_gpu_ns = 0; |
6d2010ae A |
403 | /* Ensure that the "cthread" descriptor describes a valid |
404 | * segment. | |
405 | */ | |
406 | if ((pcb->cthread_desc.access & ACC_P) == 0) { | |
407 | struct real_descriptor *ldtp; | |
408 | ldtp = (struct real_descriptor *)current_ldt(); | |
409 | pcb->cthread_desc = ldtp[sel_idx(USER_DS)]; | |
410 | } | |
411 | ||
412 | return(KERN_SUCCESS); | |
413 | } | |
414 | ||
415 | /* | |
416 | * Machine-dependent cleanup prior to destroying a thread | |
417 | */ | |
418 | void | |
419 | machine_thread_destroy( | |
420 | thread_t thread) | |
421 | { | |
39037602 | 422 | pcb_t pcb = THREAD_TO_PCB(thread); |
6d2010ae | 423 | |
fe8ab488 A |
424 | #if HYPERVISOR |
425 | if (thread->hv_thread_target) { | |
426 | hv_callbacks.thread_destroy(thread->hv_thread_target); | |
427 | thread->hv_thread_target = NULL; | |
428 | } | |
429 | #endif | |
430 | ||
6d2010ae A |
431 | if (pcb->ifps != 0) |
432 | fpu_free(pcb->ifps); | |
39236c6e A |
433 | if (pcb->iss != 0) { |
434 | zfree(iss_zone, pcb->iss); | |
435 | pcb->iss = 0; | |
6d2010ae A |
436 | } |
437 | if (pcb->ids) { | |
438 | zfree(ids_zone, pcb->ids); | |
439 | pcb->ids = NULL; | |
440 | } | |
441 | } | |
fe8ab488 A |
442 | |
443 | kern_return_t | |
444 | machine_thread_set_tsd_base( | |
445 | thread_t thread, | |
446 | mach_vm_offset_t tsd_base) | |
447 | { | |
448 | ||
449 | if (thread->task == kernel_task) { | |
450 | return KERN_INVALID_ARGUMENT; | |
451 | } | |
452 | ||
453 | if (thread_is_64bit(thread)) { | |
454 | /* check for canonical address, set 0 otherwise */ | |
455 | if (!IS_USERADDR64_CANONICAL(tsd_base)) | |
456 | tsd_base = 0ULL; | |
457 | } else { | |
458 | if (tsd_base > UINT32_MAX) | |
459 | tsd_base = 0ULL; | |
460 | } | |
461 | ||
462 | pcb_t pcb = THREAD_TO_PCB(thread); | |
463 | pcb->cthread_self = tsd_base; | |
464 | ||
465 | if (!thread_is_64bit(thread)) { | |
466 | /* Set up descriptor for later use */ | |
467 | struct real_descriptor desc = { | |
468 | .limit_low = 1, | |
469 | .limit_high = 0, | |
470 | .base_low = tsd_base & 0xffff, | |
471 | .base_med = (tsd_base >> 16) & 0xff, | |
472 | .base_high = (tsd_base >> 24) & 0xff, | |
473 | .access = ACC_P|ACC_PL_U|ACC_DATA_W, | |
474 | .granularity = SZ_32|SZ_G, | |
475 | }; | |
476 | ||
477 | pcb->cthread_desc = desc; | |
478 | saved_state32(pcb->iss)->gs = USER_CTHREAD; | |
479 | } | |
480 | ||
481 | /* For current thread, make the TSD base active immediately */ | |
482 | if (thread == current_thread()) { | |
483 | ||
484 | if (thread_is_64bit(thread)) { | |
485 | cpu_data_t *cdp; | |
486 | ||
487 | mp_disable_preemption(); | |
488 | cdp = current_cpu_datap(); | |
489 | if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || | |
490 | (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) | |
491 | wrmsr64(MSR_IA32_KERNEL_GS_BASE, tsd_base); | |
492 | cdp->cpu_uber.cu_user_gs_base = tsd_base; | |
493 | mp_enable_preemption(); | |
494 | } else { | |
495 | ||
496 | /* assign descriptor */ | |
497 | mp_disable_preemption(); | |
498 | *ldt_desc_p(USER_CTHREAD) = pcb->cthread_desc; | |
499 | mp_enable_preemption(); | |
500 | } | |
501 | } | |
502 | ||
503 | return KERN_SUCCESS; | |
504 | } |