X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/6d2010ae8f7a6078e10b361c6962983bab233e0f..527f99514973766e9c0382a4d8550dfb00f54939:/osfmk/i386/pcb_native.c diff --git a/osfmk/i386/pcb_native.c b/osfmk/i386/pcb_native.c index 8ce815029..34df7c119 100644 --- a/osfmk/i386/pcb_native.c +++ b/osfmk/i386/pcb_native.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -54,7 +54,6 @@ * the rights to redistribute these changes. */ -#include #include #include @@ -94,20 +93,19 @@ #include #include #include -#if defined(__i386__) -#include -#endif #include #include +#if HYPERVISOR +#include +#endif + #define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_) \ extern char assert_is_16byte_multiple_sizeof_ ## _type_ \ [(sizeof(_type_) % 16) == 0 ? 1 : -1] /* Compile-time checks for vital save area sizing: */ ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t); -ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_sframe64_t); -ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_compat32_t); ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t); #define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT) @@ -115,7 +113,6 @@ ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t); extern zone_t iss_zone; /* zone for saved_state area */ extern zone_t ids_zone; /* zone for debug_state area */ -extern void *get_bsduthreadarg(thread_t); void act_machine_switch_pcb(__unused thread_t old, thread_t new) { @@ -132,7 +129,6 @@ act_machine_switch_pcb(__unused thread_t old, thread_t new) } #endif -#if defined(__x86_64__) /* * Clear segment state * unconditionally for DS/ES/FS but more carefully for GS whose @@ -141,6 +137,7 @@ act_machine_switch_pcb(__unused thread_t old, thread_t new) set_ds(NULL_SEG); set_es(NULL_SEG); set_fs(NULL_SEG); + if (get_gs() != NULL_SEG) { swapgs(); /* switch to user's GS context */ set_gs(NULL_SEG); @@ -150,40 +147,29 @@ act_machine_switch_pcb(__unused thread_t old, thread_t new) cdp->cpu_uber.cu_user_gs_base = 0; } - if (is_saved_state64(pcb->iss)) { - /* - * The test above is performed against the thread save state - * flavor and not task's 64-bit feature flag because of the - * thread/task 64-bit state divergence that can arise in - * task_set_64bit() x86: the task state is changed before - * the individual thread(s). - */ - x86_saved_state64_tagged_t *iss64; - vm_offset_t isf; + vm_offset_t isf; - assert(is_saved_state64(pcb->iss)); - - iss64 = (x86_saved_state64_tagged_t *) pcb->iss; - - /* - * Set pointer to PCB's interrupt stack frame in cpu data. - * Used by syscall and double-fault trap handlers. - */ - isf = (vm_offset_t) &iss64->state.isf; - cdp->cpu_uber.cu_isf = isf; - pcb_stack_top = (vm_offset_t) (iss64 + 1); - /* require 16-byte alignment */ - assert((pcb_stack_top & 0xF) == 0); + /* + * Set pointer to PCB's interrupt stack frame in cpu data. + * Used by syscall and double-fault trap handlers. + */ + isf = (vm_offset_t) &pcb->iss->ss_64.isf; + cdp->cpu_uber.cu_isf = isf; + pcb_stack_top = (vm_offset_t) (pcb->iss + 1); + /* require 16-byte alignment */ + assert((pcb_stack_top & 0xF) == 0); - /* Interrupt stack is pcb */ - current_ktss64()->rsp0 = pcb_stack_top; + current_ktss64()->rsp0 = cdp->cpu_desc_index.cdi_sstku; + /* + * Top of temporary sysenter stack points to pcb stack. + * Although this is not normally used by 64-bit users, + * it needs to be set in case a sysenter is attempted. + */ + *current_sstk64() = pcb_stack_top; - /* - * Top of temporary sysenter stack points to pcb stack. - * Although this is not normally used by 64-bit users, - * it needs to be set in case a sysenter is attempted. - */ - *current_sstk64() = pcb_stack_top; + cdp->cd_estack = cpu_shadowp(cdp->cpu_number)->cd_estack = cdp->cpu_desc_index.cdi_sstku; + + if (is_saved_state64(pcb->iss)) { cdp->cpu_task_map = new->map->pmap->pm_task_map; @@ -191,8 +177,8 @@ act_machine_switch_pcb(__unused thread_t old, thread_t new) * Enable the 64-bit user code segment, USER64_CS. * Disable the 32-bit user code segment, USER_CS. */ - ldt_desc_p(USER64_CS)->access |= ACC_PL_U; - ldt_desc_p(USER_CS)->access &= ~ACC_PL_U; + gdt_desc_p(USER64_CS)->access |= ACC_PL_U; + gdt_desc_p(USER_CS)->access &= ~ACC_PL_U; /* * Switch user's GS base if necessary @@ -204,49 +190,28 @@ act_machine_switch_pcb(__unused thread_t old, thread_t new) * in the event it was altered in user space. */ if ((pcb->cthread_self != 0) || (new->task != kernel_task)) { - if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) { + if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || + (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) { cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self); } } } else { - x86_saved_state_compat32_t *iss32compat; - vm_offset_t isf; - - assert(is_saved_state32(pcb->iss)); - iss32compat = (x86_saved_state_compat32_t *) pcb->iss; - - pcb_stack_top = (uintptr_t) (iss32compat + 1); - /* require 16-byte alignment */ - assert((pcb_stack_top & 0xF) == 0); - - /* - * Set pointer to PCB's interrupt stack frame in cpu data. - * Used by debug trap handler. - */ - isf = (vm_offset_t) &iss32compat->isf64; - cdp->cpu_uber.cu_isf = isf; - - /* Top of temporary sysenter stack points to pcb stack */ - *current_sstk64() = pcb_stack_top; - - /* Interrupt stack is pcb */ - current_ktss64()->rsp0 = pcb_stack_top; cdp->cpu_task_map = TASK_MAP_32BIT; - /* Precalculate pointers to syscall argument store, for use - * in the trampolines. - */ - cdp->cpu_uber_arg_store = (vm_offset_t)get_bsduthreadarg(new); - cdp->cpu_uber_arg_store_valid = (vm_offset_t)&pcb->arg_store_valid; - pcb->arg_store_valid = 0; /* * Disable USER64_CS * Enable USER_CS */ - ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U; - ldt_desc_p(USER_CS)->access |= ACC_PL_U; + + /* It's possible that writing to the GDT areas + * is expensive, if the processor intercepts those + * writes to invalidate its internal segment caches + * TODO: perhaps only do this if switching bitness + */ + gdt_desc_p(USER64_CS)->access &= ~ACC_PL_U; + gdt_desc_p(USER_CS)->access |= ACC_PL_U; /* * Set the thread`s cthread (a.k.a pthread) @@ -276,162 +241,14 @@ act_machine_switch_pcb(__unused thread_t old, thread_t new) } } -#else /* !__x86_64__ */ - - vm_offset_t hi_pcb_stack_top; - vm_offset_t hi_iss; - - if (!cpu_mode_is64bit()) { - x86_saved_state32_tagged_t *hi_iss32; - /* - * Save a pointer to the top of the "kernel" stack - - * actually the place in the PCB where a trap into - * kernel mode will push the registers. - */ - hi_iss = (vm_offset_t)((unsigned long) - pmap_cpu_high_map_vaddr(cpu_number(), HIGH_CPU_ISS0) | - ((unsigned long)pcb->iss & PAGE_MASK)); - - cdp->cpu_hi_iss = (void *)hi_iss; - - pmap_high_map(pcb->iss_pte0, HIGH_CPU_ISS0); - pmap_high_map(pcb->iss_pte1, HIGH_CPU_ISS1); - - hi_iss32 = (x86_saved_state32_tagged_t *) hi_iss; - assert(hi_iss32->tag == x86_SAVED_STATE32); - - hi_pcb_stack_top = (int) (hi_iss32 + 1); - - /* - * For fast syscall, top of interrupt stack points to pcb stack - */ - *(vm_offset_t *) current_sstk() = hi_pcb_stack_top; - - current_ktss()->esp0 = hi_pcb_stack_top; - - } else if (is_saved_state64(pcb->iss)) { - /* - * The test above is performed against the thread save state - * flavor and not task's 64-bit feature flag because of the - * thread/task 64-bit state divergence that can arise in - * task_set_64bit() x86: the task state is changed before - * the individual thread(s). - */ - x86_saved_state64_tagged_t *iss64; - vm_offset_t isf; - - assert(is_saved_state64(pcb->iss)); - - iss64 = (x86_saved_state64_tagged_t *) pcb->iss; - - /* - * Set pointer to PCB's interrupt stack frame in cpu data. - * Used by syscall and double-fault trap handlers. - */ - isf = (vm_offset_t) &iss64->state.isf; - cdp->cpu_uber.cu_isf = UBER64(isf); - pcb_stack_top = (vm_offset_t) (iss64 + 1); - /* require 16-byte alignment */ - assert((pcb_stack_top & 0xF) == 0); - /* Interrupt stack is pcb */ - current_ktss64()->rsp0 = UBER64(pcb_stack_top); - - /* - * Top of temporary sysenter stack points to pcb stack. - * Although this is not normally used by 64-bit users, - * it needs to be set in case a sysenter is attempted. - */ - *current_sstk64() = UBER64(pcb_stack_top); - - cdp->cpu_task_map = new->map->pmap->pm_task_map; - - /* - * Enable the 64-bit user code segment, USER64_CS. - * Disable the 32-bit user code segment, USER_CS. - */ - ldt_desc_p(USER64_CS)->access |= ACC_PL_U; - ldt_desc_p(USER_CS)->access &= ~ACC_PL_U; - - } else { - x86_saved_state_compat32_t *iss32compat; - vm_offset_t isf; - - assert(is_saved_state32(pcb->iss)); - iss32compat = (x86_saved_state_compat32_t *) pcb->iss; - - pcb_stack_top = (int) (iss32compat + 1); - /* require 16-byte alignment */ - assert((pcb_stack_top & 0xF) == 0); - - /* - * Set pointer to PCB's interrupt stack frame in cpu data. - * Used by debug trap handler. - */ - isf = (vm_offset_t) &iss32compat->isf64; - cdp->cpu_uber.cu_isf = UBER64(isf); - - /* Top of temporary sysenter stack points to pcb stack */ - *current_sstk64() = UBER64(pcb_stack_top); - - /* Interrupt stack is pcb */ - current_ktss64()->rsp0 = UBER64(pcb_stack_top); - - cdp->cpu_task_map = TASK_MAP_32BIT; - /* Precalculate pointers to syscall argument store, for use - * in the trampolines. - */ - cdp->cpu_uber_arg_store = UBER64((vm_offset_t)get_bsduthreadarg(new)); - cdp->cpu_uber_arg_store_valid = UBER64((vm_offset_t)&pcb->arg_store_valid); - pcb->arg_store_valid = 0; - - /* - * Disable USER64_CS - * Enable USER_CS - */ - ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U; - ldt_desc_p(USER_CS)->access |= ACC_PL_U; - } - - /* - * Set the thread`s cthread (a.k.a pthread) - * For 32-bit user this involves setting the USER_CTHREAD - * descriptor in the LDT to point to the cthread data. - * The involves copying in the pre-initialized descriptor. - */ - ldtp = (struct real_descriptor *)current_ldt(); - ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc; - if (pcb->uldt_selector != 0) - ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc; - - /* - * For 64-bit, we additionally set the 64-bit User GS base - * address. On return to 64-bit user, the GS.Base MSR will be written. - */ - cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; - - /* - * Set the thread`s LDT or LDT entry. - */ - if (new->task == TASK_NULL || new->task->i386_ldt == 0) { - /* - * Use system LDT. - */ - ml_cpu_set_ldt(KERNEL_LDT); - } else { - /* - * Task has its own LDT. - */ - user_ldt_set(new); - } -#endif - /* * Bump the scheduler generation count in the commpage. * This can be read by user code to detect its preemption. */ commpage_sched_gen_inc(); } -void + +kern_return_t thread_set_wq_state32(thread_t thread, thread_state_t tstate) { x86_thread_state32_t *state; @@ -470,10 +287,12 @@ thread_set_wq_state32(thread_t thread, thread_state_t tstate) thread_unlock(thread); splx(s); } + + return KERN_SUCCESS; } -void +kern_return_t thread_set_wq_state64(thread_t thread, thread_state_t tstate) { x86_thread_state64_t *state; @@ -481,11 +300,17 @@ thread_set_wq_state64(thread_t thread, thread_state_t tstate) thread_t curth = current_thread(); spl_t s=0; - pal_register_cache_state(thread, DIRTY); - saved_state = USER_REGS64(thread); state = (x86_thread_state64_t *)tstate; + /* Disallow setting non-canonical PC or stack */ + if (!IS_USERADDR64_CANONICAL(state->rsp) || + !IS_USERADDR64_CANONICAL(state->rip)) { + return KERN_FAILURE; + } + + pal_register_cache_state(thread, DIRTY); + if (curth != thread) { s = splsched(); thread_lock(thread); @@ -508,6 +333,8 @@ thread_set_wq_state64(thread_t thread, thread_state_t tstate) thread_unlock(thread); splx(s); } + + return KERN_SUCCESS; } /* @@ -519,7 +346,6 @@ machine_thread_create( task_t task) { pcb_t pcb = THREAD_TO_PCB(thread); - x86_saved_state_t *iss; #if NCOPY_WINDOWS > 0 inval_copy_windows(thread); @@ -531,100 +357,53 @@ machine_thread_create( /* * Allocate save frame only if required. */ - if (pcb->sf == NULL) { + if (pcb->iss == NULL) { assert((get_preemption_level() == 0)); - pcb->sf = zalloc(iss_zone); - if (pcb->sf == NULL) + pcb->iss = (x86_saved_state_t *) zalloc(iss_zone); + if (pcb->iss == NULL) panic("iss_zone"); } - if (task_has_64BitAddr(task)) { - x86_sframe64_t *sf64; + /* + * Ensure that the synthesized 32-bit state including + * the 64-bit interrupt state can be acommodated in the + * 64-bit state we allocate for both 32-bit and 64-bit threads. + */ + assert(sizeof(pcb->iss->ss_32) + sizeof(pcb->iss->ss_64.isf) <= + sizeof(pcb->iss->ss_64)); - sf64 = (x86_sframe64_t *) pcb->sf; + bzero((char *)pcb->iss, sizeof(x86_saved_state_t)); - bzero((char *)sf64, sizeof(x86_sframe64_t)); + if (task_has_64BitAddr(task)) { + pcb->iss->flavor = x86_SAVED_STATE64; - iss = (x86_saved_state_t *) &sf64->ssf; - iss->flavor = x86_SAVED_STATE64; - /* - * Guarantee that the bootstrapped thread will be in user - * mode. - */ - iss->ss_64.isf.rflags = EFL_USER_SET; - iss->ss_64.isf.cs = USER64_CS; - iss->ss_64.isf.ss = USER_DS; - iss->ss_64.fs = USER_DS; - iss->ss_64.gs = USER_DS; + pcb->iss->ss_64.isf.cs = USER64_CS; + pcb->iss->ss_64.isf.ss = USER_DS; + pcb->iss->ss_64.fs = USER_DS; + pcb->iss->ss_64.gs = USER_DS; + pcb->iss->ss_64.isf.rflags = EFL_USER_SET; } else { - if (cpu_mode_is64bit()) { - x86_sframe_compat32_t *sfc32; - - sfc32 = (x86_sframe_compat32_t *)pcb->sf; - - bzero((char *)sfc32, sizeof(x86_sframe_compat32_t)); - - iss = (x86_saved_state_t *) &sfc32->ssf.iss32; - iss->flavor = x86_SAVED_STATE32; -#if defined(__i386__) -#if DEBUG - { - sfc32->pad_for_16byte_alignment[0] = 0x64326432; - sfc32->pad_for_16byte_alignment[1] = 0x64326432; - } -#endif /* DEBUG */ - } else { - x86_sframe32_t *sf32; - struct real_descriptor *ldtp; - pmap_paddr_t paddr; - - sf32 = (x86_sframe32_t *) pcb->sf; - - bzero((char *)sf32, sizeof(x86_sframe32_t)); - - iss = (x86_saved_state_t *) &sf32->ssf; - iss->flavor = x86_SAVED_STATE32; - - pcb->iss_pte0 = pte_kernel_rw(kvtophys((vm_offset_t)iss)); - if (0 == (paddr = pa_to_pte(kvtophys((vm_offset_t)iss + PAGE_SIZE)))) - pcb->iss_pte1 = INTEL_PTE_INVALID; - else - pcb->iss_pte1 = pte_kernel_rw(paddr); - - ldtp = (struct real_descriptor *) - pmap_index_to_virt(HIGH_FIXED_LDT_BEGIN); - pcb->cthread_desc = ldtp[sel_idx(USER_DS)]; - pcb->uldt_desc = ldtp[sel_idx(USER_DS)]; -#endif /* __i386__ */ - } - /* - * Guarantee that the bootstrapped thread will be in user - * mode. - */ - iss->ss_32.cs = USER_CS; - iss->ss_32.ss = USER_DS; - iss->ss_32.ds = USER_DS; - iss->ss_32.es = USER_DS; - iss->ss_32.fs = USER_DS; - iss->ss_32.gs = USER_DS; - iss->ss_32.efl = EFL_USER_SET; - + pcb->iss->flavor = x86_SAVED_STATE32; + + pcb->iss->ss_32.cs = USER_CS; + pcb->iss->ss_32.ss = USER_DS; + pcb->iss->ss_32.ds = USER_DS; + pcb->iss->ss_32.es = USER_DS; + pcb->iss->ss_32.fs = USER_DS; + pcb->iss->ss_32.gs = USER_DS; + pcb->iss->ss_32.efl = EFL_USER_SET; } - pcb->iss = iss; simple_lock_init(&pcb->lock, 0); - pcb->arg_store_valid = 0; pcb->cthread_self = 0; pcb->uldt_selector = 0; - + pcb->thread_gpu_ns = 0; /* Ensure that the "cthread" descriptor describes a valid * segment. */ if ((pcb->cthread_desc.access & ACC_P) == 0) { - struct real_descriptor *ldtp; - ldtp = (struct real_descriptor *)current_ldt(); - pcb->cthread_desc = ldtp[sel_idx(USER_DS)]; + pcb->cthread_desc = *gdt_desc_p(USER_DS); } return(KERN_SUCCESS); @@ -637,16 +416,86 @@ void machine_thread_destroy( thread_t thread) { - register pcb_t pcb = THREAD_TO_PCB(thread); + pcb_t pcb = THREAD_TO_PCB(thread); + +#if HYPERVISOR + if (thread->hv_thread_target) { + hv_callbacks.thread_destroy(thread->hv_thread_target); + thread->hv_thread_target = NULL; + } +#endif if (pcb->ifps != 0) - fpu_free(pcb->ifps); - if (pcb->sf != 0) { - zfree(iss_zone, pcb->sf); - pcb->sf = 0; + fpu_free(thread, pcb->ifps); + if (pcb->iss != 0) { + zfree(iss_zone, pcb->iss); + pcb->iss = 0; } if (pcb->ids) { zfree(ids_zone, pcb->ids); pcb->ids = NULL; } } + +kern_return_t +machine_thread_set_tsd_base( + thread_t thread, + mach_vm_offset_t tsd_base) +{ + + if (thread->task == kernel_task) { + return KERN_INVALID_ARGUMENT; + } + + if (thread_is_64bit(thread)) { + /* check for canonical address, set 0 otherwise */ + if (!IS_USERADDR64_CANONICAL(tsd_base)) + tsd_base = 0ULL; + } else { + if (tsd_base > UINT32_MAX) + tsd_base = 0ULL; + } + + pcb_t pcb = THREAD_TO_PCB(thread); + pcb->cthread_self = tsd_base; + + if (!thread_is_64bit(thread)) { + /* Set up descriptor for later use */ + struct real_descriptor desc = { + .limit_low = 1, + .limit_high = 0, + .base_low = tsd_base & 0xffff, + .base_med = (tsd_base >> 16) & 0xff, + .base_high = (tsd_base >> 24) & 0xff, + .access = ACC_P|ACC_PL_U|ACC_DATA_W, + .granularity = SZ_32|SZ_G, + }; + + pcb->cthread_desc = desc; + saved_state32(pcb->iss)->gs = USER_CTHREAD; + } + + /* For current thread, make the TSD base active immediately */ + if (thread == current_thread()) { + + if (thread_is_64bit(thread)) { + cpu_data_t *cdp; + + mp_disable_preemption(); + cdp = current_cpu_datap(); + if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || + (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) + wrmsr64(MSR_IA32_KERNEL_GS_BASE, tsd_base); + cdp->cpu_uber.cu_user_gs_base = tsd_base; + mp_enable_preemption(); + } else { + + /* assign descriptor */ + mp_disable_preemption(); + *ldt_desc_p(USER_CTHREAD) = pcb->cthread_desc; + mp_enable_preemption(); + } + } + + return KERN_SUCCESS; +}