X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/b226f5e54a60dc81db17b1260381d7dbfea3cdf1..0a7de7458d150b5d4dffc935ba399be265ef0a1a:/osfmk/x86_64/pmap_pcid.c?ds=sidebyside diff --git a/osfmk/x86_64/pmap_pcid.c b/osfmk/x86_64/pmap_pcid.c index 506d684ce..6ecc6f225 100644 --- a/osfmk/x86_64/pmap_pcid.c +++ b/osfmk/x86_64/pmap_pcid.c @@ -2,7 +2,7 @@ * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ @@ -55,34 +55,40 @@ * to physical TLB context IDs in an LRU fashion for efficiency. (DRK '10) */ -uint32_t pmap_pcid_ncpus; -boolean_t pmap_pcid_disabled = FALSE; +uint32_t pmap_pcid_ncpus; +boolean_t pmap_pcid_disabled = FALSE; +bool invpcid_enabled = false; +static uint32_t INP_MAX = 0; pcid_cdata_t pcid_data[MAX_CPUS] __attribute__((aligned(64))); -void pmap_pcid_configure(void) { +void +pmap_pcid_configure(void) +{ int ccpu = cpu_number(); uintptr_t cr4 = get_cr4(); boolean_t pcid_present = FALSE; pmap_pcid_log("PCID configure invoked on CPU %d\n", ccpu); - pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0); + pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() != 0); pmap_assert(cpu_mode_is64bit()); - if (PE_parse_boot_argn("-pmap_pcid_disable", &pmap_pcid_disabled, sizeof (pmap_pcid_disabled))) { + if (PE_parse_boot_argn("-pmap_pcid_disable", &pmap_pcid_disabled, sizeof(pmap_pcid_disabled))) { pmap_pcid_log("PMAP: PCID feature disabled\n"); printf("PMAP: PCID feature disabled, %u\n", pmap_pcid_disabled); kprintf("PMAP: PCID feature disabled %u\n", pmap_pcid_disabled); } - /* no_shared_cr3+PCID is currently unsupported */ - //todo remove nscr3 -#if DEBUG - if (pmap_pcid_disabled == FALSE) + /* no_shared_cr3+PCID is currently unsupported */ + +#if DEBUG + if (pmap_pcid_disabled == FALSE) { no_shared_cr3 = FALSE; - else + } else { no_shared_cr3 = TRUE; + } #else - if (no_shared_cr3) + if (no_shared_cr3) { pmap_pcid_disabled = TRUE; + } #endif if (pmap_pcid_disabled || no_shared_cr3) { unsigned i; @@ -102,19 +108,27 @@ void pmap_pcid_configure(void) { /* DRKTODO: assert if features haven't been discovered yet. Redundant * invocation of cpu_mode_init and descendants masks this for now. */ - if ((cpuid_features() & CPUID_FEATURE_PCID)) + if ((cpuid_features() & CPUID_FEATURE_PCID)) { pcid_present = TRUE; - else { + } else { cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE; pmap_pcid_log("PMAP: PCID not detected CPU %d\n", ccpu); return; } - if ((cr4 & (CR4_PCIDE | CR4_PGE)) == (CR4_PCIDE|CR4_PGE)) { + if ((cr4 & (CR4_PCIDE | CR4_PGE)) == (CR4_PCIDE | CR4_PGE)) { cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE; pmap_pcid_log("PMAP: PCID already enabled %d\n", ccpu); return; } if (pcid_present == TRUE) { + if (ccpu == 0) { + if (cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_INVPCID) { + invpcid_enabled = true; + } + } +#if DEVELOPMENT || DEBUG + PE_parse_boot_argn("pmap_inp_max", &INP_MAX, sizeof(INP_MAX)); +#endif pmap_pcid_log("Pre-PCID:CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, cr4); if (cpu_number() >= PMAP_PCID_MAX_CPUS) { @@ -126,7 +140,7 @@ void pmap_pcid_configure(void) { } set_cr4(get_cr4() | CR4_PCIDE); pmap_pcid_log("Post PCID: CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, get_cr4()); - tlb_flush_global(); + pmap_tlbi_range(0, ~0ULL, true, 0); cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE; if (OSIncrementAtomic(&pmap_pcid_ncpus) == machine_info.max_cpus) { @@ -140,9 +154,11 @@ void pmap_pcid_configure(void) { } } -void pmap_pcid_initialize(pmap_t p) { +void +pmap_pcid_initialize(pmap_t p) +{ unsigned i; - unsigned nc = sizeof(p->pmap_pcid_cpus)/sizeof(pcid_t); + unsigned nc = sizeof(p->pmap_pcid_cpus) / sizeof(pcid_t); pmap_assert(nc >= real_ncpus); for (i = 0; i < nc; i++) { @@ -153,9 +169,11 @@ void pmap_pcid_initialize(pmap_t p) { } } -void pmap_pcid_initialize_kernel(pmap_t p) { +void +pmap_pcid_initialize_kernel(pmap_t p) +{ unsigned i; - unsigned nc = sizeof(p->pmap_pcid_cpus)/sizeof(pcid_t); + unsigned nc = sizeof(p->pmap_pcid_cpus) / sizeof(pcid_t); for (i = 0; i < nc; i++) { p->pmap_pcid_cpus[i] = 0; @@ -165,12 +183,14 @@ void pmap_pcid_initialize_kernel(pmap_t p) { } } -pcid_t pmap_pcid_allocate_pcid(int ccpu) { +pcid_t +pmap_pcid_allocate_pcid(int ccpu) +{ int i; - pcid_ref_t cur_min = 0xFF; - uint32_t cur_min_index = ~1; - pcid_ref_t *cpu_pcid_refcounts = &cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[0]; - pcid_ref_t old_count; + pcid_ref_t cur_min = 0xFF; + uint32_t cur_min_index = ~1; + pcid_ref_t *cpu_pcid_refcounts = &cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[0]; + pcid_ref_t old_count; if ((i = cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_free_hint) != 0) { if (cpu_pcid_refcounts[i] == 0) { @@ -209,41 +229,51 @@ pcid_t pmap_pcid_allocate_pcid(int ccpu) { old_count = __sync_fetch_and_add(&cpu_pcid_refcounts[cur_min_index], 1); pmap_assert(old_count < PMAP_PCID_MAX_REFCOUNT); - return (cur_min_index); + return cur_min_index; } -void pmap_pcid_deallocate_pcid(int ccpu, pmap_t tpmap) { +void +pmap_pcid_deallocate_pcid(int ccpu, pmap_t tpmap) +{ pcid_t pcid; pmap_t lp; pcid_ref_t prior_count; pcid = tpmap->pmap_pcid_cpus[ccpu]; pmap_assert(pcid != PMAP_PCID_INVALID_PCID); - if (pcid == PMAP_PCID_INVALID_PCID) + if (pcid == PMAP_PCID_INVALID_PCID) { return; + } lp = cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_last_pmap_dispatched[pcid]; pmap_assert(pcid > 0 && pcid < PMAP_PCID_MAX_PCID); pmap_assert(cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[pcid] >= 1); - if (lp == tpmap) + if (lp == tpmap) { (void)__sync_bool_compare_and_swap(&cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_last_pmap_dispatched[pcid], tpmap, PMAP_INVALID); + } if ((prior_count = __sync_fetch_and_sub(&cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[pcid], 1)) == 1) { - cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_free_hint = pcid; + cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_free_hint = pcid; } pmap_assert(prior_count <= PMAP_PCID_MAX_REFCOUNT); } -void pmap_destroy_pcid_sync(pmap_t p) { +void +pmap_destroy_pcid_sync(pmap_t p) +{ int i; - pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0); - for (i = 0; i < PMAP_PCID_MAX_CPUS; i++) - if (p->pmap_pcid_cpus[i] != PMAP_PCID_INVALID_PCID) + pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() != 0); + for (i = 0; i < PMAP_PCID_MAX_CPUS; i++) { + if (p->pmap_pcid_cpus[i] != PMAP_PCID_INVALID_PCID) { pmap_pcid_deallocate_pcid(i, p); + } + } } -pcid_t pcid_for_pmap_cpu_tuple(pmap_t cpmap, thread_t cthread, int ccpu) { +pcid_t +pcid_for_pmap_cpu_tuple(pmap_t cpmap, thread_t cthread, int ccpu) +{ pmap_t active_pmap = cpmap; if (__improbable(cpmap->pagezero_accessible)) { @@ -260,12 +290,15 @@ int npz = 0; #define PCID_RECORD_SIZE 128 uint64_t pcid_record_array[PCID_RECORD_SIZE]; #endif +#define PMAP_UPCIDP(p) ((p ? (p + PMAP_PCID_MAX_PCID) : 0) | 1ULL << 63) -void pmap_pcid_activate(pmap_t tpmap, int ccpu, boolean_t nopagezero, boolean_t copyio) { - pcid_t new_pcid = tpmap->pmap_pcid_cpus[ccpu]; - pmap_t last_pmap; - boolean_t pcid_conflict = FALSE, pending_flush = FALSE; - pcid_cdata_t *pcdata = cpu_datap(ccpu)->cpu_pcid_data; +void +pmap_pcid_activate(pmap_t tpmap, int ccpu, boolean_t nopagezero, boolean_t copyio) +{ + pcid_t new_pcid = tpmap->pmap_pcid_cpus[ccpu]; + pmap_t last_pmap; + boolean_t pcid_conflict = FALSE, pending_flush = FALSE; + pcid_cdata_t *pcdata = cpu_datap(ccpu)->cpu_pcid_data; pmap_assert(cpu_datap(ccpu)->cpu_pmap_pcid_enabled); if (__improbable(new_pcid == PMAP_PCID_INVALID_PCID)) { @@ -273,7 +306,7 @@ void pmap_pcid_activate(pmap_t tpmap, int ccpu, boolean_t nopagezero, boolean_t } pmap_assert(new_pcid != PMAP_PCID_INVALID_PCID); -#ifdef PCID_ASSERT +#ifdef PCID_ASSERT cpu_datap(ccpu)->cpu_last_pcid = cpu_datap(ccpu)->cpu_active_pcid; #endif cpu_datap(ccpu)->cpu_active_pcid = new_pcid; @@ -290,15 +323,16 @@ void pmap_pcid_activate(pmap_t tpmap, int ccpu, boolean_t nopagezero, boolean_t pcdata->cpu_pcid_last_pmap_dispatched[new_pcid] = tpmap; pmap_assert(new_pcid < PMAP_PCID_MAX_PCID); - pmap_assert(((tpmap == kernel_pmap) && new_pcid == 0) || + pmap_assert(((tpmap == kernel_pmap) && new_pcid == 0) || ((new_pcid != PMAP_PCID_INVALID_PCID) && (new_pcid != 0))); -#if PMAP_ASSERT - pcid_record_array[ccpu % PCID_RECORD_SIZE] = tpmap->pm_cr3 | new_pcid | (((uint64_t)(!(pending_flush || pcid_conflict))) <<63); +#if PMAP_ASSERT + pcid_record_array[ccpu % PCID_RECORD_SIZE] = tpmap->pm_cr3 | new_pcid | (((uint64_t)(!(pending_flush || pcid_conflict))) << 63); pml4_entry_t *pml4 = pmap64_pml4(tpmap, 0ULL); /* Diagnostic to detect pagetable anchor corruption */ - if (pml4[KERNEL_PML4_INDEX] != kernel_pmap->pm_pml4[KERNEL_PML4_INDEX]) - __asm__ volatile("int3"); -#endif /* PMAP_ASSERT */ + if (pml4[KERNEL_PML4_INDEX] != kernel_pmap->pm_pml4[KERNEL_PML4_INDEX]) { + __asm__ volatile ("int3"); + } +#endif /* PMAP_ASSERT */ pmap_paddr_t ncr3 = tpmap->pm_cr3; @@ -312,23 +346,11 @@ void pmap_pcid_activate(pmap_t tpmap, int ccpu, boolean_t nopagezero, boolean_t npz++; } - uint64_t preserve = !(pending_flush || pcid_conflict); - set_cr3_composed(ncr3, new_pcid, preserve); -#if DEBUG - cpu_datap(ccpu)->cpu_pcid_last_cr3 = ncr3 | new_pcid | preserve << 63; -#endif - uint64_t spcid = (new_pcid + PMAP_PCID_MAX_PCID); - if (new_pcid == 0) { - spcid = 0; - } - uint64_t scr3 = tpmap->pm_ucr3 | spcid; - - cpu_datap(ccpu)->cpu_ucr3 = scr3; - cpu_shadowp(ccpu)->cpu_ucr3 = scr3; - - cpu_shadowp(ccpu)->cpu_task_cr3 = ncr3 | new_pcid; + set_cr3_composed(ncr3, new_pcid, 1ULL); + cpu_shadowp(ccpu)->cpu_shadowtask_cr3 = ncr3 | new_pcid | (1ULL << 63); - if (!pending_flush) { + bool preserve = !pcid_conflict && !pending_flush; + if (preserve == true) { /* We did not previously observe a pending invalidation for this * ASID. However, the load from the coherency vector * could've been reordered ahead of the store to the @@ -341,11 +363,88 @@ void pmap_pcid_activate(pmap_t tpmap, int ccpu, boolean_t nopagezero, boolean_t pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0); if (__improbable(pending_flush != 0)) { pmap_pcid_validate_cpu(tpmap, ccpu); - set_cr3_composed(ncr3, new_pcid, FALSE); + preserve = false; } } + + if (preserve == false) { + bool gtlbi = (invpcid_enabled == false); + pmap_tlbi_range(0, ~0ULL, gtlbi, new_pcid); + } + + uint64_t spcid = PMAP_UPCIDP(new_pcid); + uint64_t scr3 = tpmap->pm_ucr3 | spcid; + + cpu_datap(ccpu)->cpu_ucr3 = scr3; + cpu_shadowp(ccpu)->cpu_ucr3 = scr3; + cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = &(tpmap->pmap_pcid_coherency_vector[ccpu]); -#if DEBUG +#if DEBUG + cpu_datap(ccpu)->cpu_pcid_last_cr3 = scr3; KERNEL_DEBUG_CONSTANT(0x9c1d0000, tpmap, new_pcid, pending_flush, pcid_conflict, 0); #endif } + +typedef enum { + INP_ALLG = 2, INP_ASPACE = 1, INP_SINGLE = 0, INP_ALLNG = 3 +} invpcid_type_t; +typedef struct __attribute__((packed)) { + uint64_t ipcid_and_rsvd; + uint64_t iaddr; +} invpcid_desc_t; + +static inline void +invpcid(invpcid_type_t itype, pcid_t ipcid, uint64_t iaddr) +{ + invpcid_desc_t ipcdt; + + ipcdt.ipcid_and_rsvd = ipcid; + ipcdt.iaddr = iaddr; + + uint64_t iptype = itype; //promote to workaround assembler bug + + __asm__ volatile ("invpcid %0, %1" :: "m" (ipcdt), "r" (iptype) : "memory"); +} + + +void +pmap_tlbi_range(uint64_t startv, uint64_t endv, bool global, uint16_t pcid) +{ + assert(ml_get_interrupts_enabled() == FALSE || + get_preemption_level() != 0); + + if (invpcid_enabled) { + if (global) { + invpcid(INP_ALLG, 0, 0ULL); + } else { + /* TODO: separate large page invalidation check */ + if ((endv - startv) >= INP_MAX) { + invpcid(INP_ASPACE, pcid, 0ULL); + if (pcid) { + invpcid(INP_ASPACE, (pcid + PMAP_PCID_MAX_PCID), 0ULL); + } + } else { + uint64_t cv = startv; + for (; cv < endv; cv += PAGE_SIZE) { + invpcid(INP_SINGLE, pcid, cv); + if (pcid) { + invpcid(INP_SINGLE, (pcid + PMAP_PCID_MAX_PCID), cv); + } + } + } + } + } else { + if (pmap_pcid_ncpus) { + uintptr_t cr4 = get_cr4(); + if (__improbable((cr4 & CR4_PGE) == 0)) { + set_cr4(cr4 | CR4_PGE); + } else { + set_cr4(cr4 & ~CR4_PGE); + set_cr4(cr4 | CR4_PGE); + } + } else { + set_cr3_raw(get_cr3_raw()); + } + } + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); +}