]> git.saurik.com Git - apple/xnu.git/blame - osfmk/x86_64/pmap_pcid.c
xnu-6153.61.1.tar.gz
[apple/xnu.git] / osfmk / x86_64 / pmap_pcid.c
CommitLineData
6d2010ae
A
1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
6d2010ae
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
6d2010ae
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
6d2010ae
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
6d2010ae
A
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <i386/proc_reg.h>
30#include <i386/cpuid.h>
31#include <i386/tsc.h>
32#include <vm/pmap.h>
33#include <vm/vm_map.h>
34#include <i386/pmap_internal.h>
35#include <i386/pmap_pcid.h>
6d2010ae
A
36
37/*
38 * PCID (Process context identifier) aka tagged TLB support.
39 * On processors with this feature, unless disabled via the -pmap_pcid_disable
40 * boot-arg, the following algorithm is in effect:
41 * Each processor maintains an array of tag refcounts indexed by tag.
42 * Each address space maintains an array of tags indexed by CPU number.
43 * Each address space maintains a coherency vector, indexed by CPU
44 * indicating that the TLB state for that address space has a pending
45 * invalidation.
46 * On a context switch, a refcounted tag is lazily assigned to the newly
47 * dispatched (CPU, address space) tuple.
48 * When an inactive address space is invalidated on a remote CPU, it is marked
49 * for invalidation upon the next dispatch. Some invalidations are
50 * also processed at the user/kernel boundary.
51 * Provisions are made for the case where a CPU is overcommmitted, i.e.
52 * more active address spaces exist than the number of logical tags
53 * provided for by the processor architecture (currently 4096).
54 * The algorithm assumes the processor remaps the logical tags
55 * to physical TLB context IDs in an LRU fashion for efficiency. (DRK '10)
56 */
57
0a7de745
A
58uint32_t pmap_pcid_ncpus;
59boolean_t pmap_pcid_disabled = FALSE;
60bool invpcid_enabled = false;
61static uint32_t INP_MAX = 0;
5c9f4661 62pcid_cdata_t pcid_data[MAX_CPUS] __attribute__((aligned(64)));
6d2010ae 63
0a7de745
A
64void
65pmap_pcid_configure(void)
66{
6d2010ae
A
67 int ccpu = cpu_number();
68 uintptr_t cr4 = get_cr4();
69 boolean_t pcid_present = FALSE;
70
71 pmap_pcid_log("PCID configure invoked on CPU %d\n", ccpu);
0a7de745 72 pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() != 0);
6d2010ae
A
73 pmap_assert(cpu_mode_is64bit());
74
0a7de745 75 if (PE_parse_boot_argn("-pmap_pcid_disable", &pmap_pcid_disabled, sizeof(pmap_pcid_disabled))) {
6d2010ae
A
76 pmap_pcid_log("PMAP: PCID feature disabled\n");
77 printf("PMAP: PCID feature disabled, %u\n", pmap_pcid_disabled);
78 kprintf("PMAP: PCID feature disabled %u\n", pmap_pcid_disabled);
79 }
0a7de745
A
80 /* no_shared_cr3+PCID is currently unsupported */
81
82#if DEBUG
83 if (pmap_pcid_disabled == FALSE) {
6d2010ae 84 no_shared_cr3 = FALSE;
0a7de745 85 } else {
6d2010ae 86 no_shared_cr3 = TRUE;
0a7de745 87 }
6d2010ae 88#else
0a7de745 89 if (no_shared_cr3) {
6d2010ae 90 pmap_pcid_disabled = TRUE;
0a7de745 91 }
6d2010ae
A
92#endif
93 if (pmap_pcid_disabled || no_shared_cr3) {
94 unsigned i;
95 /* Reset PCID status, as we may have picked up
96 * strays if discovered prior to platform
97 * expert initialization.
98 */
99 for (i = 0; i < real_ncpus; i++) {
100 if (cpu_datap(i)) {
101 cpu_datap(i)->cpu_pmap_pcid_enabled = FALSE;
102 }
103 pmap_pcid_ncpus = 0;
104 }
105 cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE;
106 return;
107 }
108 /* DRKTODO: assert if features haven't been discovered yet. Redundant
109 * invocation of cpu_mode_init and descendants masks this for now.
110 */
0a7de745 111 if ((cpuid_features() & CPUID_FEATURE_PCID)) {
6d2010ae 112 pcid_present = TRUE;
0a7de745 113 } else {
6d2010ae
A
114 cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE;
115 pmap_pcid_log("PMAP: PCID not detected CPU %d\n", ccpu);
116 return;
117 }
0a7de745 118 if ((cr4 & (CR4_PCIDE | CR4_PGE)) == (CR4_PCIDE | CR4_PGE)) {
6d2010ae
A
119 cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE;
120 pmap_pcid_log("PMAP: PCID already enabled %d\n", ccpu);
121 return;
122 }
123 if (pcid_present == TRUE) {
0a7de745
A
124 if (ccpu == 0) {
125 if (cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_INVPCID) {
126 invpcid_enabled = true;
127 }
128 }
129#if DEVELOPMENT || DEBUG
130 PE_parse_boot_argn("pmap_inp_max", &INP_MAX, sizeof(INP_MAX));
131#endif
6d2010ae
A
132 pmap_pcid_log("Pre-PCID:CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, cr4);
133
134 if (cpu_number() >= PMAP_PCID_MAX_CPUS) {
135 panic("PMAP_PCID_MAX_CPUS %d\n", cpu_number());
136 }
137 if ((get_cr4() & CR4_PGE) == 0) {
138 set_cr4(get_cr4() | CR4_PGE);
139 pmap_pcid_log("Toggled PGE ON (CPU: %d\n", ccpu);
140 }
141 set_cr4(get_cr4() | CR4_PCIDE);
142 pmap_pcid_log("Post PCID: CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, get_cr4());
0a7de745 143 pmap_tlbi_range(0, ~0ULL, true, 0);
6d2010ae
A
144 cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE;
145
146 if (OSIncrementAtomic(&pmap_pcid_ncpus) == machine_info.max_cpus) {
147 pmap_pcid_log("All PCIDs enabled: real_ncpus: %d, pmap_pcid_ncpus: %d\n", real_ncpus, pmap_pcid_ncpus);
148 }
149 cpu_datap(ccpu)->cpu_pmap_pcid_coherentp =
150 cpu_datap(ccpu)->cpu_pmap_pcid_coherentp_kernel =
151 &(kernel_pmap->pmap_pcid_coherency_vector[ccpu]);
5c9f4661
A
152 cpu_datap(ccpu)->cpu_pcid_data = &pcid_data[ccpu];
153 cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[0] = 1;
6d2010ae
A
154 }
155}
156
0a7de745
A
157void
158pmap_pcid_initialize(pmap_t p)
159{
6d2010ae 160 unsigned i;
0a7de745 161 unsigned nc = sizeof(p->pmap_pcid_cpus) / sizeof(pcid_t);
6d2010ae
A
162
163 pmap_assert(nc >= real_ncpus);
164 for (i = 0; i < nc; i++) {
165 p->pmap_pcid_cpus[i] = PMAP_PCID_INVALID_PCID;
166 /* We assume here that the coherency vector is zeroed by
167 * pmap_create
168 */
169 }
170}
171
0a7de745
A
172void
173pmap_pcid_initialize_kernel(pmap_t p)
174{
6d2010ae 175 unsigned i;
0a7de745 176 unsigned nc = sizeof(p->pmap_pcid_cpus) / sizeof(pcid_t);
6d2010ae
A
177
178 for (i = 0; i < nc; i++) {
179 p->pmap_pcid_cpus[i] = 0;
180 /* We assume here that the coherency vector is zeroed by
181 * pmap_create
182 */
183 }
184}
185
0a7de745
A
186pcid_t
187pmap_pcid_allocate_pcid(int ccpu)
188{
6d2010ae 189 int i;
0a7de745
A
190 pcid_ref_t cur_min = 0xFF;
191 uint32_t cur_min_index = ~1;
192 pcid_ref_t *cpu_pcid_refcounts = &cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[0];
193 pcid_ref_t old_count;
6d2010ae 194
5c9f4661 195 if ((i = cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_free_hint) != 0) {
6d2010ae
A
196 if (cpu_pcid_refcounts[i] == 0) {
197 (void)__sync_fetch_and_add(&cpu_pcid_refcounts[i], 1);
5c9f4661 198 cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_free_hint = 0;
6d2010ae
A
199 return i;
200 }
201 }
202 /* Linear scan to discover free slot, with hint. Room for optimization
203 * but with intelligent prefetchers this should be
204 * adequately performant, as it is invoked
205 * only on first dispatch of a new address space onto
206 * a given processor. DRKTODO: use larger loads and
207 * zero byte discovery -- any pattern != ~1 should
208 * signify a free slot.
209 */
210 for (i = PMAP_PCID_MIN_PCID; i < PMAP_PCID_MAX_PCID; i++) {
211 pcid_ref_t cur_refcount = cpu_pcid_refcounts[i];
212
213 pmap_assert(cur_refcount < PMAP_PCID_MAX_REFCOUNT);
214
215 if (cur_refcount == 0) {
216 (void)__sync_fetch_and_add(&cpu_pcid_refcounts[i], 1);
217 return i;
5c9f4661 218 } else {
6d2010ae
A
219 if (cur_refcount < cur_min) {
220 cur_min_index = i;
221 cur_min = cur_refcount;
222 }
223 }
224 }
225 pmap_assert(cur_min_index > 0 && cur_min_index < PMAP_PCID_MAX_PCID);
226 /* Consider "rebalancing" tags actively in highly oversubscribed cases
227 * perhaps selecting tags with lower activity.
228 */
229
230 old_count = __sync_fetch_and_add(&cpu_pcid_refcounts[cur_min_index], 1);
231 pmap_assert(old_count < PMAP_PCID_MAX_REFCOUNT);
0a7de745 232 return cur_min_index;
6d2010ae
A
233}
234
0a7de745
A
235void
236pmap_pcid_deallocate_pcid(int ccpu, pmap_t tpmap)
237{
6d2010ae
A
238 pcid_t pcid;
239 pmap_t lp;
240 pcid_ref_t prior_count;
241
242 pcid = tpmap->pmap_pcid_cpus[ccpu];
243 pmap_assert(pcid != PMAP_PCID_INVALID_PCID);
0a7de745 244 if (pcid == PMAP_PCID_INVALID_PCID) {
6d2010ae 245 return;
0a7de745 246 }
6d2010ae 247
5c9f4661 248 lp = cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_last_pmap_dispatched[pcid];
6d2010ae 249 pmap_assert(pcid > 0 && pcid < PMAP_PCID_MAX_PCID);
5c9f4661 250 pmap_assert(cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[pcid] >= 1);
6d2010ae 251
0a7de745 252 if (lp == tpmap) {
5c9f4661 253 (void)__sync_bool_compare_and_swap(&cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_last_pmap_dispatched[pcid], tpmap, PMAP_INVALID);
0a7de745 254 }
6d2010ae 255
5c9f4661 256 if ((prior_count = __sync_fetch_and_sub(&cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[pcid], 1)) == 1) {
0a7de745 257 cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_free_hint = pcid;
6d2010ae
A
258 }
259 pmap_assert(prior_count <= PMAP_PCID_MAX_REFCOUNT);
260}
261
0a7de745
A
262void
263pmap_destroy_pcid_sync(pmap_t p)
264{
6d2010ae 265 int i;
0a7de745
A
266 pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() != 0);
267 for (i = 0; i < PMAP_PCID_MAX_CPUS; i++) {
268 if (p->pmap_pcid_cpus[i] != PMAP_PCID_INVALID_PCID) {
6d2010ae 269 pmap_pcid_deallocate_pcid(i, p);
0a7de745
A
270 }
271 }
6d2010ae
A
272}
273
0a7de745
A
274pcid_t
275pcid_for_pmap_cpu_tuple(pmap_t cpmap, thread_t cthread, int ccpu)
276{
39037602
A
277 pmap_t active_pmap = cpmap;
278
279 if (__improbable(cpmap->pagezero_accessible)) {
280 if ((cthread->machine.specFlags & CopyIOActive) == 0) {
281 active_pmap = kernel_pmap;
282 }
283 }
284
285 return active_pmap->pmap_pcid_cpus[ccpu];
6d2010ae 286}
5c9f4661 287int npz = 0;
39037602 288
6d2010ae
A
289#if PMAP_ASSERT
290#define PCID_RECORD_SIZE 128
291uint64_t pcid_record_array[PCID_RECORD_SIZE];
292#endif
0a7de745 293#define PMAP_UPCIDP(p) ((p ? (p + PMAP_PCID_MAX_PCID) : 0) | 1ULL << 63)
6d2010ae 294
0a7de745
A
295void
296pmap_pcid_activate(pmap_t tpmap, int ccpu, boolean_t nopagezero, boolean_t copyio)
297{
298 pcid_t new_pcid = tpmap->pmap_pcid_cpus[ccpu];
299 pmap_t last_pmap;
300 boolean_t pcid_conflict = FALSE, pending_flush = FALSE;
301 pcid_cdata_t *pcdata = cpu_datap(ccpu)->cpu_pcid_data;
6d2010ae
A
302
303 pmap_assert(cpu_datap(ccpu)->cpu_pmap_pcid_enabled);
304 if (__improbable(new_pcid == PMAP_PCID_INVALID_PCID)) {
305 new_pcid = tpmap->pmap_pcid_cpus[ccpu] = pmap_pcid_allocate_pcid(ccpu);
306 }
39037602 307
6d2010ae 308 pmap_assert(new_pcid != PMAP_PCID_INVALID_PCID);
0a7de745 309#ifdef PCID_ASSERT
6d2010ae
A
310 cpu_datap(ccpu)->cpu_last_pcid = cpu_datap(ccpu)->cpu_active_pcid;
311#endif
312 cpu_datap(ccpu)->cpu_active_pcid = new_pcid;
313
314 pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0);
315 if (__probable(pending_flush == FALSE)) {
5c9f4661 316 last_pmap = pcdata->cpu_pcid_last_pmap_dispatched[new_pcid];
39037602 317 pcid_conflict = ((last_pmap != NULL) && (tpmap != last_pmap));
6d2010ae
A
318 }
319 if (__improbable(pending_flush || pcid_conflict)) {
320 pmap_pcid_validate_cpu(tpmap, ccpu);
321 }
322 /* Consider making this a unique id */
5c9f4661 323 pcdata->cpu_pcid_last_pmap_dispatched[new_pcid] = tpmap;
6d2010ae
A
324
325 pmap_assert(new_pcid < PMAP_PCID_MAX_PCID);
0a7de745 326 pmap_assert(((tpmap == kernel_pmap) && new_pcid == 0) ||
39037602 327 ((new_pcid != PMAP_PCID_INVALID_PCID) && (new_pcid != 0)));
0a7de745
A
328#if PMAP_ASSERT
329 pcid_record_array[ccpu % PCID_RECORD_SIZE] = tpmap->pm_cr3 | new_pcid | (((uint64_t)(!(pending_flush || pcid_conflict))) << 63);
6d2010ae
A
330 pml4_entry_t *pml4 = pmap64_pml4(tpmap, 0ULL);
331 /* Diagnostic to detect pagetable anchor corruption */
0a7de745
A
332 if (pml4[KERNEL_PML4_INDEX] != kernel_pmap->pm_pml4[KERNEL_PML4_INDEX]) {
333 __asm__ volatile ("int3");
334 }
335#endif /* PMAP_ASSERT */
39037602
A
336
337 pmap_paddr_t ncr3 = tpmap->pm_cr3;
338
339 if (__improbable(nopagezero)) {
340 pending_flush = TRUE;
341 if (copyio == FALSE) {
342 new_pcid = kernel_pmap->pmap_pcid_cpus[ccpu];
343 ncr3 = kernel_pmap->pm_cr3;
344 }
345 cpu_datap(ccpu)->cpu_kernel_pcid = kernel_pmap->pmap_pcid_cpus[ccpu];
5c9f4661 346 npz++;
39037602
A
347 }
348
0a7de745
A
349 set_cr3_composed(ncr3, new_pcid, 1ULL);
350 cpu_shadowp(ccpu)->cpu_shadowtask_cr3 = ncr3 | new_pcid | (1ULL << 63);
6d2010ae 351
0a7de745
A
352 bool preserve = !pcid_conflict && !pending_flush;
353 if (preserve == true) {
6d2010ae
A
354 /* We did not previously observe a pending invalidation for this
355 * ASID. However, the load from the coherency vector
356 * could've been reordered ahead of the store to the
357 * active_cr3 field (in the context switch path, our
358 * caller). Re-consult the pending invalidation vector
359 * after the CR3 write. We rely on MOV CR3's documented
360 * serializing property to avoid insertion of an expensive
361 * barrier. (DRK)
362 */
363 pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0);
364 if (__improbable(pending_flush != 0)) {
365 pmap_pcid_validate_cpu(tpmap, ccpu);
0a7de745 366 preserve = false;
6d2010ae
A
367 }
368 }
0a7de745
A
369
370 if (preserve == false) {
371 bool gtlbi = (invpcid_enabled == false);
372 pmap_tlbi_range(0, ~0ULL, gtlbi, new_pcid);
373 }
374
375 uint64_t spcid = PMAP_UPCIDP(new_pcid);
376 uint64_t scr3 = tpmap->pm_ucr3 | spcid;
377
378 cpu_datap(ccpu)->cpu_ucr3 = scr3;
379 cpu_shadowp(ccpu)->cpu_ucr3 = scr3;
380
6d2010ae 381 cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = &(tpmap->pmap_pcid_coherency_vector[ccpu]);
0a7de745
A
382#if DEBUG
383 cpu_datap(ccpu)->cpu_pcid_last_cr3 = scr3;
6d2010ae
A
384 KERNEL_DEBUG_CONSTANT(0x9c1d0000, tpmap, new_pcid, pending_flush, pcid_conflict, 0);
385#endif
386}
0a7de745
A
387
388typedef enum {
389 INP_ALLG = 2, INP_ASPACE = 1, INP_SINGLE = 0, INP_ALLNG = 3
390} invpcid_type_t;
391typedef struct __attribute__((packed)) {
392 uint64_t ipcid_and_rsvd;
393 uint64_t iaddr;
394} invpcid_desc_t;
395
396static inline void
397invpcid(invpcid_type_t itype, pcid_t ipcid, uint64_t iaddr)
398{
399 invpcid_desc_t ipcdt;
400
401 ipcdt.ipcid_and_rsvd = ipcid;
402 ipcdt.iaddr = iaddr;
403
404 uint64_t iptype = itype; //promote to workaround assembler bug
405
406 __asm__ volatile ("invpcid %0, %1" :: "m" (ipcdt), "r" (iptype) : "memory");
407}
408
409
410void
411pmap_tlbi_range(uint64_t startv, uint64_t endv, bool global, uint16_t pcid)
412{
413 assert(ml_get_interrupts_enabled() == FALSE ||
414 get_preemption_level() != 0);
415
416 if (invpcid_enabled) {
417 if (global) {
418 invpcid(INP_ALLG, 0, 0ULL);
419 } else {
420 /* TODO: separate large page invalidation check */
421 if ((endv - startv) >= INP_MAX) {
422 invpcid(INP_ASPACE, pcid, 0ULL);
423 if (pcid) {
424 invpcid(INP_ASPACE, (pcid + PMAP_PCID_MAX_PCID), 0ULL);
425 }
426 } else {
427 uint64_t cv = startv;
428 for (; cv < endv; cv += PAGE_SIZE) {
429 invpcid(INP_SINGLE, pcid, cv);
430 if (pcid) {
431 invpcid(INP_SINGLE, (pcid + PMAP_PCID_MAX_PCID), cv);
432 }
433 }
434 }
435 }
436 } else {
437 if (pmap_pcid_ncpus) {
438 uintptr_t cr4 = get_cr4();
439 if (__improbable((cr4 & CR4_PGE) == 0)) {
440 set_cr4(cr4 | CR4_PGE);
441 } else {
442 set_cr4(cr4 & ~CR4_PGE);
443 set_cr4(cr4 | CR4_PGE);
444 }
445 } else {
446 set_cr3_raw(get_cr3_raw());
447 }
448 }
449 __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
450}