]>
Commit | Line | Data |
---|---|---|
6d2010ae A |
1 | /* |
2 | * Copyright (c) 2000-2010 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
0a7de745 | 5 | * |
6d2010ae A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
0a7de745 | 14 | * |
6d2010ae A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
0a7de745 | 17 | * |
6d2010ae A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
0a7de745 | 25 | * |
6d2010ae A |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ | |
28 | ||
29 | #include <i386/proc_reg.h> | |
30 | #include <i386/cpuid.h> | |
31 | #include <i386/tsc.h> | |
32 | #include <vm/pmap.h> | |
33 | #include <vm/vm_map.h> | |
34 | #include <i386/pmap_internal.h> | |
35 | #include <i386/pmap_pcid.h> | |
6d2010ae A |
36 | |
37 | /* | |
38 | * PCID (Process context identifier) aka tagged TLB support. | |
39 | * On processors with this feature, unless disabled via the -pmap_pcid_disable | |
40 | * boot-arg, the following algorithm is in effect: | |
41 | * Each processor maintains an array of tag refcounts indexed by tag. | |
42 | * Each address space maintains an array of tags indexed by CPU number. | |
43 | * Each address space maintains a coherency vector, indexed by CPU | |
44 | * indicating that the TLB state for that address space has a pending | |
45 | * invalidation. | |
46 | * On a context switch, a refcounted tag is lazily assigned to the newly | |
47 | * dispatched (CPU, address space) tuple. | |
48 | * When an inactive address space is invalidated on a remote CPU, it is marked | |
49 | * for invalidation upon the next dispatch. Some invalidations are | |
50 | * also processed at the user/kernel boundary. | |
51 | * Provisions are made for the case where a CPU is overcommmitted, i.e. | |
52 | * more active address spaces exist than the number of logical tags | |
53 | * provided for by the processor architecture (currently 4096). | |
54 | * The algorithm assumes the processor remaps the logical tags | |
55 | * to physical TLB context IDs in an LRU fashion for efficiency. (DRK '10) | |
56 | */ | |
57 | ||
0a7de745 A |
58 | uint32_t pmap_pcid_ncpus; |
59 | boolean_t pmap_pcid_disabled = FALSE; | |
60 | bool invpcid_enabled = false; | |
61 | static uint32_t INP_MAX = 0; | |
5c9f4661 | 62 | pcid_cdata_t pcid_data[MAX_CPUS] __attribute__((aligned(64))); |
6d2010ae | 63 | |
0a7de745 A |
64 | void |
65 | pmap_pcid_configure(void) | |
66 | { | |
6d2010ae A |
67 | int ccpu = cpu_number(); |
68 | uintptr_t cr4 = get_cr4(); | |
69 | boolean_t pcid_present = FALSE; | |
70 | ||
71 | pmap_pcid_log("PCID configure invoked on CPU %d\n", ccpu); | |
0a7de745 | 72 | pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() != 0); |
6d2010ae A |
73 | pmap_assert(cpu_mode_is64bit()); |
74 | ||
0a7de745 | 75 | if (PE_parse_boot_argn("-pmap_pcid_disable", &pmap_pcid_disabled, sizeof(pmap_pcid_disabled))) { |
6d2010ae A |
76 | pmap_pcid_log("PMAP: PCID feature disabled\n"); |
77 | printf("PMAP: PCID feature disabled, %u\n", pmap_pcid_disabled); | |
78 | kprintf("PMAP: PCID feature disabled %u\n", pmap_pcid_disabled); | |
79 | } | |
0a7de745 A |
80 | /* no_shared_cr3+PCID is currently unsupported */ |
81 | ||
82 | #if DEBUG | |
83 | if (pmap_pcid_disabled == FALSE) { | |
6d2010ae | 84 | no_shared_cr3 = FALSE; |
0a7de745 | 85 | } else { |
6d2010ae | 86 | no_shared_cr3 = TRUE; |
0a7de745 | 87 | } |
6d2010ae | 88 | #else |
0a7de745 | 89 | if (no_shared_cr3) { |
6d2010ae | 90 | pmap_pcid_disabled = TRUE; |
0a7de745 | 91 | } |
6d2010ae A |
92 | #endif |
93 | if (pmap_pcid_disabled || no_shared_cr3) { | |
94 | unsigned i; | |
95 | /* Reset PCID status, as we may have picked up | |
96 | * strays if discovered prior to platform | |
97 | * expert initialization. | |
98 | */ | |
99 | for (i = 0; i < real_ncpus; i++) { | |
100 | if (cpu_datap(i)) { | |
101 | cpu_datap(i)->cpu_pmap_pcid_enabled = FALSE; | |
102 | } | |
103 | pmap_pcid_ncpus = 0; | |
104 | } | |
105 | cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE; | |
106 | return; | |
107 | } | |
108 | /* DRKTODO: assert if features haven't been discovered yet. Redundant | |
109 | * invocation of cpu_mode_init and descendants masks this for now. | |
110 | */ | |
0a7de745 | 111 | if ((cpuid_features() & CPUID_FEATURE_PCID)) { |
6d2010ae | 112 | pcid_present = TRUE; |
0a7de745 | 113 | } else { |
6d2010ae A |
114 | cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE; |
115 | pmap_pcid_log("PMAP: PCID not detected CPU %d\n", ccpu); | |
116 | return; | |
117 | } | |
0a7de745 | 118 | if ((cr4 & (CR4_PCIDE | CR4_PGE)) == (CR4_PCIDE | CR4_PGE)) { |
6d2010ae A |
119 | cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE; |
120 | pmap_pcid_log("PMAP: PCID already enabled %d\n", ccpu); | |
121 | return; | |
122 | } | |
123 | if (pcid_present == TRUE) { | |
0a7de745 A |
124 | if (ccpu == 0) { |
125 | if (cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_INVPCID) { | |
126 | invpcid_enabled = true; | |
127 | } | |
128 | } | |
129 | #if DEVELOPMENT || DEBUG | |
130 | PE_parse_boot_argn("pmap_inp_max", &INP_MAX, sizeof(INP_MAX)); | |
131 | #endif | |
6d2010ae A |
132 | pmap_pcid_log("Pre-PCID:CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, cr4); |
133 | ||
134 | if (cpu_number() >= PMAP_PCID_MAX_CPUS) { | |
135 | panic("PMAP_PCID_MAX_CPUS %d\n", cpu_number()); | |
136 | } | |
137 | if ((get_cr4() & CR4_PGE) == 0) { | |
138 | set_cr4(get_cr4() | CR4_PGE); | |
139 | pmap_pcid_log("Toggled PGE ON (CPU: %d\n", ccpu); | |
140 | } | |
141 | set_cr4(get_cr4() | CR4_PCIDE); | |
142 | pmap_pcid_log("Post PCID: CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, get_cr4()); | |
0a7de745 | 143 | pmap_tlbi_range(0, ~0ULL, true, 0); |
6d2010ae A |
144 | cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE; |
145 | ||
146 | if (OSIncrementAtomic(&pmap_pcid_ncpus) == machine_info.max_cpus) { | |
147 | pmap_pcid_log("All PCIDs enabled: real_ncpus: %d, pmap_pcid_ncpus: %d\n", real_ncpus, pmap_pcid_ncpus); | |
148 | } | |
149 | cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = | |
150 | cpu_datap(ccpu)->cpu_pmap_pcid_coherentp_kernel = | |
151 | &(kernel_pmap->pmap_pcid_coherency_vector[ccpu]); | |
5c9f4661 A |
152 | cpu_datap(ccpu)->cpu_pcid_data = &pcid_data[ccpu]; |
153 | cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[0] = 1; | |
6d2010ae A |
154 | } |
155 | } | |
156 | ||
0a7de745 A |
157 | void |
158 | pmap_pcid_initialize(pmap_t p) | |
159 | { | |
6d2010ae | 160 | unsigned i; |
0a7de745 | 161 | unsigned nc = sizeof(p->pmap_pcid_cpus) / sizeof(pcid_t); |
6d2010ae A |
162 | |
163 | pmap_assert(nc >= real_ncpus); | |
164 | for (i = 0; i < nc; i++) { | |
165 | p->pmap_pcid_cpus[i] = PMAP_PCID_INVALID_PCID; | |
166 | /* We assume here that the coherency vector is zeroed by | |
167 | * pmap_create | |
168 | */ | |
169 | } | |
170 | } | |
171 | ||
0a7de745 A |
172 | void |
173 | pmap_pcid_initialize_kernel(pmap_t p) | |
174 | { | |
6d2010ae | 175 | unsigned i; |
0a7de745 | 176 | unsigned nc = sizeof(p->pmap_pcid_cpus) / sizeof(pcid_t); |
6d2010ae A |
177 | |
178 | for (i = 0; i < nc; i++) { | |
179 | p->pmap_pcid_cpus[i] = 0; | |
180 | /* We assume here that the coherency vector is zeroed by | |
181 | * pmap_create | |
182 | */ | |
183 | } | |
184 | } | |
185 | ||
0a7de745 A |
186 | pcid_t |
187 | pmap_pcid_allocate_pcid(int ccpu) | |
188 | { | |
6d2010ae | 189 | int i; |
0a7de745 A |
190 | pcid_ref_t cur_min = 0xFF; |
191 | uint32_t cur_min_index = ~1; | |
192 | pcid_ref_t *cpu_pcid_refcounts = &cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[0]; | |
193 | pcid_ref_t old_count; | |
6d2010ae | 194 | |
5c9f4661 | 195 | if ((i = cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_free_hint) != 0) { |
6d2010ae A |
196 | if (cpu_pcid_refcounts[i] == 0) { |
197 | (void)__sync_fetch_and_add(&cpu_pcid_refcounts[i], 1); | |
5c9f4661 | 198 | cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_free_hint = 0; |
6d2010ae A |
199 | return i; |
200 | } | |
201 | } | |
202 | /* Linear scan to discover free slot, with hint. Room for optimization | |
203 | * but with intelligent prefetchers this should be | |
204 | * adequately performant, as it is invoked | |
205 | * only on first dispatch of a new address space onto | |
206 | * a given processor. DRKTODO: use larger loads and | |
207 | * zero byte discovery -- any pattern != ~1 should | |
208 | * signify a free slot. | |
209 | */ | |
210 | for (i = PMAP_PCID_MIN_PCID; i < PMAP_PCID_MAX_PCID; i++) { | |
211 | pcid_ref_t cur_refcount = cpu_pcid_refcounts[i]; | |
212 | ||
213 | pmap_assert(cur_refcount < PMAP_PCID_MAX_REFCOUNT); | |
214 | ||
215 | if (cur_refcount == 0) { | |
216 | (void)__sync_fetch_and_add(&cpu_pcid_refcounts[i], 1); | |
217 | return i; | |
5c9f4661 | 218 | } else { |
6d2010ae A |
219 | if (cur_refcount < cur_min) { |
220 | cur_min_index = i; | |
221 | cur_min = cur_refcount; | |
222 | } | |
223 | } | |
224 | } | |
225 | pmap_assert(cur_min_index > 0 && cur_min_index < PMAP_PCID_MAX_PCID); | |
226 | /* Consider "rebalancing" tags actively in highly oversubscribed cases | |
227 | * perhaps selecting tags with lower activity. | |
228 | */ | |
229 | ||
230 | old_count = __sync_fetch_and_add(&cpu_pcid_refcounts[cur_min_index], 1); | |
231 | pmap_assert(old_count < PMAP_PCID_MAX_REFCOUNT); | |
0a7de745 | 232 | return cur_min_index; |
6d2010ae A |
233 | } |
234 | ||
0a7de745 A |
235 | void |
236 | pmap_pcid_deallocate_pcid(int ccpu, pmap_t tpmap) | |
237 | { | |
6d2010ae A |
238 | pcid_t pcid; |
239 | pmap_t lp; | |
240 | pcid_ref_t prior_count; | |
241 | ||
242 | pcid = tpmap->pmap_pcid_cpus[ccpu]; | |
243 | pmap_assert(pcid != PMAP_PCID_INVALID_PCID); | |
0a7de745 | 244 | if (pcid == PMAP_PCID_INVALID_PCID) { |
6d2010ae | 245 | return; |
0a7de745 | 246 | } |
6d2010ae | 247 | |
5c9f4661 | 248 | lp = cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_last_pmap_dispatched[pcid]; |
6d2010ae | 249 | pmap_assert(pcid > 0 && pcid < PMAP_PCID_MAX_PCID); |
5c9f4661 | 250 | pmap_assert(cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[pcid] >= 1); |
6d2010ae | 251 | |
0a7de745 | 252 | if (lp == tpmap) { |
5c9f4661 | 253 | (void)__sync_bool_compare_and_swap(&cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_last_pmap_dispatched[pcid], tpmap, PMAP_INVALID); |
0a7de745 | 254 | } |
6d2010ae | 255 | |
5c9f4661 | 256 | if ((prior_count = __sync_fetch_and_sub(&cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_refcounts[pcid], 1)) == 1) { |
0a7de745 | 257 | cpu_datap(ccpu)->cpu_pcid_data->cpu_pcid_free_hint = pcid; |
6d2010ae A |
258 | } |
259 | pmap_assert(prior_count <= PMAP_PCID_MAX_REFCOUNT); | |
260 | } | |
261 | ||
0a7de745 A |
262 | void |
263 | pmap_destroy_pcid_sync(pmap_t p) | |
264 | { | |
6d2010ae | 265 | int i; |
0a7de745 A |
266 | pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() != 0); |
267 | for (i = 0; i < PMAP_PCID_MAX_CPUS; i++) { | |
268 | if (p->pmap_pcid_cpus[i] != PMAP_PCID_INVALID_PCID) { | |
6d2010ae | 269 | pmap_pcid_deallocate_pcid(i, p); |
0a7de745 A |
270 | } |
271 | } | |
6d2010ae A |
272 | } |
273 | ||
0a7de745 A |
274 | pcid_t |
275 | pcid_for_pmap_cpu_tuple(pmap_t cpmap, thread_t cthread, int ccpu) | |
276 | { | |
39037602 A |
277 | pmap_t active_pmap = cpmap; |
278 | ||
279 | if (__improbable(cpmap->pagezero_accessible)) { | |
280 | if ((cthread->machine.specFlags & CopyIOActive) == 0) { | |
281 | active_pmap = kernel_pmap; | |
282 | } | |
283 | } | |
284 | ||
285 | return active_pmap->pmap_pcid_cpus[ccpu]; | |
6d2010ae | 286 | } |
5c9f4661 | 287 | int npz = 0; |
39037602 | 288 | |
6d2010ae A |
289 | #if PMAP_ASSERT |
290 | #define PCID_RECORD_SIZE 128 | |
291 | uint64_t pcid_record_array[PCID_RECORD_SIZE]; | |
292 | #endif | |
0a7de745 | 293 | #define PMAP_UPCIDP(p) ((p ? (p + PMAP_PCID_MAX_PCID) : 0) | 1ULL << 63) |
6d2010ae | 294 | |
0a7de745 A |
295 | void |
296 | pmap_pcid_activate(pmap_t tpmap, int ccpu, boolean_t nopagezero, boolean_t copyio) | |
297 | { | |
298 | pcid_t new_pcid = tpmap->pmap_pcid_cpus[ccpu]; | |
299 | pmap_t last_pmap; | |
300 | boolean_t pcid_conflict = FALSE, pending_flush = FALSE; | |
301 | pcid_cdata_t *pcdata = cpu_datap(ccpu)->cpu_pcid_data; | |
6d2010ae A |
302 | |
303 | pmap_assert(cpu_datap(ccpu)->cpu_pmap_pcid_enabled); | |
304 | if (__improbable(new_pcid == PMAP_PCID_INVALID_PCID)) { | |
305 | new_pcid = tpmap->pmap_pcid_cpus[ccpu] = pmap_pcid_allocate_pcid(ccpu); | |
306 | } | |
39037602 | 307 | |
6d2010ae | 308 | pmap_assert(new_pcid != PMAP_PCID_INVALID_PCID); |
0a7de745 | 309 | #ifdef PCID_ASSERT |
6d2010ae A |
310 | cpu_datap(ccpu)->cpu_last_pcid = cpu_datap(ccpu)->cpu_active_pcid; |
311 | #endif | |
312 | cpu_datap(ccpu)->cpu_active_pcid = new_pcid; | |
313 | ||
314 | pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0); | |
315 | if (__probable(pending_flush == FALSE)) { | |
5c9f4661 | 316 | last_pmap = pcdata->cpu_pcid_last_pmap_dispatched[new_pcid]; |
39037602 | 317 | pcid_conflict = ((last_pmap != NULL) && (tpmap != last_pmap)); |
6d2010ae A |
318 | } |
319 | if (__improbable(pending_flush || pcid_conflict)) { | |
320 | pmap_pcid_validate_cpu(tpmap, ccpu); | |
321 | } | |
322 | /* Consider making this a unique id */ | |
5c9f4661 | 323 | pcdata->cpu_pcid_last_pmap_dispatched[new_pcid] = tpmap; |
6d2010ae A |
324 | |
325 | pmap_assert(new_pcid < PMAP_PCID_MAX_PCID); | |
0a7de745 | 326 | pmap_assert(((tpmap == kernel_pmap) && new_pcid == 0) || |
39037602 | 327 | ((new_pcid != PMAP_PCID_INVALID_PCID) && (new_pcid != 0))); |
0a7de745 A |
328 | #if PMAP_ASSERT |
329 | pcid_record_array[ccpu % PCID_RECORD_SIZE] = tpmap->pm_cr3 | new_pcid | (((uint64_t)(!(pending_flush || pcid_conflict))) << 63); | |
6d2010ae A |
330 | pml4_entry_t *pml4 = pmap64_pml4(tpmap, 0ULL); |
331 | /* Diagnostic to detect pagetable anchor corruption */ | |
0a7de745 A |
332 | if (pml4[KERNEL_PML4_INDEX] != kernel_pmap->pm_pml4[KERNEL_PML4_INDEX]) { |
333 | __asm__ volatile ("int3"); | |
334 | } | |
335 | #endif /* PMAP_ASSERT */ | |
39037602 A |
336 | |
337 | pmap_paddr_t ncr3 = tpmap->pm_cr3; | |
338 | ||
339 | if (__improbable(nopagezero)) { | |
340 | pending_flush = TRUE; | |
341 | if (copyio == FALSE) { | |
342 | new_pcid = kernel_pmap->pmap_pcid_cpus[ccpu]; | |
343 | ncr3 = kernel_pmap->pm_cr3; | |
344 | } | |
345 | cpu_datap(ccpu)->cpu_kernel_pcid = kernel_pmap->pmap_pcid_cpus[ccpu]; | |
5c9f4661 | 346 | npz++; |
39037602 A |
347 | } |
348 | ||
0a7de745 A |
349 | set_cr3_composed(ncr3, new_pcid, 1ULL); |
350 | cpu_shadowp(ccpu)->cpu_shadowtask_cr3 = ncr3 | new_pcid | (1ULL << 63); | |
6d2010ae | 351 | |
0a7de745 A |
352 | bool preserve = !pcid_conflict && !pending_flush; |
353 | if (preserve == true) { | |
6d2010ae A |
354 | /* We did not previously observe a pending invalidation for this |
355 | * ASID. However, the load from the coherency vector | |
356 | * could've been reordered ahead of the store to the | |
357 | * active_cr3 field (in the context switch path, our | |
358 | * caller). Re-consult the pending invalidation vector | |
359 | * after the CR3 write. We rely on MOV CR3's documented | |
360 | * serializing property to avoid insertion of an expensive | |
361 | * barrier. (DRK) | |
362 | */ | |
363 | pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0); | |
364 | if (__improbable(pending_flush != 0)) { | |
365 | pmap_pcid_validate_cpu(tpmap, ccpu); | |
0a7de745 | 366 | preserve = false; |
6d2010ae A |
367 | } |
368 | } | |
0a7de745 A |
369 | |
370 | if (preserve == false) { | |
371 | bool gtlbi = (invpcid_enabled == false); | |
372 | pmap_tlbi_range(0, ~0ULL, gtlbi, new_pcid); | |
373 | } | |
374 | ||
375 | uint64_t spcid = PMAP_UPCIDP(new_pcid); | |
376 | uint64_t scr3 = tpmap->pm_ucr3 | spcid; | |
377 | ||
378 | cpu_datap(ccpu)->cpu_ucr3 = scr3; | |
379 | cpu_shadowp(ccpu)->cpu_ucr3 = scr3; | |
380 | ||
6d2010ae | 381 | cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = &(tpmap->pmap_pcid_coherency_vector[ccpu]); |
0a7de745 A |
382 | #if DEBUG |
383 | cpu_datap(ccpu)->cpu_pcid_last_cr3 = scr3; | |
6d2010ae A |
384 | KERNEL_DEBUG_CONSTANT(0x9c1d0000, tpmap, new_pcid, pending_flush, pcid_conflict, 0); |
385 | #endif | |
386 | } | |
0a7de745 A |
387 | |
388 | typedef enum { | |
389 | INP_ALLG = 2, INP_ASPACE = 1, INP_SINGLE = 0, INP_ALLNG = 3 | |
390 | } invpcid_type_t; | |
391 | typedef struct __attribute__((packed)) { | |
392 | uint64_t ipcid_and_rsvd; | |
393 | uint64_t iaddr; | |
394 | } invpcid_desc_t; | |
395 | ||
396 | static inline void | |
397 | invpcid(invpcid_type_t itype, pcid_t ipcid, uint64_t iaddr) | |
398 | { | |
399 | invpcid_desc_t ipcdt; | |
400 | ||
401 | ipcdt.ipcid_and_rsvd = ipcid; | |
402 | ipcdt.iaddr = iaddr; | |
403 | ||
404 | uint64_t iptype = itype; //promote to workaround assembler bug | |
405 | ||
406 | __asm__ volatile ("invpcid %0, %1" :: "m" (ipcdt), "r" (iptype) : "memory"); | |
407 | } | |
408 | ||
409 | ||
410 | void | |
411 | pmap_tlbi_range(uint64_t startv, uint64_t endv, bool global, uint16_t pcid) | |
412 | { | |
413 | assert(ml_get_interrupts_enabled() == FALSE || | |
414 | get_preemption_level() != 0); | |
415 | ||
416 | if (invpcid_enabled) { | |
417 | if (global) { | |
418 | invpcid(INP_ALLG, 0, 0ULL); | |
419 | } else { | |
420 | /* TODO: separate large page invalidation check */ | |
421 | if ((endv - startv) >= INP_MAX) { | |
422 | invpcid(INP_ASPACE, pcid, 0ULL); | |
423 | if (pcid) { | |
424 | invpcid(INP_ASPACE, (pcid + PMAP_PCID_MAX_PCID), 0ULL); | |
425 | } | |
426 | } else { | |
427 | uint64_t cv = startv; | |
428 | for (; cv < endv; cv += PAGE_SIZE) { | |
429 | invpcid(INP_SINGLE, pcid, cv); | |
430 | if (pcid) { | |
431 | invpcid(INP_SINGLE, (pcid + PMAP_PCID_MAX_PCID), cv); | |
432 | } | |
433 | } | |
434 | } | |
435 | } | |
436 | } else { | |
437 | if (pmap_pcid_ncpus) { | |
438 | uintptr_t cr4 = get_cr4(); | |
439 | if (__improbable((cr4 & CR4_PGE) == 0)) { | |
440 | set_cr4(cr4 | CR4_PGE); | |
441 | } else { | |
442 | set_cr4(cr4 & ~CR4_PGE); | |
443 | set_cr4(cr4 | CR4_PGE); | |
444 | } | |
445 | } else { | |
446 | set_cr3_raw(get_cr3_raw()); | |
447 | } | |
448 | } | |
449 | __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); | |
450 | } |