]>
Commit | Line | Data |
---|---|---|
6d2010ae A |
1 | /* |
2 | * Copyright (c) 2000-2010 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <i386/proc_reg.h> | |
30 | #include <i386/cpuid.h> | |
31 | #include <i386/tsc.h> | |
32 | #include <vm/pmap.h> | |
33 | #include <vm/vm_map.h> | |
34 | #include <i386/pmap_internal.h> | |
35 | #include <i386/pmap_pcid.h> | |
36 | #include <mach/branch_predicates.h> | |
37 | ||
38 | /* | |
39 | * PCID (Process context identifier) aka tagged TLB support. | |
40 | * On processors with this feature, unless disabled via the -pmap_pcid_disable | |
41 | * boot-arg, the following algorithm is in effect: | |
42 | * Each processor maintains an array of tag refcounts indexed by tag. | |
43 | * Each address space maintains an array of tags indexed by CPU number. | |
44 | * Each address space maintains a coherency vector, indexed by CPU | |
45 | * indicating that the TLB state for that address space has a pending | |
46 | * invalidation. | |
47 | * On a context switch, a refcounted tag is lazily assigned to the newly | |
48 | * dispatched (CPU, address space) tuple. | |
49 | * When an inactive address space is invalidated on a remote CPU, it is marked | |
50 | * for invalidation upon the next dispatch. Some invalidations are | |
51 | * also processed at the user/kernel boundary. | |
52 | * Provisions are made for the case where a CPU is overcommmitted, i.e. | |
53 | * more active address spaces exist than the number of logical tags | |
54 | * provided for by the processor architecture (currently 4096). | |
55 | * The algorithm assumes the processor remaps the logical tags | |
56 | * to physical TLB context IDs in an LRU fashion for efficiency. (DRK '10) | |
57 | */ | |
58 | ||
59 | uint32_t pmap_pcid_ncpus; | |
60 | boolean_t pmap_pcid_disabled = FALSE; | |
61 | ||
62 | void pmap_pcid_configure(void) { | |
63 | int ccpu = cpu_number(); | |
64 | uintptr_t cr4 = get_cr4(); | |
65 | boolean_t pcid_present = FALSE; | |
66 | ||
67 | pmap_pcid_log("PCID configure invoked on CPU %d\n", ccpu); | |
68 | pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0); | |
69 | pmap_assert(cpu_mode_is64bit()); | |
70 | ||
71 | if (PE_parse_boot_argn("-pmap_pcid_disable", &pmap_pcid_disabled, sizeof (pmap_pcid_disabled))) { | |
72 | pmap_pcid_log("PMAP: PCID feature disabled\n"); | |
73 | printf("PMAP: PCID feature disabled, %u\n", pmap_pcid_disabled); | |
74 | kprintf("PMAP: PCID feature disabled %u\n", pmap_pcid_disabled); | |
75 | } | |
76 | /* no_shared_cr3+PCID is currently unsupported */ | |
77 | #if DEBUG | |
78 | if (pmap_pcid_disabled == FALSE) | |
79 | no_shared_cr3 = FALSE; | |
80 | else | |
81 | no_shared_cr3 = TRUE; | |
82 | #else | |
83 | if (no_shared_cr3) | |
84 | pmap_pcid_disabled = TRUE; | |
85 | #endif | |
86 | if (pmap_pcid_disabled || no_shared_cr3) { | |
87 | unsigned i; | |
88 | /* Reset PCID status, as we may have picked up | |
89 | * strays if discovered prior to platform | |
90 | * expert initialization. | |
91 | */ | |
92 | for (i = 0; i < real_ncpus; i++) { | |
93 | if (cpu_datap(i)) { | |
94 | cpu_datap(i)->cpu_pmap_pcid_enabled = FALSE; | |
95 | } | |
96 | pmap_pcid_ncpus = 0; | |
97 | } | |
98 | cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE; | |
99 | return; | |
100 | } | |
101 | /* DRKTODO: assert if features haven't been discovered yet. Redundant | |
102 | * invocation of cpu_mode_init and descendants masks this for now. | |
103 | */ | |
104 | if ((cpuid_features() & CPUID_FEATURE_PCID)) | |
105 | pcid_present = TRUE; | |
106 | else { | |
107 | cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE; | |
108 | pmap_pcid_log("PMAP: PCID not detected CPU %d\n", ccpu); | |
109 | return; | |
110 | } | |
111 | if ((cr4 & (CR4_PCIDE | CR4_PGE)) == (CR4_PCIDE|CR4_PGE)) { | |
112 | cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE; | |
113 | pmap_pcid_log("PMAP: PCID already enabled %d\n", ccpu); | |
114 | return; | |
115 | } | |
116 | if (pcid_present == TRUE) { | |
117 | pmap_pcid_log("Pre-PCID:CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, cr4); | |
118 | ||
119 | if (cpu_number() >= PMAP_PCID_MAX_CPUS) { | |
120 | panic("PMAP_PCID_MAX_CPUS %d\n", cpu_number()); | |
121 | } | |
122 | if ((get_cr4() & CR4_PGE) == 0) { | |
123 | set_cr4(get_cr4() | CR4_PGE); | |
124 | pmap_pcid_log("Toggled PGE ON (CPU: %d\n", ccpu); | |
125 | } | |
126 | set_cr4(get_cr4() | CR4_PCIDE); | |
127 | pmap_pcid_log("Post PCID: CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, get_cr4()); | |
128 | tlb_flush_global(); | |
129 | cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE; | |
130 | ||
131 | if (OSIncrementAtomic(&pmap_pcid_ncpus) == machine_info.max_cpus) { | |
132 | pmap_pcid_log("All PCIDs enabled: real_ncpus: %d, pmap_pcid_ncpus: %d\n", real_ncpus, pmap_pcid_ncpus); | |
133 | } | |
134 | cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = | |
135 | cpu_datap(ccpu)->cpu_pmap_pcid_coherentp_kernel = | |
136 | &(kernel_pmap->pmap_pcid_coherency_vector[ccpu]); | |
137 | cpu_datap(ccpu)->cpu_pcid_refcounts[0] = 1; | |
138 | } | |
139 | } | |
140 | ||
141 | void pmap_pcid_initialize(pmap_t p) { | |
142 | unsigned i; | |
143 | unsigned nc = sizeof(p->pmap_pcid_cpus)/sizeof(pcid_t); | |
144 | ||
145 | pmap_assert(nc >= real_ncpus); | |
146 | for (i = 0; i < nc; i++) { | |
147 | p->pmap_pcid_cpus[i] = PMAP_PCID_INVALID_PCID; | |
148 | /* We assume here that the coherency vector is zeroed by | |
149 | * pmap_create | |
150 | */ | |
151 | } | |
152 | } | |
153 | ||
154 | void pmap_pcid_initialize_kernel(pmap_t p) { | |
155 | unsigned i; | |
156 | unsigned nc = sizeof(p->pmap_pcid_cpus)/sizeof(pcid_t); | |
157 | ||
158 | for (i = 0; i < nc; i++) { | |
159 | p->pmap_pcid_cpus[i] = 0; | |
160 | /* We assume here that the coherency vector is zeroed by | |
161 | * pmap_create | |
162 | */ | |
163 | } | |
164 | } | |
165 | ||
166 | pcid_t pmap_pcid_allocate_pcid(int ccpu) { | |
167 | int i; | |
168 | pcid_ref_t cur_min = 0xFF; | |
169 | uint32_t cur_min_index = ~1; | |
170 | pcid_ref_t *cpu_pcid_refcounts = &cpu_datap(ccpu)->cpu_pcid_refcounts[0]; | |
171 | pcid_ref_t old_count; | |
172 | ||
173 | if ((i = cpu_datap(ccpu)->cpu_pcid_free_hint) != 0) { | |
174 | if (cpu_pcid_refcounts[i] == 0) { | |
175 | (void)__sync_fetch_and_add(&cpu_pcid_refcounts[i], 1); | |
176 | cpu_datap(ccpu)->cpu_pcid_free_hint = 0; | |
177 | return i; | |
178 | } | |
179 | } | |
180 | /* Linear scan to discover free slot, with hint. Room for optimization | |
181 | * but with intelligent prefetchers this should be | |
182 | * adequately performant, as it is invoked | |
183 | * only on first dispatch of a new address space onto | |
184 | * a given processor. DRKTODO: use larger loads and | |
185 | * zero byte discovery -- any pattern != ~1 should | |
186 | * signify a free slot. | |
187 | */ | |
188 | for (i = PMAP_PCID_MIN_PCID; i < PMAP_PCID_MAX_PCID; i++) { | |
189 | pcid_ref_t cur_refcount = cpu_pcid_refcounts[i]; | |
190 | ||
191 | pmap_assert(cur_refcount < PMAP_PCID_MAX_REFCOUNT); | |
192 | ||
193 | if (cur_refcount == 0) { | |
194 | (void)__sync_fetch_and_add(&cpu_pcid_refcounts[i], 1); | |
195 | return i; | |
196 | } | |
197 | else { | |
198 | if (cur_refcount < cur_min) { | |
199 | cur_min_index = i; | |
200 | cur_min = cur_refcount; | |
201 | } | |
202 | } | |
203 | } | |
204 | pmap_assert(cur_min_index > 0 && cur_min_index < PMAP_PCID_MAX_PCID); | |
205 | /* Consider "rebalancing" tags actively in highly oversubscribed cases | |
206 | * perhaps selecting tags with lower activity. | |
207 | */ | |
208 | ||
209 | old_count = __sync_fetch_and_add(&cpu_pcid_refcounts[cur_min_index], 1); | |
210 | pmap_assert(old_count < PMAP_PCID_MAX_REFCOUNT); | |
211 | return cur_min_index; | |
212 | } | |
213 | ||
214 | void pmap_pcid_deallocate_pcid(int ccpu, pmap_t tpmap) { | |
215 | pcid_t pcid; | |
216 | pmap_t lp; | |
217 | pcid_ref_t prior_count; | |
218 | ||
219 | pcid = tpmap->pmap_pcid_cpus[ccpu]; | |
220 | pmap_assert(pcid != PMAP_PCID_INVALID_PCID); | |
221 | if (pcid == PMAP_PCID_INVALID_PCID) | |
222 | return; | |
223 | ||
224 | lp = cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[pcid]; | |
225 | pmap_assert(pcid > 0 && pcid < PMAP_PCID_MAX_PCID); | |
226 | pmap_assert(cpu_datap(ccpu)->cpu_pcid_refcounts[pcid] >= 1); | |
227 | ||
228 | if (lp == tpmap) | |
229 | (void)__sync_bool_compare_and_swap(&cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[pcid], tpmap, PMAP_INVALID); | |
230 | ||
231 | if ((prior_count = __sync_fetch_and_sub(&cpu_datap(ccpu)->cpu_pcid_refcounts[pcid], 1)) == 1) { | |
232 | cpu_datap(ccpu)->cpu_pcid_free_hint = pcid; | |
233 | } | |
234 | pmap_assert(prior_count <= PMAP_PCID_MAX_REFCOUNT); | |
235 | } | |
236 | ||
237 | void pmap_destroy_pcid_sync(pmap_t p) { | |
238 | int i; | |
239 | pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0); | |
240 | for (i = 0; i < PMAP_PCID_MAX_CPUS; i++) | |
241 | if (p->pmap_pcid_cpus[i] != PMAP_PCID_INVALID_PCID) | |
242 | pmap_pcid_deallocate_pcid(i, p); | |
243 | } | |
244 | ||
245 | pcid_t pcid_for_pmap_cpu_tuple(pmap_t pmap, int ccpu) { | |
246 | return pmap->pmap_pcid_cpus[ccpu]; | |
247 | } | |
248 | #if PMAP_ASSERT | |
249 | #define PCID_RECORD_SIZE 128 | |
250 | uint64_t pcid_record_array[PCID_RECORD_SIZE]; | |
251 | #endif | |
252 | ||
253 | void pmap_pcid_activate(pmap_t tpmap, int ccpu) { | |
254 | pcid_t new_pcid = tpmap->pmap_pcid_cpus[ccpu]; | |
255 | pmap_t last_pmap; | |
256 | boolean_t pcid_conflict = FALSE, pending_flush = FALSE; | |
257 | ||
258 | pmap_assert(cpu_datap(ccpu)->cpu_pmap_pcid_enabled); | |
259 | if (__improbable(new_pcid == PMAP_PCID_INVALID_PCID)) { | |
260 | new_pcid = tpmap->pmap_pcid_cpus[ccpu] = pmap_pcid_allocate_pcid(ccpu); | |
261 | } | |
262 | pmap_assert(new_pcid != PMAP_PCID_INVALID_PCID); | |
263 | #ifdef PCID_ASSERT | |
264 | cpu_datap(ccpu)->cpu_last_pcid = cpu_datap(ccpu)->cpu_active_pcid; | |
265 | #endif | |
266 | cpu_datap(ccpu)->cpu_active_pcid = new_pcid; | |
267 | ||
268 | pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0); | |
269 | if (__probable(pending_flush == FALSE)) { | |
270 | last_pmap = cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[new_pcid]; | |
271 | pcid_conflict = ((last_pmap != NULL) &&(tpmap != last_pmap)); | |
272 | } | |
273 | if (__improbable(pending_flush || pcid_conflict)) { | |
274 | pmap_pcid_validate_cpu(tpmap, ccpu); | |
275 | } | |
276 | /* Consider making this a unique id */ | |
277 | cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[new_pcid] = tpmap; | |
278 | ||
279 | pmap_assert(new_pcid < PMAP_PCID_MAX_PCID); | |
280 | pmap_assert(((tpmap == kernel_pmap) && new_pcid == 0) || ((new_pcid != PMAP_PCID_INVALID_PCID) && (new_pcid != 0))); | |
281 | #if PMAP_ASSERT | |
282 | pcid_record_array[ccpu % PCID_RECORD_SIZE] = tpmap->pm_cr3 | new_pcid | (((uint64_t)(!(pending_flush || pcid_conflict))) <<63); | |
283 | pml4_entry_t *pml4 = pmap64_pml4(tpmap, 0ULL); | |
284 | /* Diagnostic to detect pagetable anchor corruption */ | |
285 | if (pml4[KERNEL_PML4_INDEX] != kernel_pmap->pm_pml4[KERNEL_PML4_INDEX]) | |
286 | __asm__ volatile("int3"); | |
287 | #endif /* PMAP_ASSERT */ | |
288 | set_cr3_composed(tpmap->pm_cr3, new_pcid, !(pending_flush || pcid_conflict)); | |
289 | ||
290 | if (!pending_flush) { | |
291 | /* We did not previously observe a pending invalidation for this | |
292 | * ASID. However, the load from the coherency vector | |
293 | * could've been reordered ahead of the store to the | |
294 | * active_cr3 field (in the context switch path, our | |
295 | * caller). Re-consult the pending invalidation vector | |
296 | * after the CR3 write. We rely on MOV CR3's documented | |
297 | * serializing property to avoid insertion of an expensive | |
298 | * barrier. (DRK) | |
299 | */ | |
300 | pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0); | |
301 | if (__improbable(pending_flush != 0)) { | |
302 | pmap_pcid_validate_cpu(tpmap, ccpu); | |
303 | set_cr3_composed(tpmap->pm_cr3, new_pcid, FALSE); | |
304 | } | |
305 | } | |
306 | cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = &(tpmap->pmap_pcid_coherency_vector[ccpu]); | |
307 | #if DEBUG | |
308 | KERNEL_DEBUG_CONSTANT(0x9c1d0000, tpmap, new_pcid, pending_flush, pcid_conflict, 0); | |
309 | #endif | |
310 | } |