]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2019 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
30 | */ | |
31 | /* | |
32 | * Mach Operating System | |
33 | * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University | |
34 | * All Rights Reserved. | |
35 | * | |
36 | * Permission to use, copy, modify and distribute this software and its | |
37 | * documentation is hereby granted, provided that both the copyright | |
38 | * notice and this permission notice appear in all copies of the | |
39 | * software, derivative works or modified versions, and any portions | |
40 | * thereof, and that both notices appear in supporting documentation. | |
41 | * | |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
45 | * | |
46 | * Carnegie Mellon requests users of this software to return to | |
47 | * | |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
49 | * School of Computer Science | |
50 | * Carnegie Mellon University | |
51 | * Pittsburgh PA 15213-3890 | |
52 | * | |
53 | * any improvements or extensions that they make and grant Carnegie Mellon | |
54 | * the rights to redistribute these changes. | |
55 | */ | |
56 | /* | |
57 | */ | |
58 | ||
59 | /* | |
60 | * File: pmap.h | |
61 | * | |
62 | * Authors: Avadis Tevanian, Jr., Michael Wayne Young | |
63 | * Date: 1985 | |
64 | * | |
65 | * Machine-dependent structures for the physical map module. | |
66 | */ | |
67 | #ifdef KERNEL_PRIVATE | |
68 | #ifndef _PMAP_MACHINE_ | |
69 | #define _PMAP_MACHINE_ 1 | |
70 | ||
71 | #ifndef ASSEMBLER | |
72 | ||
73 | #include <mach/kern_return.h> | |
74 | #include <mach/machine/vm_types.h> | |
75 | #include <mach/vm_prot.h> | |
76 | #include <mach/vm_statistics.h> | |
77 | #include <mach/machine/vm_param.h> | |
78 | #include <kern/kern_types.h> | |
79 | #include <kern/thread.h> | |
80 | #include <kern/simple_lock.h> | |
81 | ||
82 | #include <i386/mp.h> | |
83 | #include <i386/proc_reg.h> | |
84 | ||
85 | #include <i386/pal_routines.h> | |
86 | ||
87 | /* | |
88 | * Define the generic in terms of the specific | |
89 | */ | |
90 | ||
91 | #define INTEL_PGBYTES I386_PGBYTES | |
92 | #define INTEL_PGSHIFT I386_PGSHIFT | |
93 | #define intel_btop(x) i386_btop(x) | |
94 | #define intel_ptob(x) i386_ptob(x) | |
95 | #define intel_round_page(x) i386_round_page(x) | |
96 | #define intel_trunc_page(x) i386_trunc_page(x) | |
97 | ||
98 | /* | |
99 | * i386/i486/i860 Page Table Entry | |
100 | */ | |
101 | ||
102 | #endif /* ASSEMBLER */ | |
103 | ||
104 | #define NPGPTD 4ULL | |
105 | #define PDESHIFT 21ULL | |
106 | #define PTEMASK 0x1ffULL | |
107 | #define PTEINDX 3ULL | |
108 | ||
109 | #define PTESHIFT 12ULL | |
110 | ||
111 | #define LOW_4GB_MASK ((vm_offset_t)0x00000000FFFFFFFFUL) | |
112 | ||
113 | #define PDESIZE sizeof(pd_entry_t) /* for assembly files */ | |
114 | #define PTESIZE sizeof(pt_entry_t) /* for assembly files */ | |
115 | ||
116 | #define INTEL_OFFMASK (I386_PGBYTES - 1) | |
117 | #define INTEL_LOFFMASK (I386_LPGBYTES - 1) | |
118 | #define PG_FRAME 0x000FFFFFFFFFF000ULL | |
119 | #define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) | |
120 | #define NPTDPG (PAGE_SIZE/(sizeof (pd_entry_t))) | |
121 | ||
122 | #define NBPTD (NPGPTD << PAGE_SHIFT) | |
123 | #define NPDEPTD (NBPTD / (sizeof (pd_entry_t))) | |
124 | #define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) | |
125 | #define NBPDE (1ULL << PDESHIFT) | |
126 | #define PDEMASK (NBPDE - 1) | |
127 | ||
128 | #define PTE_PER_PAGE 512 /* number of PTE's per page on any level */ | |
129 | ||
130 | /* cleanly define parameters for all the page table levels */ | |
131 | typedef uint64_t pml4_entry_t; | |
132 | #define NPML4PG (PAGE_SIZE/(sizeof (pml4_entry_t))) | |
133 | #define PML4SHIFT 39 | |
134 | #define PML4PGSHIFT 9 | |
135 | #define NBPML4 (1ULL << PML4SHIFT) | |
136 | #define PML4MASK (NBPML4-1) | |
137 | #define PML4_ENTRY_NULL ((pml4_entry_t *) 0) | |
138 | ||
139 | typedef uint64_t pdpt_entry_t; | |
140 | #define NPDPTPG (PAGE_SIZE/(sizeof (pdpt_entry_t))) | |
141 | #define PDPTSHIFT 30 | |
142 | #define PDPTPGSHIFT 9 | |
143 | #define NBPDPT (1ULL << PDPTSHIFT) | |
144 | #define PDPTMASK (NBPDPT-1) | |
145 | #define PDPT_ENTRY_NULL ((pdpt_entry_t *) 0) | |
146 | ||
147 | typedef uint64_t pd_entry_t; | |
148 | #define NPDPG (PAGE_SIZE/(sizeof (pd_entry_t))) | |
149 | #define PDSHIFT 21 | |
150 | #define PDPGSHIFT 9 | |
151 | #define NBPD (1ULL << PDSHIFT) | |
152 | #define PDMASK (NBPD-1) | |
153 | #define PD_ENTRY_NULL ((pd_entry_t *) 0) | |
154 | ||
155 | typedef uint64_t pt_entry_t; | |
156 | #define NPTPG (PAGE_SIZE/(sizeof (pt_entry_t))) | |
157 | #define PTSHIFT 12 | |
158 | #define PTPGSHIFT 9 | |
159 | #define NBPT (1ULL << PTSHIFT) | |
160 | #define PTMASK (NBPT-1) | |
161 | #define PT_ENTRY_NULL ((pt_entry_t *) 0) | |
162 | ||
163 | typedef uint64_t pmap_paddr_t; | |
164 | ||
165 | #if DEVELOPMENT || DEBUG | |
166 | #define PMAP_ASSERT 1 | |
167 | extern int pmap_asserts_enabled; | |
168 | extern int pmap_asserts_traced; | |
169 | #endif | |
170 | ||
171 | #if PMAP_ASSERT | |
172 | #define pmap_assert(ex) (pmap_asserts_enabled ? ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex)) : (void)0) | |
173 | ||
174 | #define pmap_assert2(ex, fmt, args...) \ | |
175 | do { \ | |
176 | if (__improbable(pmap_asserts_enabled && !(ex))) { \ | |
177 | if (pmap_asserts_traced) { \ | |
178 | KERNEL_DEBUG_CONSTANT(0xDEAD1000, __builtin_return_address(0), __LINE__, 0, 0, 0); \ | |
179 | kdebug_enable = 0; \ | |
180 | } else { \ | |
181 | kprintf("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \ | |
182 | panic("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \ | |
183 | } \ | |
184 | } \ | |
185 | } while(0) | |
186 | #else | |
187 | #define pmap_assert(ex) | |
188 | #define pmap_assert2(ex, fmt, args...) | |
189 | #endif | |
190 | ||
191 | /* superpages */ | |
192 | #define SUPERPAGE_NBASEPAGES 512 | |
193 | ||
194 | /* | |
195 | * Atomic 64-bit store of a page table entry. | |
196 | */ | |
197 | static inline void | |
198 | pmap_store_pte(pt_entry_t *entryp, pt_entry_t value) | |
199 | { | |
200 | /* | |
201 | * In the 32-bit kernel a compare-and-exchange loop was | |
202 | * required to provide atomicity. For K64, life is easier: | |
203 | */ | |
204 | *entryp = value; | |
205 | } | |
206 | ||
207 | /* in 64 bit spaces, the number of each type of page in the page tables */ | |
208 | #define NPML4PGS (1ULL * (PAGE_SIZE/(sizeof (pml4_entry_t)))) | |
209 | #define NPDPTPGS (NPML4PGS * (PAGE_SIZE/(sizeof (pdpt_entry_t)))) | |
210 | #define NPDEPGS (NPDPTPGS * (PAGE_SIZE/(sizeof (pd_entry_t)))) | |
211 | #define NPTEPGS (NPDEPGS * (PAGE_SIZE/(sizeof (pt_entry_t)))) | |
212 | ||
213 | extern int kernPhysPML4Index; | |
214 | extern int kernPhysPML4EntryCount; | |
215 | ||
216 | #define KERNEL_PML4_INDEX 511 | |
217 | #define KERNEL_KEXTS_INDEX (KERNEL_PML4_INDEX - 1) /* 510: Home of KEXTs - the basement */ | |
218 | #define KERNEL_PHYSMAP_PML4_INDEX (kernPhysPML4Index) /* 50X: virtual to physical map */ | |
219 | #define KERNEL_PHYSMAP_PML4_COUNT (kernPhysPML4EntryCount) | |
220 | #define KERNEL_PHYSMAP_PML4_COUNT_MAX (16 - 2) /* 1 for KERNEL, 1 for BASEMENT */ | |
221 | /* 2 PML4s for KASAN to cover a maximum of 16 PML4s {PHYSMAP + BASEMENT + KVA} */ | |
222 | #define KERNEL_KASAN_PML4_LAST (495) /* 511 - 16 */ | |
223 | #define KERNEL_KASAN_PML4_FIRST (494) /* 511 - 17 */ | |
224 | #define KERNEL_DBLMAP_PML4_INDEX (KERNEL_KASAN_PML4_FIRST - 1) | |
225 | #define KERNEL_PML4_COUNT 1 | |
226 | #define KERNEL_BASE (0ULL - (NBPML4 * KERNEL_PML4_COUNT)) | |
227 | #define KERNEL_BASEMENT (KERNEL_BASE - NBPML4) /* Basement uses one PML4 entry */ | |
228 | ||
229 | /* | |
230 | * Pte related macros | |
231 | */ | |
232 | #define KVADDR(pmi, pdpi, pdi, pti) \ | |
233 | ((vm_offset_t) \ | |
234 | ((uint64_t) -1 << 47) | \ | |
235 | ((uint64_t)(pmi) << PML4SHIFT) | \ | |
236 | ((uint64_t)(pdpi) << PDPTSHIFT) | \ | |
237 | ((uint64_t)(pdi) << PDESHIFT) | \ | |
238 | ((uint64_t)(pti) << PTESHIFT)) | |
239 | ||
240 | ||
241 | #ifndef NKPT | |
242 | #define NKPT 500 /* actual number of bootstrap kernel page tables */ | |
243 | #endif | |
244 | ||
245 | ||
246 | ||
247 | /* | |
248 | * Convert address offset to page descriptor index | |
249 | */ | |
250 | #define pdptnum(pmap, a) (((vm_offset_t)(a) >> PDPTSHIFT) & PDPTMASK) | |
251 | #define pdenum(pmap, a) (((vm_offset_t)(a) >> PDESHIFT) & PDEMASK) | |
252 | #define PMAP_INVALID_PDPTNUM (~0ULL) | |
253 | ||
254 | #define pdeidx(pmap, a) (((a) >> PDSHIFT) & ((1ULL<<(48 - PDSHIFT)) -1)) | |
255 | #define pdptidx(pmap, a) (((a) >> PDPTSHIFT) & ((1ULL<<(48 - PDPTSHIFT)) -1)) | |
256 | #define pml4idx(pmap, a) (((a) >> PML4SHIFT) & ((1ULL<<(48 - PML4SHIFT)) -1)) | |
257 | ||
258 | ||
259 | /* | |
260 | * Convert page descriptor index to user virtual address | |
261 | */ | |
262 | #define pdetova(a) ((vm_offset_t)(a) << PDESHIFT) | |
263 | ||
264 | /* | |
265 | * Convert address offset to page table index | |
266 | */ | |
267 | #define ptenum(a) (((vm_offset_t)(a) >> PTESHIFT) & PTEMASK) | |
268 | ||
269 | /* | |
270 | * Hardware pte bit definitions (to be used directly on the ptes | |
271 | * without using the bit fields). | |
272 | */ | |
273 | ||
274 | #define INTEL_PTE_VALID 0x00000001ULL | |
275 | ||
276 | #define INTEL_PTE_WRITE 0x00000002ULL | |
277 | #define INTEL_PTE_RW 0x00000002ULL | |
278 | ||
279 | #define INTEL_PTE_USER 0x00000004ULL | |
280 | ||
281 | #define INTEL_PTE_WTHRU 0x00000008ULL | |
282 | #define INTEL_PTE_NCACHE 0x00000010ULL | |
283 | ||
284 | #define INTEL_PTE_REF 0x00000020ULL | |
285 | #define INTEL_PTE_MOD 0x00000040ULL | |
286 | ||
287 | #define INTEL_PTE_PS 0x00000080ULL | |
288 | #define INTEL_PTE_PAT 0x00000080ULL | |
289 | ||
290 | #define INTEL_PTE_GLOBAL 0x00000100ULL | |
291 | ||
292 | /* These markers use software available bits ignored by the | |
293 | * processor's 4-level and EPT pagetable walkers. | |
294 | * N.B.: WIRED was originally bit 10, but that conflicts with | |
295 | * execute permissions for EPT entries iff mode-based execute controls | |
296 | * are enabled. | |
297 | */ | |
298 | #define INTEL_PTE_SWLOCK (0x1ULL << 52) | |
299 | #define INTEL_PDPTE_NESTED (0x1ULL << 53) | |
300 | #define INTEL_PTE_WIRED (0x1ULL << 54) | |
301 | /* TODO: Compressed markers, potential conflict with protection keys? */ | |
302 | #define INTEL_PTE_COMPRESSED_ALT (1ULL << 61) /* compressed but with "alternate accounting" */ | |
303 | #define INTEL_PTE_COMPRESSED (1ULL << 62) /* marker, for invalid PTE only -- ignored by hardware for both regular/EPT entries*/ | |
304 | ||
305 | #define INTEL_PTE_PFN PG_FRAME | |
306 | /* TODO: these should be internal definitions */ | |
307 | #define INTEL_PTE_NX (1ULL << 63) | |
308 | ||
309 | #define INTEL_PTE_INVALID 0 | |
310 | /* This is conservative, but suffices */ | |
311 | #define INTEL_PTE_RSVD ((1ULL << 10) | (1ULL << 11)) | |
312 | ||
313 | ||
314 | #define INTEL_PTE_COMPRESSED_MASK (INTEL_PTE_COMPRESSED | \ | |
315 | INTEL_PTE_COMPRESSED_ALT | INTEL_PTE_SWLOCK) | |
316 | #define PTE_IS_COMPRESSED(x, ptep, pmap, vaddr) \ | |
317 | ((((x) & INTEL_PTE_VALID) == 0) && /* PTE is not valid... */ \ | |
318 | ((x) & INTEL_PTE_COMPRESSED) && /* ...has "compressed" marker" */ \ | |
319 | ((!((x) & ~INTEL_PTE_COMPRESSED_MASK)) || /* ...no other bits */ \ | |
320 | pmap_compressed_pte_corruption_repair((x), &(x), (ptep), (pmap), (vaddr)))) | |
321 | ||
322 | #define pa_to_pte(a) ((a) & INTEL_PTE_PFN) /* XXX */ | |
323 | #define pte_to_pa(p) ((p) & INTEL_PTE_PFN) /* XXX */ | |
324 | #define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1) | |
325 | ||
326 | #define pte_kernel_rw(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_RW)) | |
327 | #define pte_kernel_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID)) | |
328 | #define pte_user_rw(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER|INTEL_PTE_RW)) | |
329 | #define pte_user_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER)) | |
330 | ||
331 | #define PMAP_INVEPT_SINGLE_CONTEXT 1 | |
332 | ||
333 | ||
334 | #define INTEL_EPTP_AD 0x00000040ULL | |
335 | ||
336 | #define INTEL_EPT_READ 0x00000001ULL | |
337 | #define INTEL_EPT_WRITE 0x00000002ULL | |
338 | #define INTEL_EPT_EX 0x00000004ULL | |
339 | #define INTEL_EPT_IPAT 0x00000040ULL | |
340 | #define INTEL_EPT_PS 0x00000080ULL | |
341 | #define INTEL_EPT_REF 0x00000100ULL | |
342 | #define INTEL_EPT_MOD 0x00000200ULL | |
343 | ||
344 | #define INTEL_EPT_CACHE_MASK 0x00000038ULL | |
345 | #define INTEL_EPT_NCACHE 0x00000000ULL | |
346 | #define INTEL_EPT_WC 0x00000008ULL | |
347 | #define INTEL_EPT_WTHRU 0x00000020ULL | |
348 | #define INTEL_EPT_WP 0x00000028ULL | |
349 | #define INTEL_EPT_WB 0x00000030ULL | |
350 | ||
351 | /* | |
352 | * Routines to filter correct bits depending on the pmap type | |
353 | */ | |
354 | ||
355 | static inline pt_entry_t | |
356 | pte_remove_ex(pt_entry_t pte, boolean_t is_ept) | |
357 | { | |
358 | if (__probable(!is_ept)) { | |
359 | return pte | INTEL_PTE_NX; | |
360 | } | |
361 | ||
362 | return pte & (~INTEL_EPT_EX); | |
363 | } | |
364 | ||
365 | static inline pt_entry_t | |
366 | pte_set_ex(pt_entry_t pte, boolean_t is_ept) | |
367 | { | |
368 | if (__probable(!is_ept)) { | |
369 | return pte & (~INTEL_PTE_NX); | |
370 | } | |
371 | ||
372 | return pte | INTEL_EPT_EX; | |
373 | } | |
374 | ||
375 | static inline pt_entry_t | |
376 | physmap_refmod_to_ept(pt_entry_t physmap_pte) | |
377 | { | |
378 | pt_entry_t ept_pte = 0; | |
379 | ||
380 | if (physmap_pte & INTEL_PTE_MOD) { | |
381 | ept_pte |= INTEL_EPT_MOD; | |
382 | } | |
383 | ||
384 | if (physmap_pte & INTEL_PTE_REF) { | |
385 | ept_pte |= INTEL_EPT_REF; | |
386 | } | |
387 | ||
388 | return ept_pte; | |
389 | } | |
390 | ||
391 | static inline pt_entry_t | |
392 | ept_refmod_to_physmap(pt_entry_t ept_pte) | |
393 | { | |
394 | pt_entry_t physmap_pte = 0; | |
395 | ||
396 | assert((ept_pte & ~(INTEL_EPT_REF | INTEL_EPT_MOD)) == 0); | |
397 | ||
398 | if (ept_pte & INTEL_EPT_REF) { | |
399 | physmap_pte |= INTEL_PTE_REF; | |
400 | } | |
401 | ||
402 | if (ept_pte & INTEL_EPT_MOD) { | |
403 | physmap_pte |= INTEL_PTE_MOD; | |
404 | } | |
405 | ||
406 | return physmap_pte; | |
407 | } | |
408 | ||
409 | /* | |
410 | * Note: Not all Intel processors support EPT referenced access and dirty bits. | |
411 | * During pmap_init() we check the VMX capability for the current hardware | |
412 | * and update this variable accordingly. | |
413 | */ | |
414 | extern boolean_t pmap_ept_support_ad; | |
415 | ||
416 | #define PTE_VALID_MASK(is_ept) ((is_ept) ? (INTEL_EPT_READ | INTEL_EPT_WRITE | INTEL_EPT_EX) : INTEL_PTE_VALID) | |
417 | #define PTE_READ(is_ept) ((is_ept) ? INTEL_EPT_READ : INTEL_PTE_VALID) | |
418 | #define PTE_WRITE(is_ept) ((is_ept) ? INTEL_EPT_WRITE : INTEL_PTE_WRITE) | |
419 | #define PTE_IS_EXECUTABLE(is_ept, pte) ((is_ept) ? (((pte) & INTEL_EPT_EX) != 0) : (((pte) & INTEL_PTE_NX) == 0)) | |
420 | #define PTE_PS INTEL_PTE_PS | |
421 | #define PTE_COMPRESSED INTEL_PTE_COMPRESSED | |
422 | #define PTE_COMPRESSED_ALT INTEL_PTE_COMPRESSED_ALT | |
423 | #define PTE_NCACHE(is_ept) ((is_ept) ? INTEL_EPT_NCACHE : INTEL_PTE_NCACHE) | |
424 | #define PTE_WTHRU(is_ept) ((is_ept) ? INTEL_EPT_WTHRU : INTEL_PTE_WTHRU) | |
425 | #define PTE_REF(is_ept) ((is_ept) ? INTEL_EPT_REF : INTEL_PTE_REF) | |
426 | #define PTE_MOD(is_ept) ((is_ept) ? INTEL_EPT_MOD : INTEL_PTE_MOD) | |
427 | #define PTE_WIRED INTEL_PTE_WIRED | |
428 | ||
429 | ||
430 | #define PMAP_DEFAULT_CACHE 0 | |
431 | #define PMAP_INHIBIT_CACHE 1 | |
432 | #define PMAP_GUARDED_CACHE 2 | |
433 | #define PMAP_ACTIVATE_CACHE 4 | |
434 | #define PMAP_NO_GUARD_CACHE 8 | |
435 | ||
436 | /* Per-pmap ledger operations */ | |
437 | #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a) | |
438 | #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a) | |
439 | ||
440 | #ifndef ASSEMBLER | |
441 | ||
442 | #include <sys/queue.h> | |
443 | ||
444 | /* | |
445 | * Address of current and alternate address space page table maps | |
446 | * and directories. | |
447 | */ | |
448 | ||
449 | extern pt_entry_t *PTmap; | |
450 | extern pdpt_entry_t *IdlePDPT; | |
451 | extern pml4_entry_t *IdlePML4; | |
452 | extern boolean_t no_shared_cr3; | |
453 | extern pd_entry_t *IdlePTD; /* physical addr of "Idle" state PTD */ | |
454 | ||
455 | extern uint64_t pmap_pv_hashlist_walks; | |
456 | extern uint64_t pmap_pv_hashlist_cnts; | |
457 | extern uint32_t pmap_pv_hashlist_max; | |
458 | extern uint32_t pmap_kernel_text_ps; | |
459 | ||
460 | #define ID_MAP_VTOP(x) ((void *)(((uint64_t)(x)) & LOW_4GB_MASK)) | |
461 | ||
462 | extern uint64_t physmap_base, physmap_max; | |
463 | ||
464 | #define NPHYSMAP (MAX(((physmap_max - physmap_base) / GB), 4)) | |
465 | ||
466 | static inline boolean_t | |
467 | physmap_enclosed(addr64_t a) | |
468 | { | |
469 | return a < (NPHYSMAP * GB); | |
470 | } | |
471 | ||
472 | static inline void * | |
473 | PHYSMAP_PTOV_check(void *paddr) | |
474 | { | |
475 | uint64_t pvaddr = (uint64_t)paddr + physmap_base; | |
476 | ||
477 | if (__improbable(pvaddr >= physmap_max)) { | |
478 | panic("PHYSMAP_PTOV bounds exceeded, 0x%qx, 0x%qx, 0x%qx", | |
479 | pvaddr, physmap_base, physmap_max); | |
480 | } | |
481 | ||
482 | return (void *)pvaddr; | |
483 | } | |
484 | ||
485 | #define PHYSMAP_PTOV(x) (PHYSMAP_PTOV_check((void*) (x))) | |
486 | #define phystokv(x) ((vm_offset_t)(PHYSMAP_PTOV(x))) | |
487 | #if MACH_KERNEL_PRIVATE | |
488 | extern uint64_t dblmap_base, dblmap_max, dblmap_dist; | |
489 | ||
490 | static inline uint64_t | |
491 | DBLMAP_CHECK(uintptr_t x) | |
492 | { | |
493 | uint64_t dbladdr = (uint64_t)x + dblmap_dist; | |
494 | if (__improbable((dbladdr >= dblmap_max) || (dbladdr < dblmap_base))) { | |
495 | panic("DBLMAP bounds exceeded, 0x%qx, 0x%qx 0x%qx, 0x%qx", | |
496 | (uint64_t)x, dbladdr, dblmap_base, dblmap_max); | |
497 | } | |
498 | return dbladdr; | |
499 | } | |
500 | #define DBLMAP(x) (DBLMAP_CHECK((uint64_t) x)) | |
501 | extern uint64_t ldt_alias_offset; | |
502 | static inline uint64_t | |
503 | LDTALIAS_CHECK(uintptr_t x) | |
504 | { | |
505 | uint64_t dbladdr = (uint64_t)x + ldt_alias_offset; | |
506 | if (__improbable((dbladdr >= dblmap_max) || (dbladdr < dblmap_base))) { | |
507 | panic("LDTALIAS: bounds exceeded, 0x%qx, 0x%qx 0x%qx, 0x%qx", | |
508 | (uint64_t)x, dbladdr, dblmap_base, dblmap_max); | |
509 | } | |
510 | return dbladdr; | |
511 | } | |
512 | #define LDTALIAS(x) (LDTALIAS_CHECK((uint64_t) x)) | |
513 | #endif | |
514 | ||
515 | /* | |
516 | * For KASLR, we alias the master processor's IDT and GDT at fixed | |
517 | * virtual addresses to defeat SIDT/SGDT address leakage. | |
518 | * And non-boot processor's GDT aliases likewise (skipping LOWGLOBAL_ALIAS) | |
519 | * The low global vector page is mapped at a fixed alias also. | |
520 | */ | |
521 | #define LOWGLOBAL_ALIAS (VM_MIN_KERNEL_ADDRESS + 0x2000) | |
522 | ||
523 | /* | |
524 | * This indicates (roughly) where there is free space for the VM | |
525 | * to use for the heap; this does not need to be precise. | |
526 | */ | |
527 | #define KERNEL_PMAP_HEAP_RANGE_START VM_MIN_KERNEL_AND_KEXT_ADDRESS | |
528 | ||
529 | #if MACH_KERNEL_PRIVATE | |
530 | extern void | |
531 | pmap_tlbi_range(uint64_t startv, uint64_t endv, bool global, uint16_t pcid); | |
532 | ||
533 | #include <vm/vm_page.h> | |
534 | ||
535 | /* | |
536 | * For each vm_page_t, there is a list of all currently | |
537 | * valid virtual mappings of that page. An entry is | |
538 | * a pv_entry_t; the list is the pv_table. | |
539 | */ | |
540 | ||
541 | struct pmap { | |
542 | lck_rw_t pmap_rwl __attribute((aligned(64))); | |
543 | pmap_paddr_t pm_cr3 __attribute((aligned(64))); /* Kernel+user shared PML4 physical*/ | |
544 | pmap_paddr_t pm_ucr3; /* Mirrored user PML4 physical */ | |
545 | pml4_entry_t *pm_pml4; /* VKA of top level */ | |
546 | pml4_entry_t *pm_upml4; /* Shadow VKA of top level */ | |
547 | pmap_paddr_t pm_eptp; /* EPTP */ | |
548 | ||
549 | task_map_t pm_task_map; | |
550 | boolean_t pagezero_accessible; | |
551 | boolean_t pm_vm_map_cs_enforced; /* is vm_map cs_enforced? */ | |
552 | #define PMAP_PCID_MAX_CPUS MAX_CPUS /* Must be a multiple of 8 */ | |
553 | pcid_t pmap_pcid_cpus[PMAP_PCID_MAX_CPUS]; | |
554 | volatile uint8_t pmap_pcid_coherency_vector[PMAP_PCID_MAX_CPUS]; | |
555 | boolean_t pm_shared; | |
556 | os_refcnt_t ref_count; | |
557 | pdpt_entry_t *pm_pdpt; /* KVA of 3rd level page */ | |
558 | vm_object_t pm_obj; /* object to hold pde's */ | |
559 | vm_object_t pm_obj_pdpt; /* holds pdpt pages */ | |
560 | vm_object_t pm_obj_pml4; /* holds pml4 pages */ | |
561 | #if DEVELOPMENT || DEBUG | |
562 | int nx_enabled; | |
563 | #endif | |
564 | ledger_t ledger; /* ledger tracking phys mappings */ | |
565 | struct pmap_statistics stats; /* map statistics */ | |
566 | uint64_t corrected_compressed_ptes_count; | |
567 | #if MACH_ASSERT | |
568 | boolean_t pmap_stats_assert; | |
569 | int pmap_pid; | |
570 | char pmap_procname[17]; | |
571 | #endif /* MACH_ASSERT */ | |
572 | }; | |
573 | ||
574 | static inline boolean_t | |
575 | is_ept_pmap(pmap_t p) | |
576 | { | |
577 | if (__probable(p->pm_cr3 != 0)) { | |
578 | assert(p->pm_eptp == 0); | |
579 | return FALSE; | |
580 | } | |
581 | ||
582 | assert(p->pm_eptp != 0); | |
583 | ||
584 | return TRUE; | |
585 | } | |
586 | ||
587 | void hv_ept_pmap_create(void **ept_pmap, void **eptp); | |
588 | ||
589 | typedef struct pmap_memory_regions { | |
590 | ppnum_t base; /* first page of this region */ | |
591 | ppnum_t alloc_up; /* pages below this one have been "stolen" */ | |
592 | ppnum_t alloc_down; /* pages above this one have been "stolen" */ | |
593 | ppnum_t alloc_frag_up; /* low page of fragment after large page alloc */ | |
594 | ppnum_t alloc_frag_down; /* high page of fragment after large page alloc */ | |
595 | ppnum_t end; /* last page of this region */ | |
596 | uint32_t type; | |
597 | uint64_t attribute; | |
598 | } pmap_memory_region_t; | |
599 | ||
600 | extern unsigned pmap_memory_region_count; | |
601 | extern unsigned pmap_memory_region_current; | |
602 | ||
603 | #define PMAP_MEMORY_REGIONS_SIZE 128 | |
604 | ||
605 | extern pmap_memory_region_t pmap_memory_regions[]; | |
606 | #include <i386/pmap_pcid.h> | |
607 | ||
608 | static inline void | |
609 | set_dirbase(pmap_t tpmap, thread_t thread, int my_cpu) | |
610 | { | |
611 | int ccpu = my_cpu; | |
612 | uint64_t pcr3 = tpmap->pm_cr3, ucr3 = tpmap->pm_ucr3; | |
613 | cpu_datap(ccpu)->cpu_task_cr3 = pcr3; | |
614 | cpu_shadowp(ccpu)->cpu_shadowtask_cr3 = pcr3; | |
615 | ||
616 | cpu_datap(ccpu)->cpu_ucr3 = ucr3; | |
617 | cpu_shadowp(ccpu)->cpu_ucr3 = ucr3; | |
618 | ||
619 | cpu_datap(ccpu)->cpu_task_map = cpu_shadowp(ccpu)->cpu_task_map = | |
620 | tpmap->pm_task_map; | |
621 | ||
622 | assert((get_preemption_level() > 0) || (ml_get_interrupts_enabled() == FALSE)); | |
623 | assert(ccpu == cpu_number()); | |
624 | /* | |
625 | * Switch cr3 if necessary | |
626 | * - unless running with no_shared_cr3 debugging mode | |
627 | * and we're not on the kernel's cr3 (after pre-empted copyio) | |
628 | */ | |
629 | boolean_t nopagezero = tpmap->pagezero_accessible; | |
630 | boolean_t priorpagezero = cpu_datap(ccpu)->cpu_pagezero_mapped; | |
631 | cpu_datap(ccpu)->cpu_pagezero_mapped = nopagezero; | |
632 | ||
633 | if (__probable(!no_shared_cr3)) { | |
634 | if (__improbable(nopagezero)) { | |
635 | boolean_t copyio_active = ((thread->machine.specFlags & CopyIOActive) != 0); | |
636 | if (pmap_pcid_ncpus) { | |
637 | pmap_pcid_activate(tpmap, ccpu, TRUE, copyio_active); | |
638 | } else { | |
639 | if (copyio_active) { | |
640 | if (get_cr3_base() != tpmap->pm_cr3) { | |
641 | set_cr3_raw(tpmap->pm_cr3); | |
642 | } | |
643 | } else if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3) { | |
644 | set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3); | |
645 | } | |
646 | } | |
647 | } else if ((get_cr3_base() != tpmap->pm_cr3) || priorpagezero) { | |
648 | if (pmap_pcid_ncpus) { | |
649 | pmap_pcid_activate(tpmap, ccpu, FALSE, FALSE); | |
650 | } else { | |
651 | set_cr3_raw(tpmap->pm_cr3); | |
652 | } | |
653 | } | |
654 | } else { | |
655 | if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3) { | |
656 | set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3); | |
657 | } | |
658 | } | |
659 | } | |
660 | ||
661 | /* | |
662 | * External declarations for PMAP_ACTIVATE. | |
663 | */ | |
664 | ||
665 | extern void pmap_update_interrupt(void); | |
666 | ||
667 | extern addr64_t(kvtophys)( | |
668 | vm_offset_t addr); | |
669 | ||
670 | extern kern_return_t pmap_expand( | |
671 | pmap_t pmap, | |
672 | vm_map_offset_t addr, | |
673 | unsigned int options); | |
674 | extern vm_offset_t pmap_map( | |
675 | vm_offset_t virt, | |
676 | vm_map_offset_t start, | |
677 | vm_map_offset_t end, | |
678 | vm_prot_t prot, | |
679 | unsigned int flags); | |
680 | ||
681 | extern vm_offset_t pmap_map_bd( | |
682 | vm_offset_t virt, | |
683 | vm_map_offset_t start, | |
684 | vm_map_offset_t end, | |
685 | vm_prot_t prot, | |
686 | unsigned int flags); | |
687 | extern void pmap_bootstrap( | |
688 | vm_offset_t load_start, | |
689 | boolean_t IA32e); | |
690 | ||
691 | extern boolean_t pmap_valid_page( | |
692 | ppnum_t pn); | |
693 | ||
694 | extern int pmap_list_resident_pages( | |
695 | struct pmap *pmap, | |
696 | vm_offset_t *listp, | |
697 | int space); | |
698 | extern void x86_filter_TLB_coherency_interrupts(boolean_t); | |
699 | ||
700 | extern void | |
701 | pmap_mark_range(pmap_t npmap, uint64_t sv, uint64_t nxrosz, boolean_t NX, | |
702 | boolean_t ro); | |
703 | ||
704 | /* | |
705 | * Get cache attributes (as pagetable bits) for the specified phys page | |
706 | */ | |
707 | extern unsigned pmap_get_cache_attributes(ppnum_t, boolean_t is_ept); | |
708 | ||
709 | extern kern_return_t pmap_map_block( | |
710 | pmap_t pmap, | |
711 | addr64_t va, | |
712 | ppnum_t pa, | |
713 | uint32_t size, | |
714 | vm_prot_t prot, | |
715 | int attr, | |
716 | unsigned int flags); | |
717 | ||
718 | extern void invalidate_icache(vm_offset_t addr, unsigned cnt, int phys); | |
719 | extern void flush_dcache(vm_offset_t addr, unsigned count, int phys); | |
720 | extern pmap_paddr_t pmap_find_pa(pmap_t map, addr64_t va); | |
721 | extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va); | |
722 | extern ppnum_t pmap_find_phys_nofault(pmap_t pmap, addr64_t va); | |
723 | ||
724 | extern kern_return_t pmap_get_prot(pmap_t pmap, addr64_t va, vm_prot_t *protp); | |
725 | ||
726 | extern void pmap_cpu_init(void); | |
727 | extern void pmap_disable_NX(pmap_t pmap); | |
728 | ||
729 | extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__printflike(1, 2)); | |
730 | ||
731 | extern void x86_64_protect_data_const(void); | |
732 | ||
733 | extern uint64_t pmap_commpage_size_min(pmap_t pmap); | |
734 | ||
735 | /* | |
736 | * Macros for speed. | |
737 | */ | |
738 | ||
739 | ||
740 | #include <kern/spl.h> | |
741 | ||
742 | ||
743 | #define PMAP_ACTIVATE_MAP(map, thread, my_cpu) { \ | |
744 | pmap_t tpmap; \ | |
745 | \ | |
746 | tpmap = vm_map_pmap(map); \ | |
747 | set_dirbase(tpmap, thread, my_cpu); \ | |
748 | } | |
749 | ||
750 | #if defined(__x86_64__) | |
751 | #define PMAP_DEACTIVATE_MAP(map, thread, ccpu) \ | |
752 | pmap_assert2((pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, thread, ccpu) == (get_cr3_raw() & 0xFFF)) : TRUE),"PCIDs: 0x%x, active PCID: 0x%x, CR3: 0x%lx, pmap_cr3: 0x%llx, kernel_cr3: 0x%llx, kernel pmap cr3: 0x%llx, CPU active PCID: 0x%x, CPU kernel PCID: 0x%x, specflags: 0x%x, pagezero: 0x%x", pmap_pcid_ncpus, pcid_for_pmap_cpu_tuple(map->pmap, thread, ccpu), get_cr3_raw(), map->pmap->pm_cr3, cpu_datap(ccpu)->cpu_kernel_cr3, kernel_pmap->pm_cr3, cpu_datap(ccpu)->cpu_active_pcid, cpu_datap(ccpu)->cpu_kernel_pcid, thread->machine.specFlags, map->pmap->pagezero_accessible); | |
753 | #else | |
754 | #define PMAP_DEACTIVATE_MAP(map, thread) | |
755 | #endif | |
756 | ||
757 | #define PMAP_SWITCH_USER(th, new_map, my_cpu) { \ | |
758 | spl_t spl; \ | |
759 | \ | |
760 | spl = splhigh(); \ | |
761 | PMAP_DEACTIVATE_MAP(th->map, th, my_cpu); \ | |
762 | th->map = new_map; \ | |
763 | PMAP_ACTIVATE_MAP(th->map, th, my_cpu); \ | |
764 | splx(spl); \ | |
765 | } | |
766 | ||
767 | /* | |
768 | * Marking the current cpu's cr3 inactive is achieved by setting its lsb. | |
769 | * Marking the current cpu's cr3 active once more involves clearng this bit. | |
770 | * Note that valid page tables are page-aligned and so the bottom 12 bits | |
771 | * are normally zero, modulo PCID. | |
772 | * We can only mark the current cpu active/inactive but we can test any cpu. | |
773 | */ | |
774 | #define CPU_CR3_MARK_INACTIVE() \ | |
775 | current_cpu_datap()->cpu_active_cr3 |= 1 | |
776 | ||
777 | #define CPU_CR3_MARK_ACTIVE() \ | |
778 | current_cpu_datap()->cpu_active_cr3 &= ~1 | |
779 | ||
780 | #define CPU_CR3_IS_ACTIVE(cpu) \ | |
781 | ((cpu_datap(cpu)->cpu_active_cr3 & 1) == 0) | |
782 | ||
783 | #define CPU_GET_ACTIVE_CR3(cpu) \ | |
784 | (cpu_datap(cpu)->cpu_active_cr3 & ~1) | |
785 | ||
786 | #define CPU_GET_TASK_CR3(cpu) \ | |
787 | (cpu_datap(cpu)->cpu_task_cr3) | |
788 | ||
789 | /* | |
790 | * Mark this cpu idle, and remove it from the active set, | |
791 | * since it is not actively using any pmap. Signal_cpus | |
792 | * will notice that it is idle, and avoid signaling it, | |
793 | * but will queue the update request for when the cpu | |
794 | * becomes active. | |
795 | */ | |
796 | #define MARK_CPU_IDLE(my_cpu) { \ | |
797 | assert(ml_get_interrupts_enabled() == FALSE); \ | |
798 | CPU_CR3_MARK_INACTIVE(); \ | |
799 | mfence(); \ | |
800 | } | |
801 | ||
802 | #define MARK_CPU_ACTIVE(my_cpu) { \ | |
803 | assert(ml_get_interrupts_enabled() == FALSE); \ | |
804 | /* \ | |
805 | * If a kernel_pmap update was requested while this cpu \ | |
806 | * was idle, process it as if we got the interrupt. \ | |
807 | * Before doing so, remove this cpu from the idle set. \ | |
808 | * Since we do not grab any pmap locks while we flush \ | |
809 | * our TLB, another cpu may start an update operation \ | |
810 | * before we finish. Removing this cpu from the idle \ | |
811 | * set assures that we will receive another update \ | |
812 | * interrupt if this happens. \ | |
813 | */ \ | |
814 | CPU_CR3_MARK_ACTIVE(); \ | |
815 | mfence(); \ | |
816 | pmap_update_interrupt(); \ | |
817 | } | |
818 | ||
819 | #define PMAP_CONTEXT(pmap, thread) | |
820 | ||
821 | #define pmap_kernel_va(VA) \ | |
822 | ((((vm_offset_t) (VA)) >= vm_min_kernel_address) && \ | |
823 | (((vm_offset_t) (VA)) <= vm_max_kernel_address)) | |
824 | ||
825 | ||
826 | #define pmap_compressed(pmap) ((pmap)->stats.compressed) | |
827 | #define pmap_resident_count(pmap) ((pmap)->stats.resident_count) | |
828 | #define pmap_resident_max(pmap) ((pmap)->stats.resident_max) | |
829 | #define pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) | |
830 | #define pmap_attribute(pmap, addr, size, attr, value) \ | |
831 | (KERN_INVALID_ADDRESS) | |
832 | #define pmap_attribute_cache_sync(addr, size, attr, value) \ | |
833 | (KERN_INVALID_ADDRESS) | |
834 | ||
835 | #define MACHINE_PMAP_IS_EMPTY 1 | |
836 | extern boolean_t pmap_is_empty(pmap_t pmap, | |
837 | vm_map_offset_t start, | |
838 | vm_map_offset_t end); | |
839 | ||
840 | #define MACHINE_BOOTSTRAPPTD 1 /* Static bootstrap page-tables */ | |
841 | ||
842 | kern_return_t | |
843 | pmap_permissions_verify(pmap_t, vm_map_t, vm_offset_t, vm_offset_t); | |
844 | ||
845 | #if DEVELOPMENT || DEBUG | |
846 | extern kern_return_t pmap_test_text_corruption(pmap_paddr_t); | |
847 | #endif /* DEVELOPMENT || DEBUG */ | |
848 | ||
849 | #if MACH_ASSERT | |
850 | extern int pmap_stats_assert; | |
851 | #define PMAP_STATS_ASSERTF(args) \ | |
852 | MACRO_BEGIN \ | |
853 | if (pmap_stats_assert) assertf args; \ | |
854 | MACRO_END | |
855 | #else /* MACH_ASSERT */ | |
856 | #define PMAP_STATS_ASSERTF(args) | |
857 | #endif /* MACH_ASSERT */ | |
858 | #endif /* MACH_KERNEL_PRIVATE */ | |
859 | #endif /* ASSEMBLER */ | |
860 | #endif /* _PMAP_MACHINE_ */ | |
861 | #endif /* KERNEL_PRIVATE */ |