]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/pmap.h
xnu-4903.270.47.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap.h
1 /*
2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 /*
60 * File: pmap.h
61 *
62 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
63 * Date: 1985
64 *
65 * Machine-dependent structures for the physical map module.
66 */
67 #ifdef KERNEL_PRIVATE
68 #ifndef _PMAP_MACHINE_
69 #define _PMAP_MACHINE_ 1
70
71 #ifndef ASSEMBLER
72
73 #include <mach/kern_return.h>
74 #include <mach/machine/vm_types.h>
75 #include <mach/vm_prot.h>
76 #include <mach/vm_statistics.h>
77 #include <mach/machine/vm_param.h>
78 #include <kern/kern_types.h>
79 #include <kern/thread.h>
80 #include <kern/simple_lock.h>
81
82 #include <i386/mp.h>
83 #include <i386/proc_reg.h>
84
85 #include <i386/pal_routines.h>
86
87 /*
88 * Define the generic in terms of the specific
89 */
90
91 #define INTEL_PGBYTES I386_PGBYTES
92 #define INTEL_PGSHIFT I386_PGSHIFT
93 #define intel_btop(x) i386_btop(x)
94 #define intel_ptob(x) i386_ptob(x)
95 #define intel_round_page(x) i386_round_page(x)
96 #define intel_trunc_page(x) i386_trunc_page(x)
97
98 /*
99 * i386/i486/i860 Page Table Entry
100 */
101
102 #endif /* ASSEMBLER */
103
104 #define NPGPTD 4ULL
105 #define PDESHIFT 21ULL
106 #define PTEMASK 0x1ffULL
107 #define PTEINDX 3ULL
108
109 #define PTESHIFT 12ULL
110
111 #define LOW_4GB_MASK ((vm_offset_t)0x00000000FFFFFFFFUL)
112
113 #define PDESIZE sizeof(pd_entry_t) /* for assembly files */
114 #define PTESIZE sizeof(pt_entry_t) /* for assembly files */
115
116 #define INTEL_OFFMASK (I386_PGBYTES - 1)
117 #define INTEL_LOFFMASK (I386_LPGBYTES - 1)
118 #define PG_FRAME 0x000FFFFFFFFFF000ULL
119 #define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t)))
120 #define NPTDPG (PAGE_SIZE/(sizeof (pd_entry_t)))
121
122 #define NBPTD (NPGPTD << PAGE_SHIFT)
123 #define NPDEPTD (NBPTD / (sizeof (pd_entry_t)))
124 #define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
125 #define NBPDE (1ULL << PDESHIFT)
126 #define PDEMASK (NBPDE - 1)
127
128 #define PTE_PER_PAGE 512 /* number of PTE's per page on any level */
129
130 /* cleanly define parameters for all the page table levels */
131 typedef uint64_t pml4_entry_t;
132 #define NPML4PG (PAGE_SIZE/(sizeof (pml4_entry_t)))
133 #define PML4SHIFT 39
134 #define PML4PGSHIFT 9
135 #define NBPML4 (1ULL << PML4SHIFT)
136 #define PML4MASK (NBPML4-1)
137 #define PML4_ENTRY_NULL ((pml4_entry_t *) 0)
138
139 typedef uint64_t pdpt_entry_t;
140 #define NPDPTPG (PAGE_SIZE/(sizeof (pdpt_entry_t)))
141 #define PDPTSHIFT 30
142 #define PDPTPGSHIFT 9
143 #define NBPDPT (1ULL << PDPTSHIFT)
144 #define PDPTMASK (NBPDPT-1)
145 #define PDPT_ENTRY_NULL ((pdpt_entry_t *) 0)
146
147 typedef uint64_t pd_entry_t;
148 #define NPDPG (PAGE_SIZE/(sizeof (pd_entry_t)))
149 #define PDSHIFT 21
150 #define PDPGSHIFT 9
151 #define NBPD (1ULL << PDSHIFT)
152 #define PDMASK (NBPD-1)
153 #define PD_ENTRY_NULL ((pd_entry_t *) 0)
154
155 typedef uint64_t pt_entry_t;
156 #define NPTPG (PAGE_SIZE/(sizeof (pt_entry_t)))
157 #define PTSHIFT 12
158 #define PTPGSHIFT 9
159 #define NBPT (1ULL << PTSHIFT)
160 #define PTMASK (NBPT-1)
161 #define PT_ENTRY_NULL ((pt_entry_t *) 0)
162
163 typedef uint64_t pmap_paddr_t;
164
165 #if DEVELOPMENT || DEBUG
166 #define PMAP_ASSERT 1
167 extern int pmap_asserts_enabled;
168 extern int pmap_asserts_traced;
169 #endif
170
171 #if PMAP_ASSERT
172 #define pmap_assert(ex) (pmap_asserts_enabled ? ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex)) : (void)0)
173
174 #define pmap_assert2(ex, fmt, args...) \
175 do { \
176 if (__improbable(pmap_asserts_enabled && !(ex))) { \
177 if (pmap_asserts_traced) { \
178 KERNEL_DEBUG_CONSTANT(0xDEAD1000, __builtin_return_address(0), __LINE__, 0, 0, 0); \
179 kdebug_enable = 0; \
180 } else { \
181 kprintf("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \
182 panic("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \
183 } \
184 } \
185 } while(0)
186 #else
187 #define pmap_assert(ex)
188 #define pmap_assert2(ex, fmt, args...)
189 #endif
190
191 /* superpages */
192 #define SUPERPAGE_NBASEPAGES 512
193
194 /*
195 * Atomic 64-bit store of a page table entry.
196 */
197 static inline void
198 pmap_store_pte(pt_entry_t *entryp, pt_entry_t value)
199 {
200 /*
201 * In the 32-bit kernel a compare-and-exchange loop was
202 * required to provide atomicity. For K64, life is easier:
203 */
204 *entryp = value;
205 }
206
207 /* in 64 bit spaces, the number of each type of page in the page tables */
208 #define NPML4PGS (1ULL * (PAGE_SIZE/(sizeof (pml4_entry_t))))
209 #define NPDPTPGS (NPML4PGS * (PAGE_SIZE/(sizeof (pdpt_entry_t))))
210 #define NPDEPGS (NPDPTPGS * (PAGE_SIZE/(sizeof (pd_entry_t))))
211 #define NPTEPGS (NPDEPGS * (PAGE_SIZE/(sizeof (pt_entry_t))))
212
213 extern int kernPhysPML4Index;
214 extern int kernPhysPML4EntryCount;
215
216 #define KERNEL_PML4_INDEX 511
217 #define KERNEL_KEXTS_INDEX (KERNEL_PML4_INDEX - 1) /* 510: Home of KEXTs - the basement */
218 #define KERNEL_PHYSMAP_PML4_INDEX (kernPhysPML4Index) /* 50X: virtual to physical map */
219 #define KERNEL_PHYSMAP_PML4_COUNT (kernPhysPML4EntryCount)
220 #define KERNEL_PHYSMAP_PML4_COUNT_MAX (16 - 2) /* 1 for KERNEL, 1 for BASEMENT */
221 /* 2 PML4s for KASAN to cover a maximum of 16 PML4s {PHYSMAP + BASEMENT + KVA} */
222 #define KERNEL_KASAN_PML4_LAST (495) /* 511 - 16 */
223 #define KERNEL_KASAN_PML4_FIRST (494) /* 511 - 17 */
224 #define KERNEL_DBLMAP_PML4_INDEX (KERNEL_KASAN_PML4_FIRST - 1)
225 #define KERNEL_PML4_COUNT 1
226 #define KERNEL_BASE (0ULL - (NBPML4 * KERNEL_PML4_COUNT))
227 #define KERNEL_BASEMENT (KERNEL_BASE - NBPML4) /* Basement uses one PML4 entry */
228
229 #define VM_WIMG_COPYBACK VM_MEM_COHERENT
230 #define VM_WIMG_COPYBACKLW VM_WIMG_COPYBACK
231 #define VM_WIMG_DEFAULT VM_MEM_COHERENT
232 /* ?? intel ?? */
233 #define VM_WIMG_IO (VM_MEM_COHERENT | \
234 VM_MEM_NOT_CACHEABLE | VM_MEM_GUARDED)
235 #define VM_WIMG_POSTED VM_WIMG_IO
236 #define VM_WIMG_WTHRU (VM_MEM_WRITE_THROUGH | VM_MEM_COHERENT | VM_MEM_GUARDED)
237 /* write combining mode, aka store gather */
238 #define VM_WIMG_WCOMB (VM_MEM_NOT_CACHEABLE | VM_MEM_COHERENT)
239 #define VM_WIMG_INNERWBACK VM_MEM_COHERENT
240 /*
241 * Pte related macros
242 */
243 #define KVADDR(pmi, pdpi, pdi, pti) \
244 ((vm_offset_t) \
245 ((uint64_t) -1 << 47) | \
246 ((uint64_t)(pmi) << PML4SHIFT) | \
247 ((uint64_t)(pdpi) << PDPTSHIFT) | \
248 ((uint64_t)(pdi) << PDESHIFT) | \
249 ((uint64_t)(pti) << PTESHIFT))
250
251
252 #ifndef NKPT
253 #define NKPT 500 /* actual number of bootstrap kernel page tables */
254 #endif
255
256
257
258 /*
259 * Convert address offset to page descriptor index
260 */
261 #define pdptnum(pmap, a) (((vm_offset_t)(a) >> PDPTSHIFT) & PDPTMASK)
262 #define pdenum(pmap, a) (((vm_offset_t)(a) >> PDESHIFT) & PDEMASK)
263 #define PMAP_INVALID_PDPTNUM (~0ULL)
264
265 #define pdeidx(pmap, a) (((a) >> PDSHIFT) & ((1ULL<<(48 - PDSHIFT)) -1))
266 #define pdptidx(pmap, a) (((a) >> PDPTSHIFT) & ((1ULL<<(48 - PDPTSHIFT)) -1))
267 #define pml4idx(pmap, a) (((a) >> PML4SHIFT) & ((1ULL<<(48 - PML4SHIFT)) -1))
268
269
270 /*
271 * Convert page descriptor index to user virtual address
272 */
273 #define pdetova(a) ((vm_offset_t)(a) << PDESHIFT)
274
275 /*
276 * Convert address offset to page table index
277 */
278 #define ptenum(a) (((vm_offset_t)(a) >> PTESHIFT) & PTEMASK)
279
280 /*
281 * Hardware pte bit definitions (to be used directly on the ptes
282 * without using the bit fields).
283 */
284
285 #define INTEL_PTE_VALID 0x00000001ULL
286
287 #define INTEL_PTE_WRITE 0x00000002ULL
288 #define INTEL_PTE_RW 0x00000002ULL
289
290 #define INTEL_PTE_USER 0x00000004ULL
291
292 #define INTEL_PTE_WTHRU 0x00000008ULL
293 #define INTEL_PTE_NCACHE 0x00000010ULL
294
295 #define INTEL_PTE_REF 0x00000020ULL
296 #define INTEL_PTE_MOD 0x00000040ULL
297
298 #define INTEL_PTE_PS 0x00000080ULL
299 #define INTEL_PTE_PAT 0x00000080ULL
300
301 #define INTEL_PTE_GLOBAL 0x00000100ULL
302
303 /* These markers use software available bits ignored by the
304 * processor's 4-level and EPT pagetable walkers.
305 * N.B.: WIRED was originally bit 10, but that conflicts with
306 * execute permissions for EPT entries iff mode-based execute controls
307 * are enabled.
308 */
309 #define INTEL_PTE_SWLOCK (0x1ULL << 52)
310 #define INTEL_PDPTE_NESTED (0x1ULL << 53)
311 #define INTEL_PTE_WIRED (0x1ULL << 54)
312 /* TODO: Compressed markers, potential conflict with protection keys? */
313 #define INTEL_PTE_COMPRESSED_ALT (1ULL << 61) /* compressed but with "alternate accounting" */
314 #define INTEL_PTE_COMPRESSED (1ULL << 62) /* marker, for invalid PTE only -- ignored by hardware for both regular/EPT entries*/
315
316 #define INTEL_PTE_PFN PG_FRAME
317 /* TODO: these should be internal definitions */
318 #define INTEL_PTE_NX (1ULL << 63)
319
320 #define INTEL_PTE_INVALID 0
321 /* This is conservative, but suffices */
322 #define INTEL_PTE_RSVD ((1ULL << 10) | (1ULL << 11))
323
324
325 #define INTEL_PTE_COMPRESSED_MASK (INTEL_PTE_COMPRESSED | \
326 INTEL_PTE_COMPRESSED_ALT | INTEL_PTE_SWLOCK)
327 #define PTE_IS_COMPRESSED(x, ptep) \
328 ((((x) & INTEL_PTE_VALID) == 0) && /* PTE is not valid... */ \
329 ((x) & INTEL_PTE_COMPRESSED) && /* ...has "compressed" marker" */ \
330 ((!((x) & ~INTEL_PTE_COMPRESSED_MASK)) || /* ...no other bits */ \
331 (panic_compressed_pte_corrupt((x), &(x), (ptep)), FALSE)))
332
333 static inline void
334 panic_compressed_pte_corrupt(uint64_t pte, uint64_t *pte_addr, uint64_t *ptep)
335 {
336 uint64_t *adj_pteps[2];
337 int pteidx = ((uintptr_t)ptep & INTEL_OFFMASK) / sizeof(pt_entry_t);
338 /*
339 * Grab pointers to PTEs on either side of the PTE in question, unless we're at the start of
340 * a PT (grab pointers to the next and next-next PTEs) or the end of a PT (grab the previous
341 * 2 PTEs).
342 */
343 if (pteidx == 0) {
344 adj_pteps[0] = ptep + 1;
345 adj_pteps[1] = ptep + 2;
346 } else if (pteidx == (NPTPG - 1)) {
347 adj_pteps[0] = ptep - 2;
348 adj_pteps[1] = ptep - 1;
349 } else {
350 adj_pteps[0] = ptep - 1;
351 adj_pteps[1] = ptep + 1;
352 }
353
354 panic("compressed PTE %p 0x%llx has extra bits 0x%llx: corrupted? Adjacent PTEs: 0x%llx@%p, 0x%llx@%p",
355 pte_addr, pte, pte & ~INTEL_PTE_COMPRESSED_MASK, *adj_pteps[0], adj_pteps[0], *adj_pteps[1], adj_pteps[1]);
356 /*NOTREACHED*/
357 }
358
359 #define pa_to_pte(a) ((a) & INTEL_PTE_PFN) /* XXX */
360 #define pte_to_pa(p) ((p) & INTEL_PTE_PFN) /* XXX */
361 #define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1)
362
363 #define pte_kernel_rw(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_RW))
364 #define pte_kernel_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID))
365 #define pte_user_rw(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER|INTEL_PTE_RW))
366 #define pte_user_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER))
367
368 #define PMAP_INVEPT_SINGLE_CONTEXT 1
369
370
371 #define INTEL_EPTP_AD 0x00000040ULL
372
373 #define INTEL_EPT_READ 0x00000001ULL
374 #define INTEL_EPT_WRITE 0x00000002ULL
375 #define INTEL_EPT_EX 0x00000004ULL
376 #define INTEL_EPT_IPAT 0x00000040ULL
377 #define INTEL_EPT_PS 0x00000080ULL
378 #define INTEL_EPT_REF 0x00000100ULL
379 #define INTEL_EPT_MOD 0x00000200ULL
380
381 #define INTEL_EPT_CACHE_MASK 0x00000038ULL
382 #define INTEL_EPT_NCACHE 0x00000000ULL
383 #define INTEL_EPT_WC 0x00000008ULL
384 #define INTEL_EPT_WTHRU 0x00000020ULL
385 #define INTEL_EPT_WP 0x00000028ULL
386 #define INTEL_EPT_WB 0x00000030ULL
387
388 /*
389 * Routines to filter correct bits depending on the pmap type
390 */
391
392 static inline pt_entry_t
393 pte_remove_ex(pt_entry_t pte, boolean_t is_ept)
394 {
395 if (__probable(!is_ept)) {
396 return pte | INTEL_PTE_NX;
397 }
398
399 return pte & (~INTEL_EPT_EX);
400 }
401
402 static inline pt_entry_t
403 pte_set_ex(pt_entry_t pte, boolean_t is_ept)
404 {
405 if (__probable(!is_ept)) {
406 return pte & (~INTEL_PTE_NX);
407 }
408
409 return pte | INTEL_EPT_EX;
410 }
411
412 static inline pt_entry_t
413 physmap_refmod_to_ept(pt_entry_t physmap_pte)
414 {
415 pt_entry_t ept_pte = 0;
416
417 if (physmap_pte & INTEL_PTE_MOD) {
418 ept_pte |= INTEL_EPT_MOD;
419 }
420
421 if (physmap_pte & INTEL_PTE_REF) {
422 ept_pte |= INTEL_EPT_REF;
423 }
424
425 return ept_pte;
426 }
427
428 static inline pt_entry_t
429 ept_refmod_to_physmap(pt_entry_t ept_pte)
430 {
431 pt_entry_t physmap_pte = 0;
432
433 assert((ept_pte & ~(INTEL_EPT_REF | INTEL_EPT_MOD)) == 0);
434
435 if (ept_pte & INTEL_EPT_REF) {
436 physmap_pte |= INTEL_PTE_REF;
437 }
438
439 if (ept_pte & INTEL_EPT_MOD) {
440 physmap_pte |= INTEL_PTE_MOD;
441 }
442
443 return physmap_pte;
444 }
445
446 /*
447 * Note: Not all Intel processors support EPT referenced access and dirty bits.
448 * During pmap_init() we check the VMX capability for the current hardware
449 * and update this variable accordingly.
450 */
451 extern boolean_t pmap_ept_support_ad;
452
453 #define PTE_VALID_MASK(is_ept) ((is_ept) ? (INTEL_EPT_READ | INTEL_EPT_WRITE | INTEL_EPT_EX) : INTEL_PTE_VALID)
454 #define PTE_READ(is_ept) ((is_ept) ? INTEL_EPT_READ : INTEL_PTE_VALID)
455 #define PTE_WRITE(is_ept) ((is_ept) ? INTEL_EPT_WRITE : INTEL_PTE_WRITE)
456 #define PTE_PS INTEL_PTE_PS
457 #define PTE_COMPRESSED INTEL_PTE_COMPRESSED
458 #define PTE_COMPRESSED_ALT INTEL_PTE_COMPRESSED_ALT
459 #define PTE_NCACHE(is_ept) ((is_ept) ? INTEL_EPT_NCACHE : INTEL_PTE_NCACHE)
460 #define PTE_WTHRU(is_ept) ((is_ept) ? INTEL_EPT_WTHRU : INTEL_PTE_WTHRU)
461 #define PTE_REF(is_ept) ((is_ept) ? INTEL_EPT_REF : INTEL_PTE_REF)
462 #define PTE_MOD(is_ept) ((is_ept) ? INTEL_EPT_MOD : INTEL_PTE_MOD)
463 #define PTE_WIRED INTEL_PTE_WIRED
464
465
466 #define PMAP_DEFAULT_CACHE 0
467 #define PMAP_INHIBIT_CACHE 1
468 #define PMAP_GUARDED_CACHE 2
469 #define PMAP_ACTIVATE_CACHE 4
470 #define PMAP_NO_GUARD_CACHE 8
471
472 /* Per-pmap ledger operations */
473 #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
474 #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
475
476 #ifndef ASSEMBLER
477
478 #include <sys/queue.h>
479
480 /*
481 * Address of current and alternate address space page table maps
482 * and directories.
483 */
484
485 extern pt_entry_t *PTmap;
486 extern pdpt_entry_t *IdlePDPT;
487 extern pml4_entry_t *IdlePML4;
488 extern boolean_t no_shared_cr3;
489 extern pd_entry_t *IdlePTD; /* physical addr of "Idle" state PTD */
490
491 extern uint64_t pmap_pv_hashlist_walks;
492 extern uint64_t pmap_pv_hashlist_cnts;
493 extern uint32_t pmap_pv_hashlist_max;
494 extern uint32_t pmap_kernel_text_ps;
495
496 #define ID_MAP_VTOP(x) ((void *)(((uint64_t)(x)) & LOW_4GB_MASK))
497
498 extern uint64_t physmap_base, physmap_max;
499
500 #define NPHYSMAP (MAX(((physmap_max - physmap_base) / GB), 4))
501
502 static inline boolean_t
503 physmap_enclosed(addr64_t a)
504 {
505 return a < (NPHYSMAP * GB);
506 }
507
508 static inline void *
509 PHYSMAP_PTOV_check(void *paddr)
510 {
511 uint64_t pvaddr = (uint64_t)paddr + physmap_base;
512
513 if (__improbable(pvaddr >= physmap_max)) {
514 panic("PHYSMAP_PTOV bounds exceeded, 0x%qx, 0x%qx, 0x%qx",
515 pvaddr, physmap_base, physmap_max);
516 }
517
518 return (void *)pvaddr;
519 }
520
521 #define PHYSMAP_PTOV(x) (PHYSMAP_PTOV_check((void*) (x)))
522 #if MACH_KERNEL_PRIVATE
523 extern uint64_t dblmap_base, dblmap_max, dblmap_dist;
524
525 static inline uint64_t
526 DBLMAP_CHECK(uintptr_t x)
527 {
528 uint64_t dbladdr = (uint64_t)x + dblmap_dist;
529 if (__improbable((dbladdr >= dblmap_max) || (dbladdr < dblmap_base))) {
530 panic("DBLMAP bounds exceeded, 0x%qx, 0x%qx 0x%qx, 0x%qx",
531 (uint64_t)x, dbladdr, dblmap_base, dblmap_max);
532 }
533 return dbladdr;
534 }
535 #define DBLMAP(x) (DBLMAP_CHECK((uint64_t) x))
536 extern uint64_t ldt_alias_offset;
537 static inline uint64_t
538 LDTALIAS_CHECK(uintptr_t x)
539 {
540 uint64_t dbladdr = (uint64_t)x + ldt_alias_offset;
541 if (__improbable((dbladdr >= dblmap_max) || (dbladdr < dblmap_base))) {
542 panic("LDTALIAS: bounds exceeded, 0x%qx, 0x%qx 0x%qx, 0x%qx",
543 (uint64_t)x, dbladdr, dblmap_base, dblmap_max);
544 }
545 return dbladdr;
546 }
547 #define LDTALIAS(x) (LDTALIAS_CHECK((uint64_t) x))
548 #endif
549
550 /*
551 * For KASLR, we alias the master processor's IDT and GDT at fixed
552 * virtual addresses to defeat SIDT/SGDT address leakage.
553 * And non-boot processor's GDT aliases likewise (skipping LOWGLOBAL_ALIAS)
554 * The low global vector page is mapped at a fixed alias also.
555 */
556 #define LOWGLOBAL_ALIAS (VM_MIN_KERNEL_ADDRESS + 0x2000)
557
558 /*
559 * This indicates (roughly) where there is free space for the VM
560 * to use for the heap; this does not need to be precise.
561 */
562 #define KERNEL_PMAP_HEAP_RANGE_START VM_MIN_KERNEL_AND_KEXT_ADDRESS
563
564 #if MACH_KERNEL_PRIVATE
565 extern void
566 pmap_tlbi_range(uint64_t startv, uint64_t endv, bool global, uint16_t pcid);
567
568 #include <vm/vm_page.h>
569
570 /*
571 * For each vm_page_t, there is a list of all currently
572 * valid virtual mappings of that page. An entry is
573 * a pv_entry_t; the list is the pv_table.
574 */
575
576 struct pmap {
577 lck_rw_t pmap_rwl __attribute((aligned(64)));
578 pmap_paddr_t pm_cr3 __attribute((aligned(64))); /* Kernel+user shared PML4 physical*/
579 pmap_paddr_t pm_ucr3; /* Mirrored user PML4 physical */
580 pml4_entry_t *pm_pml4; /* VKA of top level */
581 pml4_entry_t *pm_upml4; /* Shadow VKA of top level */
582 pmap_paddr_t pm_eptp; /* EPTP */
583 task_map_t pm_task_map;
584 boolean_t pagezero_accessible;
585 #define PMAP_PCID_MAX_CPUS MAX_CPUS /* Must be a multiple of 8 */
586 pcid_t pmap_pcid_cpus[PMAP_PCID_MAX_CPUS];
587 volatile uint8_t pmap_pcid_coherency_vector[PMAP_PCID_MAX_CPUS];
588 boolean_t pm_shared;
589 vm_object_t pm_obj; /* object to hold pde's */
590 vm_object_t pm_obj_pdpt; /* holds pdpt pages */
591 vm_object_t pm_obj_pml4; /* holds pml4 pages */
592 #if DEVELOPMENT || DEBUG
593 int nx_enabled;
594 #endif
595 int ref_count;
596 ledger_t ledger; /* ledger tracking phys mappings */
597 struct pmap_statistics stats; /* map statistics */
598 #if MACH_ASSERT
599 boolean_t pmap_stats_assert;
600 int pmap_pid;
601 char pmap_procname[17];
602 #endif /* MACH_ASSERT */
603 };
604
605 static inline boolean_t
606 is_ept_pmap(pmap_t p)
607 {
608 if (__probable(p->pm_cr3 != 0)) {
609 assert(p->pm_eptp == 0);
610 return FALSE;
611 }
612
613 assert(p->pm_eptp != 0);
614
615 return TRUE;
616 }
617
618 void hv_ept_pmap_create(void **ept_pmap, void **eptp);
619
620 #if NCOPY_WINDOWS > 0
621 #define PMAP_PDPT_FIRST_WINDOW 0
622 #define PMAP_PDPT_NWINDOWS 4
623 #define PMAP_PDE_FIRST_WINDOW (PMAP_PDPT_NWINDOWS)
624 #define PMAP_PDE_NWINDOWS 4
625 #define PMAP_PTE_FIRST_WINDOW (PMAP_PDE_FIRST_WINDOW + PMAP_PDE_NWINDOWS)
626 #define PMAP_PTE_NWINDOWS 4
627
628 #define PMAP_NWINDOWS_FIRSTFREE (PMAP_PTE_FIRST_WINDOW + PMAP_PTE_NWINDOWS)
629 #define PMAP_WINDOW_SIZE 8
630 #define PMAP_NWINDOWS (PMAP_NWINDOWS_FIRSTFREE + PMAP_WINDOW_SIZE)
631
632 typedef struct {
633 pt_entry_t *prv_CMAP;
634 caddr_t prv_CADDR;
635 } mapwindow_t;
636
637 typedef struct cpu_pmap {
638 int pdpt_window_index;
639 int pde_window_index;
640 int pte_window_index;
641 mapwindow_t mapwindow[PMAP_NWINDOWS];
642 } cpu_pmap_t;
643
644
645 extern mapwindow_t *pmap_get_mapwindow(pt_entry_t pentry);
646 extern void pmap_put_mapwindow(mapwindow_t *map);
647 #endif
648
649 typedef struct pmap_memory_regions {
650 ppnum_t base; /* first page of this region */
651 ppnum_t alloc_up; /* pages below this one have been "stolen" */
652 ppnum_t alloc_down; /* pages above this one have been "stolen" */
653 ppnum_t end; /* last page of this region */
654 uint32_t type;
655 uint64_t attribute;
656 } pmap_memory_region_t;
657
658 extern unsigned pmap_memory_region_count;
659 extern unsigned pmap_memory_region_current;
660
661 #define PMAP_MEMORY_REGIONS_SIZE 128
662
663 extern pmap_memory_region_t pmap_memory_regions[];
664 #include <i386/pmap_pcid.h>
665
666 static inline void
667 set_dirbase(pmap_t tpmap, thread_t thread, int my_cpu)
668 {
669 int ccpu = my_cpu;
670 uint64_t pcr3 = tpmap->pm_cr3, ucr3 = tpmap->pm_ucr3;
671 cpu_datap(ccpu)->cpu_task_cr3 = pcr3;
672 cpu_shadowp(ccpu)->cpu_shadowtask_cr3 = pcr3;
673
674 cpu_datap(ccpu)->cpu_ucr3 = ucr3;
675 cpu_shadowp(ccpu)->cpu_ucr3 = ucr3;
676
677 cpu_datap(ccpu)->cpu_task_map = cpu_shadowp(ccpu)->cpu_task_map =
678 tpmap->pm_task_map;
679
680 assert((get_preemption_level() > 0) || (ml_get_interrupts_enabled() == FALSE));
681 assert(ccpu == cpu_number());
682 /*
683 * Switch cr3 if necessary
684 * - unless running with no_shared_cr3 debugging mode
685 * and we're not on the kernel's cr3 (after pre-empted copyio)
686 */
687 boolean_t nopagezero = tpmap->pagezero_accessible;
688 boolean_t priorpagezero = cpu_datap(ccpu)->cpu_pagezero_mapped;
689 cpu_datap(ccpu)->cpu_pagezero_mapped = nopagezero;
690
691 if (__probable(!no_shared_cr3)) {
692 if (__improbable(nopagezero)) {
693 boolean_t copyio_active = ((thread->machine.specFlags & CopyIOActive) != 0);
694 if (pmap_pcid_ncpus) {
695 pmap_pcid_activate(tpmap, ccpu, TRUE, copyio_active);
696 } else {
697 if (copyio_active) {
698 if (get_cr3_base() != tpmap->pm_cr3) {
699 set_cr3_raw(tpmap->pm_cr3);
700 }
701 } else if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3) {
702 set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3);
703 }
704 }
705 } else if ((get_cr3_base() != tpmap->pm_cr3) || priorpagezero) {
706 if (pmap_pcid_ncpus) {
707 pmap_pcid_activate(tpmap, ccpu, FALSE, FALSE);
708 } else {
709 set_cr3_raw(tpmap->pm_cr3);
710 }
711 }
712 } else {
713 if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3) {
714 set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3);
715 }
716 }
717 }
718
719 /*
720 * External declarations for PMAP_ACTIVATE.
721 */
722
723 extern void pmap_update_interrupt(void);
724
725 extern addr64_t(kvtophys)(
726 vm_offset_t addr);
727
728 extern kern_return_t pmap_expand(
729 pmap_t pmap,
730 vm_map_offset_t addr,
731 unsigned int options);
732 extern vm_offset_t pmap_map(
733 vm_offset_t virt,
734 vm_map_offset_t start,
735 vm_map_offset_t end,
736 vm_prot_t prot,
737 unsigned int flags);
738
739 extern vm_offset_t pmap_map_bd(
740 vm_offset_t virt,
741 vm_map_offset_t start,
742 vm_map_offset_t end,
743 vm_prot_t prot,
744 unsigned int flags);
745 extern void pmap_bootstrap(
746 vm_offset_t load_start,
747 boolean_t IA32e);
748
749 extern boolean_t pmap_valid_page(
750 ppnum_t pn);
751
752 extern int pmap_list_resident_pages(
753 struct pmap *pmap,
754 vm_offset_t *listp,
755 int space);
756 extern void x86_filter_TLB_coherency_interrupts(boolean_t);
757 /*
758 * Get cache attributes (as pagetable bits) for the specified phys page
759 */
760 extern unsigned pmap_get_cache_attributes(ppnum_t, boolean_t is_ept);
761 #if NCOPY_WINDOWS > 0
762 extern struct cpu_pmap *pmap_cpu_alloc(
763 boolean_t is_boot_cpu);
764 extern void pmap_cpu_free(
765 struct cpu_pmap *cp);
766 #endif
767
768 extern kern_return_t pmap_map_block(
769 pmap_t pmap,
770 addr64_t va,
771 ppnum_t pa,
772 uint32_t size,
773 vm_prot_t prot,
774 int attr,
775 unsigned int flags);
776
777 extern void invalidate_icache(vm_offset_t addr, unsigned cnt, int phys);
778 extern void flush_dcache(vm_offset_t addr, unsigned count, int phys);
779 extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va);
780
781 extern void pmap_cpu_init(void);
782 extern void pmap_disable_NX(pmap_t pmap);
783
784 extern void pt_fake_zone_init(int);
785 extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
786 uint64_t *, int *, int *, int *);
787 extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__printflike(1, 2));
788
789 /*
790 * Macros for speed.
791 */
792
793
794 #include <kern/spl.h>
795
796
797 #define PMAP_ACTIVATE_MAP(map, thread, my_cpu) { \
798 pmap_t tpmap; \
799 \
800 tpmap = vm_map_pmap(map); \
801 set_dirbase(tpmap, thread, my_cpu); \
802 }
803
804 #if defined(__x86_64__)
805 #define PMAP_DEACTIVATE_MAP(map, thread, ccpu) \
806 pmap_assert2((pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, thread, ccpu) == (get_cr3_raw() & 0xFFF)) : TRUE),"PCIDs: 0x%x, active PCID: 0x%x, CR3: 0x%lx, pmap_cr3: 0x%llx, kernel_cr3: 0x%llx, kernel pmap cr3: 0x%llx, CPU active PCID: 0x%x, CPU kernel PCID: 0x%x, specflags: 0x%x, pagezero: 0x%x", pmap_pcid_ncpus, pcid_for_pmap_cpu_tuple(map->pmap, thread, ccpu), get_cr3_raw(), map->pmap->pm_cr3, cpu_datap(ccpu)->cpu_kernel_cr3, kernel_pmap->pm_cr3, cpu_datap(ccpu)->cpu_active_pcid, cpu_datap(ccpu)->cpu_kernel_pcid, thread->machine.specFlags, map->pmap->pagezero_accessible);
807 #else
808 #define PMAP_DEACTIVATE_MAP(map, thread)
809 #endif
810
811 #if NCOPY_WINDOWS > 0
812 #define PMAP_SWITCH_USER(th, new_map, my_cpu) { \
813 spl_t spl; \
814 \
815 spl = splhigh(); \
816 PMAP_DEACTIVATE_MAP(th->map, th); \
817 th->map = new_map; \
818 PMAP_ACTIVATE_MAP(th->map, th); \
819 splx(spl); \
820 inval_copy_windows(th); \
821 }
822 #else
823 #define PMAP_SWITCH_USER(th, new_map, my_cpu) { \
824 spl_t spl; \
825 \
826 spl = splhigh(); \
827 PMAP_DEACTIVATE_MAP(th->map, th, my_cpu); \
828 th->map = new_map; \
829 PMAP_ACTIVATE_MAP(th->map, th, my_cpu); \
830 splx(spl); \
831 }
832 #endif
833
834 /*
835 * Marking the current cpu's cr3 inactive is achieved by setting its lsb.
836 * Marking the current cpu's cr3 active once more involves clearng this bit.
837 * Note that valid page tables are page-aligned and so the bottom 12 bits
838 * are normally zero, modulo PCID.
839 * We can only mark the current cpu active/inactive but we can test any cpu.
840 */
841 #define CPU_CR3_MARK_INACTIVE() \
842 current_cpu_datap()->cpu_active_cr3 |= 1
843
844 #define CPU_CR3_MARK_ACTIVE() \
845 current_cpu_datap()->cpu_active_cr3 &= ~1
846
847 #define CPU_CR3_IS_ACTIVE(cpu) \
848 ((cpu_datap(cpu)->cpu_active_cr3 & 1) == 0)
849
850 #define CPU_GET_ACTIVE_CR3(cpu) \
851 (cpu_datap(cpu)->cpu_active_cr3 & ~1)
852
853 #define CPU_GET_TASK_CR3(cpu) \
854 (cpu_datap(cpu)->cpu_task_cr3)
855
856 /*
857 * Mark this cpu idle, and remove it from the active set,
858 * since it is not actively using any pmap. Signal_cpus
859 * will notice that it is idle, and avoid signaling it,
860 * but will queue the update request for when the cpu
861 * becomes active.
862 */
863 #define MARK_CPU_IDLE(my_cpu) { \
864 assert(ml_get_interrupts_enabled() == FALSE); \
865 CPU_CR3_MARK_INACTIVE(); \
866 mfence(); \
867 }
868
869 #define MARK_CPU_ACTIVE(my_cpu) { \
870 assert(ml_get_interrupts_enabled() == FALSE); \
871 /* \
872 * If a kernel_pmap update was requested while this cpu \
873 * was idle, process it as if we got the interrupt. \
874 * Before doing so, remove this cpu from the idle set. \
875 * Since we do not grab any pmap locks while we flush \
876 * our TLB, another cpu may start an update operation \
877 * before we finish. Removing this cpu from the idle \
878 * set assures that we will receive another update \
879 * interrupt if this happens. \
880 */ \
881 CPU_CR3_MARK_ACTIVE(); \
882 mfence(); \
883 pmap_update_interrupt(); \
884 }
885
886 #define PMAP_CONTEXT(pmap, thread)
887
888 #define pmap_kernel_va(VA) \
889 ((((vm_offset_t) (VA)) >= vm_min_kernel_address) && \
890 (((vm_offset_t) (VA)) <= vm_max_kernel_address))
891
892
893 #define pmap_compressed(pmap) ((pmap)->stats.compressed)
894 #define pmap_resident_count(pmap) ((pmap)->stats.resident_count)
895 #define pmap_resident_max(pmap) ((pmap)->stats.resident_max)
896 #define pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
897 #define pmap_attribute(pmap, addr, size, attr, value) \
898 (KERN_INVALID_ADDRESS)
899 #define pmap_attribute_cache_sync(addr, size, attr, value) \
900 (KERN_INVALID_ADDRESS)
901
902 #define MACHINE_PMAP_IS_EMPTY 1
903 extern boolean_t pmap_is_empty(pmap_t pmap,
904 vm_map_offset_t start,
905 vm_map_offset_t end);
906
907 #define MACHINE_BOOTSTRAPPTD 1 /* Static bootstrap page-tables */
908
909 kern_return_t
910 pmap_permissions_verify(pmap_t, vm_map_t, vm_offset_t, vm_offset_t);
911
912 #if MACH_ASSERT
913 extern int pmap_stats_assert;
914 #define PMAP_STATS_ASSERTF(args) \
915 MACRO_BEGIN \
916 if (pmap_stats_assert) assertf args; \
917 MACRO_END
918 #else /* MACH_ASSERT */
919 #define PMAP_STATS_ASSERTF(args)
920 #endif /* MACH_ASSERT */
921 #endif /* MACH_KERNEL_PRIVATE */
922 #endif /* ASSEMBLER */
923 #endif /* _PMAP_MACHINE_ */
924 #endif /* KERNEL_PRIVATE */