]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap.h
xnu-6153.11.26.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap.h
CommitLineData
1c79356b 1/*
cb323159 2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
0a7de745 35 *
1c79356b
A
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
0a7de745 41 *
1c79356b
A
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
0a7de745 45 *
1c79356b 46 * Carnegie Mellon requests users of this software to return to
0a7de745 47 *
1c79356b
A
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
0a7de745 52 *
1c79356b
A
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
59/*
60 * File: pmap.h
61 *
62 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
63 * Date: 1985
64 *
65 * Machine-dependent structures for the physical map module.
66 */
0c530ab8 67#ifdef KERNEL_PRIVATE
0a7de745
A
68#ifndef _PMAP_MACHINE_
69#define _PMAP_MACHINE_ 1
1c79356b 70
0a7de745 71#ifndef ASSEMBLER
1c79356b 72
1c79356b
A
73#include <mach/kern_return.h>
74#include <mach/machine/vm_types.h>
75#include <mach/vm_prot.h>
76#include <mach/vm_statistics.h>
77#include <mach/machine/vm_param.h>
78#include <kern/kern_types.h>
91447636 79#include <kern/thread.h>
fe8ab488 80#include <kern/simple_lock.h>
0c530ab8
A
81
82#include <i386/mp.h>
83#include <i386/proc_reg.h>
1c79356b 84
6d2010ae
A
85#include <i386/pal_routines.h>
86
1c79356b
A
87/*
88 * Define the generic in terms of the specific
89 */
90
0a7de745
A
91#define INTEL_PGBYTES I386_PGBYTES
92#define INTEL_PGSHIFT I386_PGSHIFT
93#define intel_btop(x) i386_btop(x)
94#define intel_ptob(x) i386_ptob(x)
95#define intel_round_page(x) i386_round_page(x)
96#define intel_trunc_page(x) i386_trunc_page(x)
1c79356b
A
97
98/*
99 * i386/i486/i860 Page Table Entry
100 */
101
0a7de745 102#endif /* ASSEMBLER */
1c79356b 103
316670eb
A
104#define NPGPTD 4ULL
105#define PDESHIFT 21ULL
106#define PTEMASK 0x1ffULL
107#define PTEINDX 3ULL
91447636 108
316670eb 109#define PTESHIFT 12ULL
b0d623f7 110
0a7de745 111#define LOW_4GB_MASK ((vm_offset_t)0x00000000FFFFFFFFUL)
b0d623f7 112
0a7de745
A
113#define PDESIZE sizeof(pd_entry_t) /* for assembly files */
114#define PTESIZE sizeof(pt_entry_t) /* for assembly files */
91447636 115
0a7de745
A
116#define INTEL_OFFMASK (I386_PGBYTES - 1)
117#define INTEL_LOFFMASK (I386_LPGBYTES - 1)
0c530ab8 118#define PG_FRAME 0x000FFFFFFFFFF000ULL
91447636 119#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t)))
0c530ab8 120#define NPTDPG (PAGE_SIZE/(sizeof (pd_entry_t)))
1c79356b 121
91447636
A
122#define NBPTD (NPGPTD << PAGE_SHIFT)
123#define NPDEPTD (NBPTD / (sizeof (pd_entry_t)))
124#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
316670eb 125#define NBPDE (1ULL << PDESHIFT)
91447636 126#define PDEMASK (NBPDE - 1)
9bccf70c 127
0a7de745 128#define PTE_PER_PAGE 512 /* number of PTE's per page on any level */
b0d623f7 129
0a7de745 130/* cleanly define parameters for all the page table levels */
0c530ab8
A
131typedef uint64_t pml4_entry_t;
132#define NPML4PG (PAGE_SIZE/(sizeof (pml4_entry_t)))
133#define PML4SHIFT 39
134#define PML4PGSHIFT 9
135#define NBPML4 (1ULL << PML4SHIFT)
136#define PML4MASK (NBPML4-1)
137#define PML4_ENTRY_NULL ((pml4_entry_t *) 0)
138
139typedef uint64_t pdpt_entry_t;
140#define NPDPTPG (PAGE_SIZE/(sizeof (pdpt_entry_t)))
141#define PDPTSHIFT 30
142#define PDPTPGSHIFT 9
316670eb 143#define NBPDPT (1ULL << PDPTSHIFT)
0c530ab8
A
144#define PDPTMASK (NBPDPT-1)
145#define PDPT_ENTRY_NULL ((pdpt_entry_t *) 0)
146
147typedef uint64_t pd_entry_t;
148#define NPDPG (PAGE_SIZE/(sizeof (pd_entry_t)))
149#define PDSHIFT 21
150#define PDPGSHIFT 9
316670eb 151#define NBPD (1ULL << PDSHIFT)
0c530ab8
A
152#define PDMASK (NBPD-1)
153#define PD_ENTRY_NULL ((pd_entry_t *) 0)
154
155typedef uint64_t pt_entry_t;
156#define NPTPG (PAGE_SIZE/(sizeof (pt_entry_t)))
157#define PTSHIFT 12
158#define PTPGSHIFT 9
316670eb 159#define NBPT (1ULL << PTSHIFT)
0c530ab8 160#define PTMASK (NBPT-1)
0a7de745 161#define PT_ENTRY_NULL ((pt_entry_t *) 0)
0c530ab8
A
162
163typedef uint64_t pmap_paddr_t;
164
0a7de745 165#if DEVELOPMENT || DEBUG
6d2010ae 166#define PMAP_ASSERT 1
39037602
A
167extern int pmap_asserts_enabled;
168extern int pmap_asserts_traced;
6d2010ae 169#endif
39037602 170
6d2010ae 171#if PMAP_ASSERT
0a7de745
A
172#define pmap_assert(ex) (pmap_asserts_enabled ? ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex)) : (void)0)
173
174#define pmap_assert2(ex, fmt, args...) \
175 do { \
176 if (__improbable(pmap_asserts_enabled && !(ex))) { \
177 if (pmap_asserts_traced) { \
178 KERNEL_DEBUG_CONSTANT(0xDEAD1000, __builtin_return_address(0), __LINE__, 0, 0, 0); \
179 kdebug_enable = 0; \
180 } else { \
181 kprintf("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \
182 panic("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \
183 } \
184 } \
6d2010ae
A
185 } while(0)
186#else
187#define pmap_assert(ex)
188#define pmap_assert2(ex, fmt, args...)
189#endif
190
b0d623f7 191/* superpages */
b0d623f7 192#define SUPERPAGE_NBASEPAGES 512
b0d623f7 193
0c530ab8
A
194/*
195 * Atomic 64-bit store of a page table entry.
196 */
197static inline void
198pmap_store_pte(pt_entry_t *entryp, pt_entry_t value)
199{
b0d623f7
A
200 /*
201 * In the 32-bit kernel a compare-and-exchange loop was
202 * required to provide atomicity. For K64, life is easier:
203 */
204 *entryp = value;
0c530ab8
A
205}
206
0c530ab8
A
207/* in 64 bit spaces, the number of each type of page in the page tables */
208#define NPML4PGS (1ULL * (PAGE_SIZE/(sizeof (pml4_entry_t))))
209#define NPDPTPGS (NPML4PGS * (PAGE_SIZE/(sizeof (pdpt_entry_t))))
210#define NPDEPGS (NPDPTPGS * (PAGE_SIZE/(sizeof (pd_entry_t))))
211#define NPTEPGS (NPDEPGS * (PAGE_SIZE/(sizeof (pt_entry_t))))
212
0a7de745
A
213extern int kernPhysPML4Index;
214extern int kernPhysPML4EntryCount;
215
216#define KERNEL_PML4_INDEX 511
217#define KERNEL_KEXTS_INDEX (KERNEL_PML4_INDEX - 1) /* 510: Home of KEXTs - the basement */
218#define KERNEL_PHYSMAP_PML4_INDEX (kernPhysPML4Index) /* 50X: virtual to physical map */
219#define KERNEL_PHYSMAP_PML4_COUNT (kernPhysPML4EntryCount)
220#define KERNEL_PHYSMAP_PML4_COUNT_MAX (16 - 2) /* 1 for KERNEL, 1 for BASEMENT */
221/* 2 PML4s for KASAN to cover a maximum of 16 PML4s {PHYSMAP + BASEMENT + KVA} */
222#define KERNEL_KASAN_PML4_LAST (495) /* 511 - 16 */
223#define KERNEL_KASAN_PML4_FIRST (494) /* 511 - 17 */
224#define KERNEL_DBLMAP_PML4_INDEX (KERNEL_KASAN_PML4_FIRST - 1)
225#define KERNEL_PML4_COUNT 1
226#define KERNEL_BASE (0ULL - (NBPML4 * KERNEL_PML4_COUNT))
227#define KERNEL_BASEMENT (KERNEL_BASE - NBPML4) /* Basement uses one PML4 entry */
228
0c530ab8
A
229/*
230 * Pte related macros
231 */
0a7de745
A
232#define KVADDR(pmi, pdpi, pdi, pti) \
233 ((vm_offset_t) \
234 ((uint64_t) -1 << 47) | \
235 ((uint64_t)(pmi) << PML4SHIFT) | \
236 ((uint64_t)(pdpi) << PDPTSHIFT) | \
237 ((uint64_t)(pdi) << PDESHIFT) | \
238 ((uint64_t)(pti) << PTESHIFT))
0c530ab8 239
1c79356b 240
91447636 241#ifndef NKPT
0a7de745 242#define NKPT 500 /* actual number of bootstrap kernel page tables */
91447636
A
243#endif
244
0c530ab8 245
0c530ab8 246
1c79356b
A
247/*
248 * Convert address offset to page descriptor index
249 */
b0d623f7 250#define pdptnum(pmap, a) (((vm_offset_t)(a) >> PDPTSHIFT) & PDPTMASK)
0a7de745 251#define pdenum(pmap, a) (((vm_offset_t)(a) >> PDESHIFT) & PDEMASK)
b0d623f7 252#define PMAP_INVALID_PDPTNUM (~0ULL)
91447636 253
0c530ab8
A
254#define pdeidx(pmap, a) (((a) >> PDSHIFT) & ((1ULL<<(48 - PDSHIFT)) -1))
255#define pdptidx(pmap, a) (((a) >> PDPTSHIFT) & ((1ULL<<(48 - PDPTSHIFT)) -1))
256#define pml4idx(pmap, a) (((a) >> PML4SHIFT) & ((1ULL<<(48 - PML4SHIFT)) -1))
6d2010ae 257
1c79356b
A
258
259/*
260 * Convert page descriptor index to user virtual address
261 */
0a7de745 262#define pdetova(a) ((vm_offset_t)(a) << PDESHIFT)
1c79356b
A
263
264/*
265 * Convert address offset to page table index
266 */
0a7de745 267#define ptenum(a) (((vm_offset_t)(a) >> PTESHIFT) & PTEMASK)
1c79356b 268
1c79356b
A
269/*
270 * Hardware pte bit definitions (to be used directly on the ptes
271 * without using the bit fields).
272 */
273
0a7de745
A
274#define INTEL_PTE_VALID 0x00000001ULL
275
276#define INTEL_PTE_WRITE 0x00000002ULL
277#define INTEL_PTE_RW 0x00000002ULL
278
279#define INTEL_PTE_USER 0x00000004ULL
280
281#define INTEL_PTE_WTHRU 0x00000008ULL
282#define INTEL_PTE_NCACHE 0x00000010ULL
283
284#define INTEL_PTE_REF 0x00000020ULL
285#define INTEL_PTE_MOD 0x00000040ULL
286
287#define INTEL_PTE_PS 0x00000080ULL
288#define INTEL_PTE_PAT 0x00000080ULL
289
290#define INTEL_PTE_GLOBAL 0x00000100ULL
291
292/* These markers use software available bits ignored by the
293 * processor's 4-level and EPT pagetable walkers.
294 * N.B.: WIRED was originally bit 10, but that conflicts with
295 * execute permissions for EPT entries iff mode-based execute controls
296 * are enabled.
297 */
298#define INTEL_PTE_SWLOCK (0x1ULL << 52)
299#define INTEL_PDPTE_NESTED (0x1ULL << 53)
300#define INTEL_PTE_WIRED (0x1ULL << 54)
301/* TODO: Compressed markers, potential conflict with protection keys? */
302#define INTEL_PTE_COMPRESSED_ALT (1ULL << 61) /* compressed but with "alternate accounting" */
303#define INTEL_PTE_COMPRESSED (1ULL << 62) /* marker, for invalid PTE only -- ignored by hardware for both regular/EPT entries*/
304
305#define INTEL_PTE_PFN PG_FRAME
306/* TODO: these should be internal definitions */
307#define INTEL_PTE_NX (1ULL << 63)
0c530ab8
A
308
309#define INTEL_PTE_INVALID 0
b7266188 310/* This is conservative, but suffices */
0a7de745 311#define INTEL_PTE_RSVD ((1ULL << 10) | (1ULL << 11))
6d2010ae 312
39037602
A
313
314#define INTEL_PTE_COMPRESSED_MASK (INTEL_PTE_COMPRESSED | \
0a7de745 315 INTEL_PTE_COMPRESSED_ALT | INTEL_PTE_SWLOCK)
cb323159
A
316#define PTE_IS_COMPRESSED(x, ptep, pmap, vaddr) \
317 ((((x) & INTEL_PTE_VALID) == 0) && /* PTE is not valid... */ \
39037602 318 ((x) & INTEL_PTE_COMPRESSED) && /* ...has "compressed" marker" */ \
cb323159
A
319 ((!((x) & ~INTEL_PTE_COMPRESSED_MASK)) || /* ...no other bits */ \
320 pmap_compressed_pte_corruption_repair((x), &(x), (ptep), (pmap), (vaddr))))
0a7de745
A
321
322#define pa_to_pte(a) ((a) & INTEL_PTE_PFN) /* XXX */
323#define pte_to_pa(p) ((p) & INTEL_PTE_PFN) /* XXX */
324#define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1)
1c79356b 325
0c530ab8
A
326#define pte_kernel_rw(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_RW))
327#define pte_kernel_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID))
3e170ce0 328#define pte_user_rw(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER|INTEL_PTE_RW))
0c530ab8
A
329#define pte_user_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER))
330
0a7de745 331#define PMAP_INVEPT_SINGLE_CONTEXT 1
3e170ce0
A
332
333
0a7de745 334#define INTEL_EPTP_AD 0x00000040ULL
3e170ce0 335
0a7de745
A
336#define INTEL_EPT_READ 0x00000001ULL
337#define INTEL_EPT_WRITE 0x00000002ULL
338#define INTEL_EPT_EX 0x00000004ULL
339#define INTEL_EPT_IPAT 0x00000040ULL
340#define INTEL_EPT_PS 0x00000080ULL
341#define INTEL_EPT_REF 0x00000100ULL
342#define INTEL_EPT_MOD 0x00000200ULL
3e170ce0 343
0a7de745
A
344#define INTEL_EPT_CACHE_MASK 0x00000038ULL
345#define INTEL_EPT_NCACHE 0x00000000ULL
346#define INTEL_EPT_WC 0x00000008ULL
347#define INTEL_EPT_WTHRU 0x00000020ULL
348#define INTEL_EPT_WP 0x00000028ULL
349#define INTEL_EPT_WB 0x00000030ULL
3e170ce0
A
350
351/*
352 * Routines to filter correct bits depending on the pmap type
353 */
354
355static inline pt_entry_t
356pte_remove_ex(pt_entry_t pte, boolean_t is_ept)
357{
358 if (__probable(!is_ept)) {
0a7de745 359 return pte | INTEL_PTE_NX;
3e170ce0
A
360 }
361
0a7de745 362 return pte & (~INTEL_EPT_EX);
3e170ce0
A
363}
364
365static inline pt_entry_t
366pte_set_ex(pt_entry_t pte, boolean_t is_ept)
367{
368 if (__probable(!is_ept)) {
0a7de745 369 return pte & (~INTEL_PTE_NX);
3e170ce0
A
370 }
371
0a7de745 372 return pte | INTEL_EPT_EX;
3e170ce0
A
373}
374
375static inline pt_entry_t
376physmap_refmod_to_ept(pt_entry_t physmap_pte)
377{
378 pt_entry_t ept_pte = 0;
379
380 if (physmap_pte & INTEL_PTE_MOD) {
381 ept_pte |= INTEL_EPT_MOD;
382 }
383
384 if (physmap_pte & INTEL_PTE_REF) {
385 ept_pte |= INTEL_EPT_REF;
386 }
387
388 return ept_pte;
389}
390
391static inline pt_entry_t
392ept_refmod_to_physmap(pt_entry_t ept_pte)
393{
394 pt_entry_t physmap_pte = 0;
395
396 assert((ept_pte & ~(INTEL_EPT_REF | INTEL_EPT_MOD)) == 0);
397
398 if (ept_pte & INTEL_EPT_REF) {
399 physmap_pte |= INTEL_PTE_REF;
400 }
401
402 if (ept_pte & INTEL_EPT_MOD) {
403 physmap_pte |= INTEL_PTE_MOD;
404 }
405
406 return physmap_pte;
407}
408
409/*
410 * Note: Not all Intel processors support EPT referenced access and dirty bits.
411 * During pmap_init() we check the VMX capability for the current hardware
412 * and update this variable accordingly.
413 */
414extern boolean_t pmap_ept_support_ad;
415
0a7de745
A
416#define PTE_VALID_MASK(is_ept) ((is_ept) ? (INTEL_EPT_READ | INTEL_EPT_WRITE | INTEL_EPT_EX) : INTEL_PTE_VALID)
417#define PTE_READ(is_ept) ((is_ept) ? INTEL_EPT_READ : INTEL_PTE_VALID)
418#define PTE_WRITE(is_ept) ((is_ept) ? INTEL_EPT_WRITE : INTEL_PTE_WRITE)
419#define PTE_PS INTEL_PTE_PS
420#define PTE_COMPRESSED INTEL_PTE_COMPRESSED
421#define PTE_COMPRESSED_ALT INTEL_PTE_COMPRESSED_ALT
422#define PTE_NCACHE(is_ept) ((is_ept) ? INTEL_EPT_NCACHE : INTEL_PTE_NCACHE)
423#define PTE_WTHRU(is_ept) ((is_ept) ? INTEL_EPT_WTHRU : INTEL_PTE_WTHRU)
424#define PTE_REF(is_ept) ((is_ept) ? INTEL_EPT_REF : INTEL_PTE_REF)
425#define PTE_MOD(is_ept) ((is_ept) ? INTEL_EPT_MOD : INTEL_PTE_MOD)
426#define PTE_WIRED INTEL_PTE_WIRED
427
428
429#define PMAP_DEFAULT_CACHE 0
430#define PMAP_INHIBIT_CACHE 1
431#define PMAP_GUARDED_CACHE 2
432#define PMAP_ACTIVATE_CACHE 4
433#define PMAP_NO_GUARD_CACHE 8
9bccf70c 434
d9a64523 435/* Per-pmap ledger operations */
0a7de745
A
436#define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
437#define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
d9a64523 438
0a7de745 439#ifndef ASSEMBLER
91447636
A
440
441#include <sys/queue.h>
442
1c79356b 443/*
91447636
A
444 * Address of current and alternate address space page table maps
445 * and directories.
1c79356b 446 */
1c79356b 447
0a7de745
A
448extern pt_entry_t *PTmap;
449extern pdpt_entry_t *IdlePDPT;
450extern pml4_entry_t *IdlePML4;
451extern boolean_t no_shared_cr3;
452extern pd_entry_t *IdlePTD; /* physical addr of "Idle" state PTD */
b0d623f7 453
0a7de745
A
454extern uint64_t pmap_pv_hashlist_walks;
455extern uint64_t pmap_pv_hashlist_cnts;
456extern uint32_t pmap_pv_hashlist_max;
457extern uint32_t pmap_kernel_text_ps;
b0d623f7 458
0a7de745 459#define ID_MAP_VTOP(x) ((void *)(((uint64_t)(x)) & LOW_4GB_MASK))
91447636 460
0a7de745 461extern uint64_t physmap_base, physmap_max;
316670eb 462
0a7de745 463#define NPHYSMAP (MAX(((physmap_max - physmap_base) / GB), 4))
7ddcb079 464
0a7de745
A
465static inline boolean_t
466physmap_enclosed(addr64_t a)
467{
468 return a < (NPHYSMAP * GB);
7ddcb079 469}
316670eb 470
0a7de745
A
471static inline void *
472PHYSMAP_PTOV_check(void *paddr)
473{
316670eb
A
474 uint64_t pvaddr = (uint64_t)paddr + physmap_base;
475
0a7de745 476 if (__improbable(pvaddr >= physmap_max)) {
316670eb 477 panic("PHYSMAP_PTOV bounds exceeded, 0x%qx, 0x%qx, 0x%qx",
0a7de745
A
478 pvaddr, physmap_base, physmap_max);
479 }
316670eb
A
480
481 return (void *)pvaddr;
482}
483
0a7de745 484#define PHYSMAP_PTOV(x) (PHYSMAP_PTOV_check((void*) (x)))
cb323159 485#define phystokv(x) ((vm_offset_t)(PHYSMAP_PTOV(x)))
5c9f4661
A
486#if MACH_KERNEL_PRIVATE
487extern uint64_t dblmap_base, dblmap_max, dblmap_dist;
488
0a7de745
A
489static inline uint64_t
490DBLMAP_CHECK(uintptr_t x)
491{
5c9f4661
A
492 uint64_t dbladdr = (uint64_t)x + dblmap_dist;
493 if (__improbable((dbladdr >= dblmap_max) || (dbladdr < dblmap_base))) {
494 panic("DBLMAP bounds exceeded, 0x%qx, 0x%qx 0x%qx, 0x%qx",
495 (uint64_t)x, dbladdr, dblmap_base, dblmap_max);
496 }
497 return dbladdr;
5c9f4661
A
498}
499#define DBLMAP(x) (DBLMAP_CHECK((uint64_t) x))
500extern uint64_t ldt_alias_offset;
0a7de745
A
501static inline uint64_t
502LDTALIAS_CHECK(uintptr_t x)
503{
5c9f4661
A
504 uint64_t dbladdr = (uint64_t)x + ldt_alias_offset;
505 if (__improbable((dbladdr >= dblmap_max) || (dbladdr < dblmap_base))) {
506 panic("LDTALIAS: bounds exceeded, 0x%qx, 0x%qx 0x%qx, 0x%qx",
507 (uint64_t)x, dbladdr, dblmap_base, dblmap_max);
508 }
509 return dbladdr;
510}
511#define LDTALIAS(x) (LDTALIAS_CHECK((uint64_t) x))
512#endif
316670eb
A
513
514/*
515 * For KASLR, we alias the master processor's IDT and GDT at fixed
516 * virtual addresses to defeat SIDT/SGDT address leakage.
143464d5
A
517 * And non-boot processor's GDT aliases likewise (skipping LOWGLOBAL_ALIAS)
518 * The low global vector page is mapped at a fixed alias also.
316670eb 519 */
0a7de745 520#define LOWGLOBAL_ALIAS (VM_MIN_KERNEL_ADDRESS + 0x2000)
316670eb 521
39037602
A
522/*
523 * This indicates (roughly) where there is free space for the VM
524 * to use for the heap; this does not need to be precise.
525 */
526#define KERNEL_PMAP_HEAP_RANGE_START VM_MIN_KERNEL_AND_KEXT_ADDRESS
527
0a7de745
A
528#if MACH_KERNEL_PRIVATE
529extern void
530pmap_tlbi_range(uint64_t startv, uint64_t endv, bool global, uint16_t pcid);
531
91447636
A
532#include <vm/vm_page.h>
533
534/*
535 * For each vm_page_t, there is a list of all currently
536 * valid virtual mappings of that page. An entry is
537 * a pv_entry_t; the list is the pv_table.
538 */
1c79356b
A
539
540struct pmap {
0a7de745
A
541 lck_rw_t pmap_rwl __attribute((aligned(64)));
542 pmap_paddr_t pm_cr3 __attribute((aligned(64))); /* Kernel+user shared PML4 physical*/
543 pmap_paddr_t pm_ucr3; /* Mirrored user PML4 physical */
39037602 544 pml4_entry_t *pm_pml4; /* VKA of top level */
5c9f4661 545 pml4_entry_t *pm_upml4; /* Shadow VKA of top level */
0a7de745 546 pmap_paddr_t pm_eptp; /* EPTP */
cb323159 547
0a7de745
A
548 task_map_t pm_task_map;
549 boolean_t pagezero_accessible;
550#define PMAP_PCID_MAX_CPUS MAX_CPUS /* Must be a multiple of 8 */
551 pcid_t pmap_pcid_cpus[PMAP_PCID_MAX_CPUS];
552 volatile uint8_t pmap_pcid_coherency_vector[PMAP_PCID_MAX_CPUS];
553 boolean_t pm_shared;
cb323159
A
554 os_refcnt_t ref_count;
555 pdpt_entry_t *pm_pdpt; /* KVA of 3rd level page */
39037602
A
556 vm_object_t pm_obj; /* object to hold pde's */
557 vm_object_t pm_obj_pdpt; /* holds pdpt pages */
558 vm_object_t pm_obj_pml4; /* holds pml4 pages */
0a7de745
A
559#if DEVELOPMENT || DEBUG
560 int nx_enabled;
561#endif
0a7de745
A
562 ledger_t ledger; /* ledger tracking phys mappings */
563 struct pmap_statistics stats; /* map statistics */
cb323159 564 uint64_t corrected_compressed_ptes_count;
39037602 565#if MACH_ASSERT
0a7de745
A
566 boolean_t pmap_stats_assert;
567 int pmap_pid;
568 char pmap_procname[17];
39037602 569#endif /* MACH_ASSERT */
1c79356b
A
570};
571
3e170ce0
A
572static inline boolean_t
573is_ept_pmap(pmap_t p)
574{
575 if (__probable(p->pm_cr3 != 0)) {
576 assert(p->pm_eptp == 0);
577 return FALSE;
578 }
579
580 assert(p->pm_eptp != 0);
581
582 return TRUE;
583}
584
585void hv_ept_pmap_create(void **ept_pmap, void **eptp);
0c530ab8 586
b0d623f7 587#if NCOPY_WINDOWS > 0
0c530ab8
A
588#define PMAP_PDPT_FIRST_WINDOW 0
589#define PMAP_PDPT_NWINDOWS 4
590#define PMAP_PDE_FIRST_WINDOW (PMAP_PDPT_NWINDOWS)
591#define PMAP_PDE_NWINDOWS 4
592#define PMAP_PTE_FIRST_WINDOW (PMAP_PDE_FIRST_WINDOW + PMAP_PDE_NWINDOWS)
593#define PMAP_PTE_NWINDOWS 4
594
595#define PMAP_NWINDOWS_FIRSTFREE (PMAP_PTE_FIRST_WINDOW + PMAP_PTE_NWINDOWS)
596#define PMAP_WINDOW_SIZE 8
597#define PMAP_NWINDOWS (PMAP_NWINDOWS_FIRSTFREE + PMAP_WINDOW_SIZE)
598
91447636 599typedef struct {
0a7de745
A
600 pt_entry_t *prv_CMAP;
601 caddr_t prv_CADDR;
91447636
A
602} mapwindow_t;
603
604typedef struct cpu_pmap {
0a7de745
A
605 int pdpt_window_index;
606 int pde_window_index;
607 int pte_window_index;
608 mapwindow_t mapwindow[PMAP_NWINDOWS];
91447636
A
609} cpu_pmap_t;
610
0c530ab8
A
611
612extern mapwindow_t *pmap_get_mapwindow(pt_entry_t pentry);
2d21ac55 613extern void pmap_put_mapwindow(mapwindow_t *map);
b0d623f7 614#endif
91447636
A
615
616typedef struct pmap_memory_regions {
cb323159
A
617 ppnum_t base; /* first page of this region */
618 ppnum_t alloc_up; /* pages below this one have been "stolen" */
619 ppnum_t alloc_down; /* pages above this one have been "stolen" */
620 ppnum_t alloc_frag_up; /* low page of fragment after large page alloc */
621 ppnum_t alloc_frag_down; /* high page of fragment after large page alloc */
622 ppnum_t end; /* last page of this region */
7ddcb079
A
623 uint32_t type;
624 uint64_t attribute;
91447636
A
625} pmap_memory_region_t;
626
b0d623f7
A
627extern unsigned pmap_memory_region_count;
628extern unsigned pmap_memory_region_current;
91447636 629
0c530ab8 630#define PMAP_MEMORY_REGIONS_SIZE 128
91447636
A
631
632extern pmap_memory_region_t pmap_memory_regions[];
6d2010ae 633#include <i386/pmap_pcid.h>
91447636 634
b0d623f7 635static inline void
0a7de745
A
636set_dirbase(pmap_t tpmap, thread_t thread, int my_cpu)
637{
fe8ab488 638 int ccpu = my_cpu;
5c9f4661
A
639 uint64_t pcr3 = tpmap->pm_cr3, ucr3 = tpmap->pm_ucr3;
640 cpu_datap(ccpu)->cpu_task_cr3 = pcr3;
0a7de745 641 cpu_shadowp(ccpu)->cpu_shadowtask_cr3 = pcr3;
5c9f4661
A
642
643 cpu_datap(ccpu)->cpu_ucr3 = ucr3;
644 cpu_shadowp(ccpu)->cpu_ucr3 = ucr3;
645
d9a64523
A
646 cpu_datap(ccpu)->cpu_task_map = cpu_shadowp(ccpu)->cpu_task_map =
647 tpmap->pm_task_map;
39037602
A
648
649 assert((get_preemption_level() > 0) || (ml_get_interrupts_enabled() == FALSE));
650 assert(ccpu == cpu_number());
b0d623f7
A
651 /*
652 * Switch cr3 if necessary
653 * - unless running with no_shared_cr3 debugging mode
654 * and we're not on the kernel's cr3 (after pre-empted copyio)
655 */
39037602
A
656 boolean_t nopagezero = tpmap->pagezero_accessible;
657 boolean_t priorpagezero = cpu_datap(ccpu)->cpu_pagezero_mapped;
658 cpu_datap(ccpu)->cpu_pagezero_mapped = nopagezero;
659
6d2010ae 660 if (__probable(!no_shared_cr3)) {
39037602
A
661 if (__improbable(nopagezero)) {
662 boolean_t copyio_active = ((thread->machine.specFlags & CopyIOActive) != 0);
6d2010ae 663 if (pmap_pcid_ncpus) {
39037602
A
664 pmap_pcid_activate(tpmap, ccpu, TRUE, copyio_active);
665 } else {
666 if (copyio_active) {
667 if (get_cr3_base() != tpmap->pm_cr3) {
668 set_cr3_raw(tpmap->pm_cr3);
669 }
670 } else if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3) {
671 set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3);
672 }
6d2010ae 673 }
39037602
A
674 } else if ((get_cr3_base() != tpmap->pm_cr3) || priorpagezero) {
675 if (pmap_pcid_ncpus) {
676 pmap_pcid_activate(tpmap, ccpu, FALSE, FALSE);
677 } else {
6d2010ae 678 set_cr3_raw(tpmap->pm_cr3);
39037602 679 }
6d2010ae 680 }
b0d623f7 681 } else {
0a7de745 682 if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3) {
6d2010ae 683 set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3);
0a7de745 684 }
b0d623f7 685 }
1c79356b
A
686}
687
1c79356b
A
688/*
689 * External declarations for PMAP_ACTIVATE.
690 */
691
0a7de745
A
692extern void pmap_update_interrupt(void);
693
694extern addr64_t(kvtophys)(
695 vm_offset_t addr);
696
697extern kern_return_t pmap_expand(
698 pmap_t pmap,
699 vm_map_offset_t addr,
700 unsigned int options);
701extern vm_offset_t pmap_map(
702 vm_offset_t virt,
703 vm_map_offset_t start,
704 vm_map_offset_t end,
705 vm_prot_t prot,
706 unsigned int flags);
707
708extern vm_offset_t pmap_map_bd(
709 vm_offset_t virt,
710 vm_map_offset_t start,
711 vm_map_offset_t end,
712 vm_prot_t prot,
713 unsigned int flags);
714extern void pmap_bootstrap(
715 vm_offset_t load_start,
716 boolean_t IA32e);
717
718extern boolean_t pmap_valid_page(
719 ppnum_t pn);
720
721extern int pmap_list_resident_pages(
722 struct pmap *pmap,
723 vm_offset_t *listp,
724 int space);
725extern void x86_filter_TLB_coherency_interrupts(boolean_t);
6d2010ae
A
726/*
727 * Get cache attributes (as pagetable bits) for the specified phys page
728 */
0a7de745 729extern unsigned pmap_get_cache_attributes(ppnum_t, boolean_t is_ept);
b0d623f7 730#if NCOPY_WINDOWS > 0
0a7de745
A
731extern struct cpu_pmap *pmap_cpu_alloc(
732 boolean_t is_boot_cpu);
733extern void pmap_cpu_free(
734 struct cpu_pmap *cp);
b0d623f7 735#endif
0c530ab8 736
0a7de745
A
737extern kern_return_t pmap_map_block(
738 pmap_t pmap,
739 addr64_t va,
740 ppnum_t pa,
741 uint32_t size,
742 vm_prot_t prot,
743 int attr,
744 unsigned int flags);
5ba3f43e 745
1c79356b
A
746extern void invalidate_icache(vm_offset_t addr, unsigned cnt, int phys);
747extern void flush_dcache(vm_offset_t addr, unsigned count, int phys);
55e303ae 748extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va);
1c79356b 749
2d21ac55 750extern void pmap_cpu_init(void);
0c530ab8 751extern void pmap_disable_NX(pmap_t pmap);
0c530ab8 752
6d2010ae 753extern void pt_fake_zone_init(int);
5ba3f43e 754extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
0a7de745
A
755 uint64_t *, int *, int *, int *);
756extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__printflike(1, 2));
0c530ab8 757
cb323159 758extern void x86_64_protect_data_const(void);
1c79356b
A
759/*
760 * Macros for speed.
761 */
762
1c79356b
A
763
764#include <kern/spl.h>
765
5ba3f43e 766
0a7de745
A
767#define PMAP_ACTIVATE_MAP(map, thread, my_cpu) { \
768 pmap_t tpmap; \
0c530ab8 769 \
0a7de745
A
770 tpmap = vm_map_pmap(map); \
771 set_dirbase(tpmap, thread, my_cpu); \
1c79356b
A
772}
773
39236c6e 774#if defined(__x86_64__)
0a7de745 775#define PMAP_DEACTIVATE_MAP(map, thread, ccpu) \
39037602 776 pmap_assert2((pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, thread, ccpu) == (get_cr3_raw() & 0xFFF)) : TRUE),"PCIDs: 0x%x, active PCID: 0x%x, CR3: 0x%lx, pmap_cr3: 0x%llx, kernel_cr3: 0x%llx, kernel pmap cr3: 0x%llx, CPU active PCID: 0x%x, CPU kernel PCID: 0x%x, specflags: 0x%x, pagezero: 0x%x", pmap_pcid_ncpus, pcid_for_pmap_cpu_tuple(map->pmap, thread, ccpu), get_cr3_raw(), map->pmap->pm_cr3, cpu_datap(ccpu)->cpu_kernel_cr3, kernel_pmap->pm_cr3, cpu_datap(ccpu)->cpu_active_pcid, cpu_datap(ccpu)->cpu_kernel_pcid, thread->machine.specFlags, map->pmap->pagezero_accessible);
b0d623f7 777#else
6d2010ae 778#define PMAP_DEACTIVATE_MAP(map, thread)
b0d623f7 779#endif
1c79356b 780
6d2010ae 781#if NCOPY_WINDOWS > 0
0a7de745
A
782#define PMAP_SWITCH_USER(th, new_map, my_cpu) { \
783 spl_t spl; \
784 \
785 spl = splhigh(); \
786 PMAP_DEACTIVATE_MAP(th->map, th); \
787 th->map = new_map; \
788 PMAP_ACTIVATE_MAP(th->map, th); \
789 splx(spl); \
790 inval_copy_windows(th); \
1c79356b 791}
b0d623f7 792#else
0a7de745
A
793#define PMAP_SWITCH_USER(th, new_map, my_cpu) { \
794 spl_t spl; \
795 \
796 spl = splhigh(); \
797 PMAP_DEACTIVATE_MAP(th->map, th, my_cpu); \
798 th->map = new_map; \
799 PMAP_ACTIVATE_MAP(th->map, th, my_cpu); \
800 splx(spl); \
b0d623f7
A
801}
802#endif
1c79356b 803
0c530ab8
A
804/*
805 * Marking the current cpu's cr3 inactive is achieved by setting its lsb.
806 * Marking the current cpu's cr3 active once more involves clearng this bit.
807 * Note that valid page tables are page-aligned and so the bottom 12 bits
6d2010ae 808 * are normally zero, modulo PCID.
0c530ab8
A
809 * We can only mark the current cpu active/inactive but we can test any cpu.
810 */
0a7de745 811#define CPU_CR3_MARK_INACTIVE() \
0c530ab8
A
812 current_cpu_datap()->cpu_active_cr3 |= 1
813
0a7de745 814#define CPU_CR3_MARK_ACTIVE() \
0c530ab8
A
815 current_cpu_datap()->cpu_active_cr3 &= ~1
816
0a7de745 817#define CPU_CR3_IS_ACTIVE(cpu) \
0c530ab8
A
818 ((cpu_datap(cpu)->cpu_active_cr3 & 1) == 0)
819
0a7de745 820#define CPU_GET_ACTIVE_CR3(cpu) \
2d21ac55 821 (cpu_datap(cpu)->cpu_active_cr3 & ~1)
0c530ab8 822
0a7de745 823#define CPU_GET_TASK_CR3(cpu) \
b0d623f7
A
824 (cpu_datap(cpu)->cpu_task_cr3)
825
826/*
827 * Mark this cpu idle, and remove it from the active set,
828 * since it is not actively using any pmap. Signal_cpus
829 * will notice that it is idle, and avoid signaling it,
830 * but will queue the update request for when the cpu
831 * becomes active.
832 */
0a7de745
A
833#define MARK_CPU_IDLE(my_cpu) { \
834 assert(ml_get_interrupts_enabled() == FALSE); \
835 CPU_CR3_MARK_INACTIVE(); \
836 mfence(); \
1c79356b
A
837}
838
0a7de745
A
839#define MARK_CPU_ACTIVE(my_cpu) { \
840 assert(ml_get_interrupts_enabled() == FALSE); \
841 /* \
842 * If a kernel_pmap update was requested while this cpu \
843 * was idle, process it as if we got the interrupt. \
844 * Before doing so, remove this cpu from the idle set. \
845 * Since we do not grab any pmap locks while we flush \
846 * our TLB, another cpu may start an update operation \
847 * before we finish. Removing this cpu from the idle \
848 * set assures that we will receive another update \
849 * interrupt if this happens. \
850 */ \
851 CPU_CR3_MARK_ACTIVE(); \
852 mfence(); \
853 pmap_update_interrupt(); \
1c79356b
A
854}
855
1c79356b
A
856#define PMAP_CONTEXT(pmap, thread)
857
0a7de745
A
858#define pmap_kernel_va(VA) \
859 ((((vm_offset_t) (VA)) >= vm_min_kernel_address) && \
0c530ab8
A
860 (((vm_offset_t) (VA)) <= vm_max_kernel_address))
861
1c79356b 862
0a7de745
A
863#define pmap_compressed(pmap) ((pmap)->stats.compressed)
864#define pmap_resident_count(pmap) ((pmap)->stats.resident_count)
865#define pmap_resident_max(pmap) ((pmap)->stats.resident_max)
866#define pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
867#define pmap_attribute(pmap, addr, size, attr, value) \
868 (KERN_INVALID_ADDRESS)
869#define pmap_attribute_cache_sync(addr, size, attr, value) \
870 (KERN_INVALID_ADDRESS)
765c9de3 871
0a7de745
A
872#define MACHINE_PMAP_IS_EMPTY 1
873extern boolean_t pmap_is_empty(pmap_t pmap,
874 vm_map_offset_t start,
875 vm_map_offset_t end);
2d21ac55 876
0a7de745 877#define MACHINE_BOOTSTRAPPTD 1 /* Static bootstrap page-tables */
6d2010ae 878
316670eb 879kern_return_t
0a7de745 880 pmap_permissions_verify(pmap_t, vm_map_t, vm_offset_t, vm_offset_t);
b0d623f7 881
39037602
A
882#if MACH_ASSERT
883extern int pmap_stats_assert;
0a7de745
A
884#define PMAP_STATS_ASSERTF(args) \
885 MACRO_BEGIN \
886 if (pmap_stats_assert) assertf args; \
39037602
A
887 MACRO_END
888#else /* MACH_ASSERT */
889#define PMAP_STATS_ASSERTF(args)
890#endif /* MACH_ASSERT */
0a7de745
A
891#endif /* MACH_KERNEL_PRIVATE */
892#endif /* ASSEMBLER */
893#endif /* _PMAP_MACHINE_ */
0c530ab8 894#endif /* KERNEL_PRIVATE */