]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap.h
xnu-4570.20.62.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap.h
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
8f6c56a5 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
59/*
60 * File: pmap.h
61 *
62 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
63 * Date: 1985
64 *
65 * Machine-dependent structures for the physical map module.
66 */
0c530ab8 67#ifdef KERNEL_PRIVATE
1c79356b
A
68#ifndef _PMAP_MACHINE_
69#define _PMAP_MACHINE_ 1
70
71#ifndef ASSEMBLER
72
1c79356b
A
73
74#include <mach/kern_return.h>
75#include <mach/machine/vm_types.h>
76#include <mach/vm_prot.h>
77#include <mach/vm_statistics.h>
78#include <mach/machine/vm_param.h>
79#include <kern/kern_types.h>
91447636 80#include <kern/thread.h>
fe8ab488 81#include <kern/simple_lock.h>
6d2010ae 82#include <mach/branch_predicates.h>
0c530ab8
A
83
84#include <i386/mp.h>
85#include <i386/proc_reg.h>
1c79356b 86
6d2010ae
A
87#include <i386/pal_routines.h>
88
1c79356b
A
89/*
90 * Define the generic in terms of the specific
91 */
92
93#define INTEL_PGBYTES I386_PGBYTES
94#define INTEL_PGSHIFT I386_PGSHIFT
95#define intel_btop(x) i386_btop(x)
96#define intel_ptob(x) i386_ptob(x)
97#define intel_round_page(x) i386_round_page(x)
98#define intel_trunc_page(x) i386_trunc_page(x)
1c79356b
A
99
100/*
101 * i386/i486/i860 Page Table Entry
102 */
103
1c79356b
A
104#endif /* ASSEMBLER */
105
316670eb
A
106#define NPGPTD 4ULL
107#define PDESHIFT 21ULL
108#define PTEMASK 0x1ffULL
109#define PTEINDX 3ULL
91447636 110
316670eb 111#define PTESHIFT 12ULL
b0d623f7 112
b0d623f7
A
113
114#ifdef __x86_64__
115#define LOW_4GB_MASK ((vm_offset_t)0x00000000FFFFFFFFUL)
116#endif
117
91447636
A
118#define PDESIZE sizeof(pd_entry_t) /* for assembly files */
119#define PTESIZE sizeof(pt_entry_t) /* for assembly files */
120
121#define INTEL_OFFMASK (I386_PGBYTES - 1)
b0d623f7 122#define INTEL_LOFFMASK (I386_LPGBYTES - 1)
0c530ab8 123#define PG_FRAME 0x000FFFFFFFFFF000ULL
91447636 124#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t)))
0c530ab8 125#define NPTDPG (PAGE_SIZE/(sizeof (pd_entry_t)))
1c79356b 126
91447636
A
127#define NBPTD (NPGPTD << PAGE_SHIFT)
128#define NPDEPTD (NBPTD / (sizeof (pd_entry_t)))
129#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
316670eb 130#define NBPDE (1ULL << PDESHIFT)
91447636 131#define PDEMASK (NBPDE - 1)
9bccf70c 132
b0d623f7
A
133#define PTE_PER_PAGE 512 /* number of PTE's per page on any level */
134
0c530ab8
A
135 /* cleanly define parameters for all the page table levels */
136typedef uint64_t pml4_entry_t;
137#define NPML4PG (PAGE_SIZE/(sizeof (pml4_entry_t)))
138#define PML4SHIFT 39
139#define PML4PGSHIFT 9
140#define NBPML4 (1ULL << PML4SHIFT)
141#define PML4MASK (NBPML4-1)
142#define PML4_ENTRY_NULL ((pml4_entry_t *) 0)
143
144typedef uint64_t pdpt_entry_t;
145#define NPDPTPG (PAGE_SIZE/(sizeof (pdpt_entry_t)))
146#define PDPTSHIFT 30
147#define PDPTPGSHIFT 9
316670eb 148#define NBPDPT (1ULL << PDPTSHIFT)
0c530ab8
A
149#define PDPTMASK (NBPDPT-1)
150#define PDPT_ENTRY_NULL ((pdpt_entry_t *) 0)
151
152typedef uint64_t pd_entry_t;
153#define NPDPG (PAGE_SIZE/(sizeof (pd_entry_t)))
154#define PDSHIFT 21
155#define PDPGSHIFT 9
316670eb 156#define NBPD (1ULL << PDSHIFT)
0c530ab8
A
157#define PDMASK (NBPD-1)
158#define PD_ENTRY_NULL ((pd_entry_t *) 0)
159
160typedef uint64_t pt_entry_t;
161#define NPTPG (PAGE_SIZE/(sizeof (pt_entry_t)))
162#define PTSHIFT 12
163#define PTPGSHIFT 9
316670eb 164#define NBPT (1ULL << PTSHIFT)
0c530ab8
A
165#define PTMASK (NBPT-1)
166#define PT_ENTRY_NULL ((pt_entry_t *) 0)
167
168typedef uint64_t pmap_paddr_t;
169
39037602 170#if DEVELOPMENT || DEBUG
6d2010ae 171#define PMAP_ASSERT 1
39037602
A
172extern int pmap_asserts_enabled;
173extern int pmap_asserts_traced;
6d2010ae 174#endif
39037602 175
6d2010ae 176#if PMAP_ASSERT
39037602 177#define pmap_assert(ex) (pmap_asserts_enabled ? ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex)) : (void)0)
6d2010ae
A
178
179#define pmap_assert2(ex, fmt, args...) \
180 do { \
39037602
A
181 if (__improbable(pmap_asserts_enabled && !(ex))) { \
182 if (pmap_asserts_traced) { \
183 KERNEL_DEBUG_CONSTANT(0xDEAD1000, __builtin_return_address(0), __LINE__, 0, 0, 0); \
184 kdebug_enable = 0; \
185 } else { \
186 kprintf("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \
187 panic("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \
188 } \
6d2010ae
A
189 } \
190 } while(0)
191#else
192#define pmap_assert(ex)
193#define pmap_assert2(ex, fmt, args...)
194#endif
195
b0d623f7
A
196/* superpages */
197#ifdef __x86_64__
198#define SUPERPAGE_NBASEPAGES 512
199#else
200#define SUPERPAGE_NBASEPAGES 1 /* we don't support superpages on i386 */
201#endif
202
0c530ab8
A
203/*
204 * Atomic 64-bit store of a page table entry.
205 */
206static inline void
207pmap_store_pte(pt_entry_t *entryp, pt_entry_t value)
208{
b0d623f7
A
209 /*
210 * In the 32-bit kernel a compare-and-exchange loop was
211 * required to provide atomicity. For K64, life is easier:
212 */
213 *entryp = value;
0c530ab8
A
214}
215
0c530ab8
A
216/* in 64 bit spaces, the number of each type of page in the page tables */
217#define NPML4PGS (1ULL * (PAGE_SIZE/(sizeof (pml4_entry_t))))
218#define NPDPTPGS (NPML4PGS * (PAGE_SIZE/(sizeof (pdpt_entry_t))))
219#define NPDEPGS (NPDPTPGS * (PAGE_SIZE/(sizeof (pd_entry_t))))
220#define NPTEPGS (NPDEPGS * (PAGE_SIZE/(sizeof (pt_entry_t))))
221
316670eb 222#define KERNEL_PML4_INDEX 511
b0d623f7 223#define KERNEL_KEXTS_INDEX 510 /* Home of KEXTs - the basement */
316670eb 224#define KERNEL_PHYSMAP_PML4_INDEX 509 /* virtual to physical map */
5ba3f43e
A
225#define KERNEL_KASAN_PML4_INDEX0 508
226#define KERNEL_KASAN_PML4_INDEX1 507
b0d623f7
A
227#define KERNEL_BASE (0ULL - NBPML4)
228#define KERNEL_BASEMENT (KERNEL_BASE - NBPML4)
0c530ab8 229
55e303ae 230#define VM_WIMG_COPYBACK VM_MEM_COHERENT
316670eb 231#define VM_WIMG_COPYBACKLW VM_WIMG_COPYBACK
9bccf70c 232#define VM_WIMG_DEFAULT VM_MEM_COHERENT
55e303ae
A
233/* ?? intel ?? */
234#define VM_WIMG_IO (VM_MEM_COHERENT | \
235 VM_MEM_NOT_CACHEABLE | VM_MEM_GUARDED)
5ba3f43e 236#define VM_WIMG_POSTED VM_WIMG_IO
55e303ae
A
237#define VM_WIMG_WTHRU (VM_MEM_WRITE_THROUGH | VM_MEM_COHERENT | VM_MEM_GUARDED)
238/* write combining mode, aka store gather */
239#define VM_WIMG_WCOMB (VM_MEM_NOT_CACHEABLE | VM_MEM_COHERENT)
316670eb 240#define VM_WIMG_INNERWBACK VM_MEM_COHERENT
0c530ab8
A
241/*
242 * Pte related macros
243 */
b0d623f7
A
244#define KVADDR(pmi, pdpi, pdi, pti) \
245 ((vm_offset_t) \
246 ((uint64_t) -1 << 47) | \
247 ((uint64_t)(pmi) << PML4SHIFT) | \
248 ((uint64_t)(pdpi) << PDPTSHIFT) | \
249 ((uint64_t)(pdi) << PDESHIFT) | \
250 ((uint64_t)(pti) << PTESHIFT))
0c530ab8 251
1c79356b 252/*
91447636
A
253 * Size of Kernel address space. This is the number of page table pages
254 * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte.
255 * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
1c79356b 256 */
91447636 257#ifndef KVA_PAGES
0c530ab8 258#define KVA_PAGES 1024
91447636 259#endif
1c79356b 260
91447636 261#ifndef NKPT
91447636 262#define NKPT 500 /* actual number of kernel page tables */
91447636
A
263#endif
264#ifndef NKPDE
265#define NKPDE (KVA_PAGES - 1) /* addressable number of page tables/pde's */
266#endif
267
0c530ab8 268
0c530ab8 269
1c79356b
A
270/*
271 * Convert address offset to page descriptor index
272 */
b0d623f7
A
273#define pdptnum(pmap, a) (((vm_offset_t)(a) >> PDPTSHIFT) & PDPTMASK)
274#define pdenum(pmap, a) (((vm_offset_t)(a) >> PDESHIFT) & PDEMASK)
275#define PMAP_INVALID_PDPTNUM (~0ULL)
91447636 276
0c530ab8
A
277#define pdeidx(pmap, a) (((a) >> PDSHIFT) & ((1ULL<<(48 - PDSHIFT)) -1))
278#define pdptidx(pmap, a) (((a) >> PDPTSHIFT) & ((1ULL<<(48 - PDPTSHIFT)) -1))
279#define pml4idx(pmap, a) (((a) >> PML4SHIFT) & ((1ULL<<(48 - PML4SHIFT)) -1))
6d2010ae 280
1c79356b
A
281
282/*
283 * Convert page descriptor index to user virtual address
284 */
285#define pdetova(a) ((vm_offset_t)(a) << PDESHIFT)
286
287/*
288 * Convert address offset to page table index
289 */
0c530ab8 290#define ptenum(a) (((vm_offset_t)(a) >> PTESHIFT) & PTEMASK)
1c79356b 291
1c79356b
A
292/*
293 * Hardware pte bit definitions (to be used directly on the ptes
294 * without using the bit fields).
295 */
296
316670eb
A
297#define INTEL_PTE_VALID 0x00000001ULL
298#define INTEL_PTE_WRITE 0x00000002ULL
299#define INTEL_PTE_RW 0x00000002ULL
300#define INTEL_PTE_USER 0x00000004ULL
301#define INTEL_PTE_WTHRU 0x00000008ULL
302#define INTEL_PTE_NCACHE 0x00000010ULL
303#define INTEL_PTE_REF 0x00000020ULL
304#define INTEL_PTE_MOD 0x00000040ULL
305#define INTEL_PTE_PS 0x00000080ULL
306#define INTEL_PTE_PTA 0x00000080ULL
307#define INTEL_PTE_GLOBAL 0x00000100ULL
3e170ce0
A
308#define INTEL_PTE_WIRED 0x00000400ULL
309#define INTEL_PDPTE_NESTED 0x00000800ULL
0c530ab8 310#define INTEL_PTE_PFN PG_FRAME
1c79356b 311
0c530ab8
A
312#define INTEL_PTE_NX (1ULL << 63)
313
314#define INTEL_PTE_INVALID 0
b7266188 315/* This is conservative, but suffices */
6d2010ae
A
316#define INTEL_PTE_RSVD ((1ULL << 10) | (1ULL << 11) | (0x1FFULL << 54))
317
39037602
A
318#define INTEL_PTE_COMPRESSED (1ULL << 62) /* marker, for invalid PTE only -- ignored by hardware for both regular/EPT entries*/
319#define INTEL_PTE_COMPRESSED_ALT (1ULL << 61) /* compressed but with "alternate accounting" */
320
321#define INTEL_PTE_COMPRESSED_MASK (INTEL_PTE_COMPRESSED | \
322 INTEL_PTE_COMPRESSED_ALT)
323#define PTE_IS_COMPRESSED(x) \
324 ((((x) & INTEL_PTE_VALID) == 0) && /* PTE is not valid... */ \
325 ((x) & INTEL_PTE_COMPRESSED) && /* ...has "compressed" marker" */ \
326 ((!((x) & ~INTEL_PTE_COMPRESSED_MASK)) || /* ...no other bits */ \
327 (panic("compressed PTE %p 0x%llx has extra bits 0x%llx: corrupted?", \
328 &(x), (x), (x) & ~INTEL_PTE_COMPRESSED_MASK), FALSE)))
39236c6e 329
91447636
A
330#define pa_to_pte(a) ((a) & INTEL_PTE_PFN) /* XXX */
331#define pte_to_pa(p) ((p) & INTEL_PTE_PFN) /* XXX */
1c79356b
A
332#define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1)
333
0c530ab8
A
334#define pte_kernel_rw(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_RW))
335#define pte_kernel_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID))
3e170ce0 336#define pte_user_rw(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER|INTEL_PTE_RW))
0c530ab8
A
337#define pte_user_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER))
338
3e170ce0
A
339#define PMAP_INVEPT_SINGLE_CONTEXT 1
340
341
342#define INTEL_EPTP_AD 0x00000040ULL
343
344#define INTEL_EPT_READ 0x00000001ULL
345#define INTEL_EPT_WRITE 0x00000002ULL
346#define INTEL_EPT_EX 0x00000004ULL
347#define INTEL_EPT_IPTA 0x00000040ULL
348#define INTEL_EPT_PS 0x00000080ULL
349#define INTEL_EPT_REF 0x00000100ULL
350#define INTEL_EPT_MOD 0x00000200ULL
351
352#define INTEL_EPT_CACHE_MASK 0x00000038ULL
353#define INTEL_EPT_NCACHE 0x00000000ULL
354#define INTEL_EPT_WC 0x00000008ULL
355#define INTEL_EPT_WTHRU 0x00000020ULL
356#define INTEL_EPT_WP 0x00000028ULL
357#define INTEL_EPT_WB 0x00000030ULL
358
359/*
360 * Routines to filter correct bits depending on the pmap type
361 */
362
363static inline pt_entry_t
364pte_remove_ex(pt_entry_t pte, boolean_t is_ept)
365{
366 if (__probable(!is_ept)) {
367 return (pte | INTEL_PTE_NX);
368 }
369
370 return (pte & (~INTEL_EPT_EX));
371}
372
373static inline pt_entry_t
374pte_set_ex(pt_entry_t pte, boolean_t is_ept)
375{
376 if (__probable(!is_ept)) {
377 return (pte & (~INTEL_PTE_NX));
378 }
379
380 return (pte | INTEL_EPT_EX);
381}
382
383static inline pt_entry_t
384physmap_refmod_to_ept(pt_entry_t physmap_pte)
385{
386 pt_entry_t ept_pte = 0;
387
388 if (physmap_pte & INTEL_PTE_MOD) {
389 ept_pte |= INTEL_EPT_MOD;
390 }
391
392 if (physmap_pte & INTEL_PTE_REF) {
393 ept_pte |= INTEL_EPT_REF;
394 }
395
396 return ept_pte;
397}
398
399static inline pt_entry_t
400ept_refmod_to_physmap(pt_entry_t ept_pte)
401{
402 pt_entry_t physmap_pte = 0;
403
404 assert((ept_pte & ~(INTEL_EPT_REF | INTEL_EPT_MOD)) == 0);
405
406 if (ept_pte & INTEL_EPT_REF) {
407 physmap_pte |= INTEL_PTE_REF;
408 }
409
410 if (ept_pte & INTEL_EPT_MOD) {
411 physmap_pte |= INTEL_PTE_MOD;
412 }
413
414 return physmap_pte;
415}
416
417/*
418 * Note: Not all Intel processors support EPT referenced access and dirty bits.
419 * During pmap_init() we check the VMX capability for the current hardware
420 * and update this variable accordingly.
421 */
422extern boolean_t pmap_ept_support_ad;
423
424#define PTE_VALID_MASK(is_ept) ((is_ept) ? (INTEL_EPT_READ | INTEL_EPT_WRITE | INTEL_EPT_EX) : INTEL_PTE_VALID)
425#define PTE_READ(is_ept) ((is_ept) ? INTEL_EPT_READ : INTEL_PTE_VALID)
426#define PTE_WRITE(is_ept) ((is_ept) ? INTEL_EPT_WRITE : INTEL_PTE_WRITE)
427#define PTE_PS INTEL_PTE_PS
39037602
A
428#define PTE_COMPRESSED INTEL_PTE_COMPRESSED
429#define PTE_COMPRESSED_ALT INTEL_PTE_COMPRESSED_ALT
3e170ce0
A
430#define PTE_NCACHE(is_ept) ((is_ept) ? INTEL_EPT_NCACHE : INTEL_PTE_NCACHE)
431#define PTE_WTHRU(is_ept) ((is_ept) ? INTEL_EPT_WTHRU : INTEL_PTE_WTHRU)
432#define PTE_REF(is_ept) ((is_ept) ? INTEL_EPT_REF : INTEL_PTE_REF)
433#define PTE_MOD(is_ept) ((is_ept) ? INTEL_EPT_MOD : INTEL_PTE_MOD)
434#define PTE_WIRED INTEL_PTE_WIRED
435
436
9bccf70c
A
437#define PMAP_DEFAULT_CACHE 0
438#define PMAP_INHIBIT_CACHE 1
439#define PMAP_GUARDED_CACHE 2
440#define PMAP_ACTIVATE_CACHE 4
441#define PMAP_NO_GUARD_CACHE 8
442
91447636
A
443#ifndef ASSEMBLER
444
445#include <sys/queue.h>
446
1c79356b 447/*
91447636
A
448 * Address of current and alternate address space page table maps
449 * and directories.
1c79356b 450 */
1c79356b 451
b0d623f7 452extern pt_entry_t *PTmap;
316670eb
A
453extern pdpt_entry_t *IdlePDPT;
454extern pml4_entry_t *IdlePML4;
b0d623f7 455extern boolean_t no_shared_cr3;
b0d623f7 456extern pd_entry_t *IdlePTD; /* physical addr of "Idle" state PTD */
b0d623f7
A
457
458extern uint64_t pmap_pv_hashlist_walks;
459extern uint64_t pmap_pv_hashlist_cnts;
460extern uint32_t pmap_pv_hashlist_max;
461extern uint32_t pmap_kernel_text_ps;
462
b0d623f7 463
316670eb 464
b0d623f7
A
465#ifdef __x86_64__
466#define ID_MAP_VTOP(x) ((void *)(((uint64_t)(x)) & LOW_4GB_MASK))
91447636 467
316670eb
A
468extern uint64_t physmap_base, physmap_max;
469
7ddcb079 470#define NPHYSMAP (MAX(K64_MAXMEM/GB + 4, 4))
7ddcb079
A
471
472static inline boolean_t physmap_enclosed(addr64_t a) {
473 return (a < (NPHYSMAP * GB));
474}
316670eb
A
475
476static inline void * PHYSMAP_PTOV_check(void *paddr) {
477 uint64_t pvaddr = (uint64_t)paddr + physmap_base;
478
479 if (__improbable(pvaddr >= physmap_max))
480 panic("PHYSMAP_PTOV bounds exceeded, 0x%qx, 0x%qx, 0x%qx",
481 pvaddr, physmap_base, physmap_max);
482
483 return (void *)pvaddr;
484}
485
486#define PHYSMAP_PTOV(x) (PHYSMAP_PTOV_check((void*) (x)))
487
488/*
489 * For KASLR, we alias the master processor's IDT and GDT at fixed
490 * virtual addresses to defeat SIDT/SGDT address leakage.
143464d5
A
491 * And non-boot processor's GDT aliases likewise (skipping LOWGLOBAL_ALIAS)
492 * The low global vector page is mapped at a fixed alias also.
316670eb
A
493 */
494#define MASTER_IDT_ALIAS (VM_MIN_KERNEL_ADDRESS + 0x0000)
495#define MASTER_GDT_ALIAS (VM_MIN_KERNEL_ADDRESS + 0x1000)
316670eb 496#define LOWGLOBAL_ALIAS (VM_MIN_KERNEL_ADDRESS + 0x2000)
143464d5 497#define CPU_GDT_ALIAS(_cpu) (LOWGLOBAL_ALIAS + (0x1000*(_cpu)))
316670eb 498
39037602
A
499/*
500 * This indicates (roughly) where there is free space for the VM
501 * to use for the heap; this does not need to be precise.
502 */
503#define KERNEL_PMAP_HEAP_RANGE_START VM_MIN_KERNEL_AND_KEXT_ADDRESS
504
316670eb 505#endif /*__x86_64__ */
91447636 506
91447636
A
507#include <vm/vm_page.h>
508
509/*
510 * For each vm_page_t, there is a list of all currently
511 * valid virtual mappings of that page. An entry is
512 * a pv_entry_t; the list is the pv_table.
513 */
1c79356b
A
514
515struct pmap {
6d2010ae
A
516 decl_simple_lock_data(,lock) /* lock on map */
517 pmap_paddr_t pm_cr3; /* physical addr */
2d21ac55 518 task_map_t pm_task_map;
39037602
A
519 boolean_t pm_shared;
520 boolean_t pagezero_accessible;
3e170ce0 521#define PMAP_PCID_MAX_CPUS MAX_CPUS /* Must be a multiple of 8 */
6d2010ae
A
522 pcid_t pmap_pcid_cpus[PMAP_PCID_MAX_CPUS];
523 volatile uint8_t pmap_pcid_coherency_vector[PMAP_PCID_MAX_CPUS];
524 struct pmap_statistics stats; /* map statistics */
525 int ref_count; /* reference count */
526 int nx_enabled;
39037602
A
527 pdpt_entry_t *pm_pdpt; /* KVA of 3rd level page */
528 pml4_entry_t *pm_pml4; /* VKA of top level */
529 vm_object_t pm_obj; /* object to hold pde's */
530 vm_object_t pm_obj_pdpt; /* holds pdpt pages */
531 vm_object_t pm_obj_pml4; /* holds pml4 pages */
532 pmap_paddr_t pm_eptp; /* EPTP */
533 pd_entry_t *dirbase; /* page directory pointer */
316670eb 534 ledger_t ledger; /* ledger tracking phys mappings */
39037602
A
535#if MACH_ASSERT
536 int pmap_pid;
537 char pmap_procname[17];
538#endif /* MACH_ASSERT */
1c79356b
A
539};
540
3e170ce0
A
541static inline boolean_t
542is_ept_pmap(pmap_t p)
543{
544 if (__probable(p->pm_cr3 != 0)) {
545 assert(p->pm_eptp == 0);
546 return FALSE;
547 }
548
549 assert(p->pm_eptp != 0);
550
551 return TRUE;
552}
553
554void hv_ept_pmap_create(void **ept_pmap, void **eptp);
0c530ab8 555
b0d623f7 556#if NCOPY_WINDOWS > 0
0c530ab8
A
557#define PMAP_PDPT_FIRST_WINDOW 0
558#define PMAP_PDPT_NWINDOWS 4
559#define PMAP_PDE_FIRST_WINDOW (PMAP_PDPT_NWINDOWS)
560#define PMAP_PDE_NWINDOWS 4
561#define PMAP_PTE_FIRST_WINDOW (PMAP_PDE_FIRST_WINDOW + PMAP_PDE_NWINDOWS)
562#define PMAP_PTE_NWINDOWS 4
563
564#define PMAP_NWINDOWS_FIRSTFREE (PMAP_PTE_FIRST_WINDOW + PMAP_PTE_NWINDOWS)
565#define PMAP_WINDOW_SIZE 8
566#define PMAP_NWINDOWS (PMAP_NWINDOWS_FIRSTFREE + PMAP_WINDOW_SIZE)
567
91447636
A
568typedef struct {
569 pt_entry_t *prv_CMAP;
570 caddr_t prv_CADDR;
571} mapwindow_t;
572
573typedef struct cpu_pmap {
0c530ab8
A
574 int pdpt_window_index;
575 int pde_window_index;
576 int pte_window_index;
91447636 577 mapwindow_t mapwindow[PMAP_NWINDOWS];
91447636
A
578} cpu_pmap_t;
579
0c530ab8
A
580
581extern mapwindow_t *pmap_get_mapwindow(pt_entry_t pentry);
2d21ac55 582extern void pmap_put_mapwindow(mapwindow_t *map);
b0d623f7 583#endif
91447636
A
584
585typedef struct pmap_memory_regions {
39236c6e
A
586 ppnum_t base; /* first page of this region */
587 ppnum_t alloc_up; /* pages below this one have been "stolen" */
588 ppnum_t alloc_down; /* pages above this one have been "stolen" */
589 ppnum_t end; /* last page of this region */
7ddcb079
A
590 uint32_t type;
591 uint64_t attribute;
91447636
A
592} pmap_memory_region_t;
593
b0d623f7
A
594extern unsigned pmap_memory_region_count;
595extern unsigned pmap_memory_region_current;
91447636 596
0c530ab8 597#define PMAP_MEMORY_REGIONS_SIZE 128
91447636
A
598
599extern pmap_memory_region_t pmap_memory_regions[];
6d2010ae 600#include <i386/pmap_pcid.h>
91447636 601
b0d623f7 602static inline void
39037602 603set_dirbase(pmap_t tpmap, thread_t thread, int my_cpu) {
fe8ab488 604 int ccpu = my_cpu;
6d2010ae
A
605 cpu_datap(ccpu)->cpu_task_cr3 = tpmap->pm_cr3;
606 cpu_datap(ccpu)->cpu_task_map = tpmap->pm_task_map;
39037602
A
607
608 assert((get_preemption_level() > 0) || (ml_get_interrupts_enabled() == FALSE));
609 assert(ccpu == cpu_number());
b0d623f7
A
610 /*
611 * Switch cr3 if necessary
612 * - unless running with no_shared_cr3 debugging mode
613 * and we're not on the kernel's cr3 (after pre-empted copyio)
614 */
39037602
A
615 boolean_t nopagezero = tpmap->pagezero_accessible;
616 boolean_t priorpagezero = cpu_datap(ccpu)->cpu_pagezero_mapped;
617 cpu_datap(ccpu)->cpu_pagezero_mapped = nopagezero;
618
6d2010ae 619 if (__probable(!no_shared_cr3)) {
39037602
A
620 if (__improbable(nopagezero)) {
621 boolean_t copyio_active = ((thread->machine.specFlags & CopyIOActive) != 0);
6d2010ae 622 if (pmap_pcid_ncpus) {
39037602
A
623 pmap_pcid_activate(tpmap, ccpu, TRUE, copyio_active);
624 } else {
625 if (copyio_active) {
626 if (get_cr3_base() != tpmap->pm_cr3) {
627 set_cr3_raw(tpmap->pm_cr3);
628 }
629 } else if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3) {
630 set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3);
631 }
6d2010ae 632 }
39037602
A
633 } else if ((get_cr3_base() != tpmap->pm_cr3) || priorpagezero) {
634 if (pmap_pcid_ncpus) {
635 pmap_pcid_activate(tpmap, ccpu, FALSE, FALSE);
636 } else {
6d2010ae 637 set_cr3_raw(tpmap->pm_cr3);
39037602 638 }
6d2010ae 639 }
b0d623f7 640 } else {
6d2010ae
A
641 if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3)
642 set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3);
b0d623f7 643 }
1c79356b
A
644}
645
1c79356b
A
646/*
647 * External declarations for PMAP_ACTIVATE.
648 */
649
0c530ab8 650extern void process_pmap_updates(void);
1c79356b 651extern void pmap_update_interrupt(void);
1c79356b
A
652
653/*
654 * Machine dependent routines that are used only for i386/i486/i860.
655 */
1c79356b 656
0c530ab8 657extern addr64_t (kvtophys)(
1c79356b
A
658 vm_offset_t addr);
659
316670eb 660extern kern_return_t pmap_expand(
2d21ac55 661 pmap_t pmap,
316670eb
A
662 vm_map_offset_t addr,
663 unsigned int options);
6d2010ae 664#if !defined(__x86_64__)
1c79356b
A
665extern pt_entry_t *pmap_pte(
666 struct pmap *pmap,
0c530ab8
A
667 vm_map_offset_t addr);
668
669extern pd_entry_t *pmap_pde(
670 struct pmap *pmap,
671 vm_map_offset_t addr);
672
673extern pd_entry_t *pmap64_pde(
674 struct pmap *pmap,
675 vm_map_offset_t addr);
676
677extern pdpt_entry_t *pmap64_pdpt(
678 struct pmap *pmap,
679 vm_map_offset_t addr);
6d2010ae 680#endif
1c79356b
A
681extern vm_offset_t pmap_map(
682 vm_offset_t virt,
0c530ab8
A
683 vm_map_offset_t start,
684 vm_map_offset_t end,
685 vm_prot_t prot,
686 unsigned int flags);
1c79356b
A
687
688extern vm_offset_t pmap_map_bd(
689 vm_offset_t virt,
0c530ab8
A
690 vm_map_offset_t start,
691 vm_map_offset_t end,
692 vm_prot_t prot,
693 unsigned int flags);
1c79356b
A
694
695extern void pmap_bootstrap(
0c530ab8
A
696 vm_offset_t load_start,
697 boolean_t IA32e);
1c79356b
A
698
699extern boolean_t pmap_valid_page(
91447636 700 ppnum_t pn);
1c79356b
A
701
702extern int pmap_list_resident_pages(
703 struct pmap *pmap,
704 vm_offset_t *listp,
705 int space);
060df5ea 706extern void x86_filter_TLB_coherency_interrupts(boolean_t);
6d2010ae
A
707/*
708 * Get cache attributes (as pagetable bits) for the specified phys page
709 */
3e170ce0 710extern unsigned pmap_get_cache_attributes(ppnum_t, boolean_t is_ept);
b0d623f7 711#if NCOPY_WINDOWS > 0
91447636
A
712extern struct cpu_pmap *pmap_cpu_alloc(
713 boolean_t is_boot_cpu);
714extern void pmap_cpu_free(
715 struct cpu_pmap *cp);
b0d623f7 716#endif
0c530ab8 717
5ba3f43e
A
718extern kern_return_t pmap_map_block(
719 pmap_t pmap,
0c530ab8
A
720 addr64_t va,
721 ppnum_t pa,
722 uint32_t size,
723 vm_prot_t prot,
724 int attr,
725 unsigned int flags);
5ba3f43e 726
1c79356b
A
727extern void invalidate_icache(vm_offset_t addr, unsigned cnt, int phys);
728extern void flush_dcache(vm_offset_t addr, unsigned count, int phys);
55e303ae 729extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va);
1c79356b 730
2d21ac55 731extern void pmap_cpu_init(void);
0c530ab8 732extern void pmap_disable_NX(pmap_t pmap);
0c530ab8 733
6d2010ae 734extern void pt_fake_zone_init(int);
5ba3f43e 735extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
6d2010ae 736 uint64_t *, int *, int *, int *);
b7266188 737extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__printflike(1,2));
0c530ab8 738
1c79356b
A
739/*
740 * Macros for speed.
741 */
742
1c79356b
A
743
744#include <kern/spl.h>
745
5ba3f43e 746
fe8ab488 747#define PMAP_ACTIVATE_MAP(map, thread, my_cpu) { \
39037602 748 pmap_t tpmap; \
0c530ab8
A
749 \
750 tpmap = vm_map_pmap(map); \
fe8ab488 751 set_dirbase(tpmap, thread, my_cpu); \
1c79356b
A
752}
753
39236c6e 754#if defined(__x86_64__)
fe8ab488 755#define PMAP_DEACTIVATE_MAP(map, thread, ccpu) \
39037602 756 pmap_assert2((pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, thread, ccpu) == (get_cr3_raw() & 0xFFF)) : TRUE),"PCIDs: 0x%x, active PCID: 0x%x, CR3: 0x%lx, pmap_cr3: 0x%llx, kernel_cr3: 0x%llx, kernel pmap cr3: 0x%llx, CPU active PCID: 0x%x, CPU kernel PCID: 0x%x, specflags: 0x%x, pagezero: 0x%x", pmap_pcid_ncpus, pcid_for_pmap_cpu_tuple(map->pmap, thread, ccpu), get_cr3_raw(), map->pmap->pm_cr3, cpu_datap(ccpu)->cpu_kernel_cr3, kernel_pmap->pm_cr3, cpu_datap(ccpu)->cpu_active_pcid, cpu_datap(ccpu)->cpu_kernel_pcid, thread->machine.specFlags, map->pmap->pagezero_accessible);
b0d623f7 757#else
6d2010ae 758#define PMAP_DEACTIVATE_MAP(map, thread)
b0d623f7 759#endif
1c79356b 760
6d2010ae 761#if NCOPY_WINDOWS > 0
1c79356b
A
762#define PMAP_SWITCH_USER(th, new_map, my_cpu) { \
763 spl_t spl; \
764 \
0c530ab8 765 spl = splhigh(); \
b0d623f7 766 PMAP_DEACTIVATE_MAP(th->map, th); \
1c79356b 767 th->map = new_map; \
b0d623f7 768 PMAP_ACTIVATE_MAP(th->map, th); \
1c79356b 769 splx(spl); \
6d2010ae 770 inval_copy_windows(th); \
1c79356b 771}
b0d623f7
A
772#else
773#define PMAP_SWITCH_USER(th, new_map, my_cpu) { \
774 spl_t spl; \
775 \
776 spl = splhigh(); \
fe8ab488 777 PMAP_DEACTIVATE_MAP(th->map, th, my_cpu); \
b0d623f7 778 th->map = new_map; \
fe8ab488 779 PMAP_ACTIVATE_MAP(th->map, th, my_cpu); \
b0d623f7
A
780 splx(spl); \
781}
782#endif
1c79356b 783
0c530ab8
A
784/*
785 * Marking the current cpu's cr3 inactive is achieved by setting its lsb.
786 * Marking the current cpu's cr3 active once more involves clearng this bit.
787 * Note that valid page tables are page-aligned and so the bottom 12 bits
6d2010ae 788 * are normally zero, modulo PCID.
0c530ab8
A
789 * We can only mark the current cpu active/inactive but we can test any cpu.
790 */
791#define CPU_CR3_MARK_INACTIVE() \
792 current_cpu_datap()->cpu_active_cr3 |= 1
793
794#define CPU_CR3_MARK_ACTIVE() \
795 current_cpu_datap()->cpu_active_cr3 &= ~1
796
797#define CPU_CR3_IS_ACTIVE(cpu) \
798 ((cpu_datap(cpu)->cpu_active_cr3 & 1) == 0)
799
2d21ac55
A
800#define CPU_GET_ACTIVE_CR3(cpu) \
801 (cpu_datap(cpu)->cpu_active_cr3 & ~1)
0c530ab8 802
b0d623f7
A
803#define CPU_GET_TASK_CR3(cpu) \
804 (cpu_datap(cpu)->cpu_task_cr3)
805
806/*
807 * Mark this cpu idle, and remove it from the active set,
808 * since it is not actively using any pmap. Signal_cpus
809 * will notice that it is idle, and avoid signaling it,
810 * but will queue the update request for when the cpu
811 * becomes active.
812 */
b0d623f7 813#define MARK_CPU_IDLE(my_cpu) { \
6d2010ae 814 assert(ml_get_interrupts_enabled() == FALSE); \
b0d623f7 815 CPU_CR3_MARK_INACTIVE(); \
39236c6e 816 mfence(); \
1c79356b
A
817}
818
0c530ab8 819#define MARK_CPU_ACTIVE(my_cpu) { \
6d2010ae 820 assert(ml_get_interrupts_enabled() == FALSE); \
1c79356b
A
821 /* \
822 * If a kernel_pmap update was requested while this cpu \
823 * was idle, process it as if we got the interrupt. \
824 * Before doing so, remove this cpu from the idle set. \
825 * Since we do not grab any pmap locks while we flush \
826 * our TLB, another cpu may start an update operation \
827 * before we finish. Removing this cpu from the idle \
828 * set assures that we will receive another update \
829 * interrupt if this happens. \
830 */ \
0c530ab8 831 CPU_CR3_MARK_ACTIVE(); \
39236c6e 832 mfence(); \
55e303ae 833 \
0c530ab8
A
834 if (current_cpu_datap()->cpu_tlb_invalid) \
835 process_pmap_updates(); \
1c79356b
A
836}
837
1c79356b
A
838#define PMAP_CONTEXT(pmap, thread)
839
840#define pmap_kernel_va(VA) \
0c530ab8
A
841 ((((vm_offset_t) (VA)) >= vm_min_kernel_address) && \
842 (((vm_offset_t) (VA)) <= vm_max_kernel_address))
843
1c79356b 844
fe8ab488 845#define pmap_compressed(pmap) ((pmap)->stats.compressed)
1c79356b 846#define pmap_resident_count(pmap) ((pmap)->stats.resident_count)
2d21ac55 847#define pmap_resident_max(pmap) ((pmap)->stats.resident_max)
1c79356b
A
848#define pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr)
849#define pmap_attribute(pmap,addr,size,attr,value) \
850 (KERN_INVALID_ADDRESS)
9bccf70c
A
851#define pmap_attribute_cache_sync(addr,size,attr,value) \
852 (KERN_INVALID_ADDRESS)
765c9de3 853
6d2010ae 854#define MACHINE_PMAP_IS_EMPTY 1
2d21ac55
A
855extern boolean_t pmap_is_empty(pmap_t pmap,
856 vm_map_offset_t start,
857 vm_map_offset_t end);
858
6d2010ae
A
859#define MACHINE_BOOTSTRAPPTD 1 /* Static bootstrap page-tables */
860
316670eb
A
861kern_return_t
862pmap_permissions_verify(pmap_t, vm_map_t, vm_offset_t, vm_offset_t);
b0d623f7 863
39037602
A
864#if MACH_ASSERT
865extern int pmap_stats_assert;
866#define PMAP_STATS_ASSERTF(args) \
867 MACRO_BEGIN \
868 if (pmap_stats_assert) assertf args; \
869 MACRO_END
870#else /* MACH_ASSERT */
871#define PMAP_STATS_ASSERTF(args)
872#endif /* MACH_ASSERT */
873
1c79356b
A
874#endif /* ASSEMBLER */
875
0c530ab8 876
1c79356b 877#endif /* _PMAP_MACHINE_ */
0c530ab8
A
878
879
880#endif /* KERNEL_PRIVATE */