]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap.h
xnu-2050.18.24.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap.h
CommitLineData
1c79356b 1/*
0c530ab8 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
8f6c56a5 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
59/*
60 * File: pmap.h
61 *
62 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
63 * Date: 1985
64 *
65 * Machine-dependent structures for the physical map module.
66 */
0c530ab8 67#ifdef KERNEL_PRIVATE
1c79356b
A
68#ifndef _PMAP_MACHINE_
69#define _PMAP_MACHINE_ 1
70
71#ifndef ASSEMBLER
72
73#include <platforms.h>
1c79356b
A
74
75#include <mach/kern_return.h>
76#include <mach/machine/vm_types.h>
77#include <mach/vm_prot.h>
78#include <mach/vm_statistics.h>
79#include <mach/machine/vm_param.h>
80#include <kern/kern_types.h>
91447636 81#include <kern/thread.h>
1c79356b 82#include <kern/lock.h>
6d2010ae 83#include <mach/branch_predicates.h>
0c530ab8
A
84
85#include <i386/mp.h>
86#include <i386/proc_reg.h>
1c79356b 87
6d2010ae
A
88#include <i386/pal_routines.h>
89
1c79356b
A
90/*
91 * Define the generic in terms of the specific
92 */
93
94#define INTEL_PGBYTES I386_PGBYTES
95#define INTEL_PGSHIFT I386_PGSHIFT
96#define intel_btop(x) i386_btop(x)
97#define intel_ptob(x) i386_ptob(x)
98#define intel_round_page(x) i386_round_page(x)
99#define intel_trunc_page(x) i386_trunc_page(x)
100#define trunc_intel_to_vm(x) trunc_i386_to_vm(x)
101#define round_intel_to_vm(x) round_i386_to_vm(x)
102#define vm_to_intel(x) vm_to_i386(x)
103
104/*
105 * i386/i486/i860 Page Table Entry
106 */
107
1c79356b
A
108#endif /* ASSEMBLER */
109
316670eb
A
110#define NPGPTD 4ULL
111#define PDESHIFT 21ULL
112#define PTEMASK 0x1ffULL
113#define PTEINDX 3ULL
91447636 114
316670eb 115#define PTESHIFT 12ULL
b0d623f7 116
316670eb 117#ifdef __i386__
b0d623f7 118#define INITPT_SEG_BASE 0x100000
316670eb 119#endif
b0d623f7
A
120
121#ifdef __x86_64__
122#define LOW_4GB_MASK ((vm_offset_t)0x00000000FFFFFFFFUL)
123#endif
124
91447636
A
125#define PDESIZE sizeof(pd_entry_t) /* for assembly files */
126#define PTESIZE sizeof(pt_entry_t) /* for assembly files */
127
128#define INTEL_OFFMASK (I386_PGBYTES - 1)
b0d623f7 129#define INTEL_LOFFMASK (I386_LPGBYTES - 1)
0c530ab8 130#define PG_FRAME 0x000FFFFFFFFFF000ULL
91447636 131#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t)))
0c530ab8 132#define NPTDPG (PAGE_SIZE/(sizeof (pd_entry_t)))
1c79356b 133
91447636
A
134#define NBPTD (NPGPTD << PAGE_SHIFT)
135#define NPDEPTD (NBPTD / (sizeof (pd_entry_t)))
136#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
316670eb 137#define NBPDE (1ULL << PDESHIFT)
91447636 138#define PDEMASK (NBPDE - 1)
9bccf70c 139
b0d623f7
A
140#define PTE_PER_PAGE 512 /* number of PTE's per page on any level */
141
0c530ab8
A
142 /* cleanly define parameters for all the page table levels */
143typedef uint64_t pml4_entry_t;
144#define NPML4PG (PAGE_SIZE/(sizeof (pml4_entry_t)))
145#define PML4SHIFT 39
146#define PML4PGSHIFT 9
147#define NBPML4 (1ULL << PML4SHIFT)
148#define PML4MASK (NBPML4-1)
149#define PML4_ENTRY_NULL ((pml4_entry_t *) 0)
150
151typedef uint64_t pdpt_entry_t;
152#define NPDPTPG (PAGE_SIZE/(sizeof (pdpt_entry_t)))
153#define PDPTSHIFT 30
154#define PDPTPGSHIFT 9
316670eb 155#define NBPDPT (1ULL << PDPTSHIFT)
0c530ab8
A
156#define PDPTMASK (NBPDPT-1)
157#define PDPT_ENTRY_NULL ((pdpt_entry_t *) 0)
158
159typedef uint64_t pd_entry_t;
160#define NPDPG (PAGE_SIZE/(sizeof (pd_entry_t)))
161#define PDSHIFT 21
162#define PDPGSHIFT 9
316670eb 163#define NBPD (1ULL << PDSHIFT)
0c530ab8
A
164#define PDMASK (NBPD-1)
165#define PD_ENTRY_NULL ((pd_entry_t *) 0)
166
167typedef uint64_t pt_entry_t;
168#define NPTPG (PAGE_SIZE/(sizeof (pt_entry_t)))
169#define PTSHIFT 12
170#define PTPGSHIFT 9
316670eb 171#define NBPT (1ULL << PTSHIFT)
0c530ab8
A
172#define PTMASK (NBPT-1)
173#define PT_ENTRY_NULL ((pt_entry_t *) 0)
174
175typedef uint64_t pmap_paddr_t;
176
6d2010ae
A
177#if DEBUG
178#define PMAP_ASSERT 1
179#endif
180#if PMAP_ASSERT
181#define pmap_assert(ex) ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex))
182
183#define pmap_assert2(ex, fmt, args...) \
184 do { \
185 if (!(ex)) { \
186 kprintf("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \
187 panic("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \
188 } \
189 } while(0)
190#else
191#define pmap_assert(ex)
192#define pmap_assert2(ex, fmt, args...)
193#endif
194
b0d623f7
A
195/* superpages */
196#ifdef __x86_64__
197#define SUPERPAGE_NBASEPAGES 512
198#else
199#define SUPERPAGE_NBASEPAGES 1 /* we don't support superpages on i386 */
200#endif
201
0c530ab8
A
202/*
203 * Atomic 64-bit store of a page table entry.
204 */
205static inline void
206pmap_store_pte(pt_entry_t *entryp, pt_entry_t value)
207{
b0d623f7 208#ifdef __i386__
0c530ab8
A
209 /*
210 * Load the new value into %ecx:%ebx
211 * Load the old value into %edx:%eax
212 * Compare-exchange-8bytes at address entryp (loaded in %edi)
213 * If the compare succeeds, the new value will have been stored.
214 * Otherwise, the old value changed and reloaded, so try again.
215 */
2d21ac55 216 __asm__ volatile(
0c530ab8
A
217 " movl (%0), %%eax \n\t"
218 " movl 4(%0), %%edx \n\t"
219 "1: \n\t"
220 " cmpxchg8b (%0) \n\t"
221 " jnz 1b"
222 :
223 : "D" (entryp),
224 "b" ((uint32_t)value),
225 "c" ((uint32_t)(value >> 32))
226 : "eax", "edx", "memory");
b0d623f7
A
227#else
228 /*
229 * In the 32-bit kernel a compare-and-exchange loop was
230 * required to provide atomicity. For K64, life is easier:
231 */
232 *entryp = value;
233#endif
0c530ab8
A
234}
235
0c530ab8
A
236/* in 64 bit spaces, the number of each type of page in the page tables */
237#define NPML4PGS (1ULL * (PAGE_SIZE/(sizeof (pml4_entry_t))))
238#define NPDPTPGS (NPML4PGS * (PAGE_SIZE/(sizeof (pdpt_entry_t))))
239#define NPDEPGS (NPDPTPGS * (PAGE_SIZE/(sizeof (pd_entry_t))))
240#define NPTEPGS (NPDEPGS * (PAGE_SIZE/(sizeof (pt_entry_t))))
241
b0d623f7 242#ifdef __i386__
0c530ab8
A
243/*
244 * The 64-bit kernel is remapped in uber-space which is at the base
245 * the highest 4th-level directory (KERNEL_UBER_PML4_INDEX). That is,
246 * 512GB from the top of virtual space (or zero).
247 */
248#define KERNEL_UBER_PML4_INDEX 511
249#define KERNEL_UBER_BASE (0ULL - NBPML4)
250#define KERNEL_UBER_BASE_HI32 ((uint32_t)(KERNEL_UBER_BASE >> 32))
b0d623f7 251#else
316670eb 252#define KERNEL_PML4_INDEX 511
b0d623f7 253#define KERNEL_KEXTS_INDEX 510 /* Home of KEXTs - the basement */
316670eb 254#define KERNEL_PHYSMAP_PML4_INDEX 509 /* virtual to physical map */
b0d623f7
A
255#define KERNEL_BASE (0ULL - NBPML4)
256#define KERNEL_BASEMENT (KERNEL_BASE - NBPML4)
257#endif
0c530ab8 258
55e303ae 259#define VM_WIMG_COPYBACK VM_MEM_COHERENT
316670eb 260#define VM_WIMG_COPYBACKLW VM_WIMG_COPYBACK
9bccf70c 261#define VM_WIMG_DEFAULT VM_MEM_COHERENT
55e303ae
A
262/* ?? intel ?? */
263#define VM_WIMG_IO (VM_MEM_COHERENT | \
264 VM_MEM_NOT_CACHEABLE | VM_MEM_GUARDED)
265#define VM_WIMG_WTHRU (VM_MEM_WRITE_THROUGH | VM_MEM_COHERENT | VM_MEM_GUARDED)
266/* write combining mode, aka store gather */
267#define VM_WIMG_WCOMB (VM_MEM_NOT_CACHEABLE | VM_MEM_COHERENT)
316670eb 268#define VM_WIMG_INNERWBACK VM_MEM_COHERENT
0c530ab8
A
269/*
270 * Pte related macros
271 */
b0d623f7 272#ifdef __i386__
0c530ab8
A
273#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDESHIFT)|((pti)<<PTESHIFT)))
274#define VADDR64(pmi, pdi, pti) ((vm_offset_t)(((pmi)<<PLM4SHIFT))((pdi)<<PDESHIFT)|((pti)<<PTESHIFT))
b0d623f7
A
275#else
276#define KVADDR(pmi, pdpi, pdi, pti) \
277 ((vm_offset_t) \
278 ((uint64_t) -1 << 47) | \
279 ((uint64_t)(pmi) << PML4SHIFT) | \
280 ((uint64_t)(pdpi) << PDPTSHIFT) | \
281 ((uint64_t)(pdi) << PDESHIFT) | \
282 ((uint64_t)(pti) << PTESHIFT))
283#endif
0c530ab8 284
1c79356b 285/*
91447636
A
286 * Size of Kernel address space. This is the number of page table pages
287 * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte.
288 * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
1c79356b 289 */
91447636 290#ifndef KVA_PAGES
0c530ab8 291#define KVA_PAGES 1024
91447636 292#endif
1c79356b 293
91447636 294#ifndef NKPT
91447636 295#define NKPT 500 /* actual number of kernel page tables */
91447636
A
296#endif
297#ifndef NKPDE
298#define NKPDE (KVA_PAGES - 1) /* addressable number of page tables/pde's */
299#endif
300
0c530ab8 301
b0d623f7 302#ifdef __i386__
0c530ab8
A
303enum high_cpu_types {
304 HIGH_CPU_ISS0,
305 HIGH_CPU_ISS1,
306 HIGH_CPU_DESC,
307 HIGH_CPU_LDT_BEGIN,
308 HIGH_CPU_LDT_END = HIGH_CPU_LDT_BEGIN + (LDTSZ / 512) - 1,
309 HIGH_CPU_END
310};
311
312enum high_fixed_addresses {
313 HIGH_FIXED_TRAMPS, /* must be first */
314 HIGH_FIXED_TRAMPS_END,
315 HIGH_FIXED_GDT,
316 HIGH_FIXED_IDT,
317 HIGH_FIXED_LDT_BEGIN,
318 HIGH_FIXED_LDT_END = HIGH_FIXED_LDT_BEGIN + (LDTSZ / 512) - 1,
319 HIGH_FIXED_KTSS,
320 HIGH_FIXED_DFTSS,
321 HIGH_FIXED_DBTSS,
322 HIGH_FIXED_CPUS_BEGIN,
323 HIGH_FIXED_CPUS_END = HIGH_FIXED_CPUS_BEGIN + (HIGH_CPU_END * MAX_CPUS) - 1,
324};
325
326
327/* XXX64 below PTDI values need cleanup */
91447636
A
328/*
329 * The *PTDI values control the layout of virtual memory
330 *
331 */
0c530ab8 332#define KPTDI (0x000)/* start of kernel virtual pde's */
91447636
A
333#define PTDPTDI (0x7F4) /* ptd entry that points to ptd! */
334#define APTDPTDI (0x7F8) /* alt ptd entry that points to APTD */
0c530ab8 335#define UMAXPTDI (0x7F8) /* ptd entry for user space end */
6601e61a 336#define UMAXPTEOFF (NPTEPG) /* pte entry for user space end */
91447636
A
337
338#define KERNBASE VADDR(KPTDI,0)
1c79356b 339
0c530ab8
A
340/*
341 * Convert address offset to directory address
342 * containing the page table pointer - legacy
343 */
344/*#define pmap_pde(m,v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]))*/
345
346#define HIGH_MEM_BASE ((uint32_t)( -NBPDE) ) /* shared gdt etc seg addr */ /* XXX64 ?? */
347#define pmap_index_to_virt(x) (HIGH_MEM_BASE | ((unsigned)(x) << PAGE_SHIFT))
b0d623f7 348#endif
0c530ab8 349
1c79356b
A
350/*
351 * Convert address offset to page descriptor index
352 */
b0d623f7
A
353#define pdptnum(pmap, a) (((vm_offset_t)(a) >> PDPTSHIFT) & PDPTMASK)
354#define pdenum(pmap, a) (((vm_offset_t)(a) >> PDESHIFT) & PDEMASK)
355#define PMAP_INVALID_PDPTNUM (~0ULL)
91447636 356
0c530ab8
A
357#define pdeidx(pmap, a) (((a) >> PDSHIFT) & ((1ULL<<(48 - PDSHIFT)) -1))
358#define pdptidx(pmap, a) (((a) >> PDPTSHIFT) & ((1ULL<<(48 - PDPTSHIFT)) -1))
359#define pml4idx(pmap, a) (((a) >> PML4SHIFT) & ((1ULL<<(48 - PML4SHIFT)) -1))
6d2010ae 360
1c79356b
A
361
362/*
363 * Convert page descriptor index to user virtual address
364 */
365#define pdetova(a) ((vm_offset_t)(a) << PDESHIFT)
366
367/*
368 * Convert address offset to page table index
369 */
0c530ab8 370#define ptenum(a) (((vm_offset_t)(a) >> PTESHIFT) & PTEMASK)
1c79356b 371
1c79356b
A
372/*
373 * Hardware pte bit definitions (to be used directly on the ptes
374 * without using the bit fields).
375 */
376
316670eb
A
377#define INTEL_PTE_VALID 0x00000001ULL
378#define INTEL_PTE_WRITE 0x00000002ULL
379#define INTEL_PTE_RW 0x00000002ULL
380#define INTEL_PTE_USER 0x00000004ULL
381#define INTEL_PTE_WTHRU 0x00000008ULL
382#define INTEL_PTE_NCACHE 0x00000010ULL
383#define INTEL_PTE_REF 0x00000020ULL
384#define INTEL_PTE_MOD 0x00000040ULL
385#define INTEL_PTE_PS 0x00000080ULL
386#define INTEL_PTE_PTA 0x00000080ULL
387#define INTEL_PTE_GLOBAL 0x00000100ULL
388#define INTEL_PTE_WIRED 0x00000200ULL
389#define INTEL_PDPTE_NESTED 0x00000400ULL
0c530ab8 390#define INTEL_PTE_PFN PG_FRAME
1c79356b 391
0c530ab8
A
392#define INTEL_PTE_NX (1ULL << 63)
393
394#define INTEL_PTE_INVALID 0
b7266188 395/* This is conservative, but suffices */
6d2010ae
A
396#define INTEL_PTE_RSVD ((1ULL << 10) | (1ULL << 11) | (0x1FFULL << 54))
397
91447636
A
398#define pa_to_pte(a) ((a) & INTEL_PTE_PFN) /* XXX */
399#define pte_to_pa(p) ((p) & INTEL_PTE_PFN) /* XXX */
1c79356b
A
400#define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1)
401
0c530ab8
A
402#define pte_kernel_rw(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_RW))
403#define pte_kernel_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID))
404#define pte_user_rw(p) ((pt_entry)t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER|INTEL_PTE_RW))
405#define pte_user_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER))
406
9bccf70c
A
407#define PMAP_DEFAULT_CACHE 0
408#define PMAP_INHIBIT_CACHE 1
409#define PMAP_GUARDED_CACHE 2
410#define PMAP_ACTIVATE_CACHE 4
411#define PMAP_NO_GUARD_CACHE 8
412
413
91447636
A
414#ifndef ASSEMBLER
415
416#include <sys/queue.h>
417
1c79356b 418/*
91447636
A
419 * Address of current and alternate address space page table maps
420 * and directories.
1c79356b 421 */
1c79356b 422
b0d623f7
A
423#ifdef __i386__
424extern pt_entry_t PTmap[], APTmap[], Upte;
425extern pd_entry_t PTD[], APTD[], PTDpde[], APTDpde[], Upde;
426extern pmap_paddr_t lo_kernel_cr3;
427extern pdpt_entry_t *IdlePDPT64;
316670eb
A
428extern pdpt_entry_t IdlePDPT[];
429extern pml4_entry_t IdlePML4[];
b0d623f7
A
430#else
431extern pt_entry_t *PTmap;
316670eb
A
432extern pdpt_entry_t *IdlePDPT;
433extern pml4_entry_t *IdlePML4;
b0d623f7
A
434#endif
435extern boolean_t no_shared_cr3;
436extern addr64_t kernel64_cr3;
437extern pd_entry_t *IdlePTD; /* physical addr of "Idle" state PTD */
b0d623f7
A
438
439extern uint64_t pmap_pv_hashlist_walks;
440extern uint64_t pmap_pv_hashlist_cnts;
441extern uint32_t pmap_pv_hashlist_max;
442extern uint32_t pmap_kernel_text_ps;
443
444#ifdef __i386__
91447636 445/*
b0d623f7 446 * ** i386 **
91447636
A
447 * virtual address to page table entry and
448 * to physical address. Likewise for alternate address space.
449 * Note: these work recursively, thus vtopte of a pte will give
450 * the corresponding pde that in turn maps it.
451 */
b0d623f7 452
0c530ab8 453#define vtopte(va) (PTmap + i386_btop((vm_offset_t)va))
b0d623f7
A
454#endif
455
316670eb 456
b0d623f7
A
457#ifdef __x86_64__
458#define ID_MAP_VTOP(x) ((void *)(((uint64_t)(x)) & LOW_4GB_MASK))
91447636 459
316670eb
A
460extern uint64_t physmap_base, physmap_max;
461
7ddcb079 462#define NPHYSMAP (MAX(K64_MAXMEM/GB + 4, 4))
7ddcb079
A
463
464static inline boolean_t physmap_enclosed(addr64_t a) {
465 return (a < (NPHYSMAP * GB));
466}
316670eb
A
467
468static inline void * PHYSMAP_PTOV_check(void *paddr) {
469 uint64_t pvaddr = (uint64_t)paddr + physmap_base;
470
471 if (__improbable(pvaddr >= physmap_max))
472 panic("PHYSMAP_PTOV bounds exceeded, 0x%qx, 0x%qx, 0x%qx",
473 pvaddr, physmap_base, physmap_max);
474
475 return (void *)pvaddr;
476}
477
478#define PHYSMAP_PTOV(x) (PHYSMAP_PTOV_check((void*) (x)))
479
480/*
481 * For KASLR, we alias the master processor's IDT and GDT at fixed
482 * virtual addresses to defeat SIDT/SGDT address leakage.
483 */
484#define MASTER_IDT_ALIAS (VM_MIN_KERNEL_ADDRESS + 0x0000)
485#define MASTER_GDT_ALIAS (VM_MIN_KERNEL_ADDRESS + 0x1000)
486
487/*
488 * The low global vector page is mapped at a fixed alias also.
489 */
490#define LOWGLOBAL_ALIAS (VM_MIN_KERNEL_ADDRESS + 0x2000)
491
492#endif /*__x86_64__ */
91447636 493
1c79356b
A
494typedef volatile long cpu_set; /* set of CPUs - must be <= 32 */
495 /* changed by other processors */
91447636
A
496#include <vm/vm_page.h>
497
498/*
499 * For each vm_page_t, there is a list of all currently
500 * valid virtual mappings of that page. An entry is
501 * a pv_entry_t; the list is the pv_table.
502 */
1c79356b
A
503
504struct pmap {
6d2010ae
A
505 decl_simple_lock_data(,lock) /* lock on map */
506 pmap_paddr_t pm_cr3; /* physical addr */
507 boolean_t pm_shared;
0c530ab8 508 pd_entry_t *dirbase; /* page directory pointer */
b0d623f7 509#ifdef __i386__
0c530ab8 510 pmap_paddr_t pdirbase; /* phys. address of dirbase */
6d2010ae 511 vm_offset_t pm_hold; /* true pdpt zalloc addr */
b0d623f7 512#endif
0c530ab8 513 vm_object_t pm_obj; /* object to hold pde's */
2d21ac55 514 task_map_t pm_task_map;
0c530ab8
A
515 pdpt_entry_t *pm_pdpt; /* KVA of 3rd level page */
516 pml4_entry_t *pm_pml4; /* VKA of top level */
517 vm_object_t pm_obj_pdpt; /* holds pdpt pages */
518 vm_object_t pm_obj_pml4; /* holds pml4 pages */
6d2010ae
A
519#define PMAP_PCID_MAX_CPUS (48) /* Must be a multiple of 8 */
520 pcid_t pmap_pcid_cpus[PMAP_PCID_MAX_CPUS];
521 volatile uint8_t pmap_pcid_coherency_vector[PMAP_PCID_MAX_CPUS];
522 struct pmap_statistics stats; /* map statistics */
523 int ref_count; /* reference count */
524 int nx_enabled;
316670eb 525 ledger_t ledger; /* ledger tracking phys mappings */
1c79356b
A
526};
527
0c530ab8 528
b0d623f7 529#if NCOPY_WINDOWS > 0
0c530ab8
A
530#define PMAP_PDPT_FIRST_WINDOW 0
531#define PMAP_PDPT_NWINDOWS 4
532#define PMAP_PDE_FIRST_WINDOW (PMAP_PDPT_NWINDOWS)
533#define PMAP_PDE_NWINDOWS 4
534#define PMAP_PTE_FIRST_WINDOW (PMAP_PDE_FIRST_WINDOW + PMAP_PDE_NWINDOWS)
535#define PMAP_PTE_NWINDOWS 4
536
537#define PMAP_NWINDOWS_FIRSTFREE (PMAP_PTE_FIRST_WINDOW + PMAP_PTE_NWINDOWS)
538#define PMAP_WINDOW_SIZE 8
539#define PMAP_NWINDOWS (PMAP_NWINDOWS_FIRSTFREE + PMAP_WINDOW_SIZE)
540
91447636
A
541typedef struct {
542 pt_entry_t *prv_CMAP;
543 caddr_t prv_CADDR;
544} mapwindow_t;
545
546typedef struct cpu_pmap {
0c530ab8
A
547 int pdpt_window_index;
548 int pde_window_index;
549 int pte_window_index;
91447636 550 mapwindow_t mapwindow[PMAP_NWINDOWS];
91447636
A
551} cpu_pmap_t;
552
0c530ab8
A
553
554extern mapwindow_t *pmap_get_mapwindow(pt_entry_t pentry);
2d21ac55 555extern void pmap_put_mapwindow(mapwindow_t *map);
b0d623f7 556#endif
91447636
A
557
558typedef struct pmap_memory_regions {
7ddcb079
A
559 ppnum_t base;
560 ppnum_t end;
561 ppnum_t alloc;
562 uint32_t type;
563 uint64_t attribute;
91447636
A
564} pmap_memory_region_t;
565
b0d623f7
A
566extern unsigned pmap_memory_region_count;
567extern unsigned pmap_memory_region_current;
91447636 568
0c530ab8 569#define PMAP_MEMORY_REGIONS_SIZE 128
91447636
A
570
571extern pmap_memory_region_t pmap_memory_regions[];
6d2010ae 572#include <i386/pmap_pcid.h>
91447636 573
b0d623f7
A
574static inline void
575set_dirbase(pmap_t tpmap, __unused thread_t thread) {
6d2010ae
A
576 int ccpu = cpu_number();
577 cpu_datap(ccpu)->cpu_task_cr3 = tpmap->pm_cr3;
578 cpu_datap(ccpu)->cpu_task_map = tpmap->pm_task_map;
b0d623f7
A
579#ifndef __i386__
580 /*
581 * Switch cr3 if necessary
582 * - unless running with no_shared_cr3 debugging mode
583 * and we're not on the kernel's cr3 (after pre-empted copyio)
584 */
6d2010ae
A
585 if (__probable(!no_shared_cr3)) {
586 if (get_cr3_base() != tpmap->pm_cr3) {
587 if (pmap_pcid_ncpus) {
588 pmap_pcid_activate(tpmap, ccpu);
589 }
590 else
591 set_cr3_raw(tpmap->pm_cr3);
592 }
b0d623f7 593 } else {
6d2010ae
A
594 if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3)
595 set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3);
b0d623f7
A
596 }
597#endif
1c79356b
A
598}
599
1c79356b
A
600/*
601 * External declarations for PMAP_ACTIVATE.
602 */
603
0c530ab8 604extern void process_pmap_updates(void);
1c79356b 605extern void pmap_update_interrupt(void);
1c79356b
A
606
607/*
608 * Machine dependent routines that are used only for i386/i486/i860.
609 */
1c79356b 610
0c530ab8 611extern addr64_t (kvtophys)(
1c79356b
A
612 vm_offset_t addr);
613
316670eb 614extern kern_return_t pmap_expand(
2d21ac55 615 pmap_t pmap,
316670eb
A
616 vm_map_offset_t addr,
617 unsigned int options);
6d2010ae 618#if !defined(__x86_64__)
1c79356b
A
619extern pt_entry_t *pmap_pte(
620 struct pmap *pmap,
0c530ab8
A
621 vm_map_offset_t addr);
622
623extern pd_entry_t *pmap_pde(
624 struct pmap *pmap,
625 vm_map_offset_t addr);
626
627extern pd_entry_t *pmap64_pde(
628 struct pmap *pmap,
629 vm_map_offset_t addr);
630
631extern pdpt_entry_t *pmap64_pdpt(
632 struct pmap *pmap,
633 vm_map_offset_t addr);
6d2010ae 634#endif
1c79356b
A
635extern vm_offset_t pmap_map(
636 vm_offset_t virt,
0c530ab8
A
637 vm_map_offset_t start,
638 vm_map_offset_t end,
639 vm_prot_t prot,
640 unsigned int flags);
1c79356b
A
641
642extern vm_offset_t pmap_map_bd(
643 vm_offset_t virt,
0c530ab8
A
644 vm_map_offset_t start,
645 vm_map_offset_t end,
646 vm_prot_t prot,
647 unsigned int flags);
1c79356b
A
648
649extern void pmap_bootstrap(
0c530ab8
A
650 vm_offset_t load_start,
651 boolean_t IA32e);
1c79356b
A
652
653extern boolean_t pmap_valid_page(
91447636 654 ppnum_t pn);
1c79356b
A
655
656extern int pmap_list_resident_pages(
657 struct pmap *pmap,
658 vm_offset_t *listp,
659 int space);
060df5ea 660extern void x86_filter_TLB_coherency_interrupts(boolean_t);
b0d623f7 661#ifdef __i386__
0c530ab8 662extern void pmap_commpage32_init(
91447636
A
663 vm_offset_t kernel,
664 vm_offset_t user,
665 int count);
0c530ab8
A
666extern void pmap_commpage64_init(
667 vm_offset_t kernel,
668 vm_map_offset_t user,
669 int count);
670
b0d623f7 671#endif
6d2010ae
A
672/*
673 * Get cache attributes (as pagetable bits) for the specified phys page
674 */
675extern unsigned pmap_get_cache_attributes(ppnum_t);
b0d623f7 676#if NCOPY_WINDOWS > 0
91447636
A
677extern struct cpu_pmap *pmap_cpu_alloc(
678 boolean_t is_boot_cpu);
679extern void pmap_cpu_free(
680 struct cpu_pmap *cp);
b0d623f7 681#endif
0c530ab8
A
682
683extern void pmap_map_block(
684 pmap_t pmap,
685 addr64_t va,
686 ppnum_t pa,
687 uint32_t size,
688 vm_prot_t prot,
689 int attr,
690 unsigned int flags);
91447636 691
1c79356b
A
692extern void invalidate_icache(vm_offset_t addr, unsigned cnt, int phys);
693extern void flush_dcache(vm_offset_t addr, unsigned count, int phys);
55e303ae 694extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va);
1c79356b 695
2d21ac55 696extern void pmap_cpu_init(void);
0c530ab8 697extern void pmap_disable_NX(pmap_t pmap);
b0d623f7 698#ifdef __i386__
0c530ab8
A
699extern void pmap_set_4GB_pagezero(pmap_t pmap);
700extern void pmap_clear_4GB_pagezero(pmap_t pmap);
701extern void pmap_load_kernel_cr3(void);
702extern vm_offset_t pmap_cpu_high_map_vaddr(int, enum high_cpu_types);
703extern vm_offset_t pmap_high_map_vaddr(enum high_cpu_types);
704extern vm_offset_t pmap_high_map(pt_entry_t, enum high_cpu_types);
705extern vm_offset_t pmap_cpu_high_shared_remap(int, enum high_cpu_types, vm_offset_t, int);
706extern vm_offset_t pmap_high_shared_remap(enum high_fixed_addresses, vm_offset_t, int);
b0d623f7 707#endif
0c530ab8 708
6d2010ae
A
709extern void pt_fake_zone_init(int);
710extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
711 uint64_t *, int *, int *, int *);
b7266188 712extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__printflike(1,2));
0c530ab8 713
1c79356b
A
714/*
715 * Macros for speed.
716 */
717
1c79356b
A
718
719#include <kern/spl.h>
720
b0d623f7
A
721
722#define PMAP_ACTIVATE_MAP(map, thread) { \
55e303ae 723 register pmap_t tpmap; \
0c530ab8
A
724 \
725 tpmap = vm_map_pmap(map); \
b0d623f7 726 set_dirbase(tpmap, thread); \
1c79356b
A
727}
728
b0d623f7
A
729#ifdef __i386__
730#define PMAP_DEACTIVATE_MAP(map, thread) \
2d21ac55
A
731 if (vm_map_pmap(map)->pm_task_map == TASK_MAP_64BIT_SHARED) \
732 pmap_load_kernel_cr3();
6d2010ae
A
733#elif defined(__x86_64__)
734#define PMAP_DEACTIVATE_MAP(map, thread) \
735 pmap_assert(pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, cpu_number()) == (get_cr3_raw() & 0xFFF)) : TRUE);
b0d623f7 736#else
6d2010ae 737#define PMAP_DEACTIVATE_MAP(map, thread)
b0d623f7 738#endif
1c79356b 739
b0d623f7 740#if defined(__i386__)
0c530ab8 741
1c79356b
A
742#define PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) { \
743 spl_t spl; \
0c530ab8
A
744 pt_entry_t *kpdp; \
745 pt_entry_t *updp; \
746 int i; \
747 int need_flush; \
748 \
749 need_flush = 0; \
750 spl = splhigh(); \
b0d623f7
A
751 if ((old_th->map != new_th->map) || (new_th->task != old_th->task)) { \
752 PMAP_DEACTIVATE_MAP(old_th->map, old_th); \
753 PMAP_ACTIVATE_MAP(new_th->map, new_th); \
1c79356b 754 } \
0c530ab8
A
755 kpdp = current_cpu_datap()->cpu_copywindow_pdp; \
756 for (i = 0; i < NCOPY_WINDOWS; i++) { \
757 if (new_th->machine.copy_window[i].user_base != (user_addr_t)-1) { \
758 updp = pmap_pde(new_th->map->pmap, \
759 new_th->machine.copy_window[i].user_base);\
2d21ac55 760 pmap_store_pte(kpdp, updp ? *updp : 0); \
0c530ab8
A
761 } \
762 kpdp++; \
763 } \
764 splx(spl); \
765 if (new_th->machine.copyio_state == WINDOWS_OPENED) \
766 need_flush = 1; \
767 else \
768 new_th->machine.copyio_state = WINDOWS_DIRTY; \
769 if (new_th->machine.physwindow_pte) { \
2d21ac55
A
770 pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), \
771 new_th->machine.physwindow_pte); \
0c530ab8
A
772 if (need_flush == 0) \
773 invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);\
774 } \
775 if (need_flush) \
776 flush_tlb(); \
1c79356b
A
777}
778
b0d623f7
A
779#else /* __x86_64__ */
780#define PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) { \
b0d623f7 781 \
6d2010ae 782 pmap_assert(ml_get_interrupts_enabled() == FALSE); \
b0d623f7
A
783 if (old_th->map != new_th->map) { \
784 PMAP_DEACTIVATE_MAP(old_th->map, old_th); \
785 PMAP_ACTIVATE_MAP(new_th->map, new_th); \
786 } \
b0d623f7
A
787}
788#endif /* __i386__ */
789
6d2010ae 790#if NCOPY_WINDOWS > 0
1c79356b
A
791#define PMAP_SWITCH_USER(th, new_map, my_cpu) { \
792 spl_t spl; \
793 \
0c530ab8 794 spl = splhigh(); \
b0d623f7 795 PMAP_DEACTIVATE_MAP(th->map, th); \
1c79356b 796 th->map = new_map; \
b0d623f7 797 PMAP_ACTIVATE_MAP(th->map, th); \
1c79356b 798 splx(spl); \
6d2010ae 799 inval_copy_windows(th); \
1c79356b 800}
b0d623f7
A
801#else
802#define PMAP_SWITCH_USER(th, new_map, my_cpu) { \
803 spl_t spl; \
804 \
805 spl = splhigh(); \
806 PMAP_DEACTIVATE_MAP(th->map, th); \
807 th->map = new_map; \
808 PMAP_ACTIVATE_MAP(th->map, th); \
809 splx(spl); \
810}
811#endif
1c79356b 812
0c530ab8
A
813/*
814 * Marking the current cpu's cr3 inactive is achieved by setting its lsb.
815 * Marking the current cpu's cr3 active once more involves clearng this bit.
816 * Note that valid page tables are page-aligned and so the bottom 12 bits
6d2010ae 817 * are normally zero, modulo PCID.
0c530ab8
A
818 * We can only mark the current cpu active/inactive but we can test any cpu.
819 */
820#define CPU_CR3_MARK_INACTIVE() \
821 current_cpu_datap()->cpu_active_cr3 |= 1
822
823#define CPU_CR3_MARK_ACTIVE() \
824 current_cpu_datap()->cpu_active_cr3 &= ~1
825
826#define CPU_CR3_IS_ACTIVE(cpu) \
827 ((cpu_datap(cpu)->cpu_active_cr3 & 1) == 0)
828
2d21ac55
A
829#define CPU_GET_ACTIVE_CR3(cpu) \
830 (cpu_datap(cpu)->cpu_active_cr3 & ~1)
0c530ab8 831
b0d623f7
A
832#define CPU_GET_TASK_CR3(cpu) \
833 (cpu_datap(cpu)->cpu_task_cr3)
834
835/*
836 * Mark this cpu idle, and remove it from the active set,
837 * since it is not actively using any pmap. Signal_cpus
838 * will notice that it is idle, and avoid signaling it,
839 * but will queue the update request for when the cpu
840 * becomes active.
841 */
842#if defined(__x86_64__)
843#define MARK_CPU_IDLE(my_cpu) { \
6d2010ae 844 assert(ml_get_interrupts_enabled() == FALSE); \
b0d623f7
A
845 CPU_CR3_MARK_INACTIVE(); \
846 __asm__ volatile("mfence"); \
b0d623f7
A
847}
848#else /* __i386__ native */
1c79356b 849#define MARK_CPU_IDLE(my_cpu) { \
6d2010ae 850 assert(ml_get_interrupts_enabled() == FALSE); \
1c79356b
A
851 /* \
852 * Mark this cpu idle, and remove it from the active set, \
853 * since it is not actively using any pmap. Signal_cpus \
854 * will notice that it is idle, and avoid signaling it, \
855 * but will queue the update request for when the cpu \
856 * becomes active. \
857 */ \
0c530ab8
A
858 if (!cpu_mode_is64bit() || no_shared_cr3) \
859 process_pmap_updates(); \
860 else \
861 pmap_load_kernel_cr3(); \
862 CPU_CR3_MARK_INACTIVE(); \
863 __asm__ volatile("mfence"); \
1c79356b 864}
b0d623f7 865#endif /* __i386__ */
1c79356b 866
0c530ab8 867#define MARK_CPU_ACTIVE(my_cpu) { \
6d2010ae 868 assert(ml_get_interrupts_enabled() == FALSE); \
1c79356b
A
869 /* \
870 * If a kernel_pmap update was requested while this cpu \
871 * was idle, process it as if we got the interrupt. \
872 * Before doing so, remove this cpu from the idle set. \
873 * Since we do not grab any pmap locks while we flush \
874 * our TLB, another cpu may start an update operation \
875 * before we finish. Removing this cpu from the idle \
876 * set assures that we will receive another update \
877 * interrupt if this happens. \
878 */ \
0c530ab8
A
879 CPU_CR3_MARK_ACTIVE(); \
880 __asm__ volatile("mfence"); \
55e303ae 881 \
0c530ab8
A
882 if (current_cpu_datap()->cpu_tlb_invalid) \
883 process_pmap_updates(); \
1c79356b
A
884}
885
1c79356b
A
886#define PMAP_CONTEXT(pmap, thread)
887
888#define pmap_kernel_va(VA) \
0c530ab8
A
889 ((((vm_offset_t) (VA)) >= vm_min_kernel_address) && \
890 (((vm_offset_t) (VA)) <= vm_max_kernel_address))
891
1c79356b
A
892
893#define pmap_resident_count(pmap) ((pmap)->stats.resident_count)
2d21ac55 894#define pmap_resident_max(pmap) ((pmap)->stats.resident_max)
1c79356b
A
895#define pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr)
896#define pmap_attribute(pmap,addr,size,attr,value) \
897 (KERN_INVALID_ADDRESS)
9bccf70c
A
898#define pmap_attribute_cache_sync(addr,size,attr,value) \
899 (KERN_INVALID_ADDRESS)
765c9de3 900
6d2010ae 901#define MACHINE_PMAP_IS_EMPTY 1
2d21ac55
A
902extern boolean_t pmap_is_empty(pmap_t pmap,
903 vm_map_offset_t start,
904 vm_map_offset_t end);
905
6d2010ae
A
906#define MACHINE_BOOTSTRAPPTD 1 /* Static bootstrap page-tables */
907
316670eb
A
908kern_return_t
909pmap_permissions_verify(pmap_t, vm_map_t, vm_offset_t, vm_offset_t);
b0d623f7 910
1c79356b
A
911#endif /* ASSEMBLER */
912
0c530ab8 913
1c79356b 914#endif /* _PMAP_MACHINE_ */
0c530ab8
A
915
916
917#endif /* KERNEL_PRIVATE */