]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap.h
xnu-1699.26.8.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap.h
CommitLineData
1c79356b 1/*
0c530ab8 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
8f6c56a5 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
59/*
60 * File: pmap.h
61 *
62 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
63 * Date: 1985
64 *
65 * Machine-dependent structures for the physical map module.
66 */
0c530ab8 67#ifdef KERNEL_PRIVATE
1c79356b
A
68#ifndef _PMAP_MACHINE_
69#define _PMAP_MACHINE_ 1
70
71#ifndef ASSEMBLER
72
73#include <platforms.h>
1c79356b
A
74
75#include <mach/kern_return.h>
76#include <mach/machine/vm_types.h>
77#include <mach/vm_prot.h>
78#include <mach/vm_statistics.h>
79#include <mach/machine/vm_param.h>
80#include <kern/kern_types.h>
91447636 81#include <kern/thread.h>
1c79356b 82#include <kern/lock.h>
6d2010ae 83#include <mach/branch_predicates.h>
0c530ab8
A
84
85#include <i386/mp.h>
86#include <i386/proc_reg.h>
1c79356b 87
6d2010ae
A
88#include <i386/pal_routines.h>
89
1c79356b
A
90/*
91 * Define the generic in terms of the specific
92 */
93
94#define INTEL_PGBYTES I386_PGBYTES
95#define INTEL_PGSHIFT I386_PGSHIFT
96#define intel_btop(x) i386_btop(x)
97#define intel_ptob(x) i386_ptob(x)
98#define intel_round_page(x) i386_round_page(x)
99#define intel_trunc_page(x) i386_trunc_page(x)
100#define trunc_intel_to_vm(x) trunc_i386_to_vm(x)
101#define round_intel_to_vm(x) round_i386_to_vm(x)
102#define vm_to_intel(x) vm_to_i386(x)
103
104/*
105 * i386/i486/i860 Page Table Entry
106 */
107
1c79356b
A
108#endif /* ASSEMBLER */
109
91447636
A
110#define NPGPTD 4
111#define PDESHIFT 21
112#define PTEMASK 0x1ff
113#define PTEINDX 3
0c530ab8 114
91447636
A
115#define PTESHIFT 12
116
b0d623f7
A
117
118#define INITPT_SEG_BASE 0x100000
119#define INITGDT_SEG_BASE 0x106000
120#define SLEEP_SEG_BASE 0x107000
121
122#ifdef __x86_64__
123#define LOW_4GB_MASK ((vm_offset_t)0x00000000FFFFFFFFUL)
124#endif
125
91447636
A
126#define PDESIZE sizeof(pd_entry_t) /* for assembly files */
127#define PTESIZE sizeof(pt_entry_t) /* for assembly files */
128
129#define INTEL_OFFMASK (I386_PGBYTES - 1)
b0d623f7 130#define INTEL_LOFFMASK (I386_LPGBYTES - 1)
0c530ab8 131#define PG_FRAME 0x000FFFFFFFFFF000ULL
91447636 132#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t)))
0c530ab8 133#define NPTDPG (PAGE_SIZE/(sizeof (pd_entry_t)))
1c79356b 134
91447636
A
135#define NBPTD (NPGPTD << PAGE_SHIFT)
136#define NPDEPTD (NBPTD / (sizeof (pd_entry_t)))
137#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
138#define NBPDE (1 << PDESHIFT)
139#define PDEMASK (NBPDE - 1)
9bccf70c 140
b0d623f7
A
141#define PTE_PER_PAGE 512 /* number of PTE's per page on any level */
142
0c530ab8
A
143 /* cleanly define parameters for all the page table levels */
144typedef uint64_t pml4_entry_t;
145#define NPML4PG (PAGE_SIZE/(sizeof (pml4_entry_t)))
146#define PML4SHIFT 39
147#define PML4PGSHIFT 9
148#define NBPML4 (1ULL << PML4SHIFT)
149#define PML4MASK (NBPML4-1)
150#define PML4_ENTRY_NULL ((pml4_entry_t *) 0)
151
152typedef uint64_t pdpt_entry_t;
153#define NPDPTPG (PAGE_SIZE/(sizeof (pdpt_entry_t)))
154#define PDPTSHIFT 30
155#define PDPTPGSHIFT 9
156#define NBPDPT (1 << PDPTSHIFT)
157#define PDPTMASK (NBPDPT-1)
158#define PDPT_ENTRY_NULL ((pdpt_entry_t *) 0)
159
160typedef uint64_t pd_entry_t;
161#define NPDPG (PAGE_SIZE/(sizeof (pd_entry_t)))
162#define PDSHIFT 21
163#define PDPGSHIFT 9
164#define NBPD (1 << PDSHIFT)
165#define PDMASK (NBPD-1)
166#define PD_ENTRY_NULL ((pd_entry_t *) 0)
167
168typedef uint64_t pt_entry_t;
169#define NPTPG (PAGE_SIZE/(sizeof (pt_entry_t)))
170#define PTSHIFT 12
171#define PTPGSHIFT 9
172#define NBPT (1 << PTSHIFT)
173#define PTMASK (NBPT-1)
174#define PT_ENTRY_NULL ((pt_entry_t *) 0)
175
176typedef uint64_t pmap_paddr_t;
177
6d2010ae
A
178#if DEBUG
179#define PMAP_ASSERT 1
180#endif
181#if PMAP_ASSERT
182#define pmap_assert(ex) ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex))
183
184#define pmap_assert2(ex, fmt, args...) \
185 do { \
186 if (!(ex)) { \
187 kprintf("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \
188 panic("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \
189 } \
190 } while(0)
191#else
192#define pmap_assert(ex)
193#define pmap_assert2(ex, fmt, args...)
194#endif
195
b0d623f7
A
196/* superpages */
197#ifdef __x86_64__
198#define SUPERPAGE_NBASEPAGES 512
199#else
200#define SUPERPAGE_NBASEPAGES 1 /* we don't support superpages on i386 */
201#endif
202
0c530ab8
A
203/*
204 * Atomic 64-bit store of a page table entry.
205 */
206static inline void
207pmap_store_pte(pt_entry_t *entryp, pt_entry_t value)
208{
b0d623f7 209#ifdef __i386__
0c530ab8
A
210 /*
211 * Load the new value into %ecx:%ebx
212 * Load the old value into %edx:%eax
213 * Compare-exchange-8bytes at address entryp (loaded in %edi)
214 * If the compare succeeds, the new value will have been stored.
215 * Otherwise, the old value changed and reloaded, so try again.
216 */
2d21ac55 217 __asm__ volatile(
0c530ab8
A
218 " movl (%0), %%eax \n\t"
219 " movl 4(%0), %%edx \n\t"
220 "1: \n\t"
221 " cmpxchg8b (%0) \n\t"
222 " jnz 1b"
223 :
224 : "D" (entryp),
225 "b" ((uint32_t)value),
226 "c" ((uint32_t)(value >> 32))
227 : "eax", "edx", "memory");
b0d623f7
A
228#else
229 /*
230 * In the 32-bit kernel a compare-and-exchange loop was
231 * required to provide atomicity. For K64, life is easier:
232 */
233 *entryp = value;
234#endif
0c530ab8
A
235}
236
237/*
238 * Atomic 64-bit compare and exchange of a page table entry.
239 */
240static inline boolean_t
241pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
242{
243 boolean_t ret;
244
b0d623f7 245#ifdef __i386__
0c530ab8
A
246 /*
247 * Load the old value into %edx:%eax
248 * Load the new value into %ecx:%ebx
249 * Compare-exchange-8bytes at address entryp (loaded in %edi)
250 * If the compare succeeds, the new value is stored, return TRUE.
251 * Otherwise, no swap is made, return FALSE.
252 */
253 asm volatile(
254 " lock; cmpxchg8b (%1) \n\t"
255 " setz %%al \n\t"
256 " movzbl %%al,%0"
257 : "=a" (ret)
258 : "D" (entryp),
259 "a" ((uint32_t)old),
260 "d" ((uint32_t)(old >> 32)),
261 "b" ((uint32_t)new),
262 "c" ((uint32_t)(new >> 32))
263 : "memory");
b0d623f7
A
264#else
265 /*
266 * Load the old value into %rax
267 * Load the new value into another register
268 * Compare-exchange-quad at address entryp
269 * If the compare succeeds, the new value is stored, return TRUE.
270 * Otherwise, no swap is made, return FALSE.
271 */
272 asm volatile(
273 " lock; cmpxchgq %2,(%3) \n\t"
274 " setz %%al \n\t"
275 " movzbl %%al,%0"
276 : "=a" (ret)
277 : "a" (old),
278 "r" (new),
279 "r" (entryp)
280 : "memory");
281#endif
0c530ab8
A
282 return ret;
283}
284
285#define pmap_update_pte(entryp, old, new) \
286 while (!pmap_cmpx_pte((entryp), (old), (new)))
287
2d21ac55 288
0c530ab8
A
289/* in 64 bit spaces, the number of each type of page in the page tables */
290#define NPML4PGS (1ULL * (PAGE_SIZE/(sizeof (pml4_entry_t))))
291#define NPDPTPGS (NPML4PGS * (PAGE_SIZE/(sizeof (pdpt_entry_t))))
292#define NPDEPGS (NPDPTPGS * (PAGE_SIZE/(sizeof (pd_entry_t))))
293#define NPTEPGS (NPDEPGS * (PAGE_SIZE/(sizeof (pt_entry_t))))
294
b0d623f7 295#ifdef __i386__
0c530ab8
A
296/*
297 * The 64-bit kernel is remapped in uber-space which is at the base
298 * the highest 4th-level directory (KERNEL_UBER_PML4_INDEX). That is,
299 * 512GB from the top of virtual space (or zero).
300 */
301#define KERNEL_UBER_PML4_INDEX 511
302#define KERNEL_UBER_BASE (0ULL - NBPML4)
303#define KERNEL_UBER_BASE_HI32 ((uint32_t)(KERNEL_UBER_BASE >> 32))
b0d623f7
A
304#else
305#define KERNEL_PML4_INDEX 511
306#define KERNEL_KEXTS_INDEX 510 /* Home of KEXTs - the basement */
307#define KERNEL_PHYSMAP_INDEX 509 /* virtual to physical map */
308#define KERNEL_BASE (0ULL - NBPML4)
309#define KERNEL_BASEMENT (KERNEL_BASE - NBPML4)
310#endif
0c530ab8 311
55e303ae 312#define VM_WIMG_COPYBACK VM_MEM_COHERENT
9bccf70c 313#define VM_WIMG_DEFAULT VM_MEM_COHERENT
55e303ae
A
314/* ?? intel ?? */
315#define VM_WIMG_IO (VM_MEM_COHERENT | \
316 VM_MEM_NOT_CACHEABLE | VM_MEM_GUARDED)
317#define VM_WIMG_WTHRU (VM_MEM_WRITE_THROUGH | VM_MEM_COHERENT | VM_MEM_GUARDED)
318/* write combining mode, aka store gather */
319#define VM_WIMG_WCOMB (VM_MEM_NOT_CACHEABLE | VM_MEM_COHERENT)
9bccf70c 320
0c530ab8
A
321/*
322 * Pte related macros
323 */
b0d623f7 324#ifdef __i386__
0c530ab8
A
325#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDESHIFT)|((pti)<<PTESHIFT)))
326#define VADDR64(pmi, pdi, pti) ((vm_offset_t)(((pmi)<<PLM4SHIFT))((pdi)<<PDESHIFT)|((pti)<<PTESHIFT))
b0d623f7
A
327#else
328#define KVADDR(pmi, pdpi, pdi, pti) \
329 ((vm_offset_t) \
330 ((uint64_t) -1 << 47) | \
331 ((uint64_t)(pmi) << PML4SHIFT) | \
332 ((uint64_t)(pdpi) << PDPTSHIFT) | \
333 ((uint64_t)(pdi) << PDESHIFT) | \
334 ((uint64_t)(pti) << PTESHIFT))
335#endif
0c530ab8 336
1c79356b 337/*
91447636
A
338 * Size of Kernel address space. This is the number of page table pages
339 * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte.
340 * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
1c79356b 341 */
91447636 342#ifndef KVA_PAGES
0c530ab8 343#define KVA_PAGES 1024
91447636 344#endif
1c79356b 345
91447636 346#ifndef NKPT
91447636 347#define NKPT 500 /* actual number of kernel page tables */
91447636
A
348#endif
349#ifndef NKPDE
350#define NKPDE (KVA_PAGES - 1) /* addressable number of page tables/pde's */
351#endif
352
0c530ab8 353
b0d623f7 354#ifdef __i386__
0c530ab8
A
355enum high_cpu_types {
356 HIGH_CPU_ISS0,
357 HIGH_CPU_ISS1,
358 HIGH_CPU_DESC,
359 HIGH_CPU_LDT_BEGIN,
360 HIGH_CPU_LDT_END = HIGH_CPU_LDT_BEGIN + (LDTSZ / 512) - 1,
361 HIGH_CPU_END
362};
363
364enum high_fixed_addresses {
365 HIGH_FIXED_TRAMPS, /* must be first */
366 HIGH_FIXED_TRAMPS_END,
367 HIGH_FIXED_GDT,
368 HIGH_FIXED_IDT,
369 HIGH_FIXED_LDT_BEGIN,
370 HIGH_FIXED_LDT_END = HIGH_FIXED_LDT_BEGIN + (LDTSZ / 512) - 1,
371 HIGH_FIXED_KTSS,
372 HIGH_FIXED_DFTSS,
373 HIGH_FIXED_DBTSS,
374 HIGH_FIXED_CPUS_BEGIN,
375 HIGH_FIXED_CPUS_END = HIGH_FIXED_CPUS_BEGIN + (HIGH_CPU_END * MAX_CPUS) - 1,
376};
377
378
379/* XXX64 below PTDI values need cleanup */
91447636
A
380/*
381 * The *PTDI values control the layout of virtual memory
382 *
383 */
0c530ab8 384#define KPTDI (0x000)/* start of kernel virtual pde's */
91447636
A
385#define PTDPTDI (0x7F4) /* ptd entry that points to ptd! */
386#define APTDPTDI (0x7F8) /* alt ptd entry that points to APTD */
0c530ab8 387#define UMAXPTDI (0x7F8) /* ptd entry for user space end */
6601e61a 388#define UMAXPTEOFF (NPTEPG) /* pte entry for user space end */
91447636
A
389
390#define KERNBASE VADDR(KPTDI,0)
1c79356b 391
0c530ab8
A
392/*
393 * Convert address offset to directory address
394 * containing the page table pointer - legacy
395 */
396/*#define pmap_pde(m,v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]))*/
397
398#define HIGH_MEM_BASE ((uint32_t)( -NBPDE) ) /* shared gdt etc seg addr */ /* XXX64 ?? */
399#define pmap_index_to_virt(x) (HIGH_MEM_BASE | ((unsigned)(x) << PAGE_SHIFT))
b0d623f7 400#endif
0c530ab8 401
1c79356b
A
402/*
403 * Convert address offset to page descriptor index
404 */
b0d623f7
A
405#define pdptnum(pmap, a) (((vm_offset_t)(a) >> PDPTSHIFT) & PDPTMASK)
406#define pdenum(pmap, a) (((vm_offset_t)(a) >> PDESHIFT) & PDEMASK)
407#define PMAP_INVALID_PDPTNUM (~0ULL)
91447636 408
0c530ab8
A
409#define pdeidx(pmap, a) (((a) >> PDSHIFT) & ((1ULL<<(48 - PDSHIFT)) -1))
410#define pdptidx(pmap, a) (((a) >> PDPTSHIFT) & ((1ULL<<(48 - PDPTSHIFT)) -1))
411#define pml4idx(pmap, a) (((a) >> PML4SHIFT) & ((1ULL<<(48 - PML4SHIFT)) -1))
6d2010ae 412
1c79356b
A
413
414/*
415 * Convert page descriptor index to user virtual address
416 */
417#define pdetova(a) ((vm_offset_t)(a) << PDESHIFT)
418
419/*
420 * Convert address offset to page table index
421 */
0c530ab8 422#define ptenum(a) (((vm_offset_t)(a) >> PTESHIFT) & PTEMASK)
1c79356b 423
1c79356b
A
424/*
425 * Hardware pte bit definitions (to be used directly on the ptes
426 * without using the bit fields).
427 */
428
429#define INTEL_PTE_VALID 0x00000001
430#define INTEL_PTE_WRITE 0x00000002
91447636 431#define INTEL_PTE_RW 0x00000002
1c79356b
A
432#define INTEL_PTE_USER 0x00000004
433#define INTEL_PTE_WTHRU 0x00000008
434#define INTEL_PTE_NCACHE 0x00000010
435#define INTEL_PTE_REF 0x00000020
436#define INTEL_PTE_MOD 0x00000040
b0d623f7
A
437#define INTEL_PTE_PS 0x00000080
438#define INTEL_PTE_PTA 0x00000080
439#define INTEL_PTE_GLOBAL 0x00000100
1c79356b 440#define INTEL_PTE_WIRED 0x00000200
b0d623f7 441#define INTEL_PDPTE_NESTED 0x00000400
0c530ab8 442#define INTEL_PTE_PFN PG_FRAME
1c79356b 443
0c530ab8
A
444#define INTEL_PTE_NX (1ULL << 63)
445
446#define INTEL_PTE_INVALID 0
b7266188 447/* This is conservative, but suffices */
6d2010ae
A
448#define INTEL_PTE_RSVD ((1ULL << 10) | (1ULL << 11) | (0x1FFULL << 54))
449
91447636
A
450#define pa_to_pte(a) ((a) & INTEL_PTE_PFN) /* XXX */
451#define pte_to_pa(p) ((p) & INTEL_PTE_PFN) /* XXX */
1c79356b
A
452#define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1)
453
0c530ab8
A
454#define pte_kernel_rw(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_RW))
455#define pte_kernel_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID))
456#define pte_user_rw(p) ((pt_entry)t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER|INTEL_PTE_RW))
457#define pte_user_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER))
458
9bccf70c
A
459#define PMAP_DEFAULT_CACHE 0
460#define PMAP_INHIBIT_CACHE 1
461#define PMAP_GUARDED_CACHE 2
462#define PMAP_ACTIVATE_CACHE 4
463#define PMAP_NO_GUARD_CACHE 8
464
465
91447636
A
466#ifndef ASSEMBLER
467
468#include <sys/queue.h>
469
1c79356b 470/*
91447636
A
471 * Address of current and alternate address space page table maps
472 * and directories.
1c79356b 473 */
1c79356b 474
b0d623f7
A
475#ifdef __i386__
476extern pt_entry_t PTmap[], APTmap[], Upte;
477extern pd_entry_t PTD[], APTD[], PTDpde[], APTDpde[], Upde;
478extern pmap_paddr_t lo_kernel_cr3;
479extern pdpt_entry_t *IdlePDPT64;
480#else
481extern pt_entry_t *PTmap;
482#endif
483extern boolean_t no_shared_cr3;
484extern addr64_t kernel64_cr3;
485extern pd_entry_t *IdlePTD; /* physical addr of "Idle" state PTD */
486extern pdpt_entry_t IdlePDPT[];
487extern pml4_entry_t IdlePML4[];
488
489extern uint64_t pmap_pv_hashlist_walks;
490extern uint64_t pmap_pv_hashlist_cnts;
491extern uint32_t pmap_pv_hashlist_max;
492extern uint32_t pmap_kernel_text_ps;
493
494#ifdef __i386__
91447636 495/*
b0d623f7 496 * ** i386 **
91447636
A
497 * virtual address to page table entry and
498 * to physical address. Likewise for alternate address space.
499 * Note: these work recursively, thus vtopte of a pte will give
500 * the corresponding pde that in turn maps it.
501 */
b0d623f7 502
0c530ab8 503#define vtopte(va) (PTmap + i386_btop((vm_offset_t)va))
b0d623f7
A
504#endif
505
506#ifdef __x86_64__
507#define ID_MAP_VTOP(x) ((void *)(((uint64_t)(x)) & LOW_4GB_MASK))
91447636 508
b0d623f7 509#define PHYSMAP_BASE KVADDR(KERNEL_PHYSMAP_INDEX,0,0,0)
7ddcb079 510#define NPHYSMAP (MAX(K64_MAXMEM/GB + 4, 4))
b0d623f7 511#define PHYSMAP_PTOV(x) ((void *)(((uint64_t)(x)) + PHYSMAP_BASE))
7ddcb079
A
512
513static inline boolean_t physmap_enclosed(addr64_t a) {
514 return (a < (NPHYSMAP * GB));
515}
b0d623f7 516#endif
91447636 517
1c79356b
A
518typedef volatile long cpu_set; /* set of CPUs - must be <= 32 */
519 /* changed by other processors */
91447636
A
520struct md_page {
521 int pv_list_count;
522 TAILQ_HEAD(,pv_entry) pv_list;
523};
524
525#include <vm/vm_page.h>
526
527/*
528 * For each vm_page_t, there is a list of all currently
529 * valid virtual mappings of that page. An entry is
530 * a pv_entry_t; the list is the pv_table.
531 */
1c79356b
A
532
533struct pmap {
6d2010ae
A
534 decl_simple_lock_data(,lock) /* lock on map */
535 pmap_paddr_t pm_cr3; /* physical addr */
536 boolean_t pm_shared;
0c530ab8 537 pd_entry_t *dirbase; /* page directory pointer */
b0d623f7 538#ifdef __i386__
0c530ab8 539 pmap_paddr_t pdirbase; /* phys. address of dirbase */
6d2010ae 540 vm_offset_t pm_hold; /* true pdpt zalloc addr */
b0d623f7 541#endif
0c530ab8 542 vm_object_t pm_obj; /* object to hold pde's */
2d21ac55 543 task_map_t pm_task_map;
0c530ab8
A
544 pdpt_entry_t *pm_pdpt; /* KVA of 3rd level page */
545 pml4_entry_t *pm_pml4; /* VKA of top level */
546 vm_object_t pm_obj_pdpt; /* holds pdpt pages */
547 vm_object_t pm_obj_pml4; /* holds pml4 pages */
6d2010ae
A
548#define PMAP_PCID_MAX_CPUS (48) /* Must be a multiple of 8 */
549 pcid_t pmap_pcid_cpus[PMAP_PCID_MAX_CPUS];
550 volatile uint8_t pmap_pcid_coherency_vector[PMAP_PCID_MAX_CPUS];
551 struct pmap_statistics stats; /* map statistics */
552 int ref_count; /* reference count */
553 int nx_enabled;
1c79356b
A
554};
555
0c530ab8 556
b0d623f7 557#if NCOPY_WINDOWS > 0
0c530ab8
A
558#define PMAP_PDPT_FIRST_WINDOW 0
559#define PMAP_PDPT_NWINDOWS 4
560#define PMAP_PDE_FIRST_WINDOW (PMAP_PDPT_NWINDOWS)
561#define PMAP_PDE_NWINDOWS 4
562#define PMAP_PTE_FIRST_WINDOW (PMAP_PDE_FIRST_WINDOW + PMAP_PDE_NWINDOWS)
563#define PMAP_PTE_NWINDOWS 4
564
565#define PMAP_NWINDOWS_FIRSTFREE (PMAP_PTE_FIRST_WINDOW + PMAP_PTE_NWINDOWS)
566#define PMAP_WINDOW_SIZE 8
567#define PMAP_NWINDOWS (PMAP_NWINDOWS_FIRSTFREE + PMAP_WINDOW_SIZE)
568
91447636
A
569typedef struct {
570 pt_entry_t *prv_CMAP;
571 caddr_t prv_CADDR;
572} mapwindow_t;
573
574typedef struct cpu_pmap {
0c530ab8
A
575 int pdpt_window_index;
576 int pde_window_index;
577 int pte_window_index;
91447636 578 mapwindow_t mapwindow[PMAP_NWINDOWS];
91447636
A
579} cpu_pmap_t;
580
0c530ab8
A
581
582extern mapwindow_t *pmap_get_mapwindow(pt_entry_t pentry);
2d21ac55 583extern void pmap_put_mapwindow(mapwindow_t *map);
b0d623f7 584#endif
91447636
A
585
586typedef struct pmap_memory_regions {
7ddcb079
A
587 ppnum_t base;
588 ppnum_t end;
589 ppnum_t alloc;
590 uint32_t type;
591 uint64_t attribute;
91447636
A
592} pmap_memory_region_t;
593
b0d623f7
A
594extern unsigned pmap_memory_region_count;
595extern unsigned pmap_memory_region_current;
91447636 596
0c530ab8 597#define PMAP_MEMORY_REGIONS_SIZE 128
91447636
A
598
599extern pmap_memory_region_t pmap_memory_regions[];
6d2010ae 600#include <i386/pmap_pcid.h>
91447636 601
b0d623f7
A
602static inline void
603set_dirbase(pmap_t tpmap, __unused thread_t thread) {
6d2010ae
A
604 int ccpu = cpu_number();
605 cpu_datap(ccpu)->cpu_task_cr3 = tpmap->pm_cr3;
606 cpu_datap(ccpu)->cpu_task_map = tpmap->pm_task_map;
b0d623f7
A
607#ifndef __i386__
608 /*
609 * Switch cr3 if necessary
610 * - unless running with no_shared_cr3 debugging mode
611 * and we're not on the kernel's cr3 (after pre-empted copyio)
612 */
6d2010ae
A
613 if (__probable(!no_shared_cr3)) {
614 if (get_cr3_base() != tpmap->pm_cr3) {
615 if (pmap_pcid_ncpus) {
616 pmap_pcid_activate(tpmap, ccpu);
617 }
618 else
619 set_cr3_raw(tpmap->pm_cr3);
620 }
b0d623f7 621 } else {
6d2010ae
A
622 if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3)
623 set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3);
b0d623f7
A
624 }
625#endif
1c79356b
A
626}
627
1c79356b
A
628/*
629 * External declarations for PMAP_ACTIVATE.
630 */
631
0c530ab8 632extern void process_pmap_updates(void);
1c79356b 633extern void pmap_update_interrupt(void);
1c79356b
A
634
635/*
636 * Machine dependent routines that are used only for i386/i486/i860.
637 */
1c79356b 638
0c530ab8 639extern addr64_t (kvtophys)(
1c79356b
A
640 vm_offset_t addr);
641
2d21ac55
A
642extern void pmap_expand(
643 pmap_t pmap,
644 vm_map_offset_t addr);
6d2010ae 645#if !defined(__x86_64__)
1c79356b
A
646extern pt_entry_t *pmap_pte(
647 struct pmap *pmap,
0c530ab8
A
648 vm_map_offset_t addr);
649
650extern pd_entry_t *pmap_pde(
651 struct pmap *pmap,
652 vm_map_offset_t addr);
653
654extern pd_entry_t *pmap64_pde(
655 struct pmap *pmap,
656 vm_map_offset_t addr);
657
658extern pdpt_entry_t *pmap64_pdpt(
659 struct pmap *pmap,
660 vm_map_offset_t addr);
6d2010ae 661#endif
1c79356b
A
662extern vm_offset_t pmap_map(
663 vm_offset_t virt,
0c530ab8
A
664 vm_map_offset_t start,
665 vm_map_offset_t end,
666 vm_prot_t prot,
667 unsigned int flags);
1c79356b
A
668
669extern vm_offset_t pmap_map_bd(
670 vm_offset_t virt,
0c530ab8
A
671 vm_map_offset_t start,
672 vm_map_offset_t end,
673 vm_prot_t prot,
674 unsigned int flags);
1c79356b
A
675
676extern void pmap_bootstrap(
0c530ab8
A
677 vm_offset_t load_start,
678 boolean_t IA32e);
1c79356b
A
679
680extern boolean_t pmap_valid_page(
91447636 681 ppnum_t pn);
1c79356b
A
682
683extern int pmap_list_resident_pages(
684 struct pmap *pmap,
685 vm_offset_t *listp,
686 int space);
060df5ea 687extern void x86_filter_TLB_coherency_interrupts(boolean_t);
b0d623f7 688#ifdef __i386__
0c530ab8 689extern void pmap_commpage32_init(
91447636
A
690 vm_offset_t kernel,
691 vm_offset_t user,
692 int count);
0c530ab8
A
693extern void pmap_commpage64_init(
694 vm_offset_t kernel,
695 vm_map_offset_t user,
696 int count);
697
b0d623f7 698#endif
6d2010ae
A
699/*
700 * Get cache attributes (as pagetable bits) for the specified phys page
701 */
702extern unsigned pmap_get_cache_attributes(ppnum_t);
b0d623f7 703#if NCOPY_WINDOWS > 0
91447636
A
704extern struct cpu_pmap *pmap_cpu_alloc(
705 boolean_t is_boot_cpu);
706extern void pmap_cpu_free(
707 struct cpu_pmap *cp);
b0d623f7 708#endif
0c530ab8
A
709
710extern void pmap_map_block(
711 pmap_t pmap,
712 addr64_t va,
713 ppnum_t pa,
714 uint32_t size,
715 vm_prot_t prot,
716 int attr,
717 unsigned int flags);
91447636 718
1c79356b
A
719extern void invalidate_icache(vm_offset_t addr, unsigned cnt, int phys);
720extern void flush_dcache(vm_offset_t addr, unsigned count, int phys);
55e303ae 721extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va);
1c79356b 722
2d21ac55 723extern void pmap_cpu_init(void);
0c530ab8 724extern void pmap_disable_NX(pmap_t pmap);
b0d623f7 725#ifdef __i386__
0c530ab8
A
726extern void pmap_set_4GB_pagezero(pmap_t pmap);
727extern void pmap_clear_4GB_pagezero(pmap_t pmap);
728extern void pmap_load_kernel_cr3(void);
729extern vm_offset_t pmap_cpu_high_map_vaddr(int, enum high_cpu_types);
730extern vm_offset_t pmap_high_map_vaddr(enum high_cpu_types);
731extern vm_offset_t pmap_high_map(pt_entry_t, enum high_cpu_types);
732extern vm_offset_t pmap_cpu_high_shared_remap(int, enum high_cpu_types, vm_offset_t, int);
733extern vm_offset_t pmap_high_shared_remap(enum high_fixed_addresses, vm_offset_t, int);
b0d623f7 734#endif
0c530ab8 735
6d2010ae
A
736extern void pt_fake_zone_init(int);
737extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
738 uint64_t *, int *, int *, int *);
b7266188 739extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__printflike(1,2));
0c530ab8 740
1c79356b
A
741/*
742 * Macros for speed.
743 */
744
1c79356b
A
745
746#include <kern/spl.h>
747
b0d623f7
A
748
749#define PMAP_ACTIVATE_MAP(map, thread) { \
55e303ae 750 register pmap_t tpmap; \
0c530ab8
A
751 \
752 tpmap = vm_map_pmap(map); \
b0d623f7 753 set_dirbase(tpmap, thread); \
1c79356b
A
754}
755
b0d623f7
A
756#ifdef __i386__
757#define PMAP_DEACTIVATE_MAP(map, thread) \
2d21ac55
A
758 if (vm_map_pmap(map)->pm_task_map == TASK_MAP_64BIT_SHARED) \
759 pmap_load_kernel_cr3();
6d2010ae
A
760#elif defined(__x86_64__)
761#define PMAP_DEACTIVATE_MAP(map, thread) \
762 pmap_assert(pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, cpu_number()) == (get_cr3_raw() & 0xFFF)) : TRUE);
b0d623f7 763#else
6d2010ae 764#define PMAP_DEACTIVATE_MAP(map, thread)
b0d623f7 765#endif
1c79356b 766
b0d623f7 767#if defined(__i386__)
0c530ab8 768
1c79356b
A
769#define PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) { \
770 spl_t spl; \
0c530ab8
A
771 pt_entry_t *kpdp; \
772 pt_entry_t *updp; \
773 int i; \
774 int need_flush; \
775 \
776 need_flush = 0; \
777 spl = splhigh(); \
b0d623f7
A
778 if ((old_th->map != new_th->map) || (new_th->task != old_th->task)) { \
779 PMAP_DEACTIVATE_MAP(old_th->map, old_th); \
780 PMAP_ACTIVATE_MAP(new_th->map, new_th); \
1c79356b 781 } \
0c530ab8
A
782 kpdp = current_cpu_datap()->cpu_copywindow_pdp; \
783 for (i = 0; i < NCOPY_WINDOWS; i++) { \
784 if (new_th->machine.copy_window[i].user_base != (user_addr_t)-1) { \
785 updp = pmap_pde(new_th->map->pmap, \
786 new_th->machine.copy_window[i].user_base);\
2d21ac55 787 pmap_store_pte(kpdp, updp ? *updp : 0); \
0c530ab8
A
788 } \
789 kpdp++; \
790 } \
791 splx(spl); \
792 if (new_th->machine.copyio_state == WINDOWS_OPENED) \
793 need_flush = 1; \
794 else \
795 new_th->machine.copyio_state = WINDOWS_DIRTY; \
796 if (new_th->machine.physwindow_pte) { \
2d21ac55
A
797 pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), \
798 new_th->machine.physwindow_pte); \
0c530ab8
A
799 if (need_flush == 0) \
800 invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);\
801 } \
802 if (need_flush) \
803 flush_tlb(); \
1c79356b
A
804}
805
b0d623f7
A
806#else /* __x86_64__ */
807#define PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) { \
b0d623f7 808 \
6d2010ae 809 pmap_assert(ml_get_interrupts_enabled() == FALSE); \
b0d623f7
A
810 if (old_th->map != new_th->map) { \
811 PMAP_DEACTIVATE_MAP(old_th->map, old_th); \
812 PMAP_ACTIVATE_MAP(new_th->map, new_th); \
813 } \
b0d623f7
A
814}
815#endif /* __i386__ */
816
6d2010ae 817#if NCOPY_WINDOWS > 0
1c79356b
A
818#define PMAP_SWITCH_USER(th, new_map, my_cpu) { \
819 spl_t spl; \
820 \
0c530ab8 821 spl = splhigh(); \
b0d623f7 822 PMAP_DEACTIVATE_MAP(th->map, th); \
1c79356b 823 th->map = new_map; \
b0d623f7 824 PMAP_ACTIVATE_MAP(th->map, th); \
1c79356b 825 splx(spl); \
6d2010ae 826 inval_copy_windows(th); \
1c79356b 827}
b0d623f7
A
828#else
829#define PMAP_SWITCH_USER(th, new_map, my_cpu) { \
830 spl_t spl; \
831 \
832 spl = splhigh(); \
833 PMAP_DEACTIVATE_MAP(th->map, th); \
834 th->map = new_map; \
835 PMAP_ACTIVATE_MAP(th->map, th); \
836 splx(spl); \
837}
838#endif
1c79356b 839
0c530ab8
A
840/*
841 * Marking the current cpu's cr3 inactive is achieved by setting its lsb.
842 * Marking the current cpu's cr3 active once more involves clearng this bit.
843 * Note that valid page tables are page-aligned and so the bottom 12 bits
6d2010ae 844 * are normally zero, modulo PCID.
0c530ab8
A
845 * We can only mark the current cpu active/inactive but we can test any cpu.
846 */
847#define CPU_CR3_MARK_INACTIVE() \
848 current_cpu_datap()->cpu_active_cr3 |= 1
849
850#define CPU_CR3_MARK_ACTIVE() \
851 current_cpu_datap()->cpu_active_cr3 &= ~1
852
853#define CPU_CR3_IS_ACTIVE(cpu) \
854 ((cpu_datap(cpu)->cpu_active_cr3 & 1) == 0)
855
2d21ac55
A
856#define CPU_GET_ACTIVE_CR3(cpu) \
857 (cpu_datap(cpu)->cpu_active_cr3 & ~1)
0c530ab8 858
b0d623f7
A
859#define CPU_GET_TASK_CR3(cpu) \
860 (cpu_datap(cpu)->cpu_task_cr3)
861
862/*
863 * Mark this cpu idle, and remove it from the active set,
864 * since it is not actively using any pmap. Signal_cpus
865 * will notice that it is idle, and avoid signaling it,
866 * but will queue the update request for when the cpu
867 * becomes active.
868 */
869#if defined(__x86_64__)
870#define MARK_CPU_IDLE(my_cpu) { \
6d2010ae 871 assert(ml_get_interrupts_enabled() == FALSE); \
b0d623f7
A
872 CPU_CR3_MARK_INACTIVE(); \
873 __asm__ volatile("mfence"); \
b0d623f7
A
874}
875#else /* __i386__ native */
1c79356b 876#define MARK_CPU_IDLE(my_cpu) { \
6d2010ae 877 assert(ml_get_interrupts_enabled() == FALSE); \
1c79356b
A
878 /* \
879 * Mark this cpu idle, and remove it from the active set, \
880 * since it is not actively using any pmap. Signal_cpus \
881 * will notice that it is idle, and avoid signaling it, \
882 * but will queue the update request for when the cpu \
883 * becomes active. \
884 */ \
0c530ab8
A
885 if (!cpu_mode_is64bit() || no_shared_cr3) \
886 process_pmap_updates(); \
887 else \
888 pmap_load_kernel_cr3(); \
889 CPU_CR3_MARK_INACTIVE(); \
890 __asm__ volatile("mfence"); \
1c79356b 891}
b0d623f7 892#endif /* __i386__ */
1c79356b 893
0c530ab8 894#define MARK_CPU_ACTIVE(my_cpu) { \
6d2010ae 895 assert(ml_get_interrupts_enabled() == FALSE); \
1c79356b
A
896 /* \
897 * If a kernel_pmap update was requested while this cpu \
898 * was idle, process it as if we got the interrupt. \
899 * Before doing so, remove this cpu from the idle set. \
900 * Since we do not grab any pmap locks while we flush \
901 * our TLB, another cpu may start an update operation \
902 * before we finish. Removing this cpu from the idle \
903 * set assures that we will receive another update \
904 * interrupt if this happens. \
905 */ \
0c530ab8
A
906 CPU_CR3_MARK_ACTIVE(); \
907 __asm__ volatile("mfence"); \
55e303ae 908 \
0c530ab8
A
909 if (current_cpu_datap()->cpu_tlb_invalid) \
910 process_pmap_updates(); \
1c79356b
A
911}
912
1c79356b
A
913#define PMAP_CONTEXT(pmap, thread)
914
915#define pmap_kernel_va(VA) \
0c530ab8
A
916 ((((vm_offset_t) (VA)) >= vm_min_kernel_address) && \
917 (((vm_offset_t) (VA)) <= vm_max_kernel_address))
918
1c79356b
A
919
920#define pmap_resident_count(pmap) ((pmap)->stats.resident_count)
2d21ac55 921#define pmap_resident_max(pmap) ((pmap)->stats.resident_max)
1c79356b
A
922#define pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr)
923#define pmap_attribute(pmap,addr,size,attr,value) \
924 (KERN_INVALID_ADDRESS)
9bccf70c
A
925#define pmap_attribute_cache_sync(addr,size,attr,value) \
926 (KERN_INVALID_ADDRESS)
765c9de3 927
6d2010ae 928#define MACHINE_PMAP_IS_EMPTY 1
2d21ac55
A
929extern boolean_t pmap_is_empty(pmap_t pmap,
930 vm_map_offset_t start,
931 vm_map_offset_t end);
932
6d2010ae
A
933#define MACHINE_BOOTSTRAPPTD 1 /* Static bootstrap page-tables */
934
b0d623f7 935
1c79356b
A
936#endif /* ASSEMBLER */
937
0c530ab8 938
1c79356b 939#endif /* _PMAP_MACHINE_ */
0c530ab8
A
940
941
942#endif /* KERNEL_PRIVATE */