]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap_internal.h
xnu-4570.1.46.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap_internal.h
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
1c79356b 28
1c79356b 29
316670eb
A
30#ifndef _I386_PMAP_INTERNAL_
31#define _I386_PMAP_INTERNAL_
b0d623f7 32#ifdef MACH_KERNEL_PRIVATE
1c79356b 33
316670eb
A
34#include <vm/pmap.h>
35#include <sys/kdebug.h>
36#include <kern/ledger.h>
fe8ab488
A
37#include <kern/simple_lock.h>
38#include <i386/bit_routines.h>
316670eb 39
b0d623f7
A
40/*
41 * pmap locking
2d21ac55 42 */
0b4e3aa0 43
b0d623f7
A
44#define PMAP_LOCK(pmap) { \
45 simple_lock(&(pmap)->lock); \
46}
0b4e3aa0 47
b0d623f7
A
48#define PMAP_UNLOCK(pmap) { \
49 simple_unlock(&(pmap)->lock); \
50}
1c79356b 51
39236c6e
A
52#define PMAP_UPDATE_TLBS(pmap, s, e) \
53 pmap_flush_tlbs(pmap, s, e, 0, NULL)
54
55
56#define PMAP_DELAY_TLB_FLUSH 0x01
57
58#define PMAP_UPDATE_TLBS_DELAYED(pmap, s, e, c) \
59 pmap_flush_tlbs(pmap, s, e, PMAP_DELAY_TLB_FLUSH, c)
60
1c79356b 61
b0d623f7 62#define iswired(pte) ((pte) & INTEL_PTE_WIRED)
1c79356b 63
b0d623f7
A
64#ifdef PMAP_TRACES
65extern boolean_t pmap_trace;
5ba3f43e
A
66#define PMAP_TRACE(...) \
67 if (pmap_trace) { \
68 KDBG_RELEASE(__VA_ARGS__); \
b0d623f7
A
69 }
70#else
5ba3f43e 71#define PMAP_TRACE(...) KDBG_DEBUG(__VA_ARGS__)
b0d623f7 72#endif /* PMAP_TRACES */
1c79356b 73
5ba3f43e 74#define PMAP_TRACE_CONSTANT(...) KDBG_RELEASE(__VA_ARGS__)
6d2010ae 75
316670eb 76kern_return_t pmap_expand_pml4(
b0d623f7 77 pmap_t map,
316670eb
A
78 vm_map_offset_t v,
79 unsigned int options);
b0d623f7 80
316670eb 81kern_return_t pmap_expand_pdpt(
b0d623f7 82 pmap_t map,
316670eb
A
83 vm_map_offset_t v,
84 unsigned int options);
b7266188 85
6d2010ae
A
86void phys_attribute_set(
87 ppnum_t phys,
88 int bits);
89
90void pmap_set_reference(
91 ppnum_t pn);
92
93boolean_t phys_page_exists(
94 ppnum_t pn);
95
39236c6e
A
96void
97pmap_flush_tlbs(pmap_t, vm_map_offset_t, vm_map_offset_t, int, pmap_flush_context *);
6d2010ae
A
98
99void
100pmap_update_cache_attributes_locked(ppnum_t, unsigned);
101
6d2010ae 102extern const boolean_t cpu_64bit;
b0d623f7 103
b7266188
A
104/*
105 * Private data structures.
106 */
107
108/*
109 * For each vm_page_t, there is a list of all currently
110 * valid virtual mappings of that page. An entry is
111 * a pv_rooted_entry_t; the list is the pv_table.
112 *
113 * N.B. with the new combo rooted/hashed scheme it is
114 * only possibly to remove individual non-rooted entries
115 * if they are found via the hashed chains as there is no
116 * way to unlink the singly linked hashed entries if navigated to
117 * via the queue list off the rooted entries. Think of it as
118 * hash/walk/pull, keeping track of the prev pointer while walking
119 * the singly linked hash list. All of this is to save memory and
120 * keep both types of pv_entries as small as possible.
121 */
122
123/*
124
125PV HASHING Changes - JK 1/2007
126
127Pve's establish physical to virtual mappings. These are used for aliasing of a
6d2010ae
A
128physical page to (potentially many) virtual addresses within pmaps. In the
129previous implementation the structure of the pv_entries (each 16 bytes in size) was
b7266188
A
130
131typedef struct pv_entry {
132 struct pv_entry_t next;
133 pmap_t pmap;
134 vm_map_offset_t va;
135} *pv_entry_t;
136
6d2010ae
A
137An initial array of these is created at boot time, one per physical page of
138memory, indexed by the physical page number. Additionally, a pool of entries
139is created from a pv_zone to be used as needed by pmap_enter() when it is
140creating new mappings. Originally, we kept this pool around because the code
141in pmap_enter() was unable to block if it needed an entry and none were
142available - we'd panic. Some time ago I restructured the pmap_enter() code
143so that for user pmaps it can block while zalloc'ing a pv structure and restart,
144removing a panic from the code (in the case of the kernel pmap we cannot block
145and still panic, so, we keep a separate hot pool for use only on kernel pmaps).
146The pool has not been removed since there is a large performance gain keeping
147freed pv's around for reuse and not suffering the overhead of zalloc for every
148new pv we need.
149
150As pmap_enter() created new mappings it linked the new pve's for them off the
151fixed pv array for that ppn (off the next pointer). These pve's are accessed
152for several operations, one of them being address space teardown. In that case,
153we basically do this
b7266188
A
154
155 for (every page/pte in the space) {
156 calc pve_ptr from the ppn in the pte
157 for (every pv in the list for the ppn) {
158 if (this pv is for this pmap/vaddr) {
159 do housekeeping
160 unlink/free the pv
161 }
162 }
163 }
164
6d2010ae
A
165The problem arose when we were running, say 8000 (or even 2000) apache or
166other processes and one or all terminate. The list hanging off each pv array
167entry could have thousands of entries. We were continuously linearly searching
168each of these lists as we stepped through the address space we were tearing
169down. Because of the locks we hold, likely taking a cache miss for each node,
170and interrupt disabling for MP issues the system became completely unresponsive
171for many seconds while we did this.
172
173Realizing that pve's are accessed in two distinct ways (linearly running the
174list by ppn for operations like pmap_page_protect and finding and
175modifying/removing a single pve as part of pmap_enter processing) has led to
176modifying the pve structures and databases.
177
178There are now two types of pve structures. A "rooted" structure which is
179basically the original structure accessed in an array by ppn, and a ''hashed''
180structure accessed on a hash list via a hash of [pmap, vaddr]. These have been
181designed with the two goals of minimizing wired memory and making the lookup of
182a ppn faster. Since a vast majority of pages in the system are not aliased
183and hence represented by a single pv entry I've kept the rooted entry size as
184small as possible because there is one of these dedicated for every physical
185page of memory. The hashed pve's are larger due to the addition of the hash
186link and the ppn entry needed for matching while running the hash list to find
187the entry we are looking for. This way, only systems that have lots of
188aliasing (like 2000+ httpd procs) will pay the extra memory price. Both
189structures have the same first three fields allowing some simplification in
190the code.
b7266188
A
191
192They have these shapes
193
194typedef struct pv_rooted_entry {
6d2010ae
A
195 queue_head_t qlink;
196 vm_map_offset_t va;
197 pmap_t pmap;
b7266188
A
198} *pv_rooted_entry_t;
199
200
201typedef struct pv_hashed_entry {
6d2010ae
A
202 queue_head_t qlink;
203 vm_map_offset_t va;
204 pmap_t pmap;
205 ppnum_t ppn;
206 struct pv_hashed_entry *nexth;
b7266188
A
207} *pv_hashed_entry_t;
208
6d2010ae
A
209The main flow difference is that the code is now aware of the rooted entry and
210the hashed entries. Code that runs the pv list still starts with the rooted
211entry and then continues down the qlink onto the hashed entries. Code that is
212looking up a specific pv entry first checks the rooted entry and then hashes
213and runs the hash list for the match. The hash list lengths are much smaller
214than the original pv lists that contained all aliases for the specific ppn.
b7266188
A
215
216*/
217
6d2010ae
A
218typedef struct pv_rooted_entry {
219 /* first three entries must match pv_hashed_entry_t */
220 queue_head_t qlink;
39037602 221 vm_map_offset_t va_and_flags; /* virtual address for mapping */
6d2010ae 222 pmap_t pmap; /* pmap where mapping lies */
b7266188
A
223} *pv_rooted_entry_t;
224
225#define PV_ROOTED_ENTRY_NULL ((pv_rooted_entry_t) 0)
226
6d2010ae
A
227typedef struct pv_hashed_entry {
228 /* first three entries must match pv_rooted_entry_t */
229 queue_head_t qlink;
39037602 230 vm_map_offset_t va_and_flags;
6d2010ae
A
231 pmap_t pmap;
232 ppnum_t ppn;
233 struct pv_hashed_entry *nexth;
b7266188
A
234} *pv_hashed_entry_t;
235
236#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
237
39037602
A
238#define PVE_VA(pve) ((pve)->va_and_flags & ~PAGE_MASK)
239#define PVE_FLAGS(pve) ((pve)->va_and_flags & PAGE_MASK)
240#define PVE_IS_ALTACCT 0x001
241#define PVE_IS_ALTACCT_PAGE(pve) \
242 (((pve)->va_and_flags & PVE_IS_ALTACCT) ? TRUE : FALSE)
243
6d2010ae 244//#define PV_DEBUG 1 /* uncomment to enable some PV debugging code */
b7266188 245#ifdef PV_DEBUG
fe8ab488 246#define CHK_NPVHASH() if(0 == npvhashmask) panic("npvhash uninitialized");
b7266188 247#else
6d2010ae 248#define CHK_NPVHASH(x)
b7266188
A
249#endif
250
fe8ab488
A
251#define NPVHASHBUCKETS (4096)
252#define NPVHASHMASK ((NPVHASHBUCKETS) - 1) /* MUST BE 2^N - 1 */
6d2010ae
A
253#define PV_HASHED_LOW_WATER_MARK_DEFAULT 5000
254#define PV_HASHED_KERN_LOW_WATER_MARK_DEFAULT 2000
255#define PV_HASHED_ALLOC_CHUNK_INITIAL 2000
256#define PV_HASHED_KERN_ALLOC_CHUNK_INITIAL 200
257
258extern volatile uint32_t mappingrecurse;
259extern uint32_t pv_hashed_low_water_mark, pv_hashed_kern_low_water_mark;
260
261/*
262 * PV hash locking
263 */
264
265#define LOCK_PV_HASH(hash) lock_hash_hash(hash)
266#define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash)
fe8ab488 267extern uint32_t npvhashmask;
6d2010ae
A
268extern pv_hashed_entry_t *pv_hash_table; /* hash lists */
269extern pv_hashed_entry_t pv_hashed_free_list;
270extern pv_hashed_entry_t pv_hashed_kern_free_list;
271decl_simple_lock_data(extern, pv_hashed_free_list_lock)
272decl_simple_lock_data(extern, pv_hashed_kern_free_list_lock)
273decl_simple_lock_data(extern, pv_hash_table_lock)
fe8ab488 274decl_simple_lock_data(extern, phys_backup_lock)
6d2010ae
A
275
276extern zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry
277 * structures */
278
279extern uint32_t pv_hashed_free_count;
280extern uint32_t pv_hashed_kern_free_count;
281/*
282 * Each entry in the pv_head_table is locked by a bit in the
283 * pv_lock_table. The lock bits are accessed by the address of
284 * the frame they lock.
285 */
286#define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
287#define pv_hash_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
288extern char *pv_lock_table; /* pointer to array of bits */
289extern char *pv_hash_lock_table;
290extern pv_rooted_entry_t pv_head_table; /* array of entries, one per page */
291
292extern event_t mapping_replenish_event;
293
294static inline void PV_HASHED_ALLOC(pv_hashed_entry_t *pvh_ep) {
316670eb 295 pmap_assert(*pvh_ep == PV_HASHED_ENTRY_NULL);
6d2010ae
A
296 simple_lock(&pv_hashed_free_list_lock);
297 /* If the kernel reserved pool is low, let non-kernel mappings allocate
298 * synchronously, possibly subject to a throttle.
299 */
316670eb 300 if ((pv_hashed_kern_free_count > pv_hashed_kern_low_water_mark) && ((*pvh_ep = pv_hashed_free_list) != 0)) {
6d2010ae
A
301 pv_hashed_free_list = (pv_hashed_entry_t)(*pvh_ep)->qlink.next;
302 pv_hashed_free_count--;
303 }
304
305 simple_unlock(&pv_hashed_free_list_lock);
306
316670eb 307 if (pv_hashed_free_count <= pv_hashed_low_water_mark) {
6d2010ae
A
308 if (!mappingrecurse && hw_compare_and_store(0,1, &mappingrecurse))
309 thread_wakeup(&mapping_replenish_event);
310 }
b7266188
A
311}
312
6d2010ae
A
313static inline void PV_HASHED_FREE_LIST(pv_hashed_entry_t pvh_eh, pv_hashed_entry_t pvh_et, int pv_cnt) {
314 simple_lock(&pv_hashed_free_list_lock);
315 pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;
316 pv_hashed_free_list = pvh_eh;
317 pv_hashed_free_count += pv_cnt;
318 simple_unlock(&pv_hashed_free_list_lock);
b7266188
A
319}
320
6d2010ae
A
321extern unsigned pmap_kern_reserve_alloc_stat;
322
323static inline void PV_HASHED_KERN_ALLOC(pv_hashed_entry_t *pvh_e) {
316670eb 324 pmap_assert(*pvh_e == PV_HASHED_ENTRY_NULL);
6d2010ae
A
325 simple_lock(&pv_hashed_kern_free_list_lock);
326
327 if ((*pvh_e = pv_hashed_kern_free_list) != 0) {
328 pv_hashed_kern_free_list = (pv_hashed_entry_t)(*pvh_e)->qlink.next;
329 pv_hashed_kern_free_count--;
330 pmap_kern_reserve_alloc_stat++;
331 }
332
333 simple_unlock(&pv_hashed_kern_free_list_lock);
334
335 if (pv_hashed_kern_free_count < pv_hashed_kern_low_water_mark) {
336 if (!mappingrecurse && hw_compare_and_store(0,1, &mappingrecurse))
337 thread_wakeup(&mapping_replenish_event);
338 }
b7266188
A
339}
340
6d2010ae
A
341static inline void PV_HASHED_KERN_FREE_LIST(pv_hashed_entry_t pvh_eh, pv_hashed_entry_t pvh_et, int pv_cnt) {
342 simple_lock(&pv_hashed_kern_free_list_lock);
343 pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;
344 pv_hashed_kern_free_list = pvh_eh;
345 pv_hashed_kern_free_count += pv_cnt;
346 simple_unlock(&pv_hashed_kern_free_list_lock);
347}
348
349extern uint64_t pmap_pv_throttle_stat, pmap_pv_throttled_waiters;
350extern event_t pmap_user_pv_throttle_event;
351
352static inline void pmap_pv_throttle(__unused pmap_t p) {
353 pmap_assert(p != kernel_pmap);
354 /* Apply throttle on non-kernel mappings */
355 if (pv_hashed_kern_free_count < (pv_hashed_kern_low_water_mark / 2)) {
356 pmap_pv_throttle_stat++;
357 /* This doesn't need to be strictly accurate, merely a hint
358 * to eliminate the timeout when the reserve is replenished.
359 */
360 pmap_pv_throttled_waiters++;
361 assert_wait_timeout(&pmap_user_pv_throttle_event, THREAD_UNINT, 1, 1000 * NSEC_PER_USEC);
362 thread_block(THREAD_CONTINUE_NULL);
363 }
b7266188
A
364}
365
366/*
367 * Index into pv_head table, its lock bits, and the modify/reference and managed bits
368 */
369
370#define pa_index(pa) (i386_btop(pa))
371#define ppn_to_pai(ppn) ((int)ppn)
372
373#define pai_to_pvh(pai) (&pv_head_table[pai])
374#define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
375#define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
376#define pvhash(idx) (&pv_hash_table[idx])
b7266188
A
377#define lock_hash_hash(hash) bit_lock(hash, (void *)pv_hash_lock_table)
378#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
379
380#define IS_MANAGED_PAGE(x) \
381 ((unsigned int)(x) <= last_managed_page && \
382 (pmap_phys_attributes[x] & PHYS_MANAGED))
39236c6e
A
383#define IS_INTERNAL_PAGE(x) \
384 (IS_MANAGED_PAGE(x) && (pmap_phys_attributes[x] & PHYS_INTERNAL))
385#define IS_REUSABLE_PAGE(x) \
386 (IS_MANAGED_PAGE(x) && (pmap_phys_attributes[x] & PHYS_REUSABLE))
d190cdc3 387#define IS_ALTACCT_PAGE(x,pve) \
39037602 388 (IS_MANAGED_PAGE((x)) && \
d190cdc3 389 (PVE_IS_ALTACCT_PAGE((pve))))
b7266188
A
390
391/*
392 * Physical page attributes. Copy bits from PTE definition.
393 */
394#define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
395#define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
3e170ce0
A
396#define PHYS_MANAGED INTEL_PTE_VALID /* page is managed */
397#define PHYS_NOENCRYPT INTEL_PTE_USER /* no need to encrypt this page in the hibernation image */
6d2010ae
A
398#define PHYS_NCACHE INTEL_PTE_NCACHE
399#define PHYS_PTA INTEL_PTE_PTA
400#define PHYS_CACHEABILITY_MASK (INTEL_PTE_PTA | INTEL_PTE_NCACHE)
3e170ce0
A
401#define PHYS_INTERNAL INTEL_PTE_WTHRU /* page from internal object */
402#define PHYS_REUSABLE INTEL_PTE_WRITE /* page is "reusable" */
b7266188 403
3e170ce0
A
404extern boolean_t pmap_disable_kheap_nx;
405extern boolean_t pmap_disable_kstack_nx;
316670eb
A
406
407#define PMAP_EXPAND_OPTIONS_NONE (0x0)
408#define PMAP_EXPAND_OPTIONS_NOWAIT (PMAP_OPTIONS_NOWAIT)
409#define PMAP_EXPAND_OPTIONS_NOENTER (PMAP_OPTIONS_NOENTER)
410
b7266188
A
411/*
412 * Amount of virtual memory mapped by one
413 * page-directory entry.
414 */
415#define PDE_MAPPED_SIZE (pdetova(1))
416
417
418/*
419 * Locking and TLB invalidation
420 */
421
422/*
423 * Locking Protocols: (changed 2/2007 JK)
424 *
425 * There are two structures in the pmap module that need locking:
426 * the pmaps themselves, and the per-page pv_lists (which are locked
427 * by locking the pv_lock_table entry that corresponds to the pv_head
428 * for the list in question.) Most routines want to lock a pmap and
429 * then do operations in it that require pv_list locking -- however
430 * pmap_remove_all and pmap_copy_on_write operate on a physical page
431 * basis and want to do the locking in the reverse order, i.e. lock
432 * a pv_list and then go through all the pmaps referenced by that list.
433 *
434 * The system wide pmap lock has been removed. Now, paths take a lock
435 * on the pmap before changing its 'shape' and the reverse order lockers
436 * (coming in by phys ppn) take a lock on the corresponding pv and then
437 * retest to be sure nothing changed during the window before they locked
438 * and can then run up/down the pv lists holding the list lock. This also
439 * lets the pmap layer run (nearly completely) interrupt enabled, unlike
440 * previously.
441 */
442
443/*
444 * PV locking
445 */
446
447#define LOCK_PVH(index) { \
448 mp_disable_preemption(); \
449 lock_pvh_pai(index); \
450}
451
452#define UNLOCK_PVH(index) { \
453 unlock_pvh_pai(index); \
454 mp_enable_preemption(); \
455}
b7266188 456
b7266188
A
457extern uint64_t pde_mapped_size;
458
459extern char *pmap_phys_attributes;
316670eb 460extern ppnum_t last_managed_page;
b7266188 461
060df5ea
A
462extern ppnum_t lowest_lo;
463extern ppnum_t lowest_hi;
464extern ppnum_t highest_hi;
465
b7266188
A
466/*
467 * when spinning through pmap_remove
468 * ensure that we don't spend too much
469 * time with preemption disabled.
470 * I'm setting the current threshold
471 * to 20us
472 */
473#define MAX_PREEMPTION_LATENCY_NS 20000
474extern uint64_t max_preemption_latency_tsc;
475
476/* #define DEBUGINTERRUPTS 1 uncomment to ensure pmap callers have interrupts enabled */
477#ifdef DEBUGINTERRUPTS
478#define pmap_intr_assert() { \
479 if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) \
480 panic("pmap interrupt assert %s, %d",__FILE__, __LINE__); \
481}
482#else
483#define pmap_intr_assert()
484#endif
485
6d2010ae
A
486extern int nx_enabled;
487extern unsigned int inuse_ptepages_count;
b7266188
A
488
489static inline uint32_t
490pvhashidx(pmap_t pmap, vm_map_offset_t va)
491{
fe8ab488 492 uint32_t hashidx = ((uint32_t)(uintptr_t)pmap ^
6d2010ae 493 ((uint32_t)(va >> PAGE_SHIFT) & 0xFFFFFFFF)) &
fe8ab488
A
494 npvhashmask;
495 return hashidx;
b7266188
A
496}
497
6d2010ae 498
b7266188
A
499/*
500 * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
501 * properly deals with the anchor.
502 * must be called with the hash locked, does not unlock it
503 */
b7266188
A
504static inline void
505pmap_pvh_unlink(pv_hashed_entry_t pvh)
506{
507 pv_hashed_entry_t curh;
508 pv_hashed_entry_t *pprevh;
509 int pvhash_idx;
510
511 CHK_NPVHASH();
39037602 512 pvhash_idx = pvhashidx(pvh->pmap, PVE_VA(pvh));
b7266188
A
513
514 pprevh = pvhash(pvhash_idx);
515
516#if PV_DEBUG
517 if (NULL == *pprevh)
518 panic("pvh_unlink null anchor"); /* JK DEBUG */
519#endif
520 curh = *pprevh;
521
522 while (PV_HASHED_ENTRY_NULL != curh) {
523 if (pvh == curh)
524 break;
525 pprevh = &curh->nexth;
526 curh = curh->nexth;
527 }
528 if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh");
529 *pprevh = pvh->nexth;
530 return;
531}
532
533static inline void
534pv_hash_add(pv_hashed_entry_t pvh_e,
535 pv_rooted_entry_t pv_h)
536{
537 pv_hashed_entry_t *hashp;
538 int pvhash_idx;
539
540 CHK_NPVHASH();
39037602 541 pvhash_idx = pvhashidx(pvh_e->pmap, PVE_VA(pvh_e));
b7266188
A
542 LOCK_PV_HASH(pvhash_idx);
543 insque(&pvh_e->qlink, &pv_h->qlink);
544 hashp = pvhash(pvhash_idx);
545#if PV_DEBUG
546 if (NULL==hashp)
547 panic("pv_hash_add(%p) null hash bucket", pvh_e);
548#endif
549 pvh_e->nexth = *hashp;
550 *hashp = pvh_e;
551 UNLOCK_PV_HASH(pvhash_idx);
552}
553
554static inline void
555pv_hash_remove(pv_hashed_entry_t pvh_e)
556{
557 int pvhash_idx;
558
559 CHK_NPVHASH();
39037602 560 pvhash_idx = pvhashidx(pvh_e->pmap,PVE_VA(pvh_e));
b7266188
A
561 LOCK_PV_HASH(pvhash_idx);
562 remque(&pvh_e->qlink);
563 pmap_pvh_unlink(pvh_e);
564 UNLOCK_PV_HASH(pvhash_idx);
6d2010ae 565}
b7266188
A
566
567static inline boolean_t popcnt1(uint64_t distance) {
568 return ((distance & (distance - 1)) == 0);
569}
570
571/*
572 * Routines to handle suppression of/recovery from some forms of pagetable corruption
573 * incidents observed in the field. These can be either software induced (wild
574 * stores to the mapwindows where applicable, use after free errors
575 * (typically of pages addressed physically), mis-directed DMAs etc., or due
576 * to DRAM/memory hierarchy/interconnect errors. Given the theoretical rarity of these errors,
577 * the recording mechanism is deliberately not MP-safe. The overarching goal is to
578 * still assert on potential software races, but attempt recovery from incidents
579 * identifiable as occurring due to issues beyond the control of the pmap module.
580 * The latter includes single-bit errors and malformed pagetable entries.
581 * We currently limit ourselves to recovery/suppression of one incident per
582 * PMAP_PAGETABLE_CORRUPTION_INTERVAL seconds, and details of the incident
583 * are logged.
584 * Assertions are not suppressed if kernel debugging is enabled. (DRK 09)
585 */
586
587typedef enum {
588 PTE_VALID = 0x0,
589 PTE_INVALID = 0x1,
590 PTE_RSVD = 0x2,
591 PTE_SUPERVISOR = 0x4,
592 PTE_BITFLIP = 0x8,
593 PV_BITFLIP = 0x10,
594 PTE_INVALID_CACHEABILITY = 0x20
595} pmap_pagetable_corruption_t;
596
597typedef enum {
598 ROOT_PRESENT = 0,
599 ROOT_ABSENT = 1
600} pmap_pv_assertion_t;
601
602typedef enum {
603 PMAP_ACTION_IGNORE = 0x0,
604 PMAP_ACTION_ASSERT = 0x1,
605 PMAP_ACTION_RETRY = 0x2,
606 PMAP_ACTION_RETRY_RELOCK = 0x4
607} pmap_pagetable_corruption_action_t;
608
609#define PMAP_PAGETABLE_CORRUPTION_INTERVAL (6ULL * 3600ULL)
610extern uint64_t pmap_pagetable_corruption_interval_abstime;
611
612extern uint32_t pmap_pagetable_corruption_incidents;
613#define PMAP_PAGETABLE_CORRUPTION_MAX_LOG (8)
614typedef struct {
615 pmap_pv_assertion_t incident;
616 pmap_pagetable_corruption_t reason;
617 pmap_pagetable_corruption_action_t action;
618 pmap_t pmap;
619 vm_map_offset_t vaddr;
620 pt_entry_t pte;
621 ppnum_t ppn;
622 pmap_t pvpmap;
623 vm_map_offset_t pvva;
624 uint64_t abstime;
625} pmap_pagetable_corruption_record_t;
626
627extern pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[];
628extern uint64_t pmap_pagetable_corruption_last_abstime;
629extern thread_call_t pmap_pagetable_corruption_log_call;
630extern boolean_t pmap_pagetable_corruption_timeout;
631
632static inline void
633pmap_pagetable_corruption_log(pmap_pv_assertion_t incident, pmap_pagetable_corruption_t suppress_reason, pmap_pagetable_corruption_action_t action, pmap_t pmap, vm_map_offset_t vaddr, pt_entry_t *ptep, ppnum_t ppn, pmap_t pvpmap, vm_map_offset_t pvva) {
634 uint32_t pmap_pagetable_corruption_log_index;
635 pmap_pagetable_corruption_log_index = pmap_pagetable_corruption_incidents++ % PMAP_PAGETABLE_CORRUPTION_MAX_LOG;
636 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].incident = incident;
637 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].reason = suppress_reason;
638 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].action = action;
639 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pmap = pmap;
640 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].vaddr = vaddr;
641 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pte = *ptep;
642 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].ppn = ppn;
643 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvpmap = pvpmap;
644 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvva = pvva;
645 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].abstime = mach_absolute_time();
646 /* Asynchronously log */
647 thread_call_enter(pmap_pagetable_corruption_log_call);
648}
649
650static inline pmap_pagetable_corruption_action_t
651pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t *ppnp, pt_entry_t *ptep, pmap_pv_assertion_t incident) {
316670eb 652 pmap_pagetable_corruption_action_t action = PMAP_ACTION_ASSERT;
b7266188
A
653 pmap_pagetable_corruption_t suppress_reason = PTE_VALID;
654 ppnum_t suppress_ppn = 0;
655 pt_entry_t cpte = *ptep;
656 ppnum_t cpn = pa_index(pte_to_pa(cpte));
657 ppnum_t ppn = *ppnp;
658 pv_rooted_entry_t pv_h = pai_to_pvh(ppn_to_pai(ppn));
659 pv_rooted_entry_t pv_e = pv_h;
660 uint32_t bitdex;
661 pmap_t pvpmap = pv_h->pmap;
39037602 662 vm_map_offset_t pvva = PVE_VA(pv_h);
d190cdc3 663 vm_map_offset_t pve_flags;
b7266188 664 boolean_t ppcd = FALSE;
3e170ce0 665 boolean_t is_ept;
b7266188
A
666
667 /* Ideally, we'd consult the Mach VM here to definitively determine
668 * the nature of the mapping for this address space and address.
669 * As that would be a layering violation in this context, we
670 * use various heuristics to recover from single bit errors,
671 * malformed pagetable entries etc. These are not intended
672 * to be comprehensive.
673 */
674
675 /* As a precautionary measure, mark A+D */
676 pmap_phys_attributes[ppn_to_pai(ppn)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
3e170ce0 677 is_ept = is_ept_pmap(pmap);
b7266188
A
678
679 /*
680 * Correct potential single bit errors in either (but not both) element
681 * of the PV
682 */
683 do {
39037602
A
684 if ((popcnt1((uintptr_t)pv_e->pmap ^ (uintptr_t)pmap) && PVE_VA(pv_e) == vaddr) ||
685 (pv_e->pmap == pmap && popcnt1(PVE_VA(pv_e) ^ vaddr))) {
d190cdc3 686 pve_flags = PVE_FLAGS(pv_e);
b7266188 687 pv_e->pmap = pmap;
d190cdc3 688 pv_h->va_and_flags = vaddr | pve_flags;
b7266188
A
689 suppress_reason = PV_BITFLIP;
690 action = PMAP_ACTION_RETRY;
691 goto pmap_cpc_exit;
692 }
316670eb 693 } while (((pv_e = (pv_rooted_entry_t) queue_next(&pv_e->qlink))) && (pv_e != pv_h));
b7266188
A
694
695 /* Discover root entries with a Hamming
696 * distance of 1 from the supplied
697 * physical page frame.
698 */
699 for (bitdex = 0; bitdex < (sizeof(ppnum_t) << 3); bitdex++) {
700 ppnum_t npn = cpn ^ (ppnum_t) (1ULL << bitdex);
701 if (IS_MANAGED_PAGE(npn)) {
702 pv_rooted_entry_t npv_h = pai_to_pvh(ppn_to_pai(npn));
39037602 703 if (PVE_VA(npv_h) == vaddr && npv_h->pmap == pmap) {
b7266188
A
704 suppress_reason = PTE_BITFLIP;
705 suppress_ppn = npn;
706 action = PMAP_ACTION_RETRY_RELOCK;
707 UNLOCK_PVH(ppn_to_pai(ppn));
708 *ppnp = npn;
709 goto pmap_cpc_exit;
710 }
711 }
712 }
713
714 if (pmap == kernel_pmap) {
715 action = PMAP_ACTION_ASSERT;
716 goto pmap_cpc_exit;
717 }
718
3e170ce0
A
719 /*
720 * Check for malformed/inconsistent entries.
721 * The first check here isn't useful for EPT PTEs because INTEL_EPT_NCACHE == 0
722 */
723 if (!is_ept && ((cpte & (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU | INTEL_PTE_PTA)) == (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU))) {
b7266188
A
724 action = PMAP_ACTION_IGNORE;
725 suppress_reason = PTE_INVALID_CACHEABILITY;
726 }
727 else if (cpte & INTEL_PTE_RSVD) {
728 action = PMAP_ACTION_IGNORE;
729 suppress_reason = PTE_RSVD;
730 }
3e170ce0 731 else if ((pmap != kernel_pmap) && (!is_ept) && ((cpte & INTEL_PTE_USER) == 0)) {
b7266188
A
732 action = PMAP_ACTION_IGNORE;
733 suppress_reason = PTE_SUPERVISOR;
734 }
735pmap_cpc_exit:
736 PE_parse_boot_argn("-pmap_pagetable_corruption_deassert", &ppcd, sizeof(ppcd));
737
738 if (debug_boot_arg && !ppcd) {
739 action = PMAP_ACTION_ASSERT;
740 }
741
742 if ((mach_absolute_time() - pmap_pagetable_corruption_last_abstime) < pmap_pagetable_corruption_interval_abstime) {
743 action = PMAP_ACTION_ASSERT;
744 pmap_pagetable_corruption_timeout = TRUE;
745 }
746 else
747 {
748 pmap_pagetable_corruption_last_abstime = mach_absolute_time();
749 }
750 pmap_pagetable_corruption_log(incident, suppress_reason, action, pmap, vaddr, &cpte, *ppnp, pvpmap, pvva);
751 return action;
752}
6d2010ae 753
b7266188
A
754/*
755 * Remove pv list entry.
756 * Called with pv_head_table entry locked.
757 * Returns pv entry to be freed (or NULL).
758 */
b7266188 759static inline __attribute__((always_inline)) pv_hashed_entry_t
6d2010ae
A
760pmap_pv_remove(pmap_t pmap,
761 vm_map_offset_t vaddr,
d190cdc3
A
762 ppnum_t *ppnp,
763 pt_entry_t *pte,
764 boolean_t *was_altacct)
b7266188
A
765{
766 pv_hashed_entry_t pvh_e;
767 pv_rooted_entry_t pv_h;
768 pv_hashed_entry_t *pprevh;
769 int pvhash_idx;
770 uint32_t pv_cnt;
771 ppnum_t ppn;
772
d190cdc3 773 *was_altacct = FALSE;
b7266188
A
774pmap_pv_remove_retry:
775 ppn = *ppnp;
776 pvh_e = PV_HASHED_ENTRY_NULL;
777 pv_h = pai_to_pvh(ppn_to_pai(ppn));
778
316670eb 779 if (__improbable(pv_h->pmap == PMAP_NULL)) {
b7266188
A
780 pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_ABSENT);
781 if (pac == PMAP_ACTION_IGNORE)
782 goto pmap_pv_remove_exit;
783 else if (pac == PMAP_ACTION_ASSERT)
5ba3f43e 784 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p, %p): null pv_list, priors: %d", pmap, vaddr, ppn, *pte, ppnp, pte, pmap_pagetable_corruption_incidents);
b7266188
A
785 else if (pac == PMAP_ACTION_RETRY_RELOCK) {
786 LOCK_PVH(ppn_to_pai(*ppnp));
787 pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
788 goto pmap_pv_remove_retry;
789 }
790 else if (pac == PMAP_ACTION_RETRY)
791 goto pmap_pv_remove_retry;
792 }
793
39037602 794 if (PVE_VA(pv_h) == vaddr && pv_h->pmap == pmap) {
d190cdc3 795 *was_altacct = IS_ALTACCT_PAGE(ppn_to_pai(*ppnp), pv_h);
b7266188
A
796 /*
797 * Header is the pv_rooted_entry.
798 * We can't free that. If there is a queued
799 * entry after this one we remove that
800 * from the ppn queue, we remove it from the hash chain
801 * and copy it to the rooted entry. Then free it instead.
802 */
803 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
804 if (pv_h != (pv_rooted_entry_t) pvh_e) {
805 /*
806 * Entry queued to root, remove this from hash
807 * and install as new root.
808 */
809 CHK_NPVHASH();
39037602 810 pvhash_idx = pvhashidx(pvh_e->pmap, PVE_VA(pvh_e));
b7266188
A
811 LOCK_PV_HASH(pvhash_idx);
812 remque(&pvh_e->qlink);
813 pprevh = pvhash(pvhash_idx);
814 if (PV_HASHED_ENTRY_NULL == *pprevh) {
39236c6e 815 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x): "
5ba3f43e
A
816 "empty hash, removing rooted, priors: %d",
817 pmap, vaddr, ppn, pmap_pagetable_corruption_incidents);
b7266188
A
818 }
819 pmap_pvh_unlink(pvh_e);
820 UNLOCK_PV_HASH(pvhash_idx);
821 pv_h->pmap = pvh_e->pmap;
d190cdc3 822 pv_h->va_and_flags = pvh_e->va_and_flags;
39037602 823 /* dispose of pvh_e */
b7266188
A
824 } else {
825 /* none queued after rooted */
826 pv_h->pmap = PMAP_NULL;
827 pvh_e = PV_HASHED_ENTRY_NULL;
828 }
829 } else {
830 /*
831 * not removing rooted pv. find it on hash chain, remove from
832 * ppn queue and hash chain and free it
833 */
834 CHK_NPVHASH();
835 pvhash_idx = pvhashidx(pmap, vaddr);
836 LOCK_PV_HASH(pvhash_idx);
837 pprevh = pvhash(pvhash_idx);
838 if (PV_HASHED_ENTRY_NULL == *pprevh) {
5ba3f43e
A
839 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p): empty hash, priors: %d",
840 pmap, vaddr, ppn, *pte, pte, pmap_pagetable_corruption_incidents);
b7266188
A
841 }
842 pvh_e = *pprevh;
843 pmap_pv_hashlist_walks++;
844 pv_cnt = 0;
845 while (PV_HASHED_ENTRY_NULL != pvh_e) {
846 pv_cnt++;
847 if (pvh_e->pmap == pmap &&
39037602 848 PVE_VA(pvh_e) == vaddr &&
b7266188
A
849 pvh_e->ppn == ppn)
850 break;
851 pprevh = &pvh_e->nexth;
852 pvh_e = pvh_e->nexth;
853 }
6d2010ae 854
b7266188
A
855 if (PV_HASHED_ENTRY_NULL == pvh_e) {
856 pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_PRESENT);
857
858 if (pac == PMAP_ACTION_ASSERT)
5ba3f43e 859 panic("Possible memory corruption: pmap_pv_remove(%p, 0x%llx, 0x%x, 0x%llx, %p, %p): pv not on hash, head: %p, 0x%llx, priors: %d", pmap, vaddr, ppn, *pte, ppnp, pte, pv_h->pmap, PVE_VA(pv_h), pmap_pagetable_corruption_incidents);
b7266188
A
860 else {
861 UNLOCK_PV_HASH(pvhash_idx);
862 if (pac == PMAP_ACTION_RETRY_RELOCK) {
863 LOCK_PVH(ppn_to_pai(*ppnp));
864 pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
865 goto pmap_pv_remove_retry;
866 }
867 else if (pac == PMAP_ACTION_RETRY) {
868 goto pmap_pv_remove_retry;
869 }
870 else if (pac == PMAP_ACTION_IGNORE) {
871 goto pmap_pv_remove_exit;
872 }
873 }
874 }
6d2010ae 875
d190cdc3
A
876 *was_altacct = IS_ALTACCT_PAGE(ppn_to_pai(*ppnp), pvh_e);
877
b7266188
A
878 pmap_pv_hashlist_cnts += pv_cnt;
879 if (pmap_pv_hashlist_max < pv_cnt)
880 pmap_pv_hashlist_max = pv_cnt;
881 *pprevh = pvh_e->nexth;
882 remque(&pvh_e->qlink);
883 UNLOCK_PV_HASH(pvhash_idx);
884 }
885pmap_pv_remove_exit:
886 return pvh_e;
887}
888
d190cdc3
A
889static inline __attribute__((always_inline)) boolean_t
890pmap_pv_is_altacct(
891 pmap_t pmap,
892 vm_map_offset_t vaddr,
893 ppnum_t ppn)
894{
895 pv_hashed_entry_t pvh_e;
896 pv_rooted_entry_t pv_h;
897 int pvhash_idx;
898 boolean_t is_altacct;
899
900 pvh_e = PV_HASHED_ENTRY_NULL;
901 pv_h = pai_to_pvh(ppn_to_pai(ppn));
902
903 if (__improbable(pv_h->pmap == PMAP_NULL)) {
904 return FALSE;
905 }
906
907 if (PVE_VA(pv_h) == vaddr && pv_h->pmap == pmap) {
908 /*
909 * Header is the pv_rooted_entry.
910 */
911 return IS_ALTACCT_PAGE(ppn, pv_h);
912 }
913
914 CHK_NPVHASH();
915 pvhash_idx = pvhashidx(pmap, vaddr);
916 LOCK_PV_HASH(pvhash_idx);
917 pvh_e = *(pvhash(pvhash_idx));
918 if (PV_HASHED_ENTRY_NULL == pvh_e) {
919 panic("Possible memory corruption: pmap_pv_is_altacct(%p,0x%llx,0x%x): empty hash",
920 pmap, vaddr, ppn);
921 }
922 while (PV_HASHED_ENTRY_NULL != pvh_e) {
923 if (pvh_e->pmap == pmap &&
924 PVE_VA(pvh_e) == vaddr &&
925 pvh_e->ppn == ppn)
926 break;
927 pvh_e = pvh_e->nexth;
928 }
929 if (PV_HASHED_ENTRY_NULL == pvh_e) {
930 is_altacct = FALSE;
931 } else {
932 is_altacct = IS_ALTACCT_PAGE(ppn, pvh_e);
933 }
934 UNLOCK_PV_HASH(pvhash_idx);
935
936 return is_altacct;
937}
6d2010ae
A
938
939extern int pt_fake_zone_index;
940static inline void
316670eb 941PMAP_ZINFO_PALLOC(pmap_t pmap, vm_size_t bytes)
6d2010ae 942{
316670eb 943 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
6d2010ae
A
944}
945
946static inline void
316670eb 947PMAP_ZINFO_PFREE(pmap_t pmap, vm_size_t bytes)
6d2010ae 948{
316670eb 949 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
6d2010ae
A
950}
951
316670eb
A
952static inline void
953PMAP_ZINFO_SALLOC(pmap_t pmap, vm_size_t bytes)
954{
955 pmap_ledger_credit(pmap, task_ledgers.tkm_shared, bytes);
956}
957
958static inline void
959PMAP_ZINFO_SFREE(pmap_t pmap, vm_size_t bytes)
960{
961 pmap_ledger_debit(pmap, task_ledgers.tkm_shared, bytes);
962}
963
6d2010ae
A
964extern boolean_t pmap_initialized;/* Has pmap_init completed? */
965#define valid_page(x) (pmap_initialized && pmap_valid_page(x))
966
967// XXX
968#define HIGH_MEM_BASE ((uint32_t)( -NBPDE) ) /* shared gdt etc seg addr */ /* XXX64 ?? */
969// XXX
970
971
972int phys_attribute_test(
973 ppnum_t phys,
974 int bits);
975void phys_attribute_clear(
976 ppnum_t phys,
39236c6e
A
977 int bits,
978 unsigned int options,
979 void *arg);
6d2010ae
A
980
981//#define PCID_DEBUG 1
982#if PCID_DEBUG
983#define pmap_pcid_log(fmt, args...) \
984 do { \
985 kprintf(fmt, ##args); \
986 printf(fmt, ##args); \
987 } while(0)
988#else
989#define pmap_pcid_log(fmt, args...)
990#endif
991void pmap_pcid_configure(void);
992
316670eb
A
993
994/*
995 * Atomic 64-bit compare and exchange of a page table entry.
996 */
997static inline boolean_t
998pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
999{
1000 boolean_t ret;
1001
316670eb
A
1002 /*
1003 * Load the old value into %rax
1004 * Load the new value into another register
1005 * Compare-exchange-quad at address entryp
1006 * If the compare succeeds, the new value is stored, return TRUE.
1007 * Otherwise, no swap is made, return FALSE.
1008 */
1009 asm volatile(
1010 " lock; cmpxchgq %2,(%3) \n\t"
1011 " setz %%al \n\t"
1012 " movzbl %%al,%0"
1013 : "=a" (ret)
1014 : "a" (old),
1015 "r" (new),
1016 "r" (entryp)
1017 : "memory");
316670eb
A
1018 return ret;
1019}
1020
1021extern uint32_t pmap_update_clear_pte_count;
1022
1023static inline void pmap_update_pte(pt_entry_t *mptep, uint64_t pclear_bits, uint64_t pset_bits) {
1024 pt_entry_t npte, opte;
1025 do {
1026 opte = *mptep;
1027 if (__improbable(opte == 0)) {
1028 pmap_update_clear_pte_count++;
1029 break;
1030 }
1031 npte = opte & ~(pclear_bits);
1032 npte |= pset_bits;
1033 } while (!pmap_cmpx_pte(mptep, opte, npte));
1034}
1035
6d2010ae
A
1036#if defined(__x86_64__)
1037/*
1038 * The single pml4 page per pmap is allocated at pmap create time and exists
1039 * for the duration of the pmap. we allocate this page in kernel vm.
1040 * this returns the address of the requested pml4 entry in the top level page.
1041 */
1042static inline
1043pml4_entry_t *
1044pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr)
1045{
316670eb
A
1046 if (__improbable((vaddr > 0x00007FFFFFFFFFFFULL) &&
1047 (vaddr < 0xFFFF800000000000ULL))) {
1048 return (NULL);
1049 }
1050
39037602 1051#if DEBUG
6d2010ae
A
1052 return PHYSMAP_PTOV(&((pml4_entry_t *)pmap->pm_cr3)[(vaddr >> PML4SHIFT) & (NPML4PG-1)]);
1053#else
1054 return &pmap->pm_pml4[(vaddr >> PML4SHIFT) & (NPML4PG-1)];
1055#endif
1056}
1057
1058/*
1059 * Returns address of requested PDPT entry in the physmap.
1060 */
1061static inline pdpt_entry_t *
1062pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
1063{
1064 pml4_entry_t newpf;
1065 pml4_entry_t *pml4;
3e170ce0 1066 boolean_t is_ept;
6d2010ae 1067
6d2010ae 1068 pml4 = pmap64_pml4(pmap, vaddr);
3e170ce0
A
1069 is_ept = is_ept_pmap(pmap);
1070
1071 if (pml4 && (*pml4 & PTE_VALID_MASK(is_ept))) {
6d2010ae
A
1072 newpf = *pml4 & PG_FRAME;
1073 return &((pdpt_entry_t *) PHYSMAP_PTOV(newpf))
1074 [(vaddr >> PDPTSHIFT) & (NPDPTPG-1)];
1075 }
1076 return (NULL);
1077}
1078/*
1079 * Returns the address of the requested PDE entry in the physmap.
1080 */
1081static inline pd_entry_t *
1082pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr)
1083{
1084 pdpt_entry_t newpf;
1085 pdpt_entry_t *pdpt;
3e170ce0 1086 boolean_t is_ept;
6d2010ae 1087
6d2010ae 1088 pdpt = pmap64_pdpt(pmap, vaddr);
3e170ce0 1089 is_ept = is_ept_pmap(pmap);
6d2010ae 1090
3e170ce0 1091 if (pdpt && (*pdpt & PTE_VALID_MASK(is_ept))) {
6d2010ae
A
1092 newpf = *pdpt & PG_FRAME;
1093 return &((pd_entry_t *) PHYSMAP_PTOV(newpf))
1094 [(vaddr >> PDSHIFT) & (NPDPG-1)];
1095 }
1096 return (NULL);
1097}
1098
1099static inline pd_entry_t *
1100pmap_pde(pmap_t m, vm_map_offset_t v)
1101{
1102 pd_entry_t *pde;
1103
6d2010ae
A
1104 pde = pmap64_pde(m, v);
1105
1106 return pde;
1107}
1108
1109
1110/*
1111 * return address of mapped pte for vaddr va in pmap pmap.
1112 *
1113 * In case the pde maps a superpage, return the pde, which, in this case
1114 * is the actual page table entry.
1115 */
1116static inline pt_entry_t *
1117pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
1118{
1119 pd_entry_t *pde;
1120 pd_entry_t newpf;
3e170ce0 1121 boolean_t is_ept;
6d2010ae
A
1122
1123 assert(pmap);
316670eb 1124 pde = pmap64_pde(pmap, vaddr);
6d2010ae 1125
3e170ce0
A
1126 is_ept = is_ept_pmap(pmap);
1127
1128 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
1129 if (*pde & PTE_PS)
6d2010ae
A
1130 return pde;
1131 newpf = *pde & PG_FRAME;
1132 return &((pt_entry_t *)PHYSMAP_PTOV(newpf))
1133 [i386_btop(vaddr) & (ppnum_t)(NPTEPG-1)];
1134 }
1135 return (NULL);
1136}
1137#endif
316670eb
A
1138#if DEBUG
1139#define DPRINTF(x...) kprintf(x)
1140#else
1141#define DPRINTF(x...)
1142#endif
1143
b0d623f7 1144#endif /* MACH_KERNEL_PRIVATE */
316670eb 1145#endif /* _I386_PMAP_INTERNAL_ */