2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 #ifndef _I386_PMAP_INTERNAL_
31 #define _I386_PMAP_INTERNAL_
32 #ifdef MACH_KERNEL_PRIVATE
35 #include <sys/kdebug.h>
36 #include <kern/ledger.h>
37 #include <kern/simple_lock.h>
38 #include <i386/bit_routines.h>
45 PMAP_LOCK_EXCLUSIVE(pmap_t p
)
47 mp_disable_preemption();
48 lck_rw_lock_exclusive(&p
->pmap_rwl
);
52 PMAP_LOCK_SHARED(pmap_t p
)
54 mp_disable_preemption();
55 lck_rw_lock_shared(&p
->pmap_rwl
);
59 PMAP_LOCK_SHARED_TO_EXCLUSIVE(pmap_t p
)
61 lck_rw_lock_shared_to_exclusive(&p
->pmap_rwl
);
65 PMAP_LOCK_EXCLUSIVE_TO_SHARED(pmap_t p
)
67 lck_rw_lock_exclusive_to_shared(&p
->pmap_rwl
);
71 PMAP_UNLOCK_EXCLUSIVE(pmap_t p
)
73 lck_rw_unlock_exclusive(&p
->pmap_rwl
);
74 mp_enable_preemption();
78 PMAP_UNLOCK_SHARED(pmap_t p
)
80 lck_rw_unlock_shared(&p
->pmap_rwl
);
81 mp_enable_preemption();
84 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
87 extern boolean_t pmap_trace
;
88 #define PMAP_TRACE(...) \
90 KDBG_RELEASE(__VA_ARGS__); \
93 #define PMAP_TRACE(...) KDBG_DEBUG(__VA_ARGS__)
94 #endif /* PMAP_TRACES */
96 #define PMAP_TRACE_CONSTANT(...) KDBG_RELEASE(__VA_ARGS__)
98 kern_return_t
pmap_expand_pml4(
101 unsigned int options
);
103 kern_return_t
pmap_expand_pdpt(
106 unsigned int options
);
108 void phys_attribute_set(
112 void pmap_set_reference(
115 boolean_t
phys_page_exists(
119 pmap_flush_tlbs(pmap_t
, vm_map_offset_t
, vm_map_offset_t
, int, pmap_flush_context
*);
122 pmap_update_cache_attributes_locked(ppnum_t
, unsigned);
126 PMAP_UPDATE_TLBS(pmap_t fp
, addr64_t s
, addr64_t e
)
128 pmap_flush_tlbs(fp
, s
, e
, 0, NULL
);
131 #define PMAP_DELAY_TLB_FLUSH 0x01
134 PMAP_UPDATE_TLBS_DELAYED(pmap_t fp
, addr64_t s
, addr64_t e
, pmap_flush_context
*pfc
)
136 pmap_flush_tlbs(fp
, s
, e
, PMAP_DELAY_TLB_FLUSH
, pfc
);
140 * Private data structures.
144 * For each vm_page_t, there is a list of all currently
145 * valid virtual mappings of that page. An entry is
146 * a pv_rooted_entry_t; the list is the pv_table.
148 * N.B. with the new combo rooted/hashed scheme it is
149 * only possibly to remove individual non-rooted entries
150 * if they are found via the hashed chains as there is no
151 * way to unlink the singly linked hashed entries if navigated to
152 * via the queue list off the rooted entries. Think of it as
153 * hash/walk/pull, keeping track of the prev pointer while walking
154 * the singly linked hash list. All of this is to save memory and
155 * keep both types of pv_entries as small as possible.
160 * PV HASHING Changes - JK 1/2007
162 * Pve's establish physical to virtual mappings. These are used for aliasing of a
163 * physical page to (potentially many) virtual addresses within pmaps. In the
164 * previous implementation the structure of the pv_entries (each 16 bytes in size) was
166 * typedef struct pv_entry {
167 * struct pv_entry_t next;
169 * vm_map_offset_t va;
172 * An initial array of these is created at boot time, one per physical page of
173 * memory, indexed by the physical page number. Additionally, a pool of entries
174 * is created from a pv_zone to be used as needed by pmap_enter() when it is
175 * creating new mappings. Originally, we kept this pool around because the code
176 * in pmap_enter() was unable to block if it needed an entry and none were
177 * available - we'd panic. Some time ago I restructured the pmap_enter() code
178 * so that for user pmaps it can block while zalloc'ing a pv structure and restart,
179 * removing a panic from the code (in the case of the kernel pmap we cannot block
180 * and still panic, so, we keep a separate hot pool for use only on kernel pmaps).
181 * The pool has not been removed since there is a large performance gain keeping
182 * freed pv's around for reuse and not suffering the overhead of zalloc for every
185 * As pmap_enter() created new mappings it linked the new pve's for them off the
186 * fixed pv array for that ppn (off the next pointer). These pve's are accessed
187 * for several operations, one of them being address space teardown. In that case,
188 * we basically do this
190 * for (every page/pte in the space) {
191 * calc pve_ptr from the ppn in the pte
192 * for (every pv in the list for the ppn) {
193 * if (this pv is for this pmap/vaddr) {
200 * The problem arose when we were running, say 8000 (or even 2000) apache or
201 * other processes and one or all terminate. The list hanging off each pv array
202 * entry could have thousands of entries. We were continuously linearly searching
203 * each of these lists as we stepped through the address space we were tearing
204 * down. Because of the locks we hold, likely taking a cache miss for each node,
205 * and interrupt disabling for MP issues the system became completely unresponsive
206 * for many seconds while we did this.
208 * Realizing that pve's are accessed in two distinct ways (linearly running the
209 * list by ppn for operations like pmap_page_protect and finding and
210 * modifying/removing a single pve as part of pmap_enter processing) has led to
211 * modifying the pve structures and databases.
213 * There are now two types of pve structures. A "rooted" structure which is
214 * basically the original structure accessed in an array by ppn, and a ''hashed''
215 * structure accessed on a hash list via a hash of [pmap, vaddr]. These have been
216 * designed with the two goals of minimizing wired memory and making the lookup of
217 * a ppn faster. Since a vast majority of pages in the system are not aliased
218 * and hence represented by a single pv entry I've kept the rooted entry size as
219 * small as possible because there is one of these dedicated for every physical
220 * page of memory. The hashed pve's are larger due to the addition of the hash
221 * link and the ppn entry needed for matching while running the hash list to find
222 * the entry we are looking for. This way, only systems that have lots of
223 * aliasing (like 2000+ httpd procs) will pay the extra memory price. Both
224 * structures have the same first three fields allowing some simplification in
227 * They have these shapes
229 * typedef struct pv_rooted_entry {
230 * queue_head_t qlink;
231 * vm_map_offset_t va;
233 * } *pv_rooted_entry_t;
236 * typedef struct pv_hashed_entry {
237 * queue_head_t qlink;
238 * vm_map_offset_t va;
241 * struct pv_hashed_entry *nexth;
242 * } *pv_hashed_entry_t;
244 * The main flow difference is that the code is now aware of the rooted entry and
245 * the hashed entries. Code that runs the pv list still starts with the rooted
246 * entry and then continues down the qlink onto the hashed entries. Code that is
247 * looking up a specific pv entry first checks the rooted entry and then hashes
248 * and runs the hash list for the match. The hash list lengths are much smaller
249 * than the original pv lists that contained all aliases for the specific ppn.
253 typedef struct pv_rooted_entry
{
254 /* first three entries must match pv_hashed_entry_t */
256 vm_map_offset_t va_and_flags
; /* virtual address for mapping */
257 pmap_t pmap
; /* pmap where mapping lies */
258 } *pv_rooted_entry_t
;
260 #define PV_ROOTED_ENTRY_NULL ((pv_rooted_entry_t) 0)
262 typedef struct pv_hashed_entry
{
263 /* first three entries must match pv_rooted_entry_t */
265 vm_map_offset_t va_and_flags
;
268 struct pv_hashed_entry
*nexth
;
269 } *pv_hashed_entry_t
;
271 #define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
273 #define PVE_VA(pve) ((pve)->va_and_flags & (vm_map_offset_t)~PAGE_MASK)
274 #define PVE_FLAGS(pve) ((pve)->va_and_flags & PAGE_MASK)
275 #define PVE_IS_ALTACCT 0x001
276 #define PVE_IS_ALTACCT_PAGE(pve) \
277 (((pve)->va_and_flags & PVE_IS_ALTACCT) ? TRUE : FALSE)
279 //#define PV_DEBUG 1 /* uncomment to enable some PV debugging code */
281 #define CHK_NPVHASH() if(0 == npvhashmask) panic("npvhash uninitialized");
283 #define CHK_NPVHASH(x)
286 #define NPVHASHBUCKETS (4096)
287 #define NPVHASHMASK ((NPVHASHBUCKETS) - 1) /* MUST BE 2^N - 1 */
288 #define PV_HASHED_LOW_WATER_MARK_DEFAULT 5000
289 #define PV_HASHED_KERN_LOW_WATER_MARK_DEFAULT 2000
290 #define PV_HASHED_ALLOC_CHUNK_INITIAL 2000
291 #define PV_HASHED_KERN_ALLOC_CHUNK_INITIAL 200
293 extern volatile uint32_t mappingrecurse
;
294 extern uint32_t pv_hashed_low_water_mark
, pv_hashed_kern_low_water_mark
;
300 #define LOCK_PV_HASH(hash) lock_hash_hash(hash)
301 #define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash)
302 extern uint32_t npvhashmask
;
303 extern pv_hashed_entry_t
*pv_hash_table
; /* hash lists */
304 extern pv_hashed_entry_t pv_hashed_free_list
;
305 extern pv_hashed_entry_t pv_hashed_kern_free_list
;
306 decl_simple_lock_data(extern, pv_hashed_free_list_lock
);
307 decl_simple_lock_data(extern, pv_hashed_kern_free_list_lock
);
308 decl_simple_lock_data(extern, pv_hash_table_lock
);
309 decl_simple_lock_data(extern, phys_backup_lock
);
311 extern zone_t pv_hashed_list_zone
; /* zone of pv_hashed_entry
314 extern uint32_t pv_hashed_free_count
;
315 extern uint32_t pv_hashed_kern_free_count
;
317 * Each entry in the pv_head_table is locked by a bit in the
318 * pv_lock_table. The lock bits are accessed by the address of
319 * the frame they lock.
321 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
322 #define pv_hash_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
323 extern char *pv_lock_table
; /* pointer to array of bits */
324 extern char *pv_hash_lock_table
;
325 extern pv_rooted_entry_t pv_head_table
; /* array of entries, one per page */
327 extern event_t mapping_replenish_event
;
330 PV_HASHED_ALLOC(pv_hashed_entry_t
*pvh_ep
)
332 pmap_assert(*pvh_ep
== PV_HASHED_ENTRY_NULL
);
333 simple_lock(&pv_hashed_free_list_lock
, LCK_GRP_NULL
);
334 /* If the kernel reserved pool is low, let non-kernel mappings allocate
335 * synchronously, possibly subject to a throttle.
337 if ((pv_hashed_kern_free_count
> pv_hashed_kern_low_water_mark
) && ((*pvh_ep
= pv_hashed_free_list
) != 0)) {
338 pv_hashed_free_list
= (pv_hashed_entry_t
)(*pvh_ep
)->qlink
.next
;
339 pv_hashed_free_count
--;
342 simple_unlock(&pv_hashed_free_list_lock
);
344 if (pv_hashed_free_count
<= pv_hashed_low_water_mark
) {
345 if (!mappingrecurse
&& os_atomic_cmpxchg(&mappingrecurse
, 0, 1, acq_rel
)) {
346 thread_wakeup(&mapping_replenish_event
);
352 PV_HASHED_FREE_LIST(pv_hashed_entry_t pvh_eh
, pv_hashed_entry_t pvh_et
, int pv_cnt
)
354 simple_lock(&pv_hashed_free_list_lock
, LCK_GRP_NULL
);
355 pvh_et
->qlink
.next
= (queue_entry_t
)pv_hashed_free_list
;
356 pv_hashed_free_list
= pvh_eh
;
357 pv_hashed_free_count
+= (uint32_t)pv_cnt
;
358 simple_unlock(&pv_hashed_free_list_lock
);
361 extern unsigned pmap_kern_reserve_alloc_stat
;
364 PV_HASHED_KERN_ALLOC(pv_hashed_entry_t
*pvh_e
)
366 pmap_assert(*pvh_e
== PV_HASHED_ENTRY_NULL
);
367 simple_lock(&pv_hashed_kern_free_list_lock
, LCK_GRP_NULL
);
369 if ((*pvh_e
= pv_hashed_kern_free_list
) != 0) {
370 pv_hashed_kern_free_list
= (pv_hashed_entry_t
)(*pvh_e
)->qlink
.next
;
371 pv_hashed_kern_free_count
--;
372 pmap_kern_reserve_alloc_stat
++;
375 simple_unlock(&pv_hashed_kern_free_list_lock
);
377 if (pv_hashed_kern_free_count
< pv_hashed_kern_low_water_mark
) {
378 if (!mappingrecurse
&& os_atomic_cmpxchg(&mappingrecurse
, 0, 1, acq_rel
)) {
379 thread_wakeup(&mapping_replenish_event
);
385 PV_HASHED_KERN_FREE_LIST(pv_hashed_entry_t pvh_eh
, pv_hashed_entry_t pvh_et
, int pv_cnt
)
387 simple_lock(&pv_hashed_kern_free_list_lock
, LCK_GRP_NULL
);
388 pvh_et
->qlink
.next
= (queue_entry_t
)pv_hashed_kern_free_list
;
389 pv_hashed_kern_free_list
= pvh_eh
;
390 pv_hashed_kern_free_count
+= (uint32_t)pv_cnt
;
391 simple_unlock(&pv_hashed_kern_free_list_lock
);
394 extern uint64_t pmap_pv_throttle_stat
, pmap_pv_throttled_waiters
;
395 extern event_t pmap_user_pv_throttle_event
;
398 pmap_pv_throttle(__unused pmap_t p
)
400 pmap_assert(p
!= kernel_pmap
);
401 /* Apply throttle on non-kernel mappings */
402 if (pv_hashed_kern_free_count
< (pv_hashed_kern_low_water_mark
/ 2)) {
403 pmap_pv_throttle_stat
++;
404 /* This doesn't need to be strictly accurate, merely a hint
405 * to eliminate the timeout when the reserve is replenished.
407 pmap_pv_throttled_waiters
++;
408 assert_wait_timeout(&pmap_user_pv_throttle_event
, THREAD_UNINT
, 1, 1000 * NSEC_PER_USEC
);
409 thread_block(THREAD_CONTINUE_NULL
);
414 * Index into pv_head table, its lock bits, and the modify/reference and managed bits
417 #define pa_index(pa) (i386_btop(pa))
418 #define ppn_to_pai(ppn) ((int)ppn)
420 #define pai_to_pvh(pai) (&pv_head_table[pai])
421 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
422 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
423 #define pvhash(idx) (&pv_hash_table[idx])
424 #define lock_hash_hash(hash) bit_lock(hash, (void *)pv_hash_lock_table)
425 #define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
427 #define IS_MANAGED_PAGE(x) \
428 ((unsigned int)(x) <= last_managed_page && \
429 ((unsigned long long)pmap_phys_attributes[x] & PHYS_MANAGED))
430 #define IS_INTERNAL_PAGE(x) \
431 (IS_MANAGED_PAGE(x) && ((unsigned long long)pmap_phys_attributes[x] & PHYS_INTERNAL))
432 #define IS_REUSABLE_PAGE(x) \
433 (IS_MANAGED_PAGE(x) && ((unsigned long long)pmap_phys_attributes[x] & PHYS_REUSABLE))
434 #define IS_ALTACCT_PAGE(x, pve) \
435 (IS_MANAGED_PAGE((x)) && \
436 (PVE_IS_ALTACCT_PAGE((pve))))
439 * Physical page attributes. Copy bits from PTE definition.
441 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
442 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
443 #define PHYS_MANAGED INTEL_PTE_VALID /* page is managed */
444 #define PHYS_NOENCRYPT INTEL_PTE_USER /* no need to encrypt this page in the hibernation image */
445 #define PHYS_NCACHE INTEL_PTE_NCACHE
446 #define PHYS_PAT INTEL_PTE_PAT
447 #define PHYS_CACHEABILITY_MASK (INTEL_PTE_PAT | INTEL_PTE_NCACHE)
448 #define PHYS_INTERNAL INTEL_PTE_WTHRU /* page from internal object */
449 #define PHYS_REUSABLE INTEL_PTE_WRITE /* page is "reusable" */
451 #if DEVELOPMENT || DEBUG
452 extern boolean_t pmap_disable_kheap_nx
;
453 extern boolean_t pmap_disable_kstack_nx
;
456 #define PMAP_EXPAND_OPTIONS_NONE (0x0)
457 #define PMAP_EXPAND_OPTIONS_NOWAIT (PMAP_OPTIONS_NOWAIT)
458 #define PMAP_EXPAND_OPTIONS_NOENTER (PMAP_OPTIONS_NOENTER)
459 #define PMAP_EXPAND_OPTIONS_ALIASMAP (0x40000000U)
461 * Amount of virtual memory mapped by one
462 * page-directory entry.
464 #define PDE_MAPPED_SIZE (pdetova(1))
467 * Locking and TLB invalidation
471 * Locking Protocols: (changed 2/2007 JK)
473 * There are two structures in the pmap module that need locking:
474 * the pmaps themselves, and the per-page pv_lists (which are locked
475 * by locking the pv_lock_table entry that corresponds to the pv_head
476 * for the list in question.) Most routines want to lock a pmap and
477 * then do operations in it that require pv_list locking -- however
478 * pmap_remove_all and pmap_copy_on_write operate on a physical page
479 * basis and want to do the locking in the reverse order, i.e. lock
480 * a pv_list and then go through all the pmaps referenced by that list.
482 * The system wide pmap lock has been removed. Now, paths take a lock
483 * on the pmap before changing its 'shape' and the reverse order lockers
484 * (coming in by phys ppn) take a lock on the corresponding pv and then
485 * retest to be sure nothing changed during the window before they locked
486 * and can then run up/down the pv lists holding the list lock. This also
487 * lets the pmap layer run (nearly completely) interrupt enabled, unlike
495 #define LOCK_PVH(index) { \
496 mp_disable_preemption(); \
497 lock_pvh_pai(index); \
500 #define UNLOCK_PVH(index) { \
501 unlock_pvh_pai(index); \
502 mp_enable_preemption(); \
505 extern uint64_t pde_mapped_size
;
507 extern char *pmap_phys_attributes
;
508 extern ppnum_t last_managed_page
;
511 * Used to record high memory allocated to kernel before
512 * pmap_init() gets called.
514 extern ppnum_t pmap_high_used_top
;
515 extern ppnum_t pmap_high_used_bottom
;
516 extern ppnum_t pmap_middle_used_top
;
517 extern ppnum_t pmap_middle_used_bottom
;
520 * when spinning through pmap_remove
521 * ensure that we don't spend too much
522 * time with preemption disabled.
523 * I'm setting the current threshold
526 #define MAX_PREEMPTION_LATENCY_NS 20000
527 extern uint64_t max_preemption_latency_tsc
;
530 #define PMAP_INTR_DEBUG (1)
534 #define pmap_intr_assert() { \
535 if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) \
536 panic("pmap interrupt assert %d %s, %d", processor_avail_count, __FILE__, __LINE__); \
539 #define pmap_intr_assert()
541 #if DEVELOPMENT || DEBUG
542 extern int nx_enabled
;
544 extern unsigned int inuse_ptepages_count
;
546 static inline uint32_t
547 pvhashidx(pmap_t pmap
, vm_map_offset_t va
)
549 uint32_t hashidx
= ((uint32_t)(uintptr_t)pmap
^
550 ((uint32_t)(va
>> PAGE_SHIFT
) & 0xFFFFFFFF)) &
556 * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
557 * properly deals with the anchor.
558 * must be called with the hash locked, does not unlock it
561 pmap_pvh_unlink(pv_hashed_entry_t pvh
)
563 pv_hashed_entry_t curh
;
564 pv_hashed_entry_t
*pprevh
;
568 pvhash_idx
= pvhashidx(pvh
->pmap
, PVE_VA(pvh
));
570 pprevh
= pvhash(pvhash_idx
);
573 if (NULL
== *pprevh
) {
574 panic("pvh_unlink null anchor"); /* JK DEBUG */
579 while (PV_HASHED_ENTRY_NULL
!= curh
) {
583 pprevh
= &curh
->nexth
;
586 if (PV_HASHED_ENTRY_NULL
== curh
) {
587 panic("pmap_pvh_unlink no pvh");
589 *pprevh
= pvh
->nexth
;
594 pv_hash_add(pv_hashed_entry_t pvh_e
,
595 pv_rooted_entry_t pv_h
)
597 pv_hashed_entry_t
*hashp
;
601 pvhash_idx
= pvhashidx(pvh_e
->pmap
, PVE_VA(pvh_e
));
602 LOCK_PV_HASH(pvhash_idx
);
603 insque(&pvh_e
->qlink
, &pv_h
->qlink
);
604 hashp
= pvhash(pvhash_idx
);
607 panic("pv_hash_add(%p) null hash bucket", pvh_e
);
610 pvh_e
->nexth
= *hashp
;
612 UNLOCK_PV_HASH(pvhash_idx
);
616 pv_hash_remove(pv_hashed_entry_t pvh_e
)
621 pvhash_idx
= pvhashidx(pvh_e
->pmap
, PVE_VA(pvh_e
));
622 LOCK_PV_HASH(pvhash_idx
);
623 remque(&pvh_e
->qlink
);
624 pmap_pvh_unlink(pvh_e
);
625 UNLOCK_PV_HASH(pvhash_idx
);
628 static inline boolean_t
629 popcnt1(uint64_t distance
)
631 return (distance
& (distance
- 1)) == 0;
635 * Routines to handle suppression of/recovery from some forms of pagetable corruption
636 * incidents observed in the field. These can be either software induced (wild
637 * stores to the mapwindows where applicable, use after free errors
638 * (typically of pages addressed physically), mis-directed DMAs etc., or due
639 * to DRAM/memory hierarchy/interconnect errors. Given the theoretical rarity of these errors,
640 * the recording mechanism is deliberately not MP-safe. The overarching goal is to
641 * still assert on potential software races, but attempt recovery from incidents
642 * identifiable as occurring due to issues beyond the control of the pmap module.
643 * The latter includes single-bit errors and malformed pagetable entries.
644 * We currently limit ourselves to recovery/suppression of one incident per
645 * PMAP_PAGETABLE_CORRUPTION_INTERVAL seconds, and details of the incident
647 * Assertions are not suppressed if kernel debugging is enabled. (DRK 09)
654 PTE_SUPERVISOR
= 0x4,
657 PTE_INVALID_CACHEABILITY
= 0x20,
659 } pmap_pagetable_corruption_t
;
664 } pmap_pv_assertion_t
;
667 PMAP_ACTION_IGNORE
= 0x0,
668 PMAP_ACTION_ASSERT
= 0x1,
669 PMAP_ACTION_RETRY
= 0x2,
670 PMAP_ACTION_RETRY_RELOCK
= 0x4
671 } pmap_pagetable_corruption_action_t
;
673 #define PMAP_PAGETABLE_CORRUPTION_INTERVAL (6ULL * 3600ULL)
674 extern uint64_t pmap_pagetable_corruption_interval_abstime
;
676 extern uint32_t pmap_pagetable_corruption_incidents
;
677 #define PMAP_PAGETABLE_CORRUPTION_MAX_LOG (8)
679 pmap_pv_assertion_t incident
;
680 pmap_pagetable_corruption_t reason
;
681 pmap_pagetable_corruption_action_t action
;
683 vm_map_offset_t vaddr
;
687 vm_map_offset_t pvva
;
690 #define PMPTCR_MAX_ADJ_PTES (2)
691 uint64_t adj_ptes
[PMPTCR_MAX_ADJ_PTES
];
692 } pmap_pagetable_corruption_record_t
;
694 extern pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records
[];
695 extern uint64_t pmap_pagetable_corruption_last_abstime
;
696 extern thread_call_t pmap_pagetable_corruption_log_call
;
697 extern boolean_t pmap_pagetable_corruption_timeout
;
699 static inline pmap_pagetable_corruption_action_t
700 pmap_pagetable_corruption_log(pmap_pv_assertion_t incident
, pmap_pagetable_corruption_t suppress_reason
,
701 pmap_pagetable_corruption_action_t action
, pmap_t pmap
, vm_map_offset_t vaddr
, pt_entry_t
*ptep
,
702 ppnum_t ppn
, pmap_t pvpmap
, vm_map_offset_t pvva
, int adj_pteps_cnt
, uint64_t **adj_pteps
)
704 uint32_t pmap_pagetable_corruption_log_index
;
705 uint64_t curtime
= mach_absolute_time();
707 if ((curtime
- pmap_pagetable_corruption_last_abstime
) < pmap_pagetable_corruption_interval_abstime
) {
708 pmap_pagetable_corruption_timeout
= TRUE
;
709 action
= PMAP_ACTION_ASSERT
;
711 pmap_pagetable_corruption_last_abstime
= curtime
;
714 pmap_pagetable_corruption_log_index
= pmap_pagetable_corruption_incidents
++ % PMAP_PAGETABLE_CORRUPTION_MAX_LOG
;
715 pmap_pagetable_corruption_records
[pmap_pagetable_corruption_log_index
].incident
= incident
;
716 pmap_pagetable_corruption_records
[pmap_pagetable_corruption_log_index
].reason
= suppress_reason
;
717 pmap_pagetable_corruption_records
[pmap_pagetable_corruption_log_index
].action
= action
;
718 pmap_pagetable_corruption_records
[pmap_pagetable_corruption_log_index
].pmap
= pmap
;
719 pmap_pagetable_corruption_records
[pmap_pagetable_corruption_log_index
].vaddr
= vaddr
;
720 pmap_pagetable_corruption_records
[pmap_pagetable_corruption_log_index
].pte
= *ptep
;
721 pmap_pagetable_corruption_records
[pmap_pagetable_corruption_log_index
].ppn
= ppn
;
722 pmap_pagetable_corruption_records
[pmap_pagetable_corruption_log_index
].pvpmap
= pvpmap
;
723 pmap_pagetable_corruption_records
[pmap_pagetable_corruption_log_index
].pvva
= pvva
;
724 pmap_pagetable_corruption_records
[pmap_pagetable_corruption_log_index
].abstime
= curtime
;
725 if (adj_pteps_cnt
> 0 && adj_pteps
!= NULL
) {
726 pmap_pagetable_corruption_records
[pmap_pagetable_corruption_log_index
].adj_ptes_count
= MIN(adj_pteps_cnt
, PMPTCR_MAX_ADJ_PTES
);
727 for (int i
= 0; i
< pmap_pagetable_corruption_records
[pmap_pagetable_corruption_log_index
].adj_ptes_count
; i
++) {
728 pmap_pagetable_corruption_records
[pmap_pagetable_corruption_log_index
].adj_ptes
[i
] = *adj_pteps
[i
];
731 /* Asynchronously log */
732 thread_call_enter(pmap_pagetable_corruption_log_call
);
737 static inline pmap_pagetable_corruption_action_t
738 pmap_classify_pagetable_corruption(pmap_t pmap
, vm_map_offset_t vaddr
, ppnum_t
*ppnp
, pt_entry_t
*ptep
, pmap_pv_assertion_t incident
)
740 pmap_pagetable_corruption_action_t action
= PMAP_ACTION_ASSERT
;
741 pmap_pagetable_corruption_t suppress_reason
= PTE_VALID
;
742 ppnum_t suppress_ppn
= 0;
743 pt_entry_t cpte
= *ptep
;
744 ppnum_t cpn
= pa_index(pte_to_pa(cpte
));
746 pv_rooted_entry_t pv_h
= pai_to_pvh(ppn_to_pai(ppn
));
747 pv_rooted_entry_t pv_e
= pv_h
;
749 pmap_t pvpmap
= pv_h
->pmap
;
750 vm_map_offset_t pvva
= PVE_VA(pv_h
);
751 vm_map_offset_t pve_flags
;
752 boolean_t ppcd
= FALSE
;
755 /* Ideally, we'd consult the Mach VM here to definitively determine
756 * the nature of the mapping for this address space and address.
757 * As that would be a layering violation in this context, we
758 * use various heuristics to recover from single bit errors,
759 * malformed pagetable entries etc. These are not intended
760 * to be comprehensive.
763 /* As a precautionary measure, mark A+D */
764 pmap_phys_attributes
[ppn_to_pai(ppn
)] |= (PHYS_MODIFIED
| PHYS_REFERENCED
);
765 is_ept
= is_ept_pmap(pmap
);
768 * Correct potential single bit errors in either (but not both) element
772 if ((popcnt1((uintptr_t)pv_e
->pmap
^ (uintptr_t)pmap
) && PVE_VA(pv_e
) == vaddr
) ||
773 (pv_e
->pmap
== pmap
&& popcnt1(PVE_VA(pv_e
) ^ vaddr
))) {
774 pve_flags
= PVE_FLAGS(pv_e
);
776 pv_h
->va_and_flags
= vaddr
| pve_flags
;
777 suppress_reason
= PV_BITFLIP
;
778 action
= PMAP_ACTION_RETRY
;
781 } while (((pv_e
= (pv_rooted_entry_t
) queue_next(&pv_e
->qlink
))) && (pv_e
!= pv_h
));
783 /* Discover root entries with a Hamming
784 * distance of 1 from the supplied
785 * physical page frame.
787 for (bitdex
= 0; bitdex
< (sizeof(ppnum_t
) << 3); bitdex
++) {
788 ppnum_t npn
= cpn
^ (ppnum_t
) (1ULL << bitdex
);
789 if (IS_MANAGED_PAGE(npn
)) {
790 pv_rooted_entry_t npv_h
= pai_to_pvh(ppn_to_pai(npn
));
791 if (PVE_VA(npv_h
) == vaddr
&& npv_h
->pmap
== pmap
) {
792 suppress_reason
= PTE_BITFLIP
;
794 action
= PMAP_ACTION_RETRY_RELOCK
;
795 UNLOCK_PVH(ppn_to_pai(ppn
));
802 if (pmap
== kernel_pmap
) {
803 action
= PMAP_ACTION_ASSERT
;
808 * Check for malformed/inconsistent entries.
809 * The first check here isn't useful for EPT PTEs because INTEL_EPT_NCACHE == 0
811 if (!is_ept
&& ((cpte
& (INTEL_PTE_NCACHE
| INTEL_PTE_WTHRU
| INTEL_PTE_PAT
)) == (INTEL_PTE_NCACHE
| INTEL_PTE_WTHRU
))) {
812 action
= PMAP_ACTION_IGNORE
;
813 suppress_reason
= PTE_INVALID_CACHEABILITY
;
814 } else if (cpte
& INTEL_PTE_RSVD
) {
815 action
= PMAP_ACTION_IGNORE
;
816 suppress_reason
= PTE_RSVD
;
817 } else if ((pmap
!= kernel_pmap
) && (!is_ept
) && ((cpte
& INTEL_PTE_USER
) == 0)) {
818 action
= PMAP_ACTION_IGNORE
;
819 suppress_reason
= PTE_SUPERVISOR
;
822 PE_parse_boot_argn("-pmap_pagetable_corruption_deassert", &ppcd
, sizeof(ppcd
));
824 if (debug_boot_arg
&& !ppcd
) {
825 action
= PMAP_ACTION_ASSERT
;
828 return pmap_pagetable_corruption_log(incident
, suppress_reason
, action
, pmap
, vaddr
, &cpte
, *ppnp
, pvpmap
, pvva
, 0, 0);
831 static inline boolean_t
832 pmap_compressed_pte_corruption_repair(uint64_t pte
, uint64_t *pte_addr
, uint64_t *ptep
, pmap_t pmap
,
833 vm_map_offset_t vaddr
)
835 uint64_t *adj_pteps
[2];
836 int pteidx
= ((uintptr_t)ptep
& INTEL_OFFMASK
) / sizeof(pt_entry_t
);
837 pmap_pagetable_corruption_action_t action
= PMAP_ACTION_IGNORE
;
840 * Grab pointers to PTEs on either side of the PTE in question, unless we're at the start of
841 * a PT (grab pointers to the next and next-next PTEs) or the end of a PT (grab the previous
845 adj_pteps
[0] = ptep
+ 1;
846 adj_pteps
[1] = ptep
+ 2;
847 } else if (pteidx
== (NPTPG
- 1)) {
848 adj_pteps
[0] = ptep
- 2;
849 adj_pteps
[1] = ptep
- 1;
851 adj_pteps
[0] = ptep
- 1;
852 adj_pteps
[1] = ptep
+ 1;
856 * Since the compressed PTE no longer has a PTE associated, we cannot pass in the pv data to
857 * pmap_pagetable_corruption_log, so instead supply adjacent PTEs for logging.
859 if (pmap_pagetable_corruption_log(ROOT_ABSENT
, (pte
& INTEL_PTE_NX
) ? PTE_NXBITFLIP
: PTE_BITFLIP
,
860 action
, pmap
, vaddr
, ptep
, (ppnum_t
)~0UL, 0, 0, sizeof(adj_pteps
) / sizeof(adj_pteps
[0]),
861 adj_pteps
) != PMAP_ACTION_ASSERT
) {
862 /* Correct the flipped bit(s) and continue */
863 pmap_store_pte(ptep
, pte
& INTEL_PTE_COMPRESSED_MASK
);
864 pmap
->corrected_compressed_ptes_count
++;
865 return TRUE
; /* Returning TRUE to indicate this is a now a valid compressed PTE (we hope) */
868 panic("compressed PTE %p 0x%llx has extra bits 0x%llx: corrupted? Adjacent PTEs: 0x%llx@%p, 0x%llx@%p",
869 pte_addr
, pte
, pte
& ~INTEL_PTE_COMPRESSED_MASK
, *adj_pteps
[0], adj_pteps
[0], *adj_pteps
[1], adj_pteps
[1]);
874 * Remove pv list entry.
875 * Called with pv_head_table entry locked.
876 * Returns pv entry to be freed (or NULL).
878 static inline __attribute__((always_inline
)) pv_hashed_entry_t
879 pmap_pv_remove(pmap_t pmap
,
880 vm_map_offset_t vaddr
,
883 boolean_t
*was_altacct
)
885 pv_hashed_entry_t pvh_e
;
886 pv_rooted_entry_t pv_h
;
887 pv_hashed_entry_t
*pprevh
;
892 *was_altacct
= FALSE
;
893 pmap_pv_remove_retry
:
895 pvh_e
= PV_HASHED_ENTRY_NULL
;
896 pv_h
= pai_to_pvh(ppn_to_pai(ppn
));
898 if (__improbable(pv_h
->pmap
== PMAP_NULL
)) {
899 pmap_pagetable_corruption_action_t pac
= pmap_classify_pagetable_corruption(pmap
, vaddr
, ppnp
, pte
, ROOT_ABSENT
);
900 if (pac
== PMAP_ACTION_IGNORE
) {
901 goto pmap_pv_remove_exit
;
902 } else if (pac
== PMAP_ACTION_ASSERT
) {
903 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p, %p): null pv_list, priors: %d", pmap
, vaddr
, ppn
, *pte
, ppnp
, pte
, pmap_pagetable_corruption_incidents
);
904 } else if (pac
== PMAP_ACTION_RETRY_RELOCK
) {
905 LOCK_PVH(ppn_to_pai(*ppnp
));
906 pmap_phys_attributes
[ppn_to_pai(*ppnp
)] |= (PHYS_MODIFIED
| PHYS_REFERENCED
);
907 goto pmap_pv_remove_retry
;
908 } else if (pac
== PMAP_ACTION_RETRY
) {
909 goto pmap_pv_remove_retry
;
913 if (PVE_VA(pv_h
) == vaddr
&& pv_h
->pmap
== pmap
) {
914 *was_altacct
= IS_ALTACCT_PAGE(ppn_to_pai(*ppnp
), pv_h
);
916 * Header is the pv_rooted_entry.
917 * We can't free that. If there is a queued
918 * entry after this one we remove that
919 * from the ppn queue, we remove it from the hash chain
920 * and copy it to the rooted entry. Then free it instead.
922 pvh_e
= (pv_hashed_entry_t
) queue_next(&pv_h
->qlink
);
923 if (pv_h
!= (pv_rooted_entry_t
) pvh_e
) {
925 * Entry queued to root, remove this from hash
926 * and install as new root.
929 pvhash_idx
= pvhashidx(pvh_e
->pmap
, PVE_VA(pvh_e
));
930 LOCK_PV_HASH(pvhash_idx
);
931 remque(&pvh_e
->qlink
);
932 pprevh
= pvhash(pvhash_idx
);
933 if (PV_HASHED_ENTRY_NULL
== *pprevh
) {
934 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x): "
935 "empty hash, removing rooted, priors: %d",
936 pmap
, vaddr
, ppn
, pmap_pagetable_corruption_incidents
);
938 pmap_pvh_unlink(pvh_e
);
939 UNLOCK_PV_HASH(pvhash_idx
);
940 pv_h
->pmap
= pvh_e
->pmap
;
941 pv_h
->va_and_flags
= pvh_e
->va_and_flags
;
942 /* dispose of pvh_e */
944 /* none queued after rooted */
945 pv_h
->pmap
= PMAP_NULL
;
946 pvh_e
= PV_HASHED_ENTRY_NULL
;
950 * not removing rooted pv. find it on hash chain, remove from
951 * ppn queue and hash chain and free it
954 pvhash_idx
= pvhashidx(pmap
, vaddr
);
955 LOCK_PV_HASH(pvhash_idx
);
956 pprevh
= pvhash(pvhash_idx
);
957 if (PV_HASHED_ENTRY_NULL
== *pprevh
) {
958 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p): empty hash, priors: %d",
959 pmap
, vaddr
, ppn
, *pte
, pte
, pmap_pagetable_corruption_incidents
);
962 pmap_pv_hashlist_walks
++;
964 while (PV_HASHED_ENTRY_NULL
!= pvh_e
) {
966 if (pvh_e
->pmap
== pmap
&&
967 PVE_VA(pvh_e
) == vaddr
&&
971 pprevh
= &pvh_e
->nexth
;
972 pvh_e
= pvh_e
->nexth
;
975 if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
976 pmap_pagetable_corruption_action_t pac
= pmap_classify_pagetable_corruption(pmap
, vaddr
, ppnp
, pte
, ROOT_PRESENT
);
978 if (pac
== PMAP_ACTION_ASSERT
) {
979 panic("Possible memory corruption: pmap_pv_remove(%p, 0x%llx, 0x%x, 0x%llx, %p, %p): pv not on hash, head: %p, 0x%llx, priors: %d", pmap
, vaddr
, ppn
, *pte
, ppnp
, pte
, pv_h
->pmap
, PVE_VA(pv_h
), pmap_pagetable_corruption_incidents
);
981 UNLOCK_PV_HASH(pvhash_idx
);
982 if (pac
== PMAP_ACTION_RETRY_RELOCK
) {
983 LOCK_PVH(ppn_to_pai(*ppnp
));
984 pmap_phys_attributes
[ppn_to_pai(*ppnp
)] |= (PHYS_MODIFIED
| PHYS_REFERENCED
);
985 goto pmap_pv_remove_retry
;
986 } else if (pac
== PMAP_ACTION_RETRY
) {
987 goto pmap_pv_remove_retry
;
988 } else if (pac
== PMAP_ACTION_IGNORE
) {
989 goto pmap_pv_remove_exit
;
994 *was_altacct
= IS_ALTACCT_PAGE(ppn_to_pai(*ppnp
), pvh_e
);
996 pmap_pv_hashlist_cnts
+= pv_cnt
;
997 if (pmap_pv_hashlist_max
< pv_cnt
) {
998 pmap_pv_hashlist_max
= pv_cnt
;
1000 *pprevh
= pvh_e
->nexth
;
1001 remque(&pvh_e
->qlink
);
1002 UNLOCK_PV_HASH(pvhash_idx
);
1004 pmap_pv_remove_exit
:
1008 static inline __attribute__((always_inline
)) boolean_t
1011 vm_map_offset_t vaddr
,
1014 pv_hashed_entry_t pvh_e
;
1015 pv_rooted_entry_t pv_h
;
1016 uint32_t pvhash_idx
;
1017 boolean_t is_altacct
;
1019 pvh_e
= PV_HASHED_ENTRY_NULL
;
1020 pv_h
= pai_to_pvh(ppn_to_pai(ppn
));
1022 if (__improbable(pv_h
->pmap
== PMAP_NULL
)) {
1026 if (PVE_VA(pv_h
) == vaddr
&& pv_h
->pmap
== pmap
) {
1028 * Header is the pv_rooted_entry.
1030 return IS_ALTACCT_PAGE(ppn
, pv_h
);
1034 pvhash_idx
= pvhashidx(pmap
, vaddr
);
1035 LOCK_PV_HASH(pvhash_idx
);
1036 pvh_e
= *(pvhash(pvhash_idx
));
1037 while (PV_HASHED_ENTRY_NULL
!= pvh_e
) {
1038 if (pvh_e
->pmap
== pmap
&&
1039 PVE_VA(pvh_e
) == vaddr
&&
1040 pvh_e
->ppn
== ppn
) {
1043 pvh_e
= pvh_e
->nexth
;
1045 if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
1048 is_altacct
= IS_ALTACCT_PAGE(ppn
, pvh_e
);
1050 UNLOCK_PV_HASH(pvhash_idx
);
1056 PMAP_ZINFO_PALLOC(pmap_t pmap
, vm_size_t bytes
)
1058 pmap_ledger_credit(pmap
, task_ledgers
.tkm_private
, (ledger_amount_t
)bytes
);
1062 PMAP_ZINFO_PFREE(pmap_t pmap
, vm_size_t bytes
)
1064 pmap_ledger_debit(pmap
, task_ledgers
.tkm_private
, (ledger_amount_t
)bytes
);
1068 PMAP_ZINFO_SALLOC(pmap_t pmap
, vm_size_t bytes
)
1070 pmap_ledger_credit(pmap
, task_ledgers
.tkm_shared
, (ledger_amount_t
)bytes
);
1074 PMAP_ZINFO_SFREE(pmap_t pmap
, vm_size_t bytes
)
1076 pmap_ledger_debit(pmap
, task_ledgers
.tkm_shared
, (ledger_amount_t
)bytes
);
1079 extern boolean_t pmap_initialized
;/* Has pmap_init completed? */
1080 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
1082 int phys_attribute_test(
1085 void phys_attribute_clear(
1088 unsigned int options
,
1091 //#define PCID_DEBUG 1
1093 #define pmap_pcid_log(fmt, args...) \
1095 kprintf(fmt, ##args); \
1096 printf(fmt, ##args); \
1099 #define pmap_pcid_log(fmt, args...)
1101 void pmap_pcid_configure(void);
1105 * Atomic 64-bit compare and exchange of a page table entry.
1108 #include <machine/atomic.h>
1109 static inline boolean_t
1110 pmap_cmpx_pte(pt_entry_t
*entryp
, pt_entry_t old
, pt_entry_t
new)
1112 return __c11_atomic_compare_exchange_strong((_Atomic pt_entry_t
*)entryp
, &old
, new,
1113 memory_order_acq_rel_smp
, memory_order_relaxed
);
1116 extern uint32_t pmap_update_clear_pte_count
;
1119 pmap_update_pte(pt_entry_t
*mptep
, uint64_t pclear_bits
, uint64_t pset_bits
)
1121 pt_entry_t npte
, opte
;
1124 if (__improbable(opte
== 0)) {
1125 #if DEVELOPMENT || DEBUG
1126 pmap_update_clear_pte_count
++;
1130 npte
= opte
& ~(pclear_bits
);
1132 } while (!pmap_cmpx_pte(mptep
, opte
, npte
));
1136 * The single pml4 page per pmap is allocated at pmap create time and exists
1137 * for the duration of the pmap. we allocate this page in kernel vm.
1138 * this returns the address of the requested pml4 entry in the top level page.
1142 pmap64_pml4(pmap_t pmap
, vm_map_offset_t vaddr
)
1144 if (__improbable((vaddr
> 0x00007FFFFFFFFFFFULL
) &&
1145 (vaddr
< 0xFFFF800000000000ULL
))) {
1150 return PHYSMAP_PTOV(&((pml4_entry_t
*)pmap
->pm_cr3
)[(vaddr
>> PML4SHIFT
) & (NPML4PG
- 1)]);
1152 return &pmap
->pm_pml4
[(vaddr
>> PML4SHIFT
) & (NPML4PG
- 1)];
1156 static inline pml4_entry_t
*
1157 pmap64_user_pml4(pmap_t pmap
, vm_map_offset_t vaddr
)
1159 if (__improbable((vaddr
> 0x00007FFFFFFFFFFFULL
) &&
1160 (vaddr
< 0xFFFF800000000000ULL
))) {
1165 return PHYSMAP_PTOV(&((pml4_entry_t
*)pmap
->pm_ucr3
)[(vaddr
>> PML4SHIFT
) & (NPML4PG
- 1)]);
1167 return &pmap
->pm_upml4
[(vaddr
>> PML4SHIFT
) & (NPML4PG
- 1)];
1172 * Returns address of requested PDPT entry in the physmap.
1174 static inline pdpt_entry_t
*
1175 pmap64_pdpt(pmap_t pmap
, vm_map_offset_t vaddr
)
1181 pml4
= pmap64_pml4(pmap
, vaddr
);
1182 is_ept
= is_ept_pmap(pmap
);
1184 if (pml4
&& (*pml4
& PTE_VALID_MASK(is_ept
))) {
1185 newpf
= *pml4
& PG_FRAME
;
1186 return &((pdpt_entry_t
*) PHYSMAP_PTOV(newpf
))
1187 [(vaddr
>> PDPTSHIFT
) & (NPDPTPG
- 1)];
1192 * Returns the address of the requested PDE entry in the physmap.
1194 static inline pd_entry_t
*
1195 pmap_pde_internal1(vm_map_offset_t vaddr
, boolean_t is_ept
, pdpt_entry_t
*pdpte
)
1197 if (*pdpte
& PTE_VALID_MASK(is_ept
)) {
1198 pdpt_entry_t newpf
= *pdpte
& PG_FRAME
;
1199 return &((pd_entry_t
*) PHYSMAP_PTOV(newpf
))
1200 [(vaddr
>> PDSHIFT
) & (NPDPG
- 1)];
1206 static inline pd_entry_t
*
1207 pmap_pde_internal0(pmap_t pmap
, vm_map_offset_t vaddr
, boolean_t is_ept
)
1211 pdpt
= pmap64_pdpt(pmap
, vaddr
);
1213 return pmap_pde_internal1(vaddr
, is_ept
, pdpt
);
1220 static inline pd_entry_t
*
1221 pmap_pde(pmap_t pmap
, vm_map_offset_t vaddr
)
1226 pdpt
= pmap64_pdpt(pmap
, vaddr
);
1227 is_ept
= is_ept_pmap(pmap
);
1230 return pmap_pde_internal1(vaddr
, is_ept
, pdpt
);
1238 * return address of mapped pte for vaddr va in pmap pmap.
1240 * In case the pde maps a superpage, return the pde, which, in this case
1241 * is the actual page table entry.
1245 static inline pt_entry_t
*
1246 pmap_pte_internal(vm_map_offset_t vaddr
, boolean_t is_ept
, pd_entry_t
*pde
)
1248 if (*pde
& PTE_VALID_MASK(is_ept
)) {
1249 if (__improbable(*pde
& PTE_PS
)) {
1252 pd_entry_t newpf
= *pde
& PG_FRAME
;
1254 return &((pt_entry_t
*)PHYSMAP_PTOV(newpf
))
1255 [i386_btop(vaddr
) & (ppnum_t
)(NPTEPG
- 1)];
1261 static inline pt_entry_t
*
1262 pmap_pte(pmap_t pmap
, vm_map_offset_t vaddr
)
1268 is_ept
= is_ept_pmap(pmap
);
1270 pde
= pmap_pde_internal0(pmap
, vaddr
, is_ept
);
1273 return pmap_pte_internal(vaddr
, is_ept
, pde
);
1279 extern void pmap_alias(
1281 vm_map_offset_t start
,
1282 vm_map_offset_t end
,
1284 unsigned int options
);
1287 #define DPRINTF(x...) kprintf(x)
1289 #define DPRINTF(x...)
1292 #endif /* MACH_KERNEL_PRIVATE */
1293 #endif /* _I386_PMAP_INTERNAL_ */