]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap_internal.h
xnu-3789.1.32.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap_internal.h
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
1c79356b 28
1c79356b 29
316670eb
A
30#ifndef _I386_PMAP_INTERNAL_
31#define _I386_PMAP_INTERNAL_
b0d623f7 32#ifdef MACH_KERNEL_PRIVATE
1c79356b 33
316670eb
A
34#include <vm/pmap.h>
35#include <sys/kdebug.h>
36#include <kern/ledger.h>
fe8ab488
A
37#include <kern/simple_lock.h>
38#include <i386/bit_routines.h>
316670eb 39
b0d623f7
A
40/*
41 * pmap locking
2d21ac55 42 */
0b4e3aa0 43
b0d623f7
A
44#define PMAP_LOCK(pmap) { \
45 simple_lock(&(pmap)->lock); \
46}
0b4e3aa0 47
b0d623f7
A
48#define PMAP_UNLOCK(pmap) { \
49 simple_unlock(&(pmap)->lock); \
50}
1c79356b 51
39236c6e
A
52#define PMAP_UPDATE_TLBS(pmap, s, e) \
53 pmap_flush_tlbs(pmap, s, e, 0, NULL)
54
55
56#define PMAP_DELAY_TLB_FLUSH 0x01
57
58#define PMAP_UPDATE_TLBS_DELAYED(pmap, s, e, c) \
59 pmap_flush_tlbs(pmap, s, e, PMAP_DELAY_TLB_FLUSH, c)
60
1c79356b 61
b0d623f7 62#define iswired(pte) ((pte) & INTEL_PTE_WIRED)
1c79356b 63
b0d623f7
A
64#ifdef PMAP_TRACES
65extern boolean_t pmap_trace;
66#define PMAP_TRACE(x,a,b,c,d,e) \
67 if (pmap_trace) { \
68 KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e); \
69 }
70#else
71#define PMAP_TRACE(x,a,b,c,d,e) KERNEL_DEBUG(x,a,b,c,d,e)
72#endif /* PMAP_TRACES */
1c79356b 73
6d2010ae
A
74#define PMAP_TRACE_CONSTANT(x,a,b,c,d,e) \
75 KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e); \
76
316670eb 77kern_return_t pmap_expand_pml4(
b0d623f7 78 pmap_t map,
316670eb
A
79 vm_map_offset_t v,
80 unsigned int options);
b0d623f7 81
316670eb 82kern_return_t pmap_expand_pdpt(
b0d623f7 83 pmap_t map,
316670eb
A
84 vm_map_offset_t v,
85 unsigned int options);
b7266188 86
6d2010ae
A
87void phys_attribute_set(
88 ppnum_t phys,
89 int bits);
90
91void pmap_set_reference(
92 ppnum_t pn);
93
94boolean_t phys_page_exists(
95 ppnum_t pn);
96
39236c6e
A
97void
98pmap_flush_tlbs(pmap_t, vm_map_offset_t, vm_map_offset_t, int, pmap_flush_context *);
6d2010ae
A
99
100void
101pmap_update_cache_attributes_locked(ppnum_t, unsigned);
102
6d2010ae 103extern const boolean_t cpu_64bit;
b0d623f7 104
b7266188
A
105/*
106 * Private data structures.
107 */
108
109/*
110 * For each vm_page_t, there is a list of all currently
111 * valid virtual mappings of that page. An entry is
112 * a pv_rooted_entry_t; the list is the pv_table.
113 *
114 * N.B. with the new combo rooted/hashed scheme it is
115 * only possibly to remove individual non-rooted entries
116 * if they are found via the hashed chains as there is no
117 * way to unlink the singly linked hashed entries if navigated to
118 * via the queue list off the rooted entries. Think of it as
119 * hash/walk/pull, keeping track of the prev pointer while walking
120 * the singly linked hash list. All of this is to save memory and
121 * keep both types of pv_entries as small as possible.
122 */
123
124/*
125
126PV HASHING Changes - JK 1/2007
127
128Pve's establish physical to virtual mappings. These are used for aliasing of a
6d2010ae
A
129physical page to (potentially many) virtual addresses within pmaps. In the
130previous implementation the structure of the pv_entries (each 16 bytes in size) was
b7266188
A
131
132typedef struct pv_entry {
133 struct pv_entry_t next;
134 pmap_t pmap;
135 vm_map_offset_t va;
136} *pv_entry_t;
137
6d2010ae
A
138An initial array of these is created at boot time, one per physical page of
139memory, indexed by the physical page number. Additionally, a pool of entries
140is created from a pv_zone to be used as needed by pmap_enter() when it is
141creating new mappings. Originally, we kept this pool around because the code
142in pmap_enter() was unable to block if it needed an entry and none were
143available - we'd panic. Some time ago I restructured the pmap_enter() code
144so that for user pmaps it can block while zalloc'ing a pv structure and restart,
145removing a panic from the code (in the case of the kernel pmap we cannot block
146and still panic, so, we keep a separate hot pool for use only on kernel pmaps).
147The pool has not been removed since there is a large performance gain keeping
148freed pv's around for reuse and not suffering the overhead of zalloc for every
149new pv we need.
150
151As pmap_enter() created new mappings it linked the new pve's for them off the
152fixed pv array for that ppn (off the next pointer). These pve's are accessed
153for several operations, one of them being address space teardown. In that case,
154we basically do this
b7266188
A
155
156 for (every page/pte in the space) {
157 calc pve_ptr from the ppn in the pte
158 for (every pv in the list for the ppn) {
159 if (this pv is for this pmap/vaddr) {
160 do housekeeping
161 unlink/free the pv
162 }
163 }
164 }
165
6d2010ae
A
166The problem arose when we were running, say 8000 (or even 2000) apache or
167other processes and one or all terminate. The list hanging off each pv array
168entry could have thousands of entries. We were continuously linearly searching
169each of these lists as we stepped through the address space we were tearing
170down. Because of the locks we hold, likely taking a cache miss for each node,
171and interrupt disabling for MP issues the system became completely unresponsive
172for many seconds while we did this.
173
174Realizing that pve's are accessed in two distinct ways (linearly running the
175list by ppn for operations like pmap_page_protect and finding and
176modifying/removing a single pve as part of pmap_enter processing) has led to
177modifying the pve structures and databases.
178
179There are now two types of pve structures. A "rooted" structure which is
180basically the original structure accessed in an array by ppn, and a ''hashed''
181structure accessed on a hash list via a hash of [pmap, vaddr]. These have been
182designed with the two goals of minimizing wired memory and making the lookup of
183a ppn faster. Since a vast majority of pages in the system are not aliased
184and hence represented by a single pv entry I've kept the rooted entry size as
185small as possible because there is one of these dedicated for every physical
186page of memory. The hashed pve's are larger due to the addition of the hash
187link and the ppn entry needed for matching while running the hash list to find
188the entry we are looking for. This way, only systems that have lots of
189aliasing (like 2000+ httpd procs) will pay the extra memory price. Both
190structures have the same first three fields allowing some simplification in
191the code.
b7266188
A
192
193They have these shapes
194
195typedef struct pv_rooted_entry {
6d2010ae
A
196 queue_head_t qlink;
197 vm_map_offset_t va;
198 pmap_t pmap;
b7266188
A
199} *pv_rooted_entry_t;
200
201
202typedef struct pv_hashed_entry {
6d2010ae
A
203 queue_head_t qlink;
204 vm_map_offset_t va;
205 pmap_t pmap;
206 ppnum_t ppn;
207 struct pv_hashed_entry *nexth;
b7266188
A
208} *pv_hashed_entry_t;
209
6d2010ae
A
210The main flow difference is that the code is now aware of the rooted entry and
211the hashed entries. Code that runs the pv list still starts with the rooted
212entry and then continues down the qlink onto the hashed entries. Code that is
213looking up a specific pv entry first checks the rooted entry and then hashes
214and runs the hash list for the match. The hash list lengths are much smaller
215than the original pv lists that contained all aliases for the specific ppn.
b7266188
A
216
217*/
218
6d2010ae
A
219typedef struct pv_rooted_entry {
220 /* first three entries must match pv_hashed_entry_t */
221 queue_head_t qlink;
39037602 222 vm_map_offset_t va_and_flags; /* virtual address for mapping */
6d2010ae 223 pmap_t pmap; /* pmap where mapping lies */
b7266188
A
224} *pv_rooted_entry_t;
225
226#define PV_ROOTED_ENTRY_NULL ((pv_rooted_entry_t) 0)
227
6d2010ae
A
228typedef struct pv_hashed_entry {
229 /* first three entries must match pv_rooted_entry_t */
230 queue_head_t qlink;
39037602 231 vm_map_offset_t va_and_flags;
6d2010ae
A
232 pmap_t pmap;
233 ppnum_t ppn;
234 struct pv_hashed_entry *nexth;
b7266188
A
235} *pv_hashed_entry_t;
236
237#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
238
39037602
A
239#define PVE_VA(pve) ((pve)->va_and_flags & ~PAGE_MASK)
240#define PVE_FLAGS(pve) ((pve)->va_and_flags & PAGE_MASK)
241#define PVE_IS_ALTACCT 0x001
242#define PVE_IS_ALTACCT_PAGE(pve) \
243 (((pve)->va_and_flags & PVE_IS_ALTACCT) ? TRUE : FALSE)
244
6d2010ae 245//#define PV_DEBUG 1 /* uncomment to enable some PV debugging code */
b7266188 246#ifdef PV_DEBUG
fe8ab488 247#define CHK_NPVHASH() if(0 == npvhashmask) panic("npvhash uninitialized");
b7266188 248#else
6d2010ae 249#define CHK_NPVHASH(x)
b7266188
A
250#endif
251
fe8ab488
A
252#define NPVHASHBUCKETS (4096)
253#define NPVHASHMASK ((NPVHASHBUCKETS) - 1) /* MUST BE 2^N - 1 */
6d2010ae
A
254#define PV_HASHED_LOW_WATER_MARK_DEFAULT 5000
255#define PV_HASHED_KERN_LOW_WATER_MARK_DEFAULT 2000
256#define PV_HASHED_ALLOC_CHUNK_INITIAL 2000
257#define PV_HASHED_KERN_ALLOC_CHUNK_INITIAL 200
258
259extern volatile uint32_t mappingrecurse;
260extern uint32_t pv_hashed_low_water_mark, pv_hashed_kern_low_water_mark;
261
262/*
263 * PV hash locking
264 */
265
266#define LOCK_PV_HASH(hash) lock_hash_hash(hash)
267#define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash)
fe8ab488 268extern uint32_t npvhashmask;
6d2010ae
A
269extern pv_hashed_entry_t *pv_hash_table; /* hash lists */
270extern pv_hashed_entry_t pv_hashed_free_list;
271extern pv_hashed_entry_t pv_hashed_kern_free_list;
272decl_simple_lock_data(extern, pv_hashed_free_list_lock)
273decl_simple_lock_data(extern, pv_hashed_kern_free_list_lock)
274decl_simple_lock_data(extern, pv_hash_table_lock)
fe8ab488 275decl_simple_lock_data(extern, phys_backup_lock)
6d2010ae
A
276
277extern zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry
278 * structures */
279
280extern uint32_t pv_hashed_free_count;
281extern uint32_t pv_hashed_kern_free_count;
282/*
283 * Each entry in the pv_head_table is locked by a bit in the
284 * pv_lock_table. The lock bits are accessed by the address of
285 * the frame they lock.
286 */
287#define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
288#define pv_hash_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
289extern char *pv_lock_table; /* pointer to array of bits */
290extern char *pv_hash_lock_table;
291extern pv_rooted_entry_t pv_head_table; /* array of entries, one per page */
292
293extern event_t mapping_replenish_event;
294
295static inline void PV_HASHED_ALLOC(pv_hashed_entry_t *pvh_ep) {
316670eb 296 pmap_assert(*pvh_ep == PV_HASHED_ENTRY_NULL);
6d2010ae
A
297 simple_lock(&pv_hashed_free_list_lock);
298 /* If the kernel reserved pool is low, let non-kernel mappings allocate
299 * synchronously, possibly subject to a throttle.
300 */
316670eb 301 if ((pv_hashed_kern_free_count > pv_hashed_kern_low_water_mark) && ((*pvh_ep = pv_hashed_free_list) != 0)) {
6d2010ae
A
302 pv_hashed_free_list = (pv_hashed_entry_t)(*pvh_ep)->qlink.next;
303 pv_hashed_free_count--;
304 }
305
306 simple_unlock(&pv_hashed_free_list_lock);
307
316670eb 308 if (pv_hashed_free_count <= pv_hashed_low_water_mark) {
6d2010ae
A
309 if (!mappingrecurse && hw_compare_and_store(0,1, &mappingrecurse))
310 thread_wakeup(&mapping_replenish_event);
311 }
b7266188
A
312}
313
6d2010ae
A
314static inline void PV_HASHED_FREE_LIST(pv_hashed_entry_t pvh_eh, pv_hashed_entry_t pvh_et, int pv_cnt) {
315 simple_lock(&pv_hashed_free_list_lock);
316 pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;
317 pv_hashed_free_list = pvh_eh;
318 pv_hashed_free_count += pv_cnt;
319 simple_unlock(&pv_hashed_free_list_lock);
b7266188
A
320}
321
6d2010ae
A
322extern unsigned pmap_kern_reserve_alloc_stat;
323
324static inline void PV_HASHED_KERN_ALLOC(pv_hashed_entry_t *pvh_e) {
316670eb 325 pmap_assert(*pvh_e == PV_HASHED_ENTRY_NULL);
6d2010ae
A
326 simple_lock(&pv_hashed_kern_free_list_lock);
327
328 if ((*pvh_e = pv_hashed_kern_free_list) != 0) {
329 pv_hashed_kern_free_list = (pv_hashed_entry_t)(*pvh_e)->qlink.next;
330 pv_hashed_kern_free_count--;
331 pmap_kern_reserve_alloc_stat++;
332 }
333
334 simple_unlock(&pv_hashed_kern_free_list_lock);
335
336 if (pv_hashed_kern_free_count < pv_hashed_kern_low_water_mark) {
337 if (!mappingrecurse && hw_compare_and_store(0,1, &mappingrecurse))
338 thread_wakeup(&mapping_replenish_event);
339 }
b7266188
A
340}
341
6d2010ae
A
342static inline void PV_HASHED_KERN_FREE_LIST(pv_hashed_entry_t pvh_eh, pv_hashed_entry_t pvh_et, int pv_cnt) {
343 simple_lock(&pv_hashed_kern_free_list_lock);
344 pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;
345 pv_hashed_kern_free_list = pvh_eh;
346 pv_hashed_kern_free_count += pv_cnt;
347 simple_unlock(&pv_hashed_kern_free_list_lock);
348}
349
350extern uint64_t pmap_pv_throttle_stat, pmap_pv_throttled_waiters;
351extern event_t pmap_user_pv_throttle_event;
352
353static inline void pmap_pv_throttle(__unused pmap_t p) {
354 pmap_assert(p != kernel_pmap);
355 /* Apply throttle on non-kernel mappings */
356 if (pv_hashed_kern_free_count < (pv_hashed_kern_low_water_mark / 2)) {
357 pmap_pv_throttle_stat++;
358 /* This doesn't need to be strictly accurate, merely a hint
359 * to eliminate the timeout when the reserve is replenished.
360 */
361 pmap_pv_throttled_waiters++;
362 assert_wait_timeout(&pmap_user_pv_throttle_event, THREAD_UNINT, 1, 1000 * NSEC_PER_USEC);
363 thread_block(THREAD_CONTINUE_NULL);
364 }
b7266188
A
365}
366
367/*
368 * Index into pv_head table, its lock bits, and the modify/reference and managed bits
369 */
370
371#define pa_index(pa) (i386_btop(pa))
372#define ppn_to_pai(ppn) ((int)ppn)
373
374#define pai_to_pvh(pai) (&pv_head_table[pai])
375#define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
376#define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
377#define pvhash(idx) (&pv_hash_table[idx])
b7266188
A
378#define lock_hash_hash(hash) bit_lock(hash, (void *)pv_hash_lock_table)
379#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
380
381#define IS_MANAGED_PAGE(x) \
382 ((unsigned int)(x) <= last_managed_page && \
383 (pmap_phys_attributes[x] & PHYS_MANAGED))
39236c6e
A
384#define IS_INTERNAL_PAGE(x) \
385 (IS_MANAGED_PAGE(x) && (pmap_phys_attributes[x] & PHYS_INTERNAL))
386#define IS_REUSABLE_PAGE(x) \
387 (IS_MANAGED_PAGE(x) && (pmap_phys_attributes[x] & PHYS_REUSABLE))
39037602
A
388#define IS_ALTACCT_PAGE(x) \
389 (IS_MANAGED_PAGE((x)) && \
390 (PVE_IS_ALTACCT_PAGE(&pv_head_table[(x)])))
b7266188
A
391
392/*
393 * Physical page attributes. Copy bits from PTE definition.
394 */
395#define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
396#define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
3e170ce0
A
397#define PHYS_MANAGED INTEL_PTE_VALID /* page is managed */
398#define PHYS_NOENCRYPT INTEL_PTE_USER /* no need to encrypt this page in the hibernation image */
6d2010ae
A
399#define PHYS_NCACHE INTEL_PTE_NCACHE
400#define PHYS_PTA INTEL_PTE_PTA
401#define PHYS_CACHEABILITY_MASK (INTEL_PTE_PTA | INTEL_PTE_NCACHE)
3e170ce0
A
402#define PHYS_INTERNAL INTEL_PTE_WTHRU /* page from internal object */
403#define PHYS_REUSABLE INTEL_PTE_WRITE /* page is "reusable" */
b7266188 404
3e170ce0
A
405extern boolean_t pmap_disable_kheap_nx;
406extern boolean_t pmap_disable_kstack_nx;
316670eb
A
407
408#define PMAP_EXPAND_OPTIONS_NONE (0x0)
409#define PMAP_EXPAND_OPTIONS_NOWAIT (PMAP_OPTIONS_NOWAIT)
410#define PMAP_EXPAND_OPTIONS_NOENTER (PMAP_OPTIONS_NOENTER)
411
b7266188
A
412/*
413 * Amount of virtual memory mapped by one
414 * page-directory entry.
415 */
416#define PDE_MAPPED_SIZE (pdetova(1))
417
418
419/*
420 * Locking and TLB invalidation
421 */
422
423/*
424 * Locking Protocols: (changed 2/2007 JK)
425 *
426 * There are two structures in the pmap module that need locking:
427 * the pmaps themselves, and the per-page pv_lists (which are locked
428 * by locking the pv_lock_table entry that corresponds to the pv_head
429 * for the list in question.) Most routines want to lock a pmap and
430 * then do operations in it that require pv_list locking -- however
431 * pmap_remove_all and pmap_copy_on_write operate on a physical page
432 * basis and want to do the locking in the reverse order, i.e. lock
433 * a pv_list and then go through all the pmaps referenced by that list.
434 *
435 * The system wide pmap lock has been removed. Now, paths take a lock
436 * on the pmap before changing its 'shape' and the reverse order lockers
437 * (coming in by phys ppn) take a lock on the corresponding pv and then
438 * retest to be sure nothing changed during the window before they locked
439 * and can then run up/down the pv lists holding the list lock. This also
440 * lets the pmap layer run (nearly completely) interrupt enabled, unlike
441 * previously.
442 */
443
444/*
445 * PV locking
446 */
447
448#define LOCK_PVH(index) { \
449 mp_disable_preemption(); \
450 lock_pvh_pai(index); \
451}
452
453#define UNLOCK_PVH(index) { \
454 unlock_pvh_pai(index); \
455 mp_enable_preemption(); \
456}
b7266188 457
b7266188
A
458extern uint64_t pde_mapped_size;
459
460extern char *pmap_phys_attributes;
316670eb 461extern ppnum_t last_managed_page;
b7266188 462
060df5ea
A
463extern ppnum_t lowest_lo;
464extern ppnum_t lowest_hi;
465extern ppnum_t highest_hi;
466
b7266188
A
467/*
468 * when spinning through pmap_remove
469 * ensure that we don't spend too much
470 * time with preemption disabled.
471 * I'm setting the current threshold
472 * to 20us
473 */
474#define MAX_PREEMPTION_LATENCY_NS 20000
475extern uint64_t max_preemption_latency_tsc;
476
477/* #define DEBUGINTERRUPTS 1 uncomment to ensure pmap callers have interrupts enabled */
478#ifdef DEBUGINTERRUPTS
479#define pmap_intr_assert() { \
480 if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) \
481 panic("pmap interrupt assert %s, %d",__FILE__, __LINE__); \
482}
483#else
484#define pmap_intr_assert()
485#endif
486
6d2010ae
A
487extern int nx_enabled;
488extern unsigned int inuse_ptepages_count;
b7266188
A
489
490static inline uint32_t
491pvhashidx(pmap_t pmap, vm_map_offset_t va)
492{
fe8ab488 493 uint32_t hashidx = ((uint32_t)(uintptr_t)pmap ^
6d2010ae 494 ((uint32_t)(va >> PAGE_SHIFT) & 0xFFFFFFFF)) &
fe8ab488
A
495 npvhashmask;
496 return hashidx;
b7266188
A
497}
498
6d2010ae 499
b7266188
A
500/*
501 * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
502 * properly deals with the anchor.
503 * must be called with the hash locked, does not unlock it
504 */
b7266188
A
505static inline void
506pmap_pvh_unlink(pv_hashed_entry_t pvh)
507{
508 pv_hashed_entry_t curh;
509 pv_hashed_entry_t *pprevh;
510 int pvhash_idx;
511
512 CHK_NPVHASH();
39037602 513 pvhash_idx = pvhashidx(pvh->pmap, PVE_VA(pvh));
b7266188
A
514
515 pprevh = pvhash(pvhash_idx);
516
517#if PV_DEBUG
518 if (NULL == *pprevh)
519 panic("pvh_unlink null anchor"); /* JK DEBUG */
520#endif
521 curh = *pprevh;
522
523 while (PV_HASHED_ENTRY_NULL != curh) {
524 if (pvh == curh)
525 break;
526 pprevh = &curh->nexth;
527 curh = curh->nexth;
528 }
529 if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh");
530 *pprevh = pvh->nexth;
531 return;
532}
533
534static inline void
535pv_hash_add(pv_hashed_entry_t pvh_e,
536 pv_rooted_entry_t pv_h)
537{
538 pv_hashed_entry_t *hashp;
539 int pvhash_idx;
540
541 CHK_NPVHASH();
39037602 542 pvhash_idx = pvhashidx(pvh_e->pmap, PVE_VA(pvh_e));
b7266188
A
543 LOCK_PV_HASH(pvhash_idx);
544 insque(&pvh_e->qlink, &pv_h->qlink);
545 hashp = pvhash(pvhash_idx);
546#if PV_DEBUG
547 if (NULL==hashp)
548 panic("pv_hash_add(%p) null hash bucket", pvh_e);
549#endif
550 pvh_e->nexth = *hashp;
551 *hashp = pvh_e;
552 UNLOCK_PV_HASH(pvhash_idx);
553}
554
555static inline void
556pv_hash_remove(pv_hashed_entry_t pvh_e)
557{
558 int pvhash_idx;
559
560 CHK_NPVHASH();
39037602 561 pvhash_idx = pvhashidx(pvh_e->pmap,PVE_VA(pvh_e));
b7266188
A
562 LOCK_PV_HASH(pvhash_idx);
563 remque(&pvh_e->qlink);
564 pmap_pvh_unlink(pvh_e);
565 UNLOCK_PV_HASH(pvhash_idx);
6d2010ae 566}
b7266188
A
567
568static inline boolean_t popcnt1(uint64_t distance) {
569 return ((distance & (distance - 1)) == 0);
570}
571
572/*
573 * Routines to handle suppression of/recovery from some forms of pagetable corruption
574 * incidents observed in the field. These can be either software induced (wild
575 * stores to the mapwindows where applicable, use after free errors
576 * (typically of pages addressed physically), mis-directed DMAs etc., or due
577 * to DRAM/memory hierarchy/interconnect errors. Given the theoretical rarity of these errors,
578 * the recording mechanism is deliberately not MP-safe. The overarching goal is to
579 * still assert on potential software races, but attempt recovery from incidents
580 * identifiable as occurring due to issues beyond the control of the pmap module.
581 * The latter includes single-bit errors and malformed pagetable entries.
582 * We currently limit ourselves to recovery/suppression of one incident per
583 * PMAP_PAGETABLE_CORRUPTION_INTERVAL seconds, and details of the incident
584 * are logged.
585 * Assertions are not suppressed if kernel debugging is enabled. (DRK 09)
586 */
587
588typedef enum {
589 PTE_VALID = 0x0,
590 PTE_INVALID = 0x1,
591 PTE_RSVD = 0x2,
592 PTE_SUPERVISOR = 0x4,
593 PTE_BITFLIP = 0x8,
594 PV_BITFLIP = 0x10,
595 PTE_INVALID_CACHEABILITY = 0x20
596} pmap_pagetable_corruption_t;
597
598typedef enum {
599 ROOT_PRESENT = 0,
600 ROOT_ABSENT = 1
601} pmap_pv_assertion_t;
602
603typedef enum {
604 PMAP_ACTION_IGNORE = 0x0,
605 PMAP_ACTION_ASSERT = 0x1,
606 PMAP_ACTION_RETRY = 0x2,
607 PMAP_ACTION_RETRY_RELOCK = 0x4
608} pmap_pagetable_corruption_action_t;
609
610#define PMAP_PAGETABLE_CORRUPTION_INTERVAL (6ULL * 3600ULL)
611extern uint64_t pmap_pagetable_corruption_interval_abstime;
612
613extern uint32_t pmap_pagetable_corruption_incidents;
614#define PMAP_PAGETABLE_CORRUPTION_MAX_LOG (8)
615typedef struct {
616 pmap_pv_assertion_t incident;
617 pmap_pagetable_corruption_t reason;
618 pmap_pagetable_corruption_action_t action;
619 pmap_t pmap;
620 vm_map_offset_t vaddr;
621 pt_entry_t pte;
622 ppnum_t ppn;
623 pmap_t pvpmap;
624 vm_map_offset_t pvva;
625 uint64_t abstime;
626} pmap_pagetable_corruption_record_t;
627
628extern pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[];
629extern uint64_t pmap_pagetable_corruption_last_abstime;
630extern thread_call_t pmap_pagetable_corruption_log_call;
631extern boolean_t pmap_pagetable_corruption_timeout;
632
633static inline void
634pmap_pagetable_corruption_log(pmap_pv_assertion_t incident, pmap_pagetable_corruption_t suppress_reason, pmap_pagetable_corruption_action_t action, pmap_t pmap, vm_map_offset_t vaddr, pt_entry_t *ptep, ppnum_t ppn, pmap_t pvpmap, vm_map_offset_t pvva) {
635 uint32_t pmap_pagetable_corruption_log_index;
636 pmap_pagetable_corruption_log_index = pmap_pagetable_corruption_incidents++ % PMAP_PAGETABLE_CORRUPTION_MAX_LOG;
637 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].incident = incident;
638 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].reason = suppress_reason;
639 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].action = action;
640 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pmap = pmap;
641 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].vaddr = vaddr;
642 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pte = *ptep;
643 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].ppn = ppn;
644 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvpmap = pvpmap;
645 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvva = pvva;
646 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].abstime = mach_absolute_time();
647 /* Asynchronously log */
648 thread_call_enter(pmap_pagetable_corruption_log_call);
649}
650
651static inline pmap_pagetable_corruption_action_t
652pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t *ppnp, pt_entry_t *ptep, pmap_pv_assertion_t incident) {
316670eb 653 pmap_pagetable_corruption_action_t action = PMAP_ACTION_ASSERT;
b7266188
A
654 pmap_pagetable_corruption_t suppress_reason = PTE_VALID;
655 ppnum_t suppress_ppn = 0;
656 pt_entry_t cpte = *ptep;
657 ppnum_t cpn = pa_index(pte_to_pa(cpte));
658 ppnum_t ppn = *ppnp;
659 pv_rooted_entry_t pv_h = pai_to_pvh(ppn_to_pai(ppn));
660 pv_rooted_entry_t pv_e = pv_h;
661 uint32_t bitdex;
662 pmap_t pvpmap = pv_h->pmap;
39037602
A
663 vm_map_offset_t pvva = PVE_VA(pv_h);
664 vm_map_offset_t pve_flags = PVE_FLAGS(pv_h);
b7266188 665 boolean_t ppcd = FALSE;
3e170ce0 666 boolean_t is_ept;
b7266188
A
667
668 /* Ideally, we'd consult the Mach VM here to definitively determine
669 * the nature of the mapping for this address space and address.
670 * As that would be a layering violation in this context, we
671 * use various heuristics to recover from single bit errors,
672 * malformed pagetable entries etc. These are not intended
673 * to be comprehensive.
674 */
675
676 /* As a precautionary measure, mark A+D */
677 pmap_phys_attributes[ppn_to_pai(ppn)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
3e170ce0 678 is_ept = is_ept_pmap(pmap);
b7266188
A
679
680 /*
681 * Correct potential single bit errors in either (but not both) element
682 * of the PV
683 */
684 do {
39037602
A
685 if ((popcnt1((uintptr_t)pv_e->pmap ^ (uintptr_t)pmap) && PVE_VA(pv_e) == vaddr) ||
686 (pv_e->pmap == pmap && popcnt1(PVE_VA(pv_e) ^ vaddr))) {
b7266188 687 pv_e->pmap = pmap;
39037602
A
688 if (pv_e == pv_h) {
689 pv_h->va_and_flags = vaddr | pve_flags;
690 } else {
691 pv_e->va_and_flags = vaddr;
692 }
b7266188
A
693 suppress_reason = PV_BITFLIP;
694 action = PMAP_ACTION_RETRY;
695 goto pmap_cpc_exit;
696 }
316670eb 697 } while (((pv_e = (pv_rooted_entry_t) queue_next(&pv_e->qlink))) && (pv_e != pv_h));
b7266188
A
698
699 /* Discover root entries with a Hamming
700 * distance of 1 from the supplied
701 * physical page frame.
702 */
703 for (bitdex = 0; bitdex < (sizeof(ppnum_t) << 3); bitdex++) {
704 ppnum_t npn = cpn ^ (ppnum_t) (1ULL << bitdex);
705 if (IS_MANAGED_PAGE(npn)) {
706 pv_rooted_entry_t npv_h = pai_to_pvh(ppn_to_pai(npn));
39037602 707 if (PVE_VA(npv_h) == vaddr && npv_h->pmap == pmap) {
b7266188
A
708 suppress_reason = PTE_BITFLIP;
709 suppress_ppn = npn;
710 action = PMAP_ACTION_RETRY_RELOCK;
711 UNLOCK_PVH(ppn_to_pai(ppn));
712 *ppnp = npn;
713 goto pmap_cpc_exit;
714 }
715 }
716 }
717
718 if (pmap == kernel_pmap) {
719 action = PMAP_ACTION_ASSERT;
720 goto pmap_cpc_exit;
721 }
722
3e170ce0
A
723 /*
724 * Check for malformed/inconsistent entries.
725 * The first check here isn't useful for EPT PTEs because INTEL_EPT_NCACHE == 0
726 */
727 if (!is_ept && ((cpte & (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU | INTEL_PTE_PTA)) == (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU))) {
b7266188
A
728 action = PMAP_ACTION_IGNORE;
729 suppress_reason = PTE_INVALID_CACHEABILITY;
730 }
731 else if (cpte & INTEL_PTE_RSVD) {
732 action = PMAP_ACTION_IGNORE;
733 suppress_reason = PTE_RSVD;
734 }
3e170ce0 735 else if ((pmap != kernel_pmap) && (!is_ept) && ((cpte & INTEL_PTE_USER) == 0)) {
b7266188
A
736 action = PMAP_ACTION_IGNORE;
737 suppress_reason = PTE_SUPERVISOR;
738 }
739pmap_cpc_exit:
740 PE_parse_boot_argn("-pmap_pagetable_corruption_deassert", &ppcd, sizeof(ppcd));
741
742 if (debug_boot_arg && !ppcd) {
743 action = PMAP_ACTION_ASSERT;
744 }
745
746 if ((mach_absolute_time() - pmap_pagetable_corruption_last_abstime) < pmap_pagetable_corruption_interval_abstime) {
747 action = PMAP_ACTION_ASSERT;
748 pmap_pagetable_corruption_timeout = TRUE;
749 }
750 else
751 {
752 pmap_pagetable_corruption_last_abstime = mach_absolute_time();
753 }
754 pmap_pagetable_corruption_log(incident, suppress_reason, action, pmap, vaddr, &cpte, *ppnp, pvpmap, pvva);
755 return action;
756}
6d2010ae 757
b7266188
A
758/*
759 * Remove pv list entry.
760 * Called with pv_head_table entry locked.
761 * Returns pv entry to be freed (or NULL).
762 */
b7266188 763static inline __attribute__((always_inline)) pv_hashed_entry_t
6d2010ae
A
764pmap_pv_remove(pmap_t pmap,
765 vm_map_offset_t vaddr,
766 ppnum_t *ppnp,
b7266188
A
767 pt_entry_t *pte)
768{
769 pv_hashed_entry_t pvh_e;
770 pv_rooted_entry_t pv_h;
771 pv_hashed_entry_t *pprevh;
772 int pvhash_idx;
773 uint32_t pv_cnt;
774 ppnum_t ppn;
775
776pmap_pv_remove_retry:
777 ppn = *ppnp;
778 pvh_e = PV_HASHED_ENTRY_NULL;
779 pv_h = pai_to_pvh(ppn_to_pai(ppn));
780
316670eb 781 if (__improbable(pv_h->pmap == PMAP_NULL)) {
b7266188
A
782 pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_ABSENT);
783 if (pac == PMAP_ACTION_IGNORE)
784 goto pmap_pv_remove_exit;
785 else if (pac == PMAP_ACTION_ASSERT)
39236c6e 786 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p, %p): null pv_list!", pmap, vaddr, ppn, *pte, ppnp, pte);
b7266188
A
787 else if (pac == PMAP_ACTION_RETRY_RELOCK) {
788 LOCK_PVH(ppn_to_pai(*ppnp));
789 pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
790 goto pmap_pv_remove_retry;
791 }
792 else if (pac == PMAP_ACTION_RETRY)
793 goto pmap_pv_remove_retry;
794 }
795
39037602 796 if (PVE_VA(pv_h) == vaddr && pv_h->pmap == pmap) {
b7266188
A
797 /*
798 * Header is the pv_rooted_entry.
799 * We can't free that. If there is a queued
800 * entry after this one we remove that
801 * from the ppn queue, we remove it from the hash chain
802 * and copy it to the rooted entry. Then free it instead.
803 */
804 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
805 if (pv_h != (pv_rooted_entry_t) pvh_e) {
39037602
A
806 vm_map_offset_t pve_flags;
807
b7266188
A
808 /*
809 * Entry queued to root, remove this from hash
810 * and install as new root.
811 */
812 CHK_NPVHASH();
39037602 813 pvhash_idx = pvhashidx(pvh_e->pmap, PVE_VA(pvh_e));
b7266188
A
814 LOCK_PV_HASH(pvhash_idx);
815 remque(&pvh_e->qlink);
816 pprevh = pvhash(pvhash_idx);
817 if (PV_HASHED_ENTRY_NULL == *pprevh) {
39236c6e 818 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x): "
b7266188
A
819 "empty hash, removing rooted",
820 pmap, vaddr, ppn);
821 }
822 pmap_pvh_unlink(pvh_e);
823 UNLOCK_PV_HASH(pvhash_idx);
824 pv_h->pmap = pvh_e->pmap;
39037602
A
825 pve_flags = PVE_FLAGS(pv_h);
826 pv_h->va_and_flags = PVE_VA(pvh_e) | pve_flags;
827 /* dispose of pvh_e */
b7266188
A
828 } else {
829 /* none queued after rooted */
830 pv_h->pmap = PMAP_NULL;
831 pvh_e = PV_HASHED_ENTRY_NULL;
832 }
833 } else {
834 /*
835 * not removing rooted pv. find it on hash chain, remove from
836 * ppn queue and hash chain and free it
837 */
838 CHK_NPVHASH();
839 pvhash_idx = pvhashidx(pmap, vaddr);
840 LOCK_PV_HASH(pvhash_idx);
841 pprevh = pvhash(pvhash_idx);
842 if (PV_HASHED_ENTRY_NULL == *pprevh) {
39236c6e 843 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p): empty hash",
316670eb 844 pmap, vaddr, ppn, *pte, pte);
b7266188
A
845 }
846 pvh_e = *pprevh;
847 pmap_pv_hashlist_walks++;
848 pv_cnt = 0;
849 while (PV_HASHED_ENTRY_NULL != pvh_e) {
850 pv_cnt++;
851 if (pvh_e->pmap == pmap &&
39037602 852 PVE_VA(pvh_e) == vaddr &&
b7266188
A
853 pvh_e->ppn == ppn)
854 break;
855 pprevh = &pvh_e->nexth;
856 pvh_e = pvh_e->nexth;
857 }
6d2010ae 858
b7266188
A
859 if (PV_HASHED_ENTRY_NULL == pvh_e) {
860 pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_PRESENT);
861
862 if (pac == PMAP_ACTION_ASSERT)
39037602 863 panic("Possible memory corruption: pmap_pv_remove(%p, 0x%llx, 0x%x, 0x%llx, %p, %p): pv not on hash, head: %p, 0x%llx", pmap, vaddr, ppn, *pte, ppnp, pte, pv_h->pmap, PVE_VA(pv_h));
b7266188
A
864 else {
865 UNLOCK_PV_HASH(pvhash_idx);
866 if (pac == PMAP_ACTION_RETRY_RELOCK) {
867 LOCK_PVH(ppn_to_pai(*ppnp));
868 pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
869 goto pmap_pv_remove_retry;
870 }
871 else if (pac == PMAP_ACTION_RETRY) {
872 goto pmap_pv_remove_retry;
873 }
874 else if (pac == PMAP_ACTION_IGNORE) {
875 goto pmap_pv_remove_exit;
876 }
877 }
878 }
6d2010ae 879
b7266188
A
880 pmap_pv_hashlist_cnts += pv_cnt;
881 if (pmap_pv_hashlist_max < pv_cnt)
882 pmap_pv_hashlist_max = pv_cnt;
883 *pprevh = pvh_e->nexth;
884 remque(&pvh_e->qlink);
885 UNLOCK_PV_HASH(pvhash_idx);
886 }
887pmap_pv_remove_exit:
888 return pvh_e;
889}
890
6d2010ae
A
891
892extern int pt_fake_zone_index;
893static inline void
316670eb 894PMAP_ZINFO_PALLOC(pmap_t pmap, vm_size_t bytes)
6d2010ae 895{
316670eb 896 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
6d2010ae
A
897}
898
899static inline void
316670eb 900PMAP_ZINFO_PFREE(pmap_t pmap, vm_size_t bytes)
6d2010ae 901{
316670eb 902 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
6d2010ae
A
903}
904
316670eb
A
905static inline void
906PMAP_ZINFO_SALLOC(pmap_t pmap, vm_size_t bytes)
907{
908 pmap_ledger_credit(pmap, task_ledgers.tkm_shared, bytes);
909}
910
911static inline void
912PMAP_ZINFO_SFREE(pmap_t pmap, vm_size_t bytes)
913{
914 pmap_ledger_debit(pmap, task_ledgers.tkm_shared, bytes);
915}
916
6d2010ae
A
917extern boolean_t pmap_initialized;/* Has pmap_init completed? */
918#define valid_page(x) (pmap_initialized && pmap_valid_page(x))
919
920// XXX
921#define HIGH_MEM_BASE ((uint32_t)( -NBPDE) ) /* shared gdt etc seg addr */ /* XXX64 ?? */
922// XXX
923
924
925int phys_attribute_test(
926 ppnum_t phys,
927 int bits);
928void phys_attribute_clear(
929 ppnum_t phys,
39236c6e
A
930 int bits,
931 unsigned int options,
932 void *arg);
6d2010ae
A
933
934//#define PCID_DEBUG 1
935#if PCID_DEBUG
936#define pmap_pcid_log(fmt, args...) \
937 do { \
938 kprintf(fmt, ##args); \
939 printf(fmt, ##args); \
940 } while(0)
941#else
942#define pmap_pcid_log(fmt, args...)
943#endif
944void pmap_pcid_configure(void);
945
316670eb
A
946
947/*
948 * Atomic 64-bit compare and exchange of a page table entry.
949 */
950static inline boolean_t
951pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
952{
953 boolean_t ret;
954
316670eb
A
955 /*
956 * Load the old value into %rax
957 * Load the new value into another register
958 * Compare-exchange-quad at address entryp
959 * If the compare succeeds, the new value is stored, return TRUE.
960 * Otherwise, no swap is made, return FALSE.
961 */
962 asm volatile(
963 " lock; cmpxchgq %2,(%3) \n\t"
964 " setz %%al \n\t"
965 " movzbl %%al,%0"
966 : "=a" (ret)
967 : "a" (old),
968 "r" (new),
969 "r" (entryp)
970 : "memory");
316670eb
A
971 return ret;
972}
973
974extern uint32_t pmap_update_clear_pte_count;
975
976static inline void pmap_update_pte(pt_entry_t *mptep, uint64_t pclear_bits, uint64_t pset_bits) {
977 pt_entry_t npte, opte;
978 do {
979 opte = *mptep;
980 if (__improbable(opte == 0)) {
981 pmap_update_clear_pte_count++;
982 break;
983 }
984 npte = opte & ~(pclear_bits);
985 npte |= pset_bits;
986 } while (!pmap_cmpx_pte(mptep, opte, npte));
987}
988
6d2010ae
A
989#if defined(__x86_64__)
990/*
991 * The single pml4 page per pmap is allocated at pmap create time and exists
992 * for the duration of the pmap. we allocate this page in kernel vm.
993 * this returns the address of the requested pml4 entry in the top level page.
994 */
995static inline
996pml4_entry_t *
997pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr)
998{
316670eb
A
999 if (__improbable((vaddr > 0x00007FFFFFFFFFFFULL) &&
1000 (vaddr < 0xFFFF800000000000ULL))) {
1001 return (NULL);
1002 }
1003
39037602 1004#if DEBUG
6d2010ae
A
1005 return PHYSMAP_PTOV(&((pml4_entry_t *)pmap->pm_cr3)[(vaddr >> PML4SHIFT) & (NPML4PG-1)]);
1006#else
1007 return &pmap->pm_pml4[(vaddr >> PML4SHIFT) & (NPML4PG-1)];
1008#endif
1009}
1010
1011/*
1012 * Returns address of requested PDPT entry in the physmap.
1013 */
1014static inline pdpt_entry_t *
1015pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
1016{
1017 pml4_entry_t newpf;
1018 pml4_entry_t *pml4;
3e170ce0 1019 boolean_t is_ept;
6d2010ae 1020
6d2010ae 1021 pml4 = pmap64_pml4(pmap, vaddr);
3e170ce0
A
1022 is_ept = is_ept_pmap(pmap);
1023
1024 if (pml4 && (*pml4 & PTE_VALID_MASK(is_ept))) {
6d2010ae
A
1025 newpf = *pml4 & PG_FRAME;
1026 return &((pdpt_entry_t *) PHYSMAP_PTOV(newpf))
1027 [(vaddr >> PDPTSHIFT) & (NPDPTPG-1)];
1028 }
1029 return (NULL);
1030}
1031/*
1032 * Returns the address of the requested PDE entry in the physmap.
1033 */
1034static inline pd_entry_t *
1035pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr)
1036{
1037 pdpt_entry_t newpf;
1038 pdpt_entry_t *pdpt;
3e170ce0 1039 boolean_t is_ept;
6d2010ae 1040
6d2010ae 1041 pdpt = pmap64_pdpt(pmap, vaddr);
3e170ce0 1042 is_ept = is_ept_pmap(pmap);
6d2010ae 1043
3e170ce0 1044 if (pdpt && (*pdpt & PTE_VALID_MASK(is_ept))) {
6d2010ae
A
1045 newpf = *pdpt & PG_FRAME;
1046 return &((pd_entry_t *) PHYSMAP_PTOV(newpf))
1047 [(vaddr >> PDSHIFT) & (NPDPG-1)];
1048 }
1049 return (NULL);
1050}
1051
1052static inline pd_entry_t *
1053pmap_pde(pmap_t m, vm_map_offset_t v)
1054{
1055 pd_entry_t *pde;
1056
6d2010ae
A
1057 pde = pmap64_pde(m, v);
1058
1059 return pde;
1060}
1061
1062
1063/*
1064 * return address of mapped pte for vaddr va in pmap pmap.
1065 *
1066 * In case the pde maps a superpage, return the pde, which, in this case
1067 * is the actual page table entry.
1068 */
1069static inline pt_entry_t *
1070pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
1071{
1072 pd_entry_t *pde;
1073 pd_entry_t newpf;
3e170ce0 1074 boolean_t is_ept;
6d2010ae
A
1075
1076 assert(pmap);
316670eb 1077 pde = pmap64_pde(pmap, vaddr);
6d2010ae 1078
3e170ce0
A
1079 is_ept = is_ept_pmap(pmap);
1080
1081 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
1082 if (*pde & PTE_PS)
6d2010ae
A
1083 return pde;
1084 newpf = *pde & PG_FRAME;
1085 return &((pt_entry_t *)PHYSMAP_PTOV(newpf))
1086 [i386_btop(vaddr) & (ppnum_t)(NPTEPG-1)];
1087 }
1088 return (NULL);
1089}
1090#endif
316670eb
A
1091#if DEBUG
1092#define DPRINTF(x...) kprintf(x)
1093#else
1094#define DPRINTF(x...)
1095#endif
1096
b0d623f7 1097#endif /* MACH_KERNEL_PRIVATE */
316670eb 1098#endif /* _I386_PMAP_INTERNAL_ */