]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/pmap_internal.h
xnu-4570.71.2.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap_internal.h
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29
30 #ifndef _I386_PMAP_INTERNAL_
31 #define _I386_PMAP_INTERNAL_
32 #ifdef MACH_KERNEL_PRIVATE
33
34 #include <vm/pmap.h>
35 #include <sys/kdebug.h>
36 #include <kern/ledger.h>
37 #include <kern/simple_lock.h>
38 #include <i386/bit_routines.h>
39
40 /*
41 * pmap locking
42 */
43
44 #define PMAP_LOCK(pmap) { \
45 simple_lock(&(pmap)->lock); \
46 }
47
48 #define PMAP_UNLOCK(pmap) { \
49 simple_unlock(&(pmap)->lock); \
50 }
51
52 #define PMAP_UPDATE_TLBS(pmap, s, e) \
53 pmap_flush_tlbs(pmap, s, e, 0, NULL)
54
55
56 #define PMAP_DELAY_TLB_FLUSH 0x01
57
58 #define PMAP_UPDATE_TLBS_DELAYED(pmap, s, e, c) \
59 pmap_flush_tlbs(pmap, s, e, PMAP_DELAY_TLB_FLUSH, c)
60
61
62 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
63
64 #ifdef PMAP_TRACES
65 extern boolean_t pmap_trace;
66 #define PMAP_TRACE(...) \
67 if (pmap_trace) { \
68 KDBG_RELEASE(__VA_ARGS__); \
69 }
70 #else
71 #define PMAP_TRACE(...) KDBG_DEBUG(__VA_ARGS__)
72 #endif /* PMAP_TRACES */
73
74 #define PMAP_TRACE_CONSTANT(...) KDBG_RELEASE(__VA_ARGS__)
75
76 kern_return_t pmap_expand_pml4(
77 pmap_t map,
78 vm_map_offset_t v,
79 unsigned int options);
80
81 kern_return_t pmap_expand_pdpt(
82 pmap_t map,
83 vm_map_offset_t v,
84 unsigned int options);
85
86 void phys_attribute_set(
87 ppnum_t phys,
88 int bits);
89
90 void pmap_set_reference(
91 ppnum_t pn);
92
93 boolean_t phys_page_exists(
94 ppnum_t pn);
95
96 void
97 pmap_flush_tlbs(pmap_t, vm_map_offset_t, vm_map_offset_t, int, pmap_flush_context *);
98
99 void
100 pmap_update_cache_attributes_locked(ppnum_t, unsigned);
101
102 extern const boolean_t cpu_64bit;
103
104 /*
105 * Private data structures.
106 */
107
108 /*
109 * For each vm_page_t, there is a list of all currently
110 * valid virtual mappings of that page. An entry is
111 * a pv_rooted_entry_t; the list is the pv_table.
112 *
113 * N.B. with the new combo rooted/hashed scheme it is
114 * only possibly to remove individual non-rooted entries
115 * if they are found via the hashed chains as there is no
116 * way to unlink the singly linked hashed entries if navigated to
117 * via the queue list off the rooted entries. Think of it as
118 * hash/walk/pull, keeping track of the prev pointer while walking
119 * the singly linked hash list. All of this is to save memory and
120 * keep both types of pv_entries as small as possible.
121 */
122
123 /*
124
125 PV HASHING Changes - JK 1/2007
126
127 Pve's establish physical to virtual mappings. These are used for aliasing of a
128 physical page to (potentially many) virtual addresses within pmaps. In the
129 previous implementation the structure of the pv_entries (each 16 bytes in size) was
130
131 typedef struct pv_entry {
132 struct pv_entry_t next;
133 pmap_t pmap;
134 vm_map_offset_t va;
135 } *pv_entry_t;
136
137 An initial array of these is created at boot time, one per physical page of
138 memory, indexed by the physical page number. Additionally, a pool of entries
139 is created from a pv_zone to be used as needed by pmap_enter() when it is
140 creating new mappings. Originally, we kept this pool around because the code
141 in pmap_enter() was unable to block if it needed an entry and none were
142 available - we'd panic. Some time ago I restructured the pmap_enter() code
143 so that for user pmaps it can block while zalloc'ing a pv structure and restart,
144 removing a panic from the code (in the case of the kernel pmap we cannot block
145 and still panic, so, we keep a separate hot pool for use only on kernel pmaps).
146 The pool has not been removed since there is a large performance gain keeping
147 freed pv's around for reuse and not suffering the overhead of zalloc for every
148 new pv we need.
149
150 As pmap_enter() created new mappings it linked the new pve's for them off the
151 fixed pv array for that ppn (off the next pointer). These pve's are accessed
152 for several operations, one of them being address space teardown. In that case,
153 we basically do this
154
155 for (every page/pte in the space) {
156 calc pve_ptr from the ppn in the pte
157 for (every pv in the list for the ppn) {
158 if (this pv is for this pmap/vaddr) {
159 do housekeeping
160 unlink/free the pv
161 }
162 }
163 }
164
165 The problem arose when we were running, say 8000 (or even 2000) apache or
166 other processes and one or all terminate. The list hanging off each pv array
167 entry could have thousands of entries. We were continuously linearly searching
168 each of these lists as we stepped through the address space we were tearing
169 down. Because of the locks we hold, likely taking a cache miss for each node,
170 and interrupt disabling for MP issues the system became completely unresponsive
171 for many seconds while we did this.
172
173 Realizing that pve's are accessed in two distinct ways (linearly running the
174 list by ppn for operations like pmap_page_protect and finding and
175 modifying/removing a single pve as part of pmap_enter processing) has led to
176 modifying the pve structures and databases.
177
178 There are now two types of pve structures. A "rooted" structure which is
179 basically the original structure accessed in an array by ppn, and a ''hashed''
180 structure accessed on a hash list via a hash of [pmap, vaddr]. These have been
181 designed with the two goals of minimizing wired memory and making the lookup of
182 a ppn faster. Since a vast majority of pages in the system are not aliased
183 and hence represented by a single pv entry I've kept the rooted entry size as
184 small as possible because there is one of these dedicated for every physical
185 page of memory. The hashed pve's are larger due to the addition of the hash
186 link and the ppn entry needed for matching while running the hash list to find
187 the entry we are looking for. This way, only systems that have lots of
188 aliasing (like 2000+ httpd procs) will pay the extra memory price. Both
189 structures have the same first three fields allowing some simplification in
190 the code.
191
192 They have these shapes
193
194 typedef struct pv_rooted_entry {
195 queue_head_t qlink;
196 vm_map_offset_t va;
197 pmap_t pmap;
198 } *pv_rooted_entry_t;
199
200
201 typedef struct pv_hashed_entry {
202 queue_head_t qlink;
203 vm_map_offset_t va;
204 pmap_t pmap;
205 ppnum_t ppn;
206 struct pv_hashed_entry *nexth;
207 } *pv_hashed_entry_t;
208
209 The main flow difference is that the code is now aware of the rooted entry and
210 the hashed entries. Code that runs the pv list still starts with the rooted
211 entry and then continues down the qlink onto the hashed entries. Code that is
212 looking up a specific pv entry first checks the rooted entry and then hashes
213 and runs the hash list for the match. The hash list lengths are much smaller
214 than the original pv lists that contained all aliases for the specific ppn.
215
216 */
217
218 typedef struct pv_rooted_entry {
219 /* first three entries must match pv_hashed_entry_t */
220 queue_head_t qlink;
221 vm_map_offset_t va_and_flags; /* virtual address for mapping */
222 pmap_t pmap; /* pmap where mapping lies */
223 } *pv_rooted_entry_t;
224
225 #define PV_ROOTED_ENTRY_NULL ((pv_rooted_entry_t) 0)
226
227 typedef struct pv_hashed_entry {
228 /* first three entries must match pv_rooted_entry_t */
229 queue_head_t qlink;
230 vm_map_offset_t va_and_flags;
231 pmap_t pmap;
232 ppnum_t ppn;
233 struct pv_hashed_entry *nexth;
234 } *pv_hashed_entry_t;
235
236 #define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
237
238 #define PVE_VA(pve) ((pve)->va_and_flags & ~PAGE_MASK)
239 #define PVE_FLAGS(pve) ((pve)->va_and_flags & PAGE_MASK)
240 #define PVE_IS_ALTACCT 0x001
241 #define PVE_IS_ALTACCT_PAGE(pve) \
242 (((pve)->va_and_flags & PVE_IS_ALTACCT) ? TRUE : FALSE)
243
244 //#define PV_DEBUG 1 /* uncomment to enable some PV debugging code */
245 #ifdef PV_DEBUG
246 #define CHK_NPVHASH() if(0 == npvhashmask) panic("npvhash uninitialized");
247 #else
248 #define CHK_NPVHASH(x)
249 #endif
250
251 #define NPVHASHBUCKETS (4096)
252 #define NPVHASHMASK ((NPVHASHBUCKETS) - 1) /* MUST BE 2^N - 1 */
253 #define PV_HASHED_LOW_WATER_MARK_DEFAULT 5000
254 #define PV_HASHED_KERN_LOW_WATER_MARK_DEFAULT 2000
255 #define PV_HASHED_ALLOC_CHUNK_INITIAL 2000
256 #define PV_HASHED_KERN_ALLOC_CHUNK_INITIAL 200
257
258 extern volatile uint32_t mappingrecurse;
259 extern uint32_t pv_hashed_low_water_mark, pv_hashed_kern_low_water_mark;
260
261 /*
262 * PV hash locking
263 */
264
265 #define LOCK_PV_HASH(hash) lock_hash_hash(hash)
266 #define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash)
267 extern uint32_t npvhashmask;
268 extern pv_hashed_entry_t *pv_hash_table; /* hash lists */
269 extern pv_hashed_entry_t pv_hashed_free_list;
270 extern pv_hashed_entry_t pv_hashed_kern_free_list;
271 decl_simple_lock_data(extern, pv_hashed_free_list_lock)
272 decl_simple_lock_data(extern, pv_hashed_kern_free_list_lock)
273 decl_simple_lock_data(extern, pv_hash_table_lock)
274 decl_simple_lock_data(extern, phys_backup_lock)
275
276 extern zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry
277 * structures */
278
279 extern uint32_t pv_hashed_free_count;
280 extern uint32_t pv_hashed_kern_free_count;
281 /*
282 * Each entry in the pv_head_table is locked by a bit in the
283 * pv_lock_table. The lock bits are accessed by the address of
284 * the frame they lock.
285 */
286 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
287 #define pv_hash_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
288 extern char *pv_lock_table; /* pointer to array of bits */
289 extern char *pv_hash_lock_table;
290 extern pv_rooted_entry_t pv_head_table; /* array of entries, one per page */
291
292 extern event_t mapping_replenish_event;
293
294 static inline void PV_HASHED_ALLOC(pv_hashed_entry_t *pvh_ep) {
295 pmap_assert(*pvh_ep == PV_HASHED_ENTRY_NULL);
296 simple_lock(&pv_hashed_free_list_lock);
297 /* If the kernel reserved pool is low, let non-kernel mappings allocate
298 * synchronously, possibly subject to a throttle.
299 */
300 if ((pv_hashed_kern_free_count > pv_hashed_kern_low_water_mark) && ((*pvh_ep = pv_hashed_free_list) != 0)) {
301 pv_hashed_free_list = (pv_hashed_entry_t)(*pvh_ep)->qlink.next;
302 pv_hashed_free_count--;
303 }
304
305 simple_unlock(&pv_hashed_free_list_lock);
306
307 if (pv_hashed_free_count <= pv_hashed_low_water_mark) {
308 if (!mappingrecurse && hw_compare_and_store(0,1, &mappingrecurse))
309 thread_wakeup(&mapping_replenish_event);
310 }
311 }
312
313 static inline void PV_HASHED_FREE_LIST(pv_hashed_entry_t pvh_eh, pv_hashed_entry_t pvh_et, int pv_cnt) {
314 simple_lock(&pv_hashed_free_list_lock);
315 pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;
316 pv_hashed_free_list = pvh_eh;
317 pv_hashed_free_count += pv_cnt;
318 simple_unlock(&pv_hashed_free_list_lock);
319 }
320
321 extern unsigned pmap_kern_reserve_alloc_stat;
322
323 static inline void PV_HASHED_KERN_ALLOC(pv_hashed_entry_t *pvh_e) {
324 pmap_assert(*pvh_e == PV_HASHED_ENTRY_NULL);
325 simple_lock(&pv_hashed_kern_free_list_lock);
326
327 if ((*pvh_e = pv_hashed_kern_free_list) != 0) {
328 pv_hashed_kern_free_list = (pv_hashed_entry_t)(*pvh_e)->qlink.next;
329 pv_hashed_kern_free_count--;
330 pmap_kern_reserve_alloc_stat++;
331 }
332
333 simple_unlock(&pv_hashed_kern_free_list_lock);
334
335 if (pv_hashed_kern_free_count < pv_hashed_kern_low_water_mark) {
336 if (!mappingrecurse && hw_compare_and_store(0,1, &mappingrecurse))
337 thread_wakeup(&mapping_replenish_event);
338 }
339 }
340
341 static inline void PV_HASHED_KERN_FREE_LIST(pv_hashed_entry_t pvh_eh, pv_hashed_entry_t pvh_et, int pv_cnt) {
342 simple_lock(&pv_hashed_kern_free_list_lock);
343 pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;
344 pv_hashed_kern_free_list = pvh_eh;
345 pv_hashed_kern_free_count += pv_cnt;
346 simple_unlock(&pv_hashed_kern_free_list_lock);
347 }
348
349 extern uint64_t pmap_pv_throttle_stat, pmap_pv_throttled_waiters;
350 extern event_t pmap_user_pv_throttle_event;
351
352 static inline void pmap_pv_throttle(__unused pmap_t p) {
353 pmap_assert(p != kernel_pmap);
354 /* Apply throttle on non-kernel mappings */
355 if (pv_hashed_kern_free_count < (pv_hashed_kern_low_water_mark / 2)) {
356 pmap_pv_throttle_stat++;
357 /* This doesn't need to be strictly accurate, merely a hint
358 * to eliminate the timeout when the reserve is replenished.
359 */
360 pmap_pv_throttled_waiters++;
361 assert_wait_timeout(&pmap_user_pv_throttle_event, THREAD_UNINT, 1, 1000 * NSEC_PER_USEC);
362 thread_block(THREAD_CONTINUE_NULL);
363 }
364 }
365
366 /*
367 * Index into pv_head table, its lock bits, and the modify/reference and managed bits
368 */
369
370 #define pa_index(pa) (i386_btop(pa))
371 #define ppn_to_pai(ppn) ((int)ppn)
372
373 #define pai_to_pvh(pai) (&pv_head_table[pai])
374 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
375 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
376 #define pvhash(idx) (&pv_hash_table[idx])
377 #define lock_hash_hash(hash) bit_lock(hash, (void *)pv_hash_lock_table)
378 #define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
379
380 #define IS_MANAGED_PAGE(x) \
381 ((unsigned int)(x) <= last_managed_page && \
382 (pmap_phys_attributes[x] & PHYS_MANAGED))
383 #define IS_INTERNAL_PAGE(x) \
384 (IS_MANAGED_PAGE(x) && (pmap_phys_attributes[x] & PHYS_INTERNAL))
385 #define IS_REUSABLE_PAGE(x) \
386 (IS_MANAGED_PAGE(x) && (pmap_phys_attributes[x] & PHYS_REUSABLE))
387 #define IS_ALTACCT_PAGE(x,pve) \
388 (IS_MANAGED_PAGE((x)) && \
389 (PVE_IS_ALTACCT_PAGE((pve))))
390
391 /*
392 * Physical page attributes. Copy bits from PTE definition.
393 */
394 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
395 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
396 #define PHYS_MANAGED INTEL_PTE_VALID /* page is managed */
397 #define PHYS_NOENCRYPT INTEL_PTE_USER /* no need to encrypt this page in the hibernation image */
398 #define PHYS_NCACHE INTEL_PTE_NCACHE
399 #define PHYS_PTA INTEL_PTE_PTA
400 #define PHYS_CACHEABILITY_MASK (INTEL_PTE_PTA | INTEL_PTE_NCACHE)
401 #define PHYS_INTERNAL INTEL_PTE_WTHRU /* page from internal object */
402 #define PHYS_REUSABLE INTEL_PTE_WRITE /* page is "reusable" */
403
404 extern boolean_t pmap_disable_kheap_nx;
405 extern boolean_t pmap_disable_kstack_nx;
406
407 #define PMAP_EXPAND_OPTIONS_NONE (0x0)
408 #define PMAP_EXPAND_OPTIONS_NOWAIT (PMAP_OPTIONS_NOWAIT)
409 #define PMAP_EXPAND_OPTIONS_NOENTER (PMAP_OPTIONS_NOENTER)
410 #define PMAP_EXPAND_OPTIONS_ALIASMAP (0x40000000U)
411 /*
412 * Amount of virtual memory mapped by one
413 * page-directory entry.
414 */
415 #define PDE_MAPPED_SIZE (pdetova(1))
416
417
418 /*
419 * Locking and TLB invalidation
420 */
421
422 /*
423 * Locking Protocols: (changed 2/2007 JK)
424 *
425 * There are two structures in the pmap module that need locking:
426 * the pmaps themselves, and the per-page pv_lists (which are locked
427 * by locking the pv_lock_table entry that corresponds to the pv_head
428 * for the list in question.) Most routines want to lock a pmap and
429 * then do operations in it that require pv_list locking -- however
430 * pmap_remove_all and pmap_copy_on_write operate on a physical page
431 * basis and want to do the locking in the reverse order, i.e. lock
432 * a pv_list and then go through all the pmaps referenced by that list.
433 *
434 * The system wide pmap lock has been removed. Now, paths take a lock
435 * on the pmap before changing its 'shape' and the reverse order lockers
436 * (coming in by phys ppn) take a lock on the corresponding pv and then
437 * retest to be sure nothing changed during the window before they locked
438 * and can then run up/down the pv lists holding the list lock. This also
439 * lets the pmap layer run (nearly completely) interrupt enabled, unlike
440 * previously.
441 */
442
443 /*
444 * PV locking
445 */
446
447 #define LOCK_PVH(index) { \
448 mp_disable_preemption(); \
449 lock_pvh_pai(index); \
450 }
451
452 #define UNLOCK_PVH(index) { \
453 unlock_pvh_pai(index); \
454 mp_enable_preemption(); \
455 }
456
457 extern uint64_t pde_mapped_size;
458
459 extern char *pmap_phys_attributes;
460 extern ppnum_t last_managed_page;
461
462 extern ppnum_t lowest_lo;
463 extern ppnum_t lowest_hi;
464 extern ppnum_t highest_hi;
465
466 /*
467 * when spinning through pmap_remove
468 * ensure that we don't spend too much
469 * time with preemption disabled.
470 * I'm setting the current threshold
471 * to 20us
472 */
473 #define MAX_PREEMPTION_LATENCY_NS 20000
474 extern uint64_t max_preemption_latency_tsc;
475
476 #if DEBUG
477 #define PMAP_INTR_DEBUG (1)
478 #endif
479
480 #if PMAP_INTR_DEBUG
481 #define pmap_intr_assert() { \
482 if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) \
483 panic("pmap interrupt assert %d %s, %d", processor_avail_count, __FILE__, __LINE__); \
484 }
485 #else
486 #define pmap_intr_assert()
487 #endif
488
489 extern int nx_enabled;
490 extern unsigned int inuse_ptepages_count;
491
492 static inline uint32_t
493 pvhashidx(pmap_t pmap, vm_map_offset_t va)
494 {
495 uint32_t hashidx = ((uint32_t)(uintptr_t)pmap ^
496 ((uint32_t)(va >> PAGE_SHIFT) & 0xFFFFFFFF)) &
497 npvhashmask;
498 return hashidx;
499 }
500
501 /*
502 * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
503 * properly deals with the anchor.
504 * must be called with the hash locked, does not unlock it
505 */
506 static inline void
507 pmap_pvh_unlink(pv_hashed_entry_t pvh)
508 {
509 pv_hashed_entry_t curh;
510 pv_hashed_entry_t *pprevh;
511 int pvhash_idx;
512
513 CHK_NPVHASH();
514 pvhash_idx = pvhashidx(pvh->pmap, PVE_VA(pvh));
515
516 pprevh = pvhash(pvhash_idx);
517
518 #if PV_DEBUG
519 if (NULL == *pprevh)
520 panic("pvh_unlink null anchor"); /* JK DEBUG */
521 #endif
522 curh = *pprevh;
523
524 while (PV_HASHED_ENTRY_NULL != curh) {
525 if (pvh == curh)
526 break;
527 pprevh = &curh->nexth;
528 curh = curh->nexth;
529 }
530 if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh");
531 *pprevh = pvh->nexth;
532 return;
533 }
534
535 static inline void
536 pv_hash_add(pv_hashed_entry_t pvh_e,
537 pv_rooted_entry_t pv_h)
538 {
539 pv_hashed_entry_t *hashp;
540 int pvhash_idx;
541
542 CHK_NPVHASH();
543 pvhash_idx = pvhashidx(pvh_e->pmap, PVE_VA(pvh_e));
544 LOCK_PV_HASH(pvhash_idx);
545 insque(&pvh_e->qlink, &pv_h->qlink);
546 hashp = pvhash(pvhash_idx);
547 #if PV_DEBUG
548 if (NULL==hashp)
549 panic("pv_hash_add(%p) null hash bucket", pvh_e);
550 #endif
551 pvh_e->nexth = *hashp;
552 *hashp = pvh_e;
553 UNLOCK_PV_HASH(pvhash_idx);
554 }
555
556 static inline void
557 pv_hash_remove(pv_hashed_entry_t pvh_e)
558 {
559 int pvhash_idx;
560
561 CHK_NPVHASH();
562 pvhash_idx = pvhashidx(pvh_e->pmap,PVE_VA(pvh_e));
563 LOCK_PV_HASH(pvhash_idx);
564 remque(&pvh_e->qlink);
565 pmap_pvh_unlink(pvh_e);
566 UNLOCK_PV_HASH(pvhash_idx);
567 }
568
569 static inline boolean_t popcnt1(uint64_t distance) {
570 return ((distance & (distance - 1)) == 0);
571 }
572
573 /*
574 * Routines to handle suppression of/recovery from some forms of pagetable corruption
575 * incidents observed in the field. These can be either software induced (wild
576 * stores to the mapwindows where applicable, use after free errors
577 * (typically of pages addressed physically), mis-directed DMAs etc., or due
578 * to DRAM/memory hierarchy/interconnect errors. Given the theoretical rarity of these errors,
579 * the recording mechanism is deliberately not MP-safe. The overarching goal is to
580 * still assert on potential software races, but attempt recovery from incidents
581 * identifiable as occurring due to issues beyond the control of the pmap module.
582 * The latter includes single-bit errors and malformed pagetable entries.
583 * We currently limit ourselves to recovery/suppression of one incident per
584 * PMAP_PAGETABLE_CORRUPTION_INTERVAL seconds, and details of the incident
585 * are logged.
586 * Assertions are not suppressed if kernel debugging is enabled. (DRK 09)
587 */
588
589 typedef enum {
590 PTE_VALID = 0x0,
591 PTE_INVALID = 0x1,
592 PTE_RSVD = 0x2,
593 PTE_SUPERVISOR = 0x4,
594 PTE_BITFLIP = 0x8,
595 PV_BITFLIP = 0x10,
596 PTE_INVALID_CACHEABILITY = 0x20
597 } pmap_pagetable_corruption_t;
598
599 typedef enum {
600 ROOT_PRESENT = 0,
601 ROOT_ABSENT = 1
602 } pmap_pv_assertion_t;
603
604 typedef enum {
605 PMAP_ACTION_IGNORE = 0x0,
606 PMAP_ACTION_ASSERT = 0x1,
607 PMAP_ACTION_RETRY = 0x2,
608 PMAP_ACTION_RETRY_RELOCK = 0x4
609 } pmap_pagetable_corruption_action_t;
610
611 #define PMAP_PAGETABLE_CORRUPTION_INTERVAL (6ULL * 3600ULL)
612 extern uint64_t pmap_pagetable_corruption_interval_abstime;
613
614 extern uint32_t pmap_pagetable_corruption_incidents;
615 #define PMAP_PAGETABLE_CORRUPTION_MAX_LOG (8)
616 typedef struct {
617 pmap_pv_assertion_t incident;
618 pmap_pagetable_corruption_t reason;
619 pmap_pagetable_corruption_action_t action;
620 pmap_t pmap;
621 vm_map_offset_t vaddr;
622 pt_entry_t pte;
623 ppnum_t ppn;
624 pmap_t pvpmap;
625 vm_map_offset_t pvva;
626 uint64_t abstime;
627 } pmap_pagetable_corruption_record_t;
628
629 extern pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[];
630 extern uint64_t pmap_pagetable_corruption_last_abstime;
631 extern thread_call_t pmap_pagetable_corruption_log_call;
632 extern boolean_t pmap_pagetable_corruption_timeout;
633
634 static inline void
635 pmap_pagetable_corruption_log(pmap_pv_assertion_t incident, pmap_pagetable_corruption_t suppress_reason, pmap_pagetable_corruption_action_t action, pmap_t pmap, vm_map_offset_t vaddr, pt_entry_t *ptep, ppnum_t ppn, pmap_t pvpmap, vm_map_offset_t pvva) {
636 uint32_t pmap_pagetable_corruption_log_index;
637 pmap_pagetable_corruption_log_index = pmap_pagetable_corruption_incidents++ % PMAP_PAGETABLE_CORRUPTION_MAX_LOG;
638 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].incident = incident;
639 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].reason = suppress_reason;
640 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].action = action;
641 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pmap = pmap;
642 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].vaddr = vaddr;
643 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pte = *ptep;
644 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].ppn = ppn;
645 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvpmap = pvpmap;
646 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvva = pvva;
647 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].abstime = mach_absolute_time();
648 /* Asynchronously log */
649 thread_call_enter(pmap_pagetable_corruption_log_call);
650 }
651
652 static inline pmap_pagetable_corruption_action_t
653 pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t *ppnp, pt_entry_t *ptep, pmap_pv_assertion_t incident) {
654 pmap_pagetable_corruption_action_t action = PMAP_ACTION_ASSERT;
655 pmap_pagetable_corruption_t suppress_reason = PTE_VALID;
656 ppnum_t suppress_ppn = 0;
657 pt_entry_t cpte = *ptep;
658 ppnum_t cpn = pa_index(pte_to_pa(cpte));
659 ppnum_t ppn = *ppnp;
660 pv_rooted_entry_t pv_h = pai_to_pvh(ppn_to_pai(ppn));
661 pv_rooted_entry_t pv_e = pv_h;
662 uint32_t bitdex;
663 pmap_t pvpmap = pv_h->pmap;
664 vm_map_offset_t pvva = PVE_VA(pv_h);
665 vm_map_offset_t pve_flags;
666 boolean_t ppcd = FALSE;
667 boolean_t is_ept;
668
669 /* Ideally, we'd consult the Mach VM here to definitively determine
670 * the nature of the mapping for this address space and address.
671 * As that would be a layering violation in this context, we
672 * use various heuristics to recover from single bit errors,
673 * malformed pagetable entries etc. These are not intended
674 * to be comprehensive.
675 */
676
677 /* As a precautionary measure, mark A+D */
678 pmap_phys_attributes[ppn_to_pai(ppn)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
679 is_ept = is_ept_pmap(pmap);
680
681 /*
682 * Correct potential single bit errors in either (but not both) element
683 * of the PV
684 */
685 do {
686 if ((popcnt1((uintptr_t)pv_e->pmap ^ (uintptr_t)pmap) && PVE_VA(pv_e) == vaddr) ||
687 (pv_e->pmap == pmap && popcnt1(PVE_VA(pv_e) ^ vaddr))) {
688 pve_flags = PVE_FLAGS(pv_e);
689 pv_e->pmap = pmap;
690 pv_h->va_and_flags = vaddr | pve_flags;
691 suppress_reason = PV_BITFLIP;
692 action = PMAP_ACTION_RETRY;
693 goto pmap_cpc_exit;
694 }
695 } while (((pv_e = (pv_rooted_entry_t) queue_next(&pv_e->qlink))) && (pv_e != pv_h));
696
697 /* Discover root entries with a Hamming
698 * distance of 1 from the supplied
699 * physical page frame.
700 */
701 for (bitdex = 0; bitdex < (sizeof(ppnum_t) << 3); bitdex++) {
702 ppnum_t npn = cpn ^ (ppnum_t) (1ULL << bitdex);
703 if (IS_MANAGED_PAGE(npn)) {
704 pv_rooted_entry_t npv_h = pai_to_pvh(ppn_to_pai(npn));
705 if (PVE_VA(npv_h) == vaddr && npv_h->pmap == pmap) {
706 suppress_reason = PTE_BITFLIP;
707 suppress_ppn = npn;
708 action = PMAP_ACTION_RETRY_RELOCK;
709 UNLOCK_PVH(ppn_to_pai(ppn));
710 *ppnp = npn;
711 goto pmap_cpc_exit;
712 }
713 }
714 }
715
716 if (pmap == kernel_pmap) {
717 action = PMAP_ACTION_ASSERT;
718 goto pmap_cpc_exit;
719 }
720
721 /*
722 * Check for malformed/inconsistent entries.
723 * The first check here isn't useful for EPT PTEs because INTEL_EPT_NCACHE == 0
724 */
725 if (!is_ept && ((cpte & (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU | INTEL_PTE_PTA)) == (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU))) {
726 action = PMAP_ACTION_IGNORE;
727 suppress_reason = PTE_INVALID_CACHEABILITY;
728 }
729 else if (cpte & INTEL_PTE_RSVD) {
730 action = PMAP_ACTION_IGNORE;
731 suppress_reason = PTE_RSVD;
732 }
733 else if ((pmap != kernel_pmap) && (!is_ept) && ((cpte & INTEL_PTE_USER) == 0)) {
734 action = PMAP_ACTION_IGNORE;
735 suppress_reason = PTE_SUPERVISOR;
736 }
737 pmap_cpc_exit:
738 PE_parse_boot_argn("-pmap_pagetable_corruption_deassert", &ppcd, sizeof(ppcd));
739
740 if (debug_boot_arg && !ppcd) {
741 action = PMAP_ACTION_ASSERT;
742 }
743
744 if ((mach_absolute_time() - pmap_pagetable_corruption_last_abstime) < pmap_pagetable_corruption_interval_abstime) {
745 action = PMAP_ACTION_ASSERT;
746 pmap_pagetable_corruption_timeout = TRUE;
747 }
748 else
749 {
750 pmap_pagetable_corruption_last_abstime = mach_absolute_time();
751 }
752 pmap_pagetable_corruption_log(incident, suppress_reason, action, pmap, vaddr, &cpte, *ppnp, pvpmap, pvva);
753 return action;
754 }
755
756 /*
757 * Remove pv list entry.
758 * Called with pv_head_table entry locked.
759 * Returns pv entry to be freed (or NULL).
760 */
761 static inline __attribute__((always_inline)) pv_hashed_entry_t
762 pmap_pv_remove(pmap_t pmap,
763 vm_map_offset_t vaddr,
764 ppnum_t *ppnp,
765 pt_entry_t *pte,
766 boolean_t *was_altacct)
767 {
768 pv_hashed_entry_t pvh_e;
769 pv_rooted_entry_t pv_h;
770 pv_hashed_entry_t *pprevh;
771 int pvhash_idx;
772 uint32_t pv_cnt;
773 ppnum_t ppn;
774
775 *was_altacct = FALSE;
776 pmap_pv_remove_retry:
777 ppn = *ppnp;
778 pvh_e = PV_HASHED_ENTRY_NULL;
779 pv_h = pai_to_pvh(ppn_to_pai(ppn));
780
781 if (__improbable(pv_h->pmap == PMAP_NULL)) {
782 pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_ABSENT);
783 if (pac == PMAP_ACTION_IGNORE)
784 goto pmap_pv_remove_exit;
785 else if (pac == PMAP_ACTION_ASSERT)
786 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p, %p): null pv_list, priors: %d", pmap, vaddr, ppn, *pte, ppnp, pte, pmap_pagetable_corruption_incidents);
787 else if (pac == PMAP_ACTION_RETRY_RELOCK) {
788 LOCK_PVH(ppn_to_pai(*ppnp));
789 pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
790 goto pmap_pv_remove_retry;
791 }
792 else if (pac == PMAP_ACTION_RETRY)
793 goto pmap_pv_remove_retry;
794 }
795
796 if (PVE_VA(pv_h) == vaddr && pv_h->pmap == pmap) {
797 *was_altacct = IS_ALTACCT_PAGE(ppn_to_pai(*ppnp), pv_h);
798 /*
799 * Header is the pv_rooted_entry.
800 * We can't free that. If there is a queued
801 * entry after this one we remove that
802 * from the ppn queue, we remove it from the hash chain
803 * and copy it to the rooted entry. Then free it instead.
804 */
805 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
806 if (pv_h != (pv_rooted_entry_t) pvh_e) {
807 /*
808 * Entry queued to root, remove this from hash
809 * and install as new root.
810 */
811 CHK_NPVHASH();
812 pvhash_idx = pvhashidx(pvh_e->pmap, PVE_VA(pvh_e));
813 LOCK_PV_HASH(pvhash_idx);
814 remque(&pvh_e->qlink);
815 pprevh = pvhash(pvhash_idx);
816 if (PV_HASHED_ENTRY_NULL == *pprevh) {
817 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x): "
818 "empty hash, removing rooted, priors: %d",
819 pmap, vaddr, ppn, pmap_pagetable_corruption_incidents);
820 }
821 pmap_pvh_unlink(pvh_e);
822 UNLOCK_PV_HASH(pvhash_idx);
823 pv_h->pmap = pvh_e->pmap;
824 pv_h->va_and_flags = pvh_e->va_and_flags;
825 /* dispose of pvh_e */
826 } else {
827 /* none queued after rooted */
828 pv_h->pmap = PMAP_NULL;
829 pvh_e = PV_HASHED_ENTRY_NULL;
830 }
831 } else {
832 /*
833 * not removing rooted pv. find it on hash chain, remove from
834 * ppn queue and hash chain and free it
835 */
836 CHK_NPVHASH();
837 pvhash_idx = pvhashidx(pmap, vaddr);
838 LOCK_PV_HASH(pvhash_idx);
839 pprevh = pvhash(pvhash_idx);
840 if (PV_HASHED_ENTRY_NULL == *pprevh) {
841 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p): empty hash, priors: %d",
842 pmap, vaddr, ppn, *pte, pte, pmap_pagetable_corruption_incidents);
843 }
844 pvh_e = *pprevh;
845 pmap_pv_hashlist_walks++;
846 pv_cnt = 0;
847 while (PV_HASHED_ENTRY_NULL != pvh_e) {
848 pv_cnt++;
849 if (pvh_e->pmap == pmap &&
850 PVE_VA(pvh_e) == vaddr &&
851 pvh_e->ppn == ppn)
852 break;
853 pprevh = &pvh_e->nexth;
854 pvh_e = pvh_e->nexth;
855 }
856
857 if (PV_HASHED_ENTRY_NULL == pvh_e) {
858 pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_PRESENT);
859
860 if (pac == PMAP_ACTION_ASSERT)
861 panic("Possible memory corruption: pmap_pv_remove(%p, 0x%llx, 0x%x, 0x%llx, %p, %p): pv not on hash, head: %p, 0x%llx, priors: %d", pmap, vaddr, ppn, *pte, ppnp, pte, pv_h->pmap, PVE_VA(pv_h), pmap_pagetable_corruption_incidents);
862 else {
863 UNLOCK_PV_HASH(pvhash_idx);
864 if (pac == PMAP_ACTION_RETRY_RELOCK) {
865 LOCK_PVH(ppn_to_pai(*ppnp));
866 pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
867 goto pmap_pv_remove_retry;
868 }
869 else if (pac == PMAP_ACTION_RETRY) {
870 goto pmap_pv_remove_retry;
871 }
872 else if (pac == PMAP_ACTION_IGNORE) {
873 goto pmap_pv_remove_exit;
874 }
875 }
876 }
877
878 *was_altacct = IS_ALTACCT_PAGE(ppn_to_pai(*ppnp), pvh_e);
879
880 pmap_pv_hashlist_cnts += pv_cnt;
881 if (pmap_pv_hashlist_max < pv_cnt)
882 pmap_pv_hashlist_max = pv_cnt;
883 *pprevh = pvh_e->nexth;
884 remque(&pvh_e->qlink);
885 UNLOCK_PV_HASH(pvhash_idx);
886 }
887 pmap_pv_remove_exit:
888 return pvh_e;
889 }
890
891 static inline __attribute__((always_inline)) boolean_t
892 pmap_pv_is_altacct(
893 pmap_t pmap,
894 vm_map_offset_t vaddr,
895 ppnum_t ppn)
896 {
897 pv_hashed_entry_t pvh_e;
898 pv_rooted_entry_t pv_h;
899 int pvhash_idx;
900 boolean_t is_altacct;
901
902 pvh_e = PV_HASHED_ENTRY_NULL;
903 pv_h = pai_to_pvh(ppn_to_pai(ppn));
904
905 if (__improbable(pv_h->pmap == PMAP_NULL)) {
906 return FALSE;
907 }
908
909 if (PVE_VA(pv_h) == vaddr && pv_h->pmap == pmap) {
910 /*
911 * Header is the pv_rooted_entry.
912 */
913 return IS_ALTACCT_PAGE(ppn, pv_h);
914 }
915
916 CHK_NPVHASH();
917 pvhash_idx = pvhashidx(pmap, vaddr);
918 LOCK_PV_HASH(pvhash_idx);
919 pvh_e = *(pvhash(pvhash_idx));
920 if (PV_HASHED_ENTRY_NULL == pvh_e) {
921 panic("Possible memory corruption: pmap_pv_is_altacct(%p,0x%llx,0x%x): empty hash",
922 pmap, vaddr, ppn);
923 }
924 while (PV_HASHED_ENTRY_NULL != pvh_e) {
925 if (pvh_e->pmap == pmap &&
926 PVE_VA(pvh_e) == vaddr &&
927 pvh_e->ppn == ppn)
928 break;
929 pvh_e = pvh_e->nexth;
930 }
931 if (PV_HASHED_ENTRY_NULL == pvh_e) {
932 is_altacct = FALSE;
933 } else {
934 is_altacct = IS_ALTACCT_PAGE(ppn, pvh_e);
935 }
936 UNLOCK_PV_HASH(pvhash_idx);
937
938 return is_altacct;
939 }
940
941 extern int pt_fake_zone_index;
942 static inline void
943 PMAP_ZINFO_PALLOC(pmap_t pmap, vm_size_t bytes)
944 {
945 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
946 }
947
948 static inline void
949 PMAP_ZINFO_PFREE(pmap_t pmap, vm_size_t bytes)
950 {
951 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
952 }
953
954 static inline void
955 PMAP_ZINFO_SALLOC(pmap_t pmap, vm_size_t bytes)
956 {
957 pmap_ledger_credit(pmap, task_ledgers.tkm_shared, bytes);
958 }
959
960 static inline void
961 PMAP_ZINFO_SFREE(pmap_t pmap, vm_size_t bytes)
962 {
963 pmap_ledger_debit(pmap, task_ledgers.tkm_shared, bytes);
964 }
965
966 extern boolean_t pmap_initialized;/* Has pmap_init completed? */
967 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
968
969 int phys_attribute_test(
970 ppnum_t phys,
971 int bits);
972 void phys_attribute_clear(
973 ppnum_t phys,
974 int bits,
975 unsigned int options,
976 void *arg);
977
978 //#define PCID_DEBUG 1
979 #if PCID_DEBUG
980 #define pmap_pcid_log(fmt, args...) \
981 do { \
982 kprintf(fmt, ##args); \
983 printf(fmt, ##args); \
984 } while(0)
985 #else
986 #define pmap_pcid_log(fmt, args...)
987 #endif
988 void pmap_pcid_configure(void);
989
990
991 /*
992 * Atomic 64-bit compare and exchange of a page table entry.
993 */
994 static inline boolean_t
995 pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
996 {
997 boolean_t ret;
998
999 /*
1000 * Load the old value into %rax
1001 * Load the new value into another register
1002 * Compare-exchange-quad at address entryp
1003 * If the compare succeeds, the new value is stored, return TRUE.
1004 * Otherwise, no swap is made, return FALSE.
1005 */
1006 asm volatile(
1007 " lock; cmpxchgq %2,(%3) \n\t"
1008 " setz %%al \n\t"
1009 " movzbl %%al,%0"
1010 : "=a" (ret)
1011 : "a" (old),
1012 "r" (new),
1013 "r" (entryp)
1014 : "memory");
1015 return ret;
1016 }
1017
1018 extern uint32_t pmap_update_clear_pte_count;
1019
1020 static inline void pmap_update_pte(pt_entry_t *mptep, uint64_t pclear_bits, uint64_t pset_bits) {
1021 pt_entry_t npte, opte;
1022 do {
1023 opte = *mptep;
1024 if (__improbable(opte == 0)) {
1025 pmap_update_clear_pte_count++;
1026 break;
1027 }
1028 npte = opte & ~(pclear_bits);
1029 npte |= pset_bits;
1030 } while (!pmap_cmpx_pte(mptep, opte, npte));
1031 }
1032
1033 /*
1034 * The single pml4 page per pmap is allocated at pmap create time and exists
1035 * for the duration of the pmap. we allocate this page in kernel vm.
1036 * this returns the address of the requested pml4 entry in the top level page.
1037 */
1038 static inline
1039 pml4_entry_t *
1040 pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr)
1041 {
1042 if (__improbable((vaddr > 0x00007FFFFFFFFFFFULL) &&
1043 (vaddr < 0xFFFF800000000000ULL))) {
1044 return (NULL);
1045 }
1046
1047 #if DEBUG
1048 return PHYSMAP_PTOV(&((pml4_entry_t *)pmap->pm_cr3)[(vaddr >> PML4SHIFT) & (NPML4PG-1)]);
1049 #else
1050 return &pmap->pm_pml4[(vaddr >> PML4SHIFT) & (NPML4PG-1)];
1051 #endif
1052 }
1053
1054 static inline pml4_entry_t *
1055 pmap64_user_pml4(pmap_t pmap, vm_map_offset_t vaddr)
1056 {
1057 if (__improbable((vaddr > 0x00007FFFFFFFFFFFULL) &&
1058 (vaddr < 0xFFFF800000000000ULL))) {
1059 return (NULL);
1060 }
1061
1062 #if DEBUG
1063 return PHYSMAP_PTOV(&((pml4_entry_t *)pmap->pm_ucr3)[(vaddr >> PML4SHIFT) & (NPML4PG-1)]);
1064 #else
1065 return &pmap->pm_upml4[(vaddr >> PML4SHIFT) & (NPML4PG-1)];
1066 #endif
1067 }
1068
1069 /*
1070 * Returns address of requested PDPT entry in the physmap.
1071 */
1072 static inline pdpt_entry_t *
1073 pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
1074 {
1075 pml4_entry_t newpf;
1076 pml4_entry_t *pml4;
1077 boolean_t is_ept;
1078
1079 pml4 = pmap64_pml4(pmap, vaddr);
1080 is_ept = is_ept_pmap(pmap);
1081
1082 if (pml4 && (*pml4 & PTE_VALID_MASK(is_ept))) {
1083 newpf = *pml4 & PG_FRAME;
1084 return &((pdpt_entry_t *) PHYSMAP_PTOV(newpf))
1085 [(vaddr >> PDPTSHIFT) & (NPDPTPG-1)];
1086 }
1087 return (NULL);
1088 }
1089 /*
1090 * Returns the address of the requested PDE entry in the physmap.
1091 */
1092 static inline pd_entry_t *
1093 pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr)
1094 {
1095 pdpt_entry_t newpf;
1096 pdpt_entry_t *pdpt;
1097 boolean_t is_ept;
1098
1099 pdpt = pmap64_pdpt(pmap, vaddr);
1100 is_ept = is_ept_pmap(pmap);
1101
1102 if (pdpt && (*pdpt & PTE_VALID_MASK(is_ept))) {
1103 newpf = *pdpt & PG_FRAME;
1104 return &((pd_entry_t *) PHYSMAP_PTOV(newpf))
1105 [(vaddr >> PDSHIFT) & (NPDPG-1)];
1106 }
1107 return (NULL);
1108 }
1109
1110 static inline pd_entry_t *
1111 pmap_pde(pmap_t m, vm_map_offset_t v)
1112 {
1113 pd_entry_t *pde;
1114
1115 pde = pmap64_pde(m, v);
1116
1117 return pde;
1118 }
1119
1120
1121 /*
1122 * return address of mapped pte for vaddr va in pmap pmap.
1123 *
1124 * In case the pde maps a superpage, return the pde, which, in this case
1125 * is the actual page table entry.
1126 */
1127 static inline pt_entry_t *
1128 pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
1129 {
1130 pd_entry_t *pde;
1131 pd_entry_t newpf;
1132 boolean_t is_ept;
1133
1134 assert(pmap);
1135 pde = pmap64_pde(pmap, vaddr);
1136
1137 is_ept = is_ept_pmap(pmap);
1138
1139 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
1140 if (*pde & PTE_PS)
1141 return pde;
1142 newpf = *pde & PG_FRAME;
1143 return &((pt_entry_t *)PHYSMAP_PTOV(newpf))
1144 [i386_btop(vaddr) & (ppnum_t)(NPTEPG-1)];
1145 }
1146 return (NULL);
1147 }
1148 extern void pmap_alias(
1149 vm_offset_t ava,
1150 vm_map_offset_t start,
1151 vm_map_offset_t end,
1152 vm_prot_t prot,
1153 unsigned int options);
1154
1155 #if DEBUG
1156 #define DPRINTF(x...) kprintf(x)
1157 #else
1158 #define DPRINTF(x...)
1159 #endif
1160
1161 #endif /* MACH_KERNEL_PRIVATE */
1162 #endif /* _I386_PMAP_INTERNAL_ */