]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/pmap_internal.h
abf263a1ffd189752e4c4a54e176f3ce706de9c1
[apple/xnu.git] / osfmk / i386 / pmap_internal.h
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29
30 #ifndef _I386_PMAP_INTERNAL_
31 #define _I386_PMAP_INTERNAL_
32 #ifdef MACH_KERNEL_PRIVATE
33
34 #include <vm/pmap.h>
35 #include <sys/kdebug.h>
36 #include <kern/ledger.h>
37 #include <kern/simple_lock.h>
38 #include <i386/bit_routines.h>
39
40 /*
41 * pmap locking
42 */
43
44 static inline void
45 PMAP_LOCK_EXCLUSIVE(pmap_t p)
46 {
47 mp_disable_preemption();
48 lck_rw_lock_exclusive(&p->pmap_rwl);
49 }
50
51 static inline void
52 PMAP_LOCK_SHARED(pmap_t p)
53 {
54 mp_disable_preemption();
55 lck_rw_lock_shared(&p->pmap_rwl);
56 }
57
58 static inline void
59 PMAP_LOCK_SHARED_TO_EXCLUSIVE(pmap_t p)
60 {
61 lck_rw_lock_shared_to_exclusive(&p->pmap_rwl);
62 }
63
64 static inline void
65 PMAP_LOCK_EXCLUSIVE_TO_SHARED(pmap_t p)
66 {
67 lck_rw_lock_exclusive_to_shared(&p->pmap_rwl);
68 }
69
70 static inline void
71 PMAP_UNLOCK_EXCLUSIVE(pmap_t p)
72 {
73 lck_rw_unlock_exclusive(&p->pmap_rwl);
74 mp_enable_preemption();
75 }
76
77 static inline void
78 PMAP_UNLOCK_SHARED(pmap_t p)
79 {
80 lck_rw_unlock_shared(&p->pmap_rwl);
81 mp_enable_preemption();
82 }
83
84 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
85
86 #ifdef PMAP_TRACES
87 extern boolean_t pmap_trace;
88 #define PMAP_TRACE(...) \
89 if (pmap_trace) { \
90 KDBG_RELEASE(__VA_ARGS__); \
91 }
92 #else
93 #define PMAP_TRACE(...) KDBG_DEBUG(__VA_ARGS__)
94 #endif /* PMAP_TRACES */
95
96 #define PMAP_TRACE_CONSTANT(...) KDBG_RELEASE(__VA_ARGS__)
97
98 kern_return_t pmap_expand_pml4(
99 pmap_t map,
100 vm_map_offset_t v,
101 unsigned int options);
102
103 kern_return_t pmap_expand_pdpt(
104 pmap_t map,
105 vm_map_offset_t v,
106 unsigned int options);
107
108 void phys_attribute_set(
109 ppnum_t phys,
110 int bits);
111
112 void pmap_set_reference(
113 ppnum_t pn);
114
115 boolean_t phys_page_exists(
116 ppnum_t pn);
117
118 void
119 pmap_flush_tlbs(pmap_t, vm_map_offset_t, vm_map_offset_t, int, pmap_flush_context *);
120
121 void
122 pmap_update_cache_attributes_locked(ppnum_t, unsigned);
123
124
125 static inline void
126 PMAP_UPDATE_TLBS(pmap_t fp, addr64_t s, addr64_t e)
127 {
128 pmap_flush_tlbs(fp, s, e, 0, NULL);
129 }
130
131 #define PMAP_DELAY_TLB_FLUSH 0x01
132
133 static inline void
134 PMAP_UPDATE_TLBS_DELAYED(pmap_t fp, addr64_t s, addr64_t e, pmap_flush_context *pfc)
135 {
136 pmap_flush_tlbs(fp, s, e, PMAP_DELAY_TLB_FLUSH, pfc);
137 }
138
139 /*
140 * Private data structures.
141 */
142
143 /*
144 * For each vm_page_t, there is a list of all currently
145 * valid virtual mappings of that page. An entry is
146 * a pv_rooted_entry_t; the list is the pv_table.
147 *
148 * N.B. with the new combo rooted/hashed scheme it is
149 * only possibly to remove individual non-rooted entries
150 * if they are found via the hashed chains as there is no
151 * way to unlink the singly linked hashed entries if navigated to
152 * via the queue list off the rooted entries. Think of it as
153 * hash/walk/pull, keeping track of the prev pointer while walking
154 * the singly linked hash list. All of this is to save memory and
155 * keep both types of pv_entries as small as possible.
156 */
157
158 /*
159 *
160 * PV HASHING Changes - JK 1/2007
161 *
162 * Pve's establish physical to virtual mappings. These are used for aliasing of a
163 * physical page to (potentially many) virtual addresses within pmaps. In the
164 * previous implementation the structure of the pv_entries (each 16 bytes in size) was
165 *
166 * typedef struct pv_entry {
167 * struct pv_entry_t next;
168 * pmap_t pmap;
169 * vm_map_offset_t va;
170 * } *pv_entry_t;
171 *
172 * An initial array of these is created at boot time, one per physical page of
173 * memory, indexed by the physical page number. Additionally, a pool of entries
174 * is created from a pv_zone to be used as needed by pmap_enter() when it is
175 * creating new mappings. Originally, we kept this pool around because the code
176 * in pmap_enter() was unable to block if it needed an entry and none were
177 * available - we'd panic. Some time ago I restructured the pmap_enter() code
178 * so that for user pmaps it can block while zalloc'ing a pv structure and restart,
179 * removing a panic from the code (in the case of the kernel pmap we cannot block
180 * and still panic, so, we keep a separate hot pool for use only on kernel pmaps).
181 * The pool has not been removed since there is a large performance gain keeping
182 * freed pv's around for reuse and not suffering the overhead of zalloc for every
183 * new pv we need.
184 *
185 * As pmap_enter() created new mappings it linked the new pve's for them off the
186 * fixed pv array for that ppn (off the next pointer). These pve's are accessed
187 * for several operations, one of them being address space teardown. In that case,
188 * we basically do this
189 *
190 * for (every page/pte in the space) {
191 * calc pve_ptr from the ppn in the pte
192 * for (every pv in the list for the ppn) {
193 * if (this pv is for this pmap/vaddr) {
194 * do housekeeping
195 * unlink/free the pv
196 * }
197 * }
198 * }
199 *
200 * The problem arose when we were running, say 8000 (or even 2000) apache or
201 * other processes and one or all terminate. The list hanging off each pv array
202 * entry could have thousands of entries. We were continuously linearly searching
203 * each of these lists as we stepped through the address space we were tearing
204 * down. Because of the locks we hold, likely taking a cache miss for each node,
205 * and interrupt disabling for MP issues the system became completely unresponsive
206 * for many seconds while we did this.
207 *
208 * Realizing that pve's are accessed in two distinct ways (linearly running the
209 * list by ppn for operations like pmap_page_protect and finding and
210 * modifying/removing a single pve as part of pmap_enter processing) has led to
211 * modifying the pve structures and databases.
212 *
213 * There are now two types of pve structures. A "rooted" structure which is
214 * basically the original structure accessed in an array by ppn, and a ''hashed''
215 * structure accessed on a hash list via a hash of [pmap, vaddr]. These have been
216 * designed with the two goals of minimizing wired memory and making the lookup of
217 * a ppn faster. Since a vast majority of pages in the system are not aliased
218 * and hence represented by a single pv entry I've kept the rooted entry size as
219 * small as possible because there is one of these dedicated for every physical
220 * page of memory. The hashed pve's are larger due to the addition of the hash
221 * link and the ppn entry needed for matching while running the hash list to find
222 * the entry we are looking for. This way, only systems that have lots of
223 * aliasing (like 2000+ httpd procs) will pay the extra memory price. Both
224 * structures have the same first three fields allowing some simplification in
225 * the code.
226 *
227 * They have these shapes
228 *
229 * typedef struct pv_rooted_entry {
230 * queue_head_t qlink;
231 * vm_map_offset_t va;
232 * pmap_t pmap;
233 * } *pv_rooted_entry_t;
234 *
235 *
236 * typedef struct pv_hashed_entry {
237 * queue_head_t qlink;
238 * vm_map_offset_t va;
239 * pmap_t pmap;
240 * ppnum_t ppn;
241 * struct pv_hashed_entry *nexth;
242 * } *pv_hashed_entry_t;
243 *
244 * The main flow difference is that the code is now aware of the rooted entry and
245 * the hashed entries. Code that runs the pv list still starts with the rooted
246 * entry and then continues down the qlink onto the hashed entries. Code that is
247 * looking up a specific pv entry first checks the rooted entry and then hashes
248 * and runs the hash list for the match. The hash list lengths are much smaller
249 * than the original pv lists that contained all aliases for the specific ppn.
250 *
251 */
252
253 typedef struct pv_rooted_entry {
254 /* first three entries must match pv_hashed_entry_t */
255 queue_head_t qlink;
256 vm_map_offset_t va_and_flags; /* virtual address for mapping */
257 pmap_t pmap; /* pmap where mapping lies */
258 } *pv_rooted_entry_t;
259
260 #define PV_ROOTED_ENTRY_NULL ((pv_rooted_entry_t) 0)
261
262 typedef struct pv_hashed_entry {
263 /* first three entries must match pv_rooted_entry_t */
264 queue_head_t qlink;
265 vm_map_offset_t va_and_flags;
266 pmap_t pmap;
267 ppnum_t ppn;
268 struct pv_hashed_entry *nexth;
269 } *pv_hashed_entry_t;
270
271 #define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
272
273 #define PVE_VA(pve) ((pve)->va_and_flags & ~PAGE_MASK)
274 #define PVE_FLAGS(pve) ((pve)->va_and_flags & PAGE_MASK)
275 #define PVE_IS_ALTACCT 0x001
276 #define PVE_IS_ALTACCT_PAGE(pve) \
277 (((pve)->va_and_flags & PVE_IS_ALTACCT) ? TRUE : FALSE)
278
279 //#define PV_DEBUG 1 /* uncomment to enable some PV debugging code */
280 #ifdef PV_DEBUG
281 #define CHK_NPVHASH() if(0 == npvhashmask) panic("npvhash uninitialized");
282 #else
283 #define CHK_NPVHASH(x)
284 #endif
285
286 #define NPVHASHBUCKETS (4096)
287 #define NPVHASHMASK ((NPVHASHBUCKETS) - 1) /* MUST BE 2^N - 1 */
288 #define PV_HASHED_LOW_WATER_MARK_DEFAULT 5000
289 #define PV_HASHED_KERN_LOW_WATER_MARK_DEFAULT 2000
290 #define PV_HASHED_ALLOC_CHUNK_INITIAL 2000
291 #define PV_HASHED_KERN_ALLOC_CHUNK_INITIAL 200
292
293 extern volatile uint32_t mappingrecurse;
294 extern uint32_t pv_hashed_low_water_mark, pv_hashed_kern_low_water_mark;
295
296 /*
297 * PV hash locking
298 */
299
300 #define LOCK_PV_HASH(hash) lock_hash_hash(hash)
301 #define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash)
302 extern uint32_t npvhashmask;
303 extern pv_hashed_entry_t *pv_hash_table; /* hash lists */
304 extern pv_hashed_entry_t pv_hashed_free_list;
305 extern pv_hashed_entry_t pv_hashed_kern_free_list;
306 decl_simple_lock_data(extern, pv_hashed_free_list_lock)
307 decl_simple_lock_data(extern, pv_hashed_kern_free_list_lock)
308 decl_simple_lock_data(extern, pv_hash_table_lock)
309 decl_simple_lock_data(extern, phys_backup_lock)
310
311 extern zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry
312 * structures */
313
314 extern uint32_t pv_hashed_free_count;
315 extern uint32_t pv_hashed_kern_free_count;
316 /*
317 * Each entry in the pv_head_table is locked by a bit in the
318 * pv_lock_table. The lock bits are accessed by the address of
319 * the frame they lock.
320 */
321 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
322 #define pv_hash_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
323 extern char *pv_lock_table; /* pointer to array of bits */
324 extern char *pv_hash_lock_table;
325 extern pv_rooted_entry_t pv_head_table; /* array of entries, one per page */
326
327 extern event_t mapping_replenish_event;
328
329 static inline void
330 PV_HASHED_ALLOC(pv_hashed_entry_t *pvh_ep)
331 {
332 pmap_assert(*pvh_ep == PV_HASHED_ENTRY_NULL);
333 simple_lock(&pv_hashed_free_list_lock, LCK_GRP_NULL);
334 /* If the kernel reserved pool is low, let non-kernel mappings allocate
335 * synchronously, possibly subject to a throttle.
336 */
337 if ((pv_hashed_kern_free_count > pv_hashed_kern_low_water_mark) && ((*pvh_ep = pv_hashed_free_list) != 0)) {
338 pv_hashed_free_list = (pv_hashed_entry_t)(*pvh_ep)->qlink.next;
339 pv_hashed_free_count--;
340 }
341
342 simple_unlock(&pv_hashed_free_list_lock);
343
344 if (pv_hashed_free_count <= pv_hashed_low_water_mark) {
345 if (!mappingrecurse && hw_compare_and_store(0, 1, &mappingrecurse)) {
346 thread_wakeup(&mapping_replenish_event);
347 }
348 }
349 }
350
351 static inline void
352 PV_HASHED_FREE_LIST(pv_hashed_entry_t pvh_eh, pv_hashed_entry_t pvh_et, int pv_cnt)
353 {
354 simple_lock(&pv_hashed_free_list_lock, LCK_GRP_NULL);
355 pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;
356 pv_hashed_free_list = pvh_eh;
357 pv_hashed_free_count += pv_cnt;
358 simple_unlock(&pv_hashed_free_list_lock);
359 }
360
361 extern unsigned pmap_kern_reserve_alloc_stat;
362
363 static inline void
364 PV_HASHED_KERN_ALLOC(pv_hashed_entry_t *pvh_e)
365 {
366 pmap_assert(*pvh_e == PV_HASHED_ENTRY_NULL);
367 simple_lock(&pv_hashed_kern_free_list_lock, LCK_GRP_NULL);
368
369 if ((*pvh_e = pv_hashed_kern_free_list) != 0) {
370 pv_hashed_kern_free_list = (pv_hashed_entry_t)(*pvh_e)->qlink.next;
371 pv_hashed_kern_free_count--;
372 pmap_kern_reserve_alloc_stat++;
373 }
374
375 simple_unlock(&pv_hashed_kern_free_list_lock);
376
377 if (pv_hashed_kern_free_count < pv_hashed_kern_low_water_mark) {
378 if (!mappingrecurse && hw_compare_and_store(0, 1, &mappingrecurse)) {
379 thread_wakeup(&mapping_replenish_event);
380 }
381 }
382 }
383
384 static inline void
385 PV_HASHED_KERN_FREE_LIST(pv_hashed_entry_t pvh_eh, pv_hashed_entry_t pvh_et, int pv_cnt)
386 {
387 simple_lock(&pv_hashed_kern_free_list_lock, LCK_GRP_NULL);
388 pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;
389 pv_hashed_kern_free_list = pvh_eh;
390 pv_hashed_kern_free_count += pv_cnt;
391 simple_unlock(&pv_hashed_kern_free_list_lock);
392 }
393
394 extern uint64_t pmap_pv_throttle_stat, pmap_pv_throttled_waiters;
395 extern event_t pmap_user_pv_throttle_event;
396
397 static inline void
398 pmap_pv_throttle(__unused pmap_t p)
399 {
400 pmap_assert(p != kernel_pmap);
401 /* Apply throttle on non-kernel mappings */
402 if (pv_hashed_kern_free_count < (pv_hashed_kern_low_water_mark / 2)) {
403 pmap_pv_throttle_stat++;
404 /* This doesn't need to be strictly accurate, merely a hint
405 * to eliminate the timeout when the reserve is replenished.
406 */
407 pmap_pv_throttled_waiters++;
408 assert_wait_timeout(&pmap_user_pv_throttle_event, THREAD_UNINT, 1, 1000 * NSEC_PER_USEC);
409 thread_block(THREAD_CONTINUE_NULL);
410 }
411 }
412
413 /*
414 * Index into pv_head table, its lock bits, and the modify/reference and managed bits
415 */
416
417 #define pa_index(pa) (i386_btop(pa))
418 #define ppn_to_pai(ppn) ((int)ppn)
419
420 #define pai_to_pvh(pai) (&pv_head_table[pai])
421 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
422 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
423 #define pvhash(idx) (&pv_hash_table[idx])
424 #define lock_hash_hash(hash) bit_lock(hash, (void *)pv_hash_lock_table)
425 #define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
426
427 #define IS_MANAGED_PAGE(x) \
428 ((unsigned int)(x) <= last_managed_page && \
429 (pmap_phys_attributes[x] & PHYS_MANAGED))
430 #define IS_INTERNAL_PAGE(x) \
431 (IS_MANAGED_PAGE(x) && (pmap_phys_attributes[x] & PHYS_INTERNAL))
432 #define IS_REUSABLE_PAGE(x) \
433 (IS_MANAGED_PAGE(x) && (pmap_phys_attributes[x] & PHYS_REUSABLE))
434 #define IS_ALTACCT_PAGE(x, pve) \
435 (IS_MANAGED_PAGE((x)) && \
436 (PVE_IS_ALTACCT_PAGE((pve))))
437
438 /*
439 * Physical page attributes. Copy bits from PTE definition.
440 */
441 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
442 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
443 #define PHYS_MANAGED INTEL_PTE_VALID /* page is managed */
444 #define PHYS_NOENCRYPT INTEL_PTE_USER /* no need to encrypt this page in the hibernation image */
445 #define PHYS_NCACHE INTEL_PTE_NCACHE
446 #define PHYS_PAT INTEL_PTE_PAT
447 #define PHYS_CACHEABILITY_MASK (INTEL_PTE_PAT | INTEL_PTE_NCACHE)
448 #define PHYS_INTERNAL INTEL_PTE_WTHRU /* page from internal object */
449 #define PHYS_REUSABLE INTEL_PTE_WRITE /* page is "reusable" */
450
451 #if DEVELOPMENT || DEBUG
452 extern boolean_t pmap_disable_kheap_nx;
453 extern boolean_t pmap_disable_kstack_nx;
454 #endif
455
456 #define PMAP_EXPAND_OPTIONS_NONE (0x0)
457 #define PMAP_EXPAND_OPTIONS_NOWAIT (PMAP_OPTIONS_NOWAIT)
458 #define PMAP_EXPAND_OPTIONS_NOENTER (PMAP_OPTIONS_NOENTER)
459 #define PMAP_EXPAND_OPTIONS_ALIASMAP (0x40000000U)
460 /*
461 * Amount of virtual memory mapped by one
462 * page-directory entry.
463 */
464 #define PDE_MAPPED_SIZE (pdetova(1))
465
466 /*
467 * Locking and TLB invalidation
468 */
469
470 /*
471 * Locking Protocols: (changed 2/2007 JK)
472 *
473 * There are two structures in the pmap module that need locking:
474 * the pmaps themselves, and the per-page pv_lists (which are locked
475 * by locking the pv_lock_table entry that corresponds to the pv_head
476 * for the list in question.) Most routines want to lock a pmap and
477 * then do operations in it that require pv_list locking -- however
478 * pmap_remove_all and pmap_copy_on_write operate on a physical page
479 * basis and want to do the locking in the reverse order, i.e. lock
480 * a pv_list and then go through all the pmaps referenced by that list.
481 *
482 * The system wide pmap lock has been removed. Now, paths take a lock
483 * on the pmap before changing its 'shape' and the reverse order lockers
484 * (coming in by phys ppn) take a lock on the corresponding pv and then
485 * retest to be sure nothing changed during the window before they locked
486 * and can then run up/down the pv lists holding the list lock. This also
487 * lets the pmap layer run (nearly completely) interrupt enabled, unlike
488 * previously.
489 */
490
491 /*
492 * PV locking
493 */
494
495 #define LOCK_PVH(index) { \
496 mp_disable_preemption(); \
497 lock_pvh_pai(index); \
498 }
499
500 #define UNLOCK_PVH(index) { \
501 unlock_pvh_pai(index); \
502 mp_enable_preemption(); \
503 }
504
505 extern uint64_t pde_mapped_size;
506
507 extern char *pmap_phys_attributes;
508 extern ppnum_t last_managed_page;
509
510 extern ppnum_t lowest_lo;
511 extern ppnum_t lowest_hi;
512 extern ppnum_t highest_hi;
513
514 /*
515 * when spinning through pmap_remove
516 * ensure that we don't spend too much
517 * time with preemption disabled.
518 * I'm setting the current threshold
519 * to 20us
520 */
521 #define MAX_PREEMPTION_LATENCY_NS 20000
522 extern uint64_t max_preemption_latency_tsc;
523
524 #if DEBUG
525 #define PMAP_INTR_DEBUG (1)
526 #endif
527
528 #if PMAP_INTR_DEBUG
529 #define pmap_intr_assert() { \
530 if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) \
531 panic("pmap interrupt assert %d %s, %d", processor_avail_count, __FILE__, __LINE__); \
532 }
533 #else
534 #define pmap_intr_assert()
535 #endif
536 #if DEVELOPMENT || DEBUG
537 extern int nx_enabled;
538 #endif
539 extern unsigned int inuse_ptepages_count;
540
541 static inline uint32_t
542 pvhashidx(pmap_t pmap, vm_map_offset_t va)
543 {
544 uint32_t hashidx = ((uint32_t)(uintptr_t)pmap ^
545 ((uint32_t)(va >> PAGE_SHIFT) & 0xFFFFFFFF)) &
546 npvhashmask;
547 return hashidx;
548 }
549
550 /*
551 * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
552 * properly deals with the anchor.
553 * must be called with the hash locked, does not unlock it
554 */
555 static inline void
556 pmap_pvh_unlink(pv_hashed_entry_t pvh)
557 {
558 pv_hashed_entry_t curh;
559 pv_hashed_entry_t *pprevh;
560 int pvhash_idx;
561
562 CHK_NPVHASH();
563 pvhash_idx = pvhashidx(pvh->pmap, PVE_VA(pvh));
564
565 pprevh = pvhash(pvhash_idx);
566
567 #if PV_DEBUG
568 if (NULL == *pprevh) {
569 panic("pvh_unlink null anchor"); /* JK DEBUG */
570 }
571 #endif
572 curh = *pprevh;
573
574 while (PV_HASHED_ENTRY_NULL != curh) {
575 if (pvh == curh) {
576 break;
577 }
578 pprevh = &curh->nexth;
579 curh = curh->nexth;
580 }
581 if (PV_HASHED_ENTRY_NULL == curh) {
582 panic("pmap_pvh_unlink no pvh");
583 }
584 *pprevh = pvh->nexth;
585 return;
586 }
587
588 static inline void
589 pv_hash_add(pv_hashed_entry_t pvh_e,
590 pv_rooted_entry_t pv_h)
591 {
592 pv_hashed_entry_t *hashp;
593 int pvhash_idx;
594
595 CHK_NPVHASH();
596 pvhash_idx = pvhashidx(pvh_e->pmap, PVE_VA(pvh_e));
597 LOCK_PV_HASH(pvhash_idx);
598 insque(&pvh_e->qlink, &pv_h->qlink);
599 hashp = pvhash(pvhash_idx);
600 #if PV_DEBUG
601 if (NULL == hashp) {
602 panic("pv_hash_add(%p) null hash bucket", pvh_e);
603 }
604 #endif
605 pvh_e->nexth = *hashp;
606 *hashp = pvh_e;
607 UNLOCK_PV_HASH(pvhash_idx);
608 }
609
610 static inline void
611 pv_hash_remove(pv_hashed_entry_t pvh_e)
612 {
613 int pvhash_idx;
614
615 CHK_NPVHASH();
616 pvhash_idx = pvhashidx(pvh_e->pmap, PVE_VA(pvh_e));
617 LOCK_PV_HASH(pvhash_idx);
618 remque(&pvh_e->qlink);
619 pmap_pvh_unlink(pvh_e);
620 UNLOCK_PV_HASH(pvhash_idx);
621 }
622
623 static inline boolean_t
624 popcnt1(uint64_t distance)
625 {
626 return (distance & (distance - 1)) == 0;
627 }
628
629 /*
630 * Routines to handle suppression of/recovery from some forms of pagetable corruption
631 * incidents observed in the field. These can be either software induced (wild
632 * stores to the mapwindows where applicable, use after free errors
633 * (typically of pages addressed physically), mis-directed DMAs etc., or due
634 * to DRAM/memory hierarchy/interconnect errors. Given the theoretical rarity of these errors,
635 * the recording mechanism is deliberately not MP-safe. The overarching goal is to
636 * still assert on potential software races, but attempt recovery from incidents
637 * identifiable as occurring due to issues beyond the control of the pmap module.
638 * The latter includes single-bit errors and malformed pagetable entries.
639 * We currently limit ourselves to recovery/suppression of one incident per
640 * PMAP_PAGETABLE_CORRUPTION_INTERVAL seconds, and details of the incident
641 * are logged.
642 * Assertions are not suppressed if kernel debugging is enabled. (DRK 09)
643 */
644
645 typedef enum {
646 PTE_VALID = 0x0,
647 PTE_INVALID = 0x1,
648 PTE_RSVD = 0x2,
649 PTE_SUPERVISOR = 0x4,
650 PTE_BITFLIP = 0x8,
651 PV_BITFLIP = 0x10,
652 PTE_INVALID_CACHEABILITY = 0x20
653 } pmap_pagetable_corruption_t;
654
655 typedef enum {
656 ROOT_PRESENT = 0,
657 ROOT_ABSENT = 1
658 } pmap_pv_assertion_t;
659
660 typedef enum {
661 PMAP_ACTION_IGNORE = 0x0,
662 PMAP_ACTION_ASSERT = 0x1,
663 PMAP_ACTION_RETRY = 0x2,
664 PMAP_ACTION_RETRY_RELOCK = 0x4
665 } pmap_pagetable_corruption_action_t;
666
667 #define PMAP_PAGETABLE_CORRUPTION_INTERVAL (6ULL * 3600ULL)
668 extern uint64_t pmap_pagetable_corruption_interval_abstime;
669
670 extern uint32_t pmap_pagetable_corruption_incidents;
671 #define PMAP_PAGETABLE_CORRUPTION_MAX_LOG (8)
672 typedef struct {
673 pmap_pv_assertion_t incident;
674 pmap_pagetable_corruption_t reason;
675 pmap_pagetable_corruption_action_t action;
676 pmap_t pmap;
677 vm_map_offset_t vaddr;
678 pt_entry_t pte;
679 ppnum_t ppn;
680 pmap_t pvpmap;
681 vm_map_offset_t pvva;
682 uint64_t abstime;
683 } pmap_pagetable_corruption_record_t;
684
685 extern pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[];
686 extern uint64_t pmap_pagetable_corruption_last_abstime;
687 extern thread_call_t pmap_pagetable_corruption_log_call;
688 extern boolean_t pmap_pagetable_corruption_timeout;
689
690 static inline void
691 pmap_pagetable_corruption_log(pmap_pv_assertion_t incident, pmap_pagetable_corruption_t suppress_reason, pmap_pagetable_corruption_action_t action, pmap_t pmap, vm_map_offset_t vaddr, pt_entry_t *ptep, ppnum_t ppn, pmap_t pvpmap, vm_map_offset_t pvva)
692 {
693 uint32_t pmap_pagetable_corruption_log_index;
694 pmap_pagetable_corruption_log_index = pmap_pagetable_corruption_incidents++ % PMAP_PAGETABLE_CORRUPTION_MAX_LOG;
695 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].incident = incident;
696 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].reason = suppress_reason;
697 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].action = action;
698 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pmap = pmap;
699 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].vaddr = vaddr;
700 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pte = *ptep;
701 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].ppn = ppn;
702 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvpmap = pvpmap;
703 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvva = pvva;
704 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].abstime = mach_absolute_time();
705 /* Asynchronously log */
706 thread_call_enter(pmap_pagetable_corruption_log_call);
707 }
708
709 static inline pmap_pagetable_corruption_action_t
710 pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t *ppnp, pt_entry_t *ptep, pmap_pv_assertion_t incident)
711 {
712 pmap_pagetable_corruption_action_t action = PMAP_ACTION_ASSERT;
713 pmap_pagetable_corruption_t suppress_reason = PTE_VALID;
714 ppnum_t suppress_ppn = 0;
715 pt_entry_t cpte = *ptep;
716 ppnum_t cpn = pa_index(pte_to_pa(cpte));
717 ppnum_t ppn = *ppnp;
718 pv_rooted_entry_t pv_h = pai_to_pvh(ppn_to_pai(ppn));
719 pv_rooted_entry_t pv_e = pv_h;
720 uint32_t bitdex;
721 pmap_t pvpmap = pv_h->pmap;
722 vm_map_offset_t pvva = PVE_VA(pv_h);
723 vm_map_offset_t pve_flags;
724 boolean_t ppcd = FALSE;
725 boolean_t is_ept;
726
727 /* Ideally, we'd consult the Mach VM here to definitively determine
728 * the nature of the mapping for this address space and address.
729 * As that would be a layering violation in this context, we
730 * use various heuristics to recover from single bit errors,
731 * malformed pagetable entries etc. These are not intended
732 * to be comprehensive.
733 */
734
735 /* As a precautionary measure, mark A+D */
736 pmap_phys_attributes[ppn_to_pai(ppn)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
737 is_ept = is_ept_pmap(pmap);
738
739 /*
740 * Correct potential single bit errors in either (but not both) element
741 * of the PV
742 */
743 do {
744 if ((popcnt1((uintptr_t)pv_e->pmap ^ (uintptr_t)pmap) && PVE_VA(pv_e) == vaddr) ||
745 (pv_e->pmap == pmap && popcnt1(PVE_VA(pv_e) ^ vaddr))) {
746 pve_flags = PVE_FLAGS(pv_e);
747 pv_e->pmap = pmap;
748 pv_h->va_and_flags = vaddr | pve_flags;
749 suppress_reason = PV_BITFLIP;
750 action = PMAP_ACTION_RETRY;
751 goto pmap_cpc_exit;
752 }
753 } while (((pv_e = (pv_rooted_entry_t) queue_next(&pv_e->qlink))) && (pv_e != pv_h));
754
755 /* Discover root entries with a Hamming
756 * distance of 1 from the supplied
757 * physical page frame.
758 */
759 for (bitdex = 0; bitdex < (sizeof(ppnum_t) << 3); bitdex++) {
760 ppnum_t npn = cpn ^ (ppnum_t) (1ULL << bitdex);
761 if (IS_MANAGED_PAGE(npn)) {
762 pv_rooted_entry_t npv_h = pai_to_pvh(ppn_to_pai(npn));
763 if (PVE_VA(npv_h) == vaddr && npv_h->pmap == pmap) {
764 suppress_reason = PTE_BITFLIP;
765 suppress_ppn = npn;
766 action = PMAP_ACTION_RETRY_RELOCK;
767 UNLOCK_PVH(ppn_to_pai(ppn));
768 *ppnp = npn;
769 goto pmap_cpc_exit;
770 }
771 }
772 }
773
774 if (pmap == kernel_pmap) {
775 action = PMAP_ACTION_ASSERT;
776 goto pmap_cpc_exit;
777 }
778
779 /*
780 * Check for malformed/inconsistent entries.
781 * The first check here isn't useful for EPT PTEs because INTEL_EPT_NCACHE == 0
782 */
783 if (!is_ept && ((cpte & (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU | INTEL_PTE_PAT)) == (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU))) {
784 action = PMAP_ACTION_IGNORE;
785 suppress_reason = PTE_INVALID_CACHEABILITY;
786 } else if (cpte & INTEL_PTE_RSVD) {
787 action = PMAP_ACTION_IGNORE;
788 suppress_reason = PTE_RSVD;
789 } else if ((pmap != kernel_pmap) && (!is_ept) && ((cpte & INTEL_PTE_USER) == 0)) {
790 action = PMAP_ACTION_IGNORE;
791 suppress_reason = PTE_SUPERVISOR;
792 }
793 pmap_cpc_exit:
794 PE_parse_boot_argn("-pmap_pagetable_corruption_deassert", &ppcd, sizeof(ppcd));
795
796 if (debug_boot_arg && !ppcd) {
797 action = PMAP_ACTION_ASSERT;
798 }
799
800 if ((mach_absolute_time() - pmap_pagetable_corruption_last_abstime) < pmap_pagetable_corruption_interval_abstime) {
801 action = PMAP_ACTION_ASSERT;
802 pmap_pagetable_corruption_timeout = TRUE;
803 } else {
804 pmap_pagetable_corruption_last_abstime = mach_absolute_time();
805 }
806 pmap_pagetable_corruption_log(incident, suppress_reason, action, pmap, vaddr, &cpte, *ppnp, pvpmap, pvva);
807 return action;
808 }
809
810 /*
811 * Remove pv list entry.
812 * Called with pv_head_table entry locked.
813 * Returns pv entry to be freed (or NULL).
814 */
815 static inline __attribute__((always_inline)) pv_hashed_entry_t
816 pmap_pv_remove(pmap_t pmap,
817 vm_map_offset_t vaddr,
818 ppnum_t *ppnp,
819 pt_entry_t *pte,
820 boolean_t *was_altacct)
821 {
822 pv_hashed_entry_t pvh_e;
823 pv_rooted_entry_t pv_h;
824 pv_hashed_entry_t *pprevh;
825 int pvhash_idx;
826 uint32_t pv_cnt;
827 ppnum_t ppn;
828
829 *was_altacct = FALSE;
830 pmap_pv_remove_retry:
831 ppn = *ppnp;
832 pvh_e = PV_HASHED_ENTRY_NULL;
833 pv_h = pai_to_pvh(ppn_to_pai(ppn));
834
835 if (__improbable(pv_h->pmap == PMAP_NULL)) {
836 pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_ABSENT);
837 if (pac == PMAP_ACTION_IGNORE) {
838 goto pmap_pv_remove_exit;
839 } else if (pac == PMAP_ACTION_ASSERT) {
840 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p, %p): null pv_list, priors: %d", pmap, vaddr, ppn, *pte, ppnp, pte, pmap_pagetable_corruption_incidents);
841 } else if (pac == PMAP_ACTION_RETRY_RELOCK) {
842 LOCK_PVH(ppn_to_pai(*ppnp));
843 pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
844 goto pmap_pv_remove_retry;
845 } else if (pac == PMAP_ACTION_RETRY) {
846 goto pmap_pv_remove_retry;
847 }
848 }
849
850 if (PVE_VA(pv_h) == vaddr && pv_h->pmap == pmap) {
851 *was_altacct = IS_ALTACCT_PAGE(ppn_to_pai(*ppnp), pv_h);
852 /*
853 * Header is the pv_rooted_entry.
854 * We can't free that. If there is a queued
855 * entry after this one we remove that
856 * from the ppn queue, we remove it from the hash chain
857 * and copy it to the rooted entry. Then free it instead.
858 */
859 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
860 if (pv_h != (pv_rooted_entry_t) pvh_e) {
861 /*
862 * Entry queued to root, remove this from hash
863 * and install as new root.
864 */
865 CHK_NPVHASH();
866 pvhash_idx = pvhashidx(pvh_e->pmap, PVE_VA(pvh_e));
867 LOCK_PV_HASH(pvhash_idx);
868 remque(&pvh_e->qlink);
869 pprevh = pvhash(pvhash_idx);
870 if (PV_HASHED_ENTRY_NULL == *pprevh) {
871 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x): "
872 "empty hash, removing rooted, priors: %d",
873 pmap, vaddr, ppn, pmap_pagetable_corruption_incidents);
874 }
875 pmap_pvh_unlink(pvh_e);
876 UNLOCK_PV_HASH(pvhash_idx);
877 pv_h->pmap = pvh_e->pmap;
878 pv_h->va_and_flags = pvh_e->va_and_flags;
879 /* dispose of pvh_e */
880 } else {
881 /* none queued after rooted */
882 pv_h->pmap = PMAP_NULL;
883 pvh_e = PV_HASHED_ENTRY_NULL;
884 }
885 } else {
886 /*
887 * not removing rooted pv. find it on hash chain, remove from
888 * ppn queue and hash chain and free it
889 */
890 CHK_NPVHASH();
891 pvhash_idx = pvhashidx(pmap, vaddr);
892 LOCK_PV_HASH(pvhash_idx);
893 pprevh = pvhash(pvhash_idx);
894 if (PV_HASHED_ENTRY_NULL == *pprevh) {
895 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p): empty hash, priors: %d",
896 pmap, vaddr, ppn, *pte, pte, pmap_pagetable_corruption_incidents);
897 }
898 pvh_e = *pprevh;
899 pmap_pv_hashlist_walks++;
900 pv_cnt = 0;
901 while (PV_HASHED_ENTRY_NULL != pvh_e) {
902 pv_cnt++;
903 if (pvh_e->pmap == pmap &&
904 PVE_VA(pvh_e) == vaddr &&
905 pvh_e->ppn == ppn) {
906 break;
907 }
908 pprevh = &pvh_e->nexth;
909 pvh_e = pvh_e->nexth;
910 }
911
912 if (PV_HASHED_ENTRY_NULL == pvh_e) {
913 pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_PRESENT);
914
915 if (pac == PMAP_ACTION_ASSERT) {
916 panic("Possible memory corruption: pmap_pv_remove(%p, 0x%llx, 0x%x, 0x%llx, %p, %p): pv not on hash, head: %p, 0x%llx, priors: %d", pmap, vaddr, ppn, *pte, ppnp, pte, pv_h->pmap, PVE_VA(pv_h), pmap_pagetable_corruption_incidents);
917 } else {
918 UNLOCK_PV_HASH(pvhash_idx);
919 if (pac == PMAP_ACTION_RETRY_RELOCK) {
920 LOCK_PVH(ppn_to_pai(*ppnp));
921 pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
922 goto pmap_pv_remove_retry;
923 } else if (pac == PMAP_ACTION_RETRY) {
924 goto pmap_pv_remove_retry;
925 } else if (pac == PMAP_ACTION_IGNORE) {
926 goto pmap_pv_remove_exit;
927 }
928 }
929 }
930
931 *was_altacct = IS_ALTACCT_PAGE(ppn_to_pai(*ppnp), pvh_e);
932
933 pmap_pv_hashlist_cnts += pv_cnt;
934 if (pmap_pv_hashlist_max < pv_cnt) {
935 pmap_pv_hashlist_max = pv_cnt;
936 }
937 *pprevh = pvh_e->nexth;
938 remque(&pvh_e->qlink);
939 UNLOCK_PV_HASH(pvhash_idx);
940 }
941 pmap_pv_remove_exit:
942 return pvh_e;
943 }
944
945 static inline __attribute__((always_inline)) boolean_t
946 pmap_pv_is_altacct(
947 pmap_t pmap,
948 vm_map_offset_t vaddr,
949 ppnum_t ppn)
950 {
951 pv_hashed_entry_t pvh_e;
952 pv_rooted_entry_t pv_h;
953 int pvhash_idx;
954 boolean_t is_altacct;
955
956 pvh_e = PV_HASHED_ENTRY_NULL;
957 pv_h = pai_to_pvh(ppn_to_pai(ppn));
958
959 if (__improbable(pv_h->pmap == PMAP_NULL)) {
960 return FALSE;
961 }
962
963 if (PVE_VA(pv_h) == vaddr && pv_h->pmap == pmap) {
964 /*
965 * Header is the pv_rooted_entry.
966 */
967 return IS_ALTACCT_PAGE(ppn, pv_h);
968 }
969
970 CHK_NPVHASH();
971 pvhash_idx = pvhashidx(pmap, vaddr);
972 LOCK_PV_HASH(pvhash_idx);
973 pvh_e = *(pvhash(pvhash_idx));
974 while (PV_HASHED_ENTRY_NULL != pvh_e) {
975 if (pvh_e->pmap == pmap &&
976 PVE_VA(pvh_e) == vaddr &&
977 pvh_e->ppn == ppn) {
978 break;
979 }
980 pvh_e = pvh_e->nexth;
981 }
982 if (PV_HASHED_ENTRY_NULL == pvh_e) {
983 is_altacct = FALSE;
984 } else {
985 is_altacct = IS_ALTACCT_PAGE(ppn, pvh_e);
986 }
987 UNLOCK_PV_HASH(pvhash_idx);
988
989 return is_altacct;
990 }
991
992 extern int pt_fake_zone_index;
993 static inline void
994 PMAP_ZINFO_PALLOC(pmap_t pmap, vm_size_t bytes)
995 {
996 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
997 }
998
999 static inline void
1000 PMAP_ZINFO_PFREE(pmap_t pmap, vm_size_t bytes)
1001 {
1002 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
1003 }
1004
1005 static inline void
1006 PMAP_ZINFO_SALLOC(pmap_t pmap, vm_size_t bytes)
1007 {
1008 pmap_ledger_credit(pmap, task_ledgers.tkm_shared, bytes);
1009 }
1010
1011 static inline void
1012 PMAP_ZINFO_SFREE(pmap_t pmap, vm_size_t bytes)
1013 {
1014 pmap_ledger_debit(pmap, task_ledgers.tkm_shared, bytes);
1015 }
1016
1017 extern boolean_t pmap_initialized;/* Has pmap_init completed? */
1018 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
1019
1020 int phys_attribute_test(
1021 ppnum_t phys,
1022 int bits);
1023 void phys_attribute_clear(
1024 ppnum_t phys,
1025 int bits,
1026 unsigned int options,
1027 void *arg);
1028
1029 //#define PCID_DEBUG 1
1030 #if PCID_DEBUG
1031 #define pmap_pcid_log(fmt, args...) \
1032 do { \
1033 kprintf(fmt, ##args); \
1034 printf(fmt, ##args); \
1035 } while(0)
1036 #else
1037 #define pmap_pcid_log(fmt, args...)
1038 #endif
1039 void pmap_pcid_configure(void);
1040
1041
1042 /*
1043 * Atomic 64-bit compare and exchange of a page table entry.
1044 */
1045
1046 #include <machine/atomic.h>
1047 static inline boolean_t
1048 pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
1049 {
1050 return __c11_atomic_compare_exchange_strong((_Atomic pt_entry_t *)entryp, &old, new,
1051 memory_order_acq_rel_smp, memory_order_relaxed);
1052 }
1053
1054 extern uint32_t pmap_update_clear_pte_count;
1055
1056 static inline void
1057 pmap_update_pte(pt_entry_t *mptep, uint64_t pclear_bits, uint64_t pset_bits)
1058 {
1059 pt_entry_t npte, opte;
1060 do {
1061 opte = *mptep;
1062 if (__improbable(opte == 0)) {
1063 #if DEVELOPMENT || DEBUG
1064 pmap_update_clear_pte_count++;
1065 #endif
1066 break;
1067 }
1068 npte = opte & ~(pclear_bits);
1069 npte |= pset_bits;
1070 } while (!pmap_cmpx_pte(mptep, opte, npte));
1071 }
1072
1073 /*
1074 * The single pml4 page per pmap is allocated at pmap create time and exists
1075 * for the duration of the pmap. we allocate this page in kernel vm.
1076 * this returns the address of the requested pml4 entry in the top level page.
1077 */
1078 static inline
1079 pml4_entry_t *
1080 pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr)
1081 {
1082 if (__improbable((vaddr > 0x00007FFFFFFFFFFFULL) &&
1083 (vaddr < 0xFFFF800000000000ULL))) {
1084 return NULL;
1085 }
1086
1087 #if DEBUG
1088 return PHYSMAP_PTOV(&((pml4_entry_t *)pmap->pm_cr3)[(vaddr >> PML4SHIFT) & (NPML4PG - 1)]);
1089 #else
1090 return &pmap->pm_pml4[(vaddr >> PML4SHIFT) & (NPML4PG - 1)];
1091 #endif
1092 }
1093
1094 static inline pml4_entry_t *
1095 pmap64_user_pml4(pmap_t pmap, vm_map_offset_t vaddr)
1096 {
1097 if (__improbable((vaddr > 0x00007FFFFFFFFFFFULL) &&
1098 (vaddr < 0xFFFF800000000000ULL))) {
1099 return NULL;
1100 }
1101
1102 #if DEBUG
1103 return PHYSMAP_PTOV(&((pml4_entry_t *)pmap->pm_ucr3)[(vaddr >> PML4SHIFT) & (NPML4PG - 1)]);
1104 #else
1105 return &pmap->pm_upml4[(vaddr >> PML4SHIFT) & (NPML4PG - 1)];
1106 #endif
1107 }
1108
1109 /*
1110 * Returns address of requested PDPT entry in the physmap.
1111 */
1112 static inline pdpt_entry_t *
1113 pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
1114 {
1115 pml4_entry_t newpf;
1116 pml4_entry_t *pml4;
1117 boolean_t is_ept;
1118
1119 pml4 = pmap64_pml4(pmap, vaddr);
1120 is_ept = is_ept_pmap(pmap);
1121
1122 if (pml4 && (*pml4 & PTE_VALID_MASK(is_ept))) {
1123 newpf = *pml4 & PG_FRAME;
1124 return &((pdpt_entry_t *) PHYSMAP_PTOV(newpf))
1125 [(vaddr >> PDPTSHIFT) & (NPDPTPG - 1)];
1126 }
1127 return NULL;
1128 }
1129 /*
1130 * Returns the address of the requested PDE entry in the physmap.
1131 */
1132 static inline pd_entry_t *
1133 pmap_pde_internal1(vm_map_offset_t vaddr, boolean_t is_ept, pdpt_entry_t *pdpte)
1134 {
1135 if (*pdpte & PTE_VALID_MASK(is_ept)) {
1136 pdpt_entry_t newpf = *pdpte & PG_FRAME;
1137 return &((pd_entry_t *) PHYSMAP_PTOV(newpf))
1138 [(vaddr >> PDSHIFT) & (NPDPG - 1)];
1139 } else {
1140 return NULL;
1141 }
1142 }
1143
1144 static inline pd_entry_t *
1145 pmap_pde_internal0(pmap_t pmap, vm_map_offset_t vaddr, boolean_t is_ept)
1146 {
1147 pdpt_entry_t *pdpt;
1148
1149 pdpt = pmap64_pdpt(pmap, vaddr);
1150 if (pdpt) {
1151 return pmap_pde_internal1(vaddr, is_ept, pdpt);
1152 } else {
1153 return NULL;
1154 }
1155 }
1156
1157
1158 static inline pd_entry_t *
1159 pmap_pde(pmap_t pmap, vm_map_offset_t vaddr)
1160 {
1161 pdpt_entry_t *pdpt;
1162 boolean_t is_ept;
1163
1164 pdpt = pmap64_pdpt(pmap, vaddr);
1165 is_ept = is_ept_pmap(pmap);
1166
1167 if (pdpt) {
1168 return pmap_pde_internal1(vaddr, is_ept, pdpt);
1169 } else {
1170 return NULL;
1171 }
1172 }
1173
1174
1175 /*
1176 * return address of mapped pte for vaddr va in pmap pmap.
1177 *
1178 * In case the pde maps a superpage, return the pde, which, in this case
1179 * is the actual page table entry.
1180 */
1181
1182
1183 static inline pt_entry_t *
1184 pmap_pte_internal(vm_map_offset_t vaddr, boolean_t is_ept, pd_entry_t *pde)
1185 {
1186 if (*pde & PTE_VALID_MASK(is_ept)) {
1187 if (__improbable(*pde & PTE_PS)) {
1188 return pde;
1189 }
1190 pd_entry_t newpf = *pde & PG_FRAME;
1191
1192 return &((pt_entry_t *)PHYSMAP_PTOV(newpf))
1193 [i386_btop(vaddr) & (ppnum_t)(NPTEPG - 1)];
1194 } else {
1195 return NULL;
1196 }
1197 }
1198
1199 static inline pt_entry_t *
1200 pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
1201 {
1202 pd_entry_t *pde;
1203
1204 boolean_t is_ept;
1205
1206 is_ept = is_ept_pmap(pmap);
1207
1208 pde = pmap_pde_internal0(pmap, vaddr, is_ept);
1209
1210 if (pde) {
1211 return pmap_pte_internal(vaddr, is_ept, pde);
1212 } else {
1213 return NULL;
1214 }
1215 }
1216
1217 extern void pmap_alias(
1218 vm_offset_t ava,
1219 vm_map_offset_t start,
1220 vm_map_offset_t end,
1221 vm_prot_t prot,
1222 unsigned int options);
1223
1224 #if DEBUG
1225 #define DPRINTF(x...) kprintf(x)
1226 #else
1227 #define DPRINTF(x...)
1228 #endif
1229
1230 #endif /* MACH_KERNEL_PRIVATE */
1231 #endif /* _I386_PMAP_INTERNAL_ */