]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap_internal.h
xnu-4570.51.1.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap_internal.h
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
1c79356b 28
1c79356b 29
316670eb
A
30#ifndef _I386_PMAP_INTERNAL_
31#define _I386_PMAP_INTERNAL_
b0d623f7 32#ifdef MACH_KERNEL_PRIVATE
1c79356b 33
316670eb
A
34#include <vm/pmap.h>
35#include <sys/kdebug.h>
36#include <kern/ledger.h>
fe8ab488
A
37#include <kern/simple_lock.h>
38#include <i386/bit_routines.h>
316670eb 39
b0d623f7
A
40/*
41 * pmap locking
2d21ac55 42 */
0b4e3aa0 43
b0d623f7
A
44#define PMAP_LOCK(pmap) { \
45 simple_lock(&(pmap)->lock); \
46}
0b4e3aa0 47
b0d623f7
A
48#define PMAP_UNLOCK(pmap) { \
49 simple_unlock(&(pmap)->lock); \
50}
1c79356b 51
39236c6e
A
52#define PMAP_UPDATE_TLBS(pmap, s, e) \
53 pmap_flush_tlbs(pmap, s, e, 0, NULL)
54
55
56#define PMAP_DELAY_TLB_FLUSH 0x01
57
58#define PMAP_UPDATE_TLBS_DELAYED(pmap, s, e, c) \
59 pmap_flush_tlbs(pmap, s, e, PMAP_DELAY_TLB_FLUSH, c)
60
1c79356b 61
b0d623f7 62#define iswired(pte) ((pte) & INTEL_PTE_WIRED)
1c79356b 63
b0d623f7
A
64#ifdef PMAP_TRACES
65extern boolean_t pmap_trace;
5ba3f43e
A
66#define PMAP_TRACE(...) \
67 if (pmap_trace) { \
68 KDBG_RELEASE(__VA_ARGS__); \
b0d623f7
A
69 }
70#else
5ba3f43e 71#define PMAP_TRACE(...) KDBG_DEBUG(__VA_ARGS__)
b0d623f7 72#endif /* PMAP_TRACES */
1c79356b 73
5ba3f43e 74#define PMAP_TRACE_CONSTANT(...) KDBG_RELEASE(__VA_ARGS__)
6d2010ae 75
316670eb 76kern_return_t pmap_expand_pml4(
b0d623f7 77 pmap_t map,
316670eb
A
78 vm_map_offset_t v,
79 unsigned int options);
b0d623f7 80
316670eb 81kern_return_t pmap_expand_pdpt(
b0d623f7 82 pmap_t map,
316670eb
A
83 vm_map_offset_t v,
84 unsigned int options);
b7266188 85
6d2010ae
A
86void phys_attribute_set(
87 ppnum_t phys,
88 int bits);
89
90void pmap_set_reference(
91 ppnum_t pn);
92
93boolean_t phys_page_exists(
94 ppnum_t pn);
95
39236c6e
A
96void
97pmap_flush_tlbs(pmap_t, vm_map_offset_t, vm_map_offset_t, int, pmap_flush_context *);
6d2010ae
A
98
99void
100pmap_update_cache_attributes_locked(ppnum_t, unsigned);
101
6d2010ae 102extern const boolean_t cpu_64bit;
b0d623f7 103
b7266188
A
104/*
105 * Private data structures.
106 */
107
108/*
109 * For each vm_page_t, there is a list of all currently
110 * valid virtual mappings of that page. An entry is
111 * a pv_rooted_entry_t; the list is the pv_table.
112 *
113 * N.B. with the new combo rooted/hashed scheme it is
114 * only possibly to remove individual non-rooted entries
115 * if they are found via the hashed chains as there is no
116 * way to unlink the singly linked hashed entries if navigated to
117 * via the queue list off the rooted entries. Think of it as
118 * hash/walk/pull, keeping track of the prev pointer while walking
119 * the singly linked hash list. All of this is to save memory and
120 * keep both types of pv_entries as small as possible.
121 */
122
123/*
124
125PV HASHING Changes - JK 1/2007
126
127Pve's establish physical to virtual mappings. These are used for aliasing of a
6d2010ae
A
128physical page to (potentially many) virtual addresses within pmaps. In the
129previous implementation the structure of the pv_entries (each 16 bytes in size) was
b7266188
A
130
131typedef struct pv_entry {
132 struct pv_entry_t next;
133 pmap_t pmap;
134 vm_map_offset_t va;
135} *pv_entry_t;
136
6d2010ae
A
137An initial array of these is created at boot time, one per physical page of
138memory, indexed by the physical page number. Additionally, a pool of entries
139is created from a pv_zone to be used as needed by pmap_enter() when it is
140creating new mappings. Originally, we kept this pool around because the code
141in pmap_enter() was unable to block if it needed an entry and none were
142available - we'd panic. Some time ago I restructured the pmap_enter() code
143so that for user pmaps it can block while zalloc'ing a pv structure and restart,
144removing a panic from the code (in the case of the kernel pmap we cannot block
145and still panic, so, we keep a separate hot pool for use only on kernel pmaps).
146The pool has not been removed since there is a large performance gain keeping
147freed pv's around for reuse and not suffering the overhead of zalloc for every
148new pv we need.
149
150As pmap_enter() created new mappings it linked the new pve's for them off the
151fixed pv array for that ppn (off the next pointer). These pve's are accessed
152for several operations, one of them being address space teardown. In that case,
153we basically do this
b7266188
A
154
155 for (every page/pte in the space) {
156 calc pve_ptr from the ppn in the pte
157 for (every pv in the list for the ppn) {
158 if (this pv is for this pmap/vaddr) {
159 do housekeeping
160 unlink/free the pv
161 }
162 }
163 }
164
6d2010ae
A
165The problem arose when we were running, say 8000 (or even 2000) apache or
166other processes and one or all terminate. The list hanging off each pv array
167entry could have thousands of entries. We were continuously linearly searching
168each of these lists as we stepped through the address space we were tearing
169down. Because of the locks we hold, likely taking a cache miss for each node,
170and interrupt disabling for MP issues the system became completely unresponsive
171for many seconds while we did this.
172
173Realizing that pve's are accessed in two distinct ways (linearly running the
174list by ppn for operations like pmap_page_protect and finding and
175modifying/removing a single pve as part of pmap_enter processing) has led to
176modifying the pve structures and databases.
177
178There are now two types of pve structures. A "rooted" structure which is
179basically the original structure accessed in an array by ppn, and a ''hashed''
180structure accessed on a hash list via a hash of [pmap, vaddr]. These have been
181designed with the two goals of minimizing wired memory and making the lookup of
182a ppn faster. Since a vast majority of pages in the system are not aliased
183and hence represented by a single pv entry I've kept the rooted entry size as
184small as possible because there is one of these dedicated for every physical
185page of memory. The hashed pve's are larger due to the addition of the hash
186link and the ppn entry needed for matching while running the hash list to find
187the entry we are looking for. This way, only systems that have lots of
188aliasing (like 2000+ httpd procs) will pay the extra memory price. Both
189structures have the same first three fields allowing some simplification in
190the code.
b7266188
A
191
192They have these shapes
193
194typedef struct pv_rooted_entry {
6d2010ae
A
195 queue_head_t qlink;
196 vm_map_offset_t va;
197 pmap_t pmap;
b7266188
A
198} *pv_rooted_entry_t;
199
200
201typedef struct pv_hashed_entry {
6d2010ae
A
202 queue_head_t qlink;
203 vm_map_offset_t va;
204 pmap_t pmap;
205 ppnum_t ppn;
206 struct pv_hashed_entry *nexth;
b7266188
A
207} *pv_hashed_entry_t;
208
6d2010ae
A
209The main flow difference is that the code is now aware of the rooted entry and
210the hashed entries. Code that runs the pv list still starts with the rooted
211entry and then continues down the qlink onto the hashed entries. Code that is
212looking up a specific pv entry first checks the rooted entry and then hashes
213and runs the hash list for the match. The hash list lengths are much smaller
214than the original pv lists that contained all aliases for the specific ppn.
b7266188
A
215
216*/
217
6d2010ae
A
218typedef struct pv_rooted_entry {
219 /* first three entries must match pv_hashed_entry_t */
220 queue_head_t qlink;
39037602 221 vm_map_offset_t va_and_flags; /* virtual address for mapping */
6d2010ae 222 pmap_t pmap; /* pmap where mapping lies */
b7266188
A
223} *pv_rooted_entry_t;
224
225#define PV_ROOTED_ENTRY_NULL ((pv_rooted_entry_t) 0)
226
6d2010ae
A
227typedef struct pv_hashed_entry {
228 /* first three entries must match pv_rooted_entry_t */
229 queue_head_t qlink;
39037602 230 vm_map_offset_t va_and_flags;
6d2010ae
A
231 pmap_t pmap;
232 ppnum_t ppn;
233 struct pv_hashed_entry *nexth;
b7266188
A
234} *pv_hashed_entry_t;
235
236#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
237
39037602
A
238#define PVE_VA(pve) ((pve)->va_and_flags & ~PAGE_MASK)
239#define PVE_FLAGS(pve) ((pve)->va_and_flags & PAGE_MASK)
240#define PVE_IS_ALTACCT 0x001
241#define PVE_IS_ALTACCT_PAGE(pve) \
242 (((pve)->va_and_flags & PVE_IS_ALTACCT) ? TRUE : FALSE)
243
6d2010ae 244//#define PV_DEBUG 1 /* uncomment to enable some PV debugging code */
b7266188 245#ifdef PV_DEBUG
fe8ab488 246#define CHK_NPVHASH() if(0 == npvhashmask) panic("npvhash uninitialized");
b7266188 247#else
6d2010ae 248#define CHK_NPVHASH(x)
b7266188
A
249#endif
250
fe8ab488
A
251#define NPVHASHBUCKETS (4096)
252#define NPVHASHMASK ((NPVHASHBUCKETS) - 1) /* MUST BE 2^N - 1 */
6d2010ae
A
253#define PV_HASHED_LOW_WATER_MARK_DEFAULT 5000
254#define PV_HASHED_KERN_LOW_WATER_MARK_DEFAULT 2000
255#define PV_HASHED_ALLOC_CHUNK_INITIAL 2000
256#define PV_HASHED_KERN_ALLOC_CHUNK_INITIAL 200
257
258extern volatile uint32_t mappingrecurse;
259extern uint32_t pv_hashed_low_water_mark, pv_hashed_kern_low_water_mark;
260
261/*
262 * PV hash locking
263 */
264
265#define LOCK_PV_HASH(hash) lock_hash_hash(hash)
266#define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash)
fe8ab488 267extern uint32_t npvhashmask;
6d2010ae
A
268extern pv_hashed_entry_t *pv_hash_table; /* hash lists */
269extern pv_hashed_entry_t pv_hashed_free_list;
270extern pv_hashed_entry_t pv_hashed_kern_free_list;
271decl_simple_lock_data(extern, pv_hashed_free_list_lock)
272decl_simple_lock_data(extern, pv_hashed_kern_free_list_lock)
273decl_simple_lock_data(extern, pv_hash_table_lock)
fe8ab488 274decl_simple_lock_data(extern, phys_backup_lock)
6d2010ae
A
275
276extern zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry
277 * structures */
278
279extern uint32_t pv_hashed_free_count;
280extern uint32_t pv_hashed_kern_free_count;
281/*
282 * Each entry in the pv_head_table is locked by a bit in the
283 * pv_lock_table. The lock bits are accessed by the address of
284 * the frame they lock.
285 */
286#define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
287#define pv_hash_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
288extern char *pv_lock_table; /* pointer to array of bits */
289extern char *pv_hash_lock_table;
290extern pv_rooted_entry_t pv_head_table; /* array of entries, one per page */
291
292extern event_t mapping_replenish_event;
293
294static inline void PV_HASHED_ALLOC(pv_hashed_entry_t *pvh_ep) {
316670eb 295 pmap_assert(*pvh_ep == PV_HASHED_ENTRY_NULL);
6d2010ae
A
296 simple_lock(&pv_hashed_free_list_lock);
297 /* If the kernel reserved pool is low, let non-kernel mappings allocate
298 * synchronously, possibly subject to a throttle.
299 */
316670eb 300 if ((pv_hashed_kern_free_count > pv_hashed_kern_low_water_mark) && ((*pvh_ep = pv_hashed_free_list) != 0)) {
6d2010ae
A
301 pv_hashed_free_list = (pv_hashed_entry_t)(*pvh_ep)->qlink.next;
302 pv_hashed_free_count--;
303 }
304
305 simple_unlock(&pv_hashed_free_list_lock);
306
316670eb 307 if (pv_hashed_free_count <= pv_hashed_low_water_mark) {
6d2010ae
A
308 if (!mappingrecurse && hw_compare_and_store(0,1, &mappingrecurse))
309 thread_wakeup(&mapping_replenish_event);
310 }
b7266188
A
311}
312
6d2010ae
A
313static inline void PV_HASHED_FREE_LIST(pv_hashed_entry_t pvh_eh, pv_hashed_entry_t pvh_et, int pv_cnt) {
314 simple_lock(&pv_hashed_free_list_lock);
315 pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;
316 pv_hashed_free_list = pvh_eh;
317 pv_hashed_free_count += pv_cnt;
318 simple_unlock(&pv_hashed_free_list_lock);
b7266188
A
319}
320
6d2010ae
A
321extern unsigned pmap_kern_reserve_alloc_stat;
322
323static inline void PV_HASHED_KERN_ALLOC(pv_hashed_entry_t *pvh_e) {
316670eb 324 pmap_assert(*pvh_e == PV_HASHED_ENTRY_NULL);
6d2010ae
A
325 simple_lock(&pv_hashed_kern_free_list_lock);
326
327 if ((*pvh_e = pv_hashed_kern_free_list) != 0) {
328 pv_hashed_kern_free_list = (pv_hashed_entry_t)(*pvh_e)->qlink.next;
329 pv_hashed_kern_free_count--;
330 pmap_kern_reserve_alloc_stat++;
331 }
332
333 simple_unlock(&pv_hashed_kern_free_list_lock);
334
335 if (pv_hashed_kern_free_count < pv_hashed_kern_low_water_mark) {
336 if (!mappingrecurse && hw_compare_and_store(0,1, &mappingrecurse))
337 thread_wakeup(&mapping_replenish_event);
338 }
b7266188
A
339}
340
6d2010ae
A
341static inline void PV_HASHED_KERN_FREE_LIST(pv_hashed_entry_t pvh_eh, pv_hashed_entry_t pvh_et, int pv_cnt) {
342 simple_lock(&pv_hashed_kern_free_list_lock);
343 pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;
344 pv_hashed_kern_free_list = pvh_eh;
345 pv_hashed_kern_free_count += pv_cnt;
346 simple_unlock(&pv_hashed_kern_free_list_lock);
347}
348
349extern uint64_t pmap_pv_throttle_stat, pmap_pv_throttled_waiters;
350extern event_t pmap_user_pv_throttle_event;
351
352static inline void pmap_pv_throttle(__unused pmap_t p) {
353 pmap_assert(p != kernel_pmap);
354 /* Apply throttle on non-kernel mappings */
355 if (pv_hashed_kern_free_count < (pv_hashed_kern_low_water_mark / 2)) {
356 pmap_pv_throttle_stat++;
357 /* This doesn't need to be strictly accurate, merely a hint
358 * to eliminate the timeout when the reserve is replenished.
359 */
360 pmap_pv_throttled_waiters++;
361 assert_wait_timeout(&pmap_user_pv_throttle_event, THREAD_UNINT, 1, 1000 * NSEC_PER_USEC);
362 thread_block(THREAD_CONTINUE_NULL);
363 }
b7266188
A
364}
365
366/*
367 * Index into pv_head table, its lock bits, and the modify/reference and managed bits
368 */
369
370#define pa_index(pa) (i386_btop(pa))
371#define ppn_to_pai(ppn) ((int)ppn)
372
373#define pai_to_pvh(pai) (&pv_head_table[pai])
374#define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
375#define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
376#define pvhash(idx) (&pv_hash_table[idx])
b7266188
A
377#define lock_hash_hash(hash) bit_lock(hash, (void *)pv_hash_lock_table)
378#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
379
380#define IS_MANAGED_PAGE(x) \
381 ((unsigned int)(x) <= last_managed_page && \
382 (pmap_phys_attributes[x] & PHYS_MANAGED))
39236c6e
A
383#define IS_INTERNAL_PAGE(x) \
384 (IS_MANAGED_PAGE(x) && (pmap_phys_attributes[x] & PHYS_INTERNAL))
385#define IS_REUSABLE_PAGE(x) \
386 (IS_MANAGED_PAGE(x) && (pmap_phys_attributes[x] & PHYS_REUSABLE))
d190cdc3 387#define IS_ALTACCT_PAGE(x,pve) \
39037602 388 (IS_MANAGED_PAGE((x)) && \
d190cdc3 389 (PVE_IS_ALTACCT_PAGE((pve))))
b7266188
A
390
391/*
392 * Physical page attributes. Copy bits from PTE definition.
393 */
394#define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
395#define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
3e170ce0
A
396#define PHYS_MANAGED INTEL_PTE_VALID /* page is managed */
397#define PHYS_NOENCRYPT INTEL_PTE_USER /* no need to encrypt this page in the hibernation image */
6d2010ae
A
398#define PHYS_NCACHE INTEL_PTE_NCACHE
399#define PHYS_PTA INTEL_PTE_PTA
400#define PHYS_CACHEABILITY_MASK (INTEL_PTE_PTA | INTEL_PTE_NCACHE)
3e170ce0
A
401#define PHYS_INTERNAL INTEL_PTE_WTHRU /* page from internal object */
402#define PHYS_REUSABLE INTEL_PTE_WRITE /* page is "reusable" */
b7266188 403
3e170ce0
A
404extern boolean_t pmap_disable_kheap_nx;
405extern boolean_t pmap_disable_kstack_nx;
316670eb
A
406
407#define PMAP_EXPAND_OPTIONS_NONE (0x0)
408#define PMAP_EXPAND_OPTIONS_NOWAIT (PMAP_OPTIONS_NOWAIT)
409#define PMAP_EXPAND_OPTIONS_NOENTER (PMAP_OPTIONS_NOENTER)
5c9f4661 410#define PMAP_EXPAND_OPTIONS_ALIASMAP (0x40000000U)
b7266188
A
411/*
412 * Amount of virtual memory mapped by one
413 * page-directory entry.
414 */
415#define PDE_MAPPED_SIZE (pdetova(1))
416
417
418/*
419 * Locking and TLB invalidation
420 */
421
422/*
423 * Locking Protocols: (changed 2/2007 JK)
424 *
425 * There are two structures in the pmap module that need locking:
426 * the pmaps themselves, and the per-page pv_lists (which are locked
427 * by locking the pv_lock_table entry that corresponds to the pv_head
428 * for the list in question.) Most routines want to lock a pmap and
429 * then do operations in it that require pv_list locking -- however
430 * pmap_remove_all and pmap_copy_on_write operate on a physical page
431 * basis and want to do the locking in the reverse order, i.e. lock
432 * a pv_list and then go through all the pmaps referenced by that list.
433 *
434 * The system wide pmap lock has been removed. Now, paths take a lock
435 * on the pmap before changing its 'shape' and the reverse order lockers
436 * (coming in by phys ppn) take a lock on the corresponding pv and then
437 * retest to be sure nothing changed during the window before they locked
438 * and can then run up/down the pv lists holding the list lock. This also
439 * lets the pmap layer run (nearly completely) interrupt enabled, unlike
440 * previously.
441 */
442
443/*
444 * PV locking
445 */
446
447#define LOCK_PVH(index) { \
448 mp_disable_preemption(); \
449 lock_pvh_pai(index); \
450}
451
452#define UNLOCK_PVH(index) { \
453 unlock_pvh_pai(index); \
454 mp_enable_preemption(); \
455}
b7266188 456
b7266188
A
457extern uint64_t pde_mapped_size;
458
459extern char *pmap_phys_attributes;
316670eb 460extern ppnum_t last_managed_page;
b7266188 461
060df5ea
A
462extern ppnum_t lowest_lo;
463extern ppnum_t lowest_hi;
464extern ppnum_t highest_hi;
465
b7266188
A
466/*
467 * when spinning through pmap_remove
468 * ensure that we don't spend too much
469 * time with preemption disabled.
470 * I'm setting the current threshold
471 * to 20us
472 */
473#define MAX_PREEMPTION_LATENCY_NS 20000
474extern uint64_t max_preemption_latency_tsc;
475
5c9f4661
A
476#if DEBUG
477#define PMAP_INTR_DEBUG (1)
478#endif
479
480#if PMAP_INTR_DEBUG
b7266188
A
481#define pmap_intr_assert() { \
482 if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) \
5c9f4661 483 panic("pmap interrupt assert %d %s, %d", processor_avail_count, __FILE__, __LINE__); \
b7266188
A
484}
485#else
486#define pmap_intr_assert()
487#endif
488
6d2010ae
A
489extern int nx_enabled;
490extern unsigned int inuse_ptepages_count;
b7266188
A
491
492static inline uint32_t
493pvhashidx(pmap_t pmap, vm_map_offset_t va)
494{
fe8ab488 495 uint32_t hashidx = ((uint32_t)(uintptr_t)pmap ^
6d2010ae 496 ((uint32_t)(va >> PAGE_SHIFT) & 0xFFFFFFFF)) &
fe8ab488
A
497 npvhashmask;
498 return hashidx;
b7266188
A
499}
500
501/*
502 * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
503 * properly deals with the anchor.
504 * must be called with the hash locked, does not unlock it
505 */
b7266188
A
506static inline void
507pmap_pvh_unlink(pv_hashed_entry_t pvh)
508{
509 pv_hashed_entry_t curh;
510 pv_hashed_entry_t *pprevh;
511 int pvhash_idx;
512
513 CHK_NPVHASH();
39037602 514 pvhash_idx = pvhashidx(pvh->pmap, PVE_VA(pvh));
b7266188
A
515
516 pprevh = pvhash(pvhash_idx);
517
518#if PV_DEBUG
519 if (NULL == *pprevh)
520 panic("pvh_unlink null anchor"); /* JK DEBUG */
521#endif
522 curh = *pprevh;
523
524 while (PV_HASHED_ENTRY_NULL != curh) {
525 if (pvh == curh)
526 break;
527 pprevh = &curh->nexth;
528 curh = curh->nexth;
529 }
530 if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh");
531 *pprevh = pvh->nexth;
532 return;
533}
534
535static inline void
536pv_hash_add(pv_hashed_entry_t pvh_e,
537 pv_rooted_entry_t pv_h)
538{
539 pv_hashed_entry_t *hashp;
540 int pvhash_idx;
541
542 CHK_NPVHASH();
39037602 543 pvhash_idx = pvhashidx(pvh_e->pmap, PVE_VA(pvh_e));
b7266188
A
544 LOCK_PV_HASH(pvhash_idx);
545 insque(&pvh_e->qlink, &pv_h->qlink);
546 hashp = pvhash(pvhash_idx);
547#if PV_DEBUG
548 if (NULL==hashp)
549 panic("pv_hash_add(%p) null hash bucket", pvh_e);
550#endif
551 pvh_e->nexth = *hashp;
552 *hashp = pvh_e;
553 UNLOCK_PV_HASH(pvhash_idx);
554}
555
556static inline void
557pv_hash_remove(pv_hashed_entry_t pvh_e)
558{
559 int pvhash_idx;
560
561 CHK_NPVHASH();
39037602 562 pvhash_idx = pvhashidx(pvh_e->pmap,PVE_VA(pvh_e));
b7266188
A
563 LOCK_PV_HASH(pvhash_idx);
564 remque(&pvh_e->qlink);
565 pmap_pvh_unlink(pvh_e);
566 UNLOCK_PV_HASH(pvhash_idx);
6d2010ae 567}
b7266188
A
568
569static inline boolean_t popcnt1(uint64_t distance) {
570 return ((distance & (distance - 1)) == 0);
571}
572
573/*
574 * Routines to handle suppression of/recovery from some forms of pagetable corruption
575 * incidents observed in the field. These can be either software induced (wild
576 * stores to the mapwindows where applicable, use after free errors
577 * (typically of pages addressed physically), mis-directed DMAs etc., or due
578 * to DRAM/memory hierarchy/interconnect errors. Given the theoretical rarity of these errors,
579 * the recording mechanism is deliberately not MP-safe. The overarching goal is to
580 * still assert on potential software races, but attempt recovery from incidents
581 * identifiable as occurring due to issues beyond the control of the pmap module.
582 * The latter includes single-bit errors and malformed pagetable entries.
583 * We currently limit ourselves to recovery/suppression of one incident per
584 * PMAP_PAGETABLE_CORRUPTION_INTERVAL seconds, and details of the incident
585 * are logged.
586 * Assertions are not suppressed if kernel debugging is enabled. (DRK 09)
587 */
588
589typedef enum {
590 PTE_VALID = 0x0,
591 PTE_INVALID = 0x1,
592 PTE_RSVD = 0x2,
593 PTE_SUPERVISOR = 0x4,
594 PTE_BITFLIP = 0x8,
595 PV_BITFLIP = 0x10,
596 PTE_INVALID_CACHEABILITY = 0x20
597} pmap_pagetable_corruption_t;
598
599typedef enum {
600 ROOT_PRESENT = 0,
601 ROOT_ABSENT = 1
602} pmap_pv_assertion_t;
603
604typedef enum {
605 PMAP_ACTION_IGNORE = 0x0,
606 PMAP_ACTION_ASSERT = 0x1,
607 PMAP_ACTION_RETRY = 0x2,
608 PMAP_ACTION_RETRY_RELOCK = 0x4
609} pmap_pagetable_corruption_action_t;
610
611#define PMAP_PAGETABLE_CORRUPTION_INTERVAL (6ULL * 3600ULL)
612extern uint64_t pmap_pagetable_corruption_interval_abstime;
613
614extern uint32_t pmap_pagetable_corruption_incidents;
615#define PMAP_PAGETABLE_CORRUPTION_MAX_LOG (8)
616typedef struct {
617 pmap_pv_assertion_t incident;
618 pmap_pagetable_corruption_t reason;
619 pmap_pagetable_corruption_action_t action;
620 pmap_t pmap;
621 vm_map_offset_t vaddr;
622 pt_entry_t pte;
623 ppnum_t ppn;
624 pmap_t pvpmap;
625 vm_map_offset_t pvva;
626 uint64_t abstime;
627} pmap_pagetable_corruption_record_t;
628
629extern pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[];
630extern uint64_t pmap_pagetable_corruption_last_abstime;
631extern thread_call_t pmap_pagetable_corruption_log_call;
632extern boolean_t pmap_pagetable_corruption_timeout;
633
634static inline void
635pmap_pagetable_corruption_log(pmap_pv_assertion_t incident, pmap_pagetable_corruption_t suppress_reason, pmap_pagetable_corruption_action_t action, pmap_t pmap, vm_map_offset_t vaddr, pt_entry_t *ptep, ppnum_t ppn, pmap_t pvpmap, vm_map_offset_t pvva) {
636 uint32_t pmap_pagetable_corruption_log_index;
637 pmap_pagetable_corruption_log_index = pmap_pagetable_corruption_incidents++ % PMAP_PAGETABLE_CORRUPTION_MAX_LOG;
638 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].incident = incident;
639 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].reason = suppress_reason;
640 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].action = action;
641 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pmap = pmap;
642 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].vaddr = vaddr;
643 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pte = *ptep;
644 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].ppn = ppn;
645 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvpmap = pvpmap;
646 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvva = pvva;
647 pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].abstime = mach_absolute_time();
648 /* Asynchronously log */
649 thread_call_enter(pmap_pagetable_corruption_log_call);
650}
651
652static inline pmap_pagetable_corruption_action_t
653pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t *ppnp, pt_entry_t *ptep, pmap_pv_assertion_t incident) {
316670eb 654 pmap_pagetable_corruption_action_t action = PMAP_ACTION_ASSERT;
b7266188
A
655 pmap_pagetable_corruption_t suppress_reason = PTE_VALID;
656 ppnum_t suppress_ppn = 0;
657 pt_entry_t cpte = *ptep;
658 ppnum_t cpn = pa_index(pte_to_pa(cpte));
659 ppnum_t ppn = *ppnp;
660 pv_rooted_entry_t pv_h = pai_to_pvh(ppn_to_pai(ppn));
661 pv_rooted_entry_t pv_e = pv_h;
662 uint32_t bitdex;
663 pmap_t pvpmap = pv_h->pmap;
39037602 664 vm_map_offset_t pvva = PVE_VA(pv_h);
d190cdc3 665 vm_map_offset_t pve_flags;
b7266188 666 boolean_t ppcd = FALSE;
3e170ce0 667 boolean_t is_ept;
b7266188
A
668
669 /* Ideally, we'd consult the Mach VM here to definitively determine
670 * the nature of the mapping for this address space and address.
671 * As that would be a layering violation in this context, we
672 * use various heuristics to recover from single bit errors,
673 * malformed pagetable entries etc. These are not intended
674 * to be comprehensive.
675 */
676
677 /* As a precautionary measure, mark A+D */
678 pmap_phys_attributes[ppn_to_pai(ppn)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
3e170ce0 679 is_ept = is_ept_pmap(pmap);
b7266188
A
680
681 /*
682 * Correct potential single bit errors in either (but not both) element
683 * of the PV
684 */
685 do {
39037602
A
686 if ((popcnt1((uintptr_t)pv_e->pmap ^ (uintptr_t)pmap) && PVE_VA(pv_e) == vaddr) ||
687 (pv_e->pmap == pmap && popcnt1(PVE_VA(pv_e) ^ vaddr))) {
d190cdc3 688 pve_flags = PVE_FLAGS(pv_e);
b7266188 689 pv_e->pmap = pmap;
d190cdc3 690 pv_h->va_and_flags = vaddr | pve_flags;
b7266188
A
691 suppress_reason = PV_BITFLIP;
692 action = PMAP_ACTION_RETRY;
693 goto pmap_cpc_exit;
694 }
316670eb 695 } while (((pv_e = (pv_rooted_entry_t) queue_next(&pv_e->qlink))) && (pv_e != pv_h));
b7266188
A
696
697 /* Discover root entries with a Hamming
698 * distance of 1 from the supplied
699 * physical page frame.
700 */
701 for (bitdex = 0; bitdex < (sizeof(ppnum_t) << 3); bitdex++) {
702 ppnum_t npn = cpn ^ (ppnum_t) (1ULL << bitdex);
703 if (IS_MANAGED_PAGE(npn)) {
704 pv_rooted_entry_t npv_h = pai_to_pvh(ppn_to_pai(npn));
39037602 705 if (PVE_VA(npv_h) == vaddr && npv_h->pmap == pmap) {
b7266188
A
706 suppress_reason = PTE_BITFLIP;
707 suppress_ppn = npn;
708 action = PMAP_ACTION_RETRY_RELOCK;
709 UNLOCK_PVH(ppn_to_pai(ppn));
710 *ppnp = npn;
711 goto pmap_cpc_exit;
712 }
713 }
714 }
715
716 if (pmap == kernel_pmap) {
717 action = PMAP_ACTION_ASSERT;
718 goto pmap_cpc_exit;
719 }
720
3e170ce0
A
721 /*
722 * Check for malformed/inconsistent entries.
723 * The first check here isn't useful for EPT PTEs because INTEL_EPT_NCACHE == 0
724 */
725 if (!is_ept && ((cpte & (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU | INTEL_PTE_PTA)) == (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU))) {
b7266188
A
726 action = PMAP_ACTION_IGNORE;
727 suppress_reason = PTE_INVALID_CACHEABILITY;
728 }
729 else if (cpte & INTEL_PTE_RSVD) {
730 action = PMAP_ACTION_IGNORE;
731 suppress_reason = PTE_RSVD;
732 }
3e170ce0 733 else if ((pmap != kernel_pmap) && (!is_ept) && ((cpte & INTEL_PTE_USER) == 0)) {
b7266188
A
734 action = PMAP_ACTION_IGNORE;
735 suppress_reason = PTE_SUPERVISOR;
736 }
737pmap_cpc_exit:
738 PE_parse_boot_argn("-pmap_pagetable_corruption_deassert", &ppcd, sizeof(ppcd));
739
740 if (debug_boot_arg && !ppcd) {
741 action = PMAP_ACTION_ASSERT;
742 }
743
744 if ((mach_absolute_time() - pmap_pagetable_corruption_last_abstime) < pmap_pagetable_corruption_interval_abstime) {
745 action = PMAP_ACTION_ASSERT;
746 pmap_pagetable_corruption_timeout = TRUE;
747 }
748 else
749 {
750 pmap_pagetable_corruption_last_abstime = mach_absolute_time();
751 }
752 pmap_pagetable_corruption_log(incident, suppress_reason, action, pmap, vaddr, &cpte, *ppnp, pvpmap, pvva);
753 return action;
754}
6d2010ae 755
b7266188
A
756/*
757 * Remove pv list entry.
758 * Called with pv_head_table entry locked.
759 * Returns pv entry to be freed (or NULL).
760 */
b7266188 761static inline __attribute__((always_inline)) pv_hashed_entry_t
6d2010ae
A
762pmap_pv_remove(pmap_t pmap,
763 vm_map_offset_t vaddr,
d190cdc3
A
764 ppnum_t *ppnp,
765 pt_entry_t *pte,
766 boolean_t *was_altacct)
b7266188
A
767{
768 pv_hashed_entry_t pvh_e;
769 pv_rooted_entry_t pv_h;
770 pv_hashed_entry_t *pprevh;
771 int pvhash_idx;
772 uint32_t pv_cnt;
773 ppnum_t ppn;
774
d190cdc3 775 *was_altacct = FALSE;
b7266188
A
776pmap_pv_remove_retry:
777 ppn = *ppnp;
778 pvh_e = PV_HASHED_ENTRY_NULL;
779 pv_h = pai_to_pvh(ppn_to_pai(ppn));
780
316670eb 781 if (__improbable(pv_h->pmap == PMAP_NULL)) {
b7266188
A
782 pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_ABSENT);
783 if (pac == PMAP_ACTION_IGNORE)
784 goto pmap_pv_remove_exit;
785 else if (pac == PMAP_ACTION_ASSERT)
5ba3f43e 786 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p, %p): null pv_list, priors: %d", pmap, vaddr, ppn, *pte, ppnp, pte, pmap_pagetable_corruption_incidents);
b7266188
A
787 else if (pac == PMAP_ACTION_RETRY_RELOCK) {
788 LOCK_PVH(ppn_to_pai(*ppnp));
789 pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
790 goto pmap_pv_remove_retry;
791 }
792 else if (pac == PMAP_ACTION_RETRY)
793 goto pmap_pv_remove_retry;
794 }
795
39037602 796 if (PVE_VA(pv_h) == vaddr && pv_h->pmap == pmap) {
d190cdc3 797 *was_altacct = IS_ALTACCT_PAGE(ppn_to_pai(*ppnp), pv_h);
b7266188
A
798 /*
799 * Header is the pv_rooted_entry.
800 * We can't free that. If there is a queued
801 * entry after this one we remove that
802 * from the ppn queue, we remove it from the hash chain
803 * and copy it to the rooted entry. Then free it instead.
804 */
805 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
806 if (pv_h != (pv_rooted_entry_t) pvh_e) {
807 /*
808 * Entry queued to root, remove this from hash
809 * and install as new root.
810 */
811 CHK_NPVHASH();
39037602 812 pvhash_idx = pvhashidx(pvh_e->pmap, PVE_VA(pvh_e));
b7266188
A
813 LOCK_PV_HASH(pvhash_idx);
814 remque(&pvh_e->qlink);
815 pprevh = pvhash(pvhash_idx);
816 if (PV_HASHED_ENTRY_NULL == *pprevh) {
39236c6e 817 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x): "
5ba3f43e
A
818 "empty hash, removing rooted, priors: %d",
819 pmap, vaddr, ppn, pmap_pagetable_corruption_incidents);
b7266188
A
820 }
821 pmap_pvh_unlink(pvh_e);
822 UNLOCK_PV_HASH(pvhash_idx);
823 pv_h->pmap = pvh_e->pmap;
d190cdc3 824 pv_h->va_and_flags = pvh_e->va_and_flags;
39037602 825 /* dispose of pvh_e */
b7266188
A
826 } else {
827 /* none queued after rooted */
828 pv_h->pmap = PMAP_NULL;
829 pvh_e = PV_HASHED_ENTRY_NULL;
830 }
831 } else {
832 /*
833 * not removing rooted pv. find it on hash chain, remove from
834 * ppn queue and hash chain and free it
835 */
836 CHK_NPVHASH();
837 pvhash_idx = pvhashidx(pmap, vaddr);
838 LOCK_PV_HASH(pvhash_idx);
839 pprevh = pvhash(pvhash_idx);
840 if (PV_HASHED_ENTRY_NULL == *pprevh) {
5ba3f43e
A
841 panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p): empty hash, priors: %d",
842 pmap, vaddr, ppn, *pte, pte, pmap_pagetable_corruption_incidents);
b7266188
A
843 }
844 pvh_e = *pprevh;
845 pmap_pv_hashlist_walks++;
846 pv_cnt = 0;
847 while (PV_HASHED_ENTRY_NULL != pvh_e) {
848 pv_cnt++;
849 if (pvh_e->pmap == pmap &&
39037602 850 PVE_VA(pvh_e) == vaddr &&
b7266188
A
851 pvh_e->ppn == ppn)
852 break;
853 pprevh = &pvh_e->nexth;
854 pvh_e = pvh_e->nexth;
855 }
6d2010ae 856
b7266188
A
857 if (PV_HASHED_ENTRY_NULL == pvh_e) {
858 pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_PRESENT);
859
860 if (pac == PMAP_ACTION_ASSERT)
5ba3f43e 861 panic("Possible memory corruption: pmap_pv_remove(%p, 0x%llx, 0x%x, 0x%llx, %p, %p): pv not on hash, head: %p, 0x%llx, priors: %d", pmap, vaddr, ppn, *pte, ppnp, pte, pv_h->pmap, PVE_VA(pv_h), pmap_pagetable_corruption_incidents);
b7266188
A
862 else {
863 UNLOCK_PV_HASH(pvhash_idx);
864 if (pac == PMAP_ACTION_RETRY_RELOCK) {
865 LOCK_PVH(ppn_to_pai(*ppnp));
866 pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
867 goto pmap_pv_remove_retry;
868 }
869 else if (pac == PMAP_ACTION_RETRY) {
870 goto pmap_pv_remove_retry;
871 }
872 else if (pac == PMAP_ACTION_IGNORE) {
873 goto pmap_pv_remove_exit;
874 }
875 }
876 }
6d2010ae 877
d190cdc3
A
878 *was_altacct = IS_ALTACCT_PAGE(ppn_to_pai(*ppnp), pvh_e);
879
b7266188
A
880 pmap_pv_hashlist_cnts += pv_cnt;
881 if (pmap_pv_hashlist_max < pv_cnt)
882 pmap_pv_hashlist_max = pv_cnt;
883 *pprevh = pvh_e->nexth;
884 remque(&pvh_e->qlink);
885 UNLOCK_PV_HASH(pvhash_idx);
886 }
887pmap_pv_remove_exit:
888 return pvh_e;
889}
890
d190cdc3
A
891static inline __attribute__((always_inline)) boolean_t
892pmap_pv_is_altacct(
893 pmap_t pmap,
894 vm_map_offset_t vaddr,
895 ppnum_t ppn)
896{
897 pv_hashed_entry_t pvh_e;
898 pv_rooted_entry_t pv_h;
899 int pvhash_idx;
900 boolean_t is_altacct;
901
902 pvh_e = PV_HASHED_ENTRY_NULL;
903 pv_h = pai_to_pvh(ppn_to_pai(ppn));
904
905 if (__improbable(pv_h->pmap == PMAP_NULL)) {
906 return FALSE;
907 }
908
909 if (PVE_VA(pv_h) == vaddr && pv_h->pmap == pmap) {
910 /*
911 * Header is the pv_rooted_entry.
912 */
913 return IS_ALTACCT_PAGE(ppn, pv_h);
914 }
915
916 CHK_NPVHASH();
917 pvhash_idx = pvhashidx(pmap, vaddr);
918 LOCK_PV_HASH(pvhash_idx);
919 pvh_e = *(pvhash(pvhash_idx));
920 if (PV_HASHED_ENTRY_NULL == pvh_e) {
921 panic("Possible memory corruption: pmap_pv_is_altacct(%p,0x%llx,0x%x): empty hash",
922 pmap, vaddr, ppn);
923 }
924 while (PV_HASHED_ENTRY_NULL != pvh_e) {
925 if (pvh_e->pmap == pmap &&
926 PVE_VA(pvh_e) == vaddr &&
927 pvh_e->ppn == ppn)
928 break;
929 pvh_e = pvh_e->nexth;
930 }
931 if (PV_HASHED_ENTRY_NULL == pvh_e) {
932 is_altacct = FALSE;
933 } else {
934 is_altacct = IS_ALTACCT_PAGE(ppn, pvh_e);
935 }
936 UNLOCK_PV_HASH(pvhash_idx);
937
938 return is_altacct;
939}
6d2010ae
A
940
941extern int pt_fake_zone_index;
942static inline void
316670eb 943PMAP_ZINFO_PALLOC(pmap_t pmap, vm_size_t bytes)
6d2010ae 944{
316670eb 945 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
6d2010ae
A
946}
947
948static inline void
316670eb 949PMAP_ZINFO_PFREE(pmap_t pmap, vm_size_t bytes)
6d2010ae 950{
316670eb 951 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
6d2010ae
A
952}
953
316670eb
A
954static inline void
955PMAP_ZINFO_SALLOC(pmap_t pmap, vm_size_t bytes)
956{
957 pmap_ledger_credit(pmap, task_ledgers.tkm_shared, bytes);
958}
959
960static inline void
961PMAP_ZINFO_SFREE(pmap_t pmap, vm_size_t bytes)
962{
963 pmap_ledger_debit(pmap, task_ledgers.tkm_shared, bytes);
964}
965
6d2010ae
A
966extern boolean_t pmap_initialized;/* Has pmap_init completed? */
967#define valid_page(x) (pmap_initialized && pmap_valid_page(x))
968
6d2010ae
A
969int phys_attribute_test(
970 ppnum_t phys,
971 int bits);
972void phys_attribute_clear(
973 ppnum_t phys,
39236c6e
A
974 int bits,
975 unsigned int options,
976 void *arg);
6d2010ae
A
977
978//#define PCID_DEBUG 1
979#if PCID_DEBUG
980#define pmap_pcid_log(fmt, args...) \
981 do { \
982 kprintf(fmt, ##args); \
983 printf(fmt, ##args); \
984 } while(0)
985#else
986#define pmap_pcid_log(fmt, args...)
987#endif
988void pmap_pcid_configure(void);
989
316670eb
A
990
991/*
992 * Atomic 64-bit compare and exchange of a page table entry.
993 */
994static inline boolean_t
995pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
996{
997 boolean_t ret;
998
316670eb
A
999 /*
1000 * Load the old value into %rax
1001 * Load the new value into another register
1002 * Compare-exchange-quad at address entryp
1003 * If the compare succeeds, the new value is stored, return TRUE.
1004 * Otherwise, no swap is made, return FALSE.
1005 */
1006 asm volatile(
1007 " lock; cmpxchgq %2,(%3) \n\t"
1008 " setz %%al \n\t"
1009 " movzbl %%al,%0"
1010 : "=a" (ret)
1011 : "a" (old),
1012 "r" (new),
1013 "r" (entryp)
1014 : "memory");
316670eb
A
1015 return ret;
1016}
1017
1018extern uint32_t pmap_update_clear_pte_count;
1019
1020static inline void pmap_update_pte(pt_entry_t *mptep, uint64_t pclear_bits, uint64_t pset_bits) {
1021 pt_entry_t npte, opte;
1022 do {
1023 opte = *mptep;
1024 if (__improbable(opte == 0)) {
1025 pmap_update_clear_pte_count++;
1026 break;
1027 }
1028 npte = opte & ~(pclear_bits);
1029 npte |= pset_bits;
1030 } while (!pmap_cmpx_pte(mptep, opte, npte));
1031}
1032
6d2010ae
A
1033/*
1034 * The single pml4 page per pmap is allocated at pmap create time and exists
1035 * for the duration of the pmap. we allocate this page in kernel vm.
1036 * this returns the address of the requested pml4 entry in the top level page.
1037 */
1038static inline
1039pml4_entry_t *
1040pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr)
1041{
316670eb
A
1042 if (__improbable((vaddr > 0x00007FFFFFFFFFFFULL) &&
1043 (vaddr < 0xFFFF800000000000ULL))) {
1044 return (NULL);
1045 }
1046
39037602 1047#if DEBUG
6d2010ae
A
1048 return PHYSMAP_PTOV(&((pml4_entry_t *)pmap->pm_cr3)[(vaddr >> PML4SHIFT) & (NPML4PG-1)]);
1049#else
1050 return &pmap->pm_pml4[(vaddr >> PML4SHIFT) & (NPML4PG-1)];
1051#endif
1052}
1053
5c9f4661
A
1054static inline pml4_entry_t *
1055pmap64_user_pml4(pmap_t pmap, vm_map_offset_t vaddr)
1056{
1057 if (__improbable((vaddr > 0x00007FFFFFFFFFFFULL) &&
1058 (vaddr < 0xFFFF800000000000ULL))) {
1059 return (NULL);
1060 }
1061
1062#if DEBUG
1063 return PHYSMAP_PTOV(&((pml4_entry_t *)pmap->pm_ucr3)[(vaddr >> PML4SHIFT) & (NPML4PG-1)]);
1064#else
1065 return &pmap->pm_upml4[(vaddr >> PML4SHIFT) & (NPML4PG-1)];
1066#endif
1067}
1068
6d2010ae
A
1069/*
1070 * Returns address of requested PDPT entry in the physmap.
1071 */
1072static inline pdpt_entry_t *
1073pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
1074{
1075 pml4_entry_t newpf;
1076 pml4_entry_t *pml4;
3e170ce0 1077 boolean_t is_ept;
6d2010ae 1078
6d2010ae 1079 pml4 = pmap64_pml4(pmap, vaddr);
3e170ce0
A
1080 is_ept = is_ept_pmap(pmap);
1081
1082 if (pml4 && (*pml4 & PTE_VALID_MASK(is_ept))) {
6d2010ae
A
1083 newpf = *pml4 & PG_FRAME;
1084 return &((pdpt_entry_t *) PHYSMAP_PTOV(newpf))
1085 [(vaddr >> PDPTSHIFT) & (NPDPTPG-1)];
1086 }
1087 return (NULL);
1088}
1089/*
1090 * Returns the address of the requested PDE entry in the physmap.
1091 */
1092static inline pd_entry_t *
1093pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr)
1094{
1095 pdpt_entry_t newpf;
1096 pdpt_entry_t *pdpt;
3e170ce0 1097 boolean_t is_ept;
6d2010ae 1098
6d2010ae 1099 pdpt = pmap64_pdpt(pmap, vaddr);
3e170ce0 1100 is_ept = is_ept_pmap(pmap);
6d2010ae 1101
3e170ce0 1102 if (pdpt && (*pdpt & PTE_VALID_MASK(is_ept))) {
6d2010ae
A
1103 newpf = *pdpt & PG_FRAME;
1104 return &((pd_entry_t *) PHYSMAP_PTOV(newpf))
1105 [(vaddr >> PDSHIFT) & (NPDPG-1)];
1106 }
1107 return (NULL);
1108}
1109
1110static inline pd_entry_t *
1111pmap_pde(pmap_t m, vm_map_offset_t v)
1112{
1113 pd_entry_t *pde;
1114
6d2010ae
A
1115 pde = pmap64_pde(m, v);
1116
1117 return pde;
1118}
1119
1120
1121/*
1122 * return address of mapped pte for vaddr va in pmap pmap.
1123 *
1124 * In case the pde maps a superpage, return the pde, which, in this case
1125 * is the actual page table entry.
1126 */
1127static inline pt_entry_t *
1128pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
1129{
1130 pd_entry_t *pde;
1131 pd_entry_t newpf;
3e170ce0 1132 boolean_t is_ept;
6d2010ae
A
1133
1134 assert(pmap);
316670eb 1135 pde = pmap64_pde(pmap, vaddr);
6d2010ae 1136
3e170ce0
A
1137 is_ept = is_ept_pmap(pmap);
1138
1139 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
1140 if (*pde & PTE_PS)
6d2010ae
A
1141 return pde;
1142 newpf = *pde & PG_FRAME;
1143 return &((pt_entry_t *)PHYSMAP_PTOV(newpf))
1144 [i386_btop(vaddr) & (ppnum_t)(NPTEPG-1)];
1145 }
1146 return (NULL);
1147}
5c9f4661
A
1148extern void pmap_alias(
1149 vm_offset_t ava,
1150 vm_map_offset_t start,
1151 vm_map_offset_t end,
1152 vm_prot_t prot,
1153 unsigned int options);
1154
316670eb
A
1155#if DEBUG
1156#define DPRINTF(x...) kprintf(x)
1157#else
1158#define DPRINTF(x...)
1159#endif
1160
b0d623f7 1161#endif /* MACH_KERNEL_PRIVATE */
316670eb 1162#endif /* _I386_PMAP_INTERNAL_ */