2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <kern/ledger.h>
30 #include <i386/pmap_internal.h>
34 * Each entry in the pv_head_table is locked by a bit in the
35 * pv_lock_table. The lock bits are accessed by the physical
36 * address of the page they lock.
39 char *pv_lock_table
; /* pointer to array of bits */
40 char *pv_hash_lock_table
;
42 pv_rooted_entry_t pv_head_table
; /* array of entries, one per
44 uint32_t pv_hashed_free_count
= 0;
45 uint32_t pv_hashed_kern_free_count
= 0;
47 pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records
[PMAP_PAGETABLE_CORRUPTION_MAX_LOG
];
48 uint32_t pmap_pagetable_corruption_incidents
;
49 uint64_t pmap_pagetable_corruption_last_abstime
= (~(0ULL) >> 1);
50 uint64_t pmap_pagetable_corruption_interval_abstime
;
51 thread_call_t pmap_pagetable_corruption_log_call
;
52 static thread_call_data_t pmap_pagetable_corruption_log_call_data
;
53 boolean_t pmap_pagetable_corruption_timeout
= FALSE
;
55 volatile uint32_t mappingrecurse
= 0;
57 uint32_t pv_hashed_low_water_mark
, pv_hashed_kern_low_water_mark
, pv_hashed_alloc_chunk
, pv_hashed_kern_alloc_chunk
;
59 thread_t mapping_replenish_thread
;
60 event_t mapping_replenish_event
, pmap_user_pv_throttle_event
;
62 uint64_t pmap_pv_throttle_stat
, pmap_pv_throttled_waiters
;
64 int pmap_asserts_enabled
= (DEBUG
);
65 int pmap_asserts_traced
= 0;
68 pmap_cache_attributes(ppnum_t pn
)
70 int cacheattr
= pmap_get_cache_attributes(pn
, FALSE
);
72 if (cacheattr
& INTEL_PTE_NCACHE
) {
73 if (cacheattr
& INTEL_PTE_PAT
) {
79 return VM_WIMG_COPYBACK
;
84 pmap_set_cache_attributes(ppnum_t pn
, unsigned int cacheattr
)
86 unsigned int current
, template = 0;
89 if (cacheattr
& VM_MEM_NOT_CACHEABLE
) {
90 if (!(cacheattr
& VM_MEM_GUARDED
)) {
93 template |= PHYS_NCACHE
;
98 assert((pn
!= vm_page_fictitious_addr
) && (pn
!= vm_page_guard_addr
));
100 pai
= ppn_to_pai(pn
);
102 if (!IS_MANAGED_PAGE(pai
)) {
106 /* override cache attributes for this phys page
107 * Does not walk through existing mappings to adjust,
108 * assumes page is disconnected
113 pmap_update_cache_attributes_locked(pn
, template);
115 current
= pmap_phys_attributes
[pai
] & PHYS_CACHEABILITY_MASK
;
116 pmap_phys_attributes
[pai
] &= ~PHYS_CACHEABILITY_MASK
;
117 pmap_phys_attributes
[pai
] |= template;
121 if ((template & PHYS_NCACHE
) && !(current
& PHYS_NCACHE
)) {
122 pmap_sync_page_attributes_phys(pn
);
127 pmap_get_cache_attributes(ppnum_t pn
, boolean_t is_ept
)
129 if (last_managed_page
== 0) {
133 if (!IS_MANAGED_PAGE(ppn_to_pai(pn
))) {
134 return PTE_NCACHE(is_ept
);
138 * The cache attributes are read locklessly for efficiency.
140 unsigned int attr
= pmap_phys_attributes
[ppn_to_pai(pn
)];
141 unsigned int template = 0;
144 * The PTA bit is currently unsupported for EPT PTEs.
146 if ((attr
& PHYS_PAT
) && !is_ept
) {
147 template |= INTEL_PTE_PAT
;
151 * If the page isn't marked as NCACHE, the default for EPT entries
154 if (attr
& PHYS_NCACHE
) {
155 template |= PTE_NCACHE(is_ept
);
157 template |= INTEL_EPT_WB
;
164 pmap_has_managed_page(ppnum_t first
, ppnum_t last
)
166 ppnum_t pn
, kdata_start
, kdata_end
;
170 args
= (boot_args
*) PE_state
.bootArgs
;
172 // Allow pages that the booter added to the end of the kernel.
173 // We may miss reporting some pages in this range that were freed
174 // with ml_static_free()
175 kdata_start
= atop_32(args
->kaddr
);
176 kdata_end
= atop_32(args
->kaddr
+ args
->ksize
);
178 assert(last_managed_page
);
179 assert(first
<= last
);
181 for (result
= FALSE
, pn
= first
;
184 && (pn
<= last_managed_page
);
186 if ((pn
>= kdata_start
) && (pn
< kdata_end
)) {
189 result
= (0 != (pmap_phys_attributes
[pn
] & PHYS_MANAGED
));
196 pmap_is_noencrypt(ppnum_t pn
)
200 pai
= ppn_to_pai(pn
);
202 if (!IS_MANAGED_PAGE(pai
)) {
206 if (pmap_phys_attributes
[pai
] & PHYS_NOENCRYPT
) {
215 pmap_set_noencrypt(ppnum_t pn
)
219 pai
= ppn_to_pai(pn
);
221 if (IS_MANAGED_PAGE(pai
)) {
224 pmap_phys_attributes
[pai
] |= PHYS_NOENCRYPT
;
232 pmap_clear_noencrypt(ppnum_t pn
)
236 pai
= ppn_to_pai(pn
);
238 if (IS_MANAGED_PAGE(pai
)) {
240 * synchronization at VM layer prevents PHYS_NOENCRYPT
241 * from changing state, so we don't need the lock to inspect
243 if (pmap_phys_attributes
[pai
] & PHYS_NOENCRYPT
) {
246 pmap_phys_attributes
[pai
] &= ~PHYS_NOENCRYPT
;
254 compute_pmap_gc_throttle(void *arg __unused
)
260 pmap_lock_phys_page(ppnum_t pn
)
264 pai
= ppn_to_pai(pn
);
266 if (IS_MANAGED_PAGE(pai
)) {
269 simple_lock(&phys_backup_lock
, LCK_GRP_NULL
);
275 pmap_unlock_phys_page(ppnum_t pn
)
279 pai
= ppn_to_pai(pn
);
281 if (IS_MANAGED_PAGE(pai
)) {
284 simple_unlock(&phys_backup_lock
);
290 __private_extern__
void
291 pmap_pagetable_corruption_msg_log(int (*log_func
)(const char * fmt
, ...)__printflike(1, 2))
293 if (pmap_pagetable_corruption_incidents
> 0) {
294 int i
, e
= MIN(pmap_pagetable_corruption_incidents
, PMAP_PAGETABLE_CORRUPTION_MAX_LOG
);
295 (*log_func
)("%u pagetable corruption incident(s) detected, timeout: %u\n", pmap_pagetable_corruption_incidents
, pmap_pagetable_corruption_timeout
);
296 for (i
= 0; i
< e
; i
++) {
297 (*log_func
)("Incident 0x%x, reason: 0x%x, action: 0x%x, time: 0x%llx\n", pmap_pagetable_corruption_records
[i
].incident
, pmap_pagetable_corruption_records
[i
].reason
, pmap_pagetable_corruption_records
[i
].action
, pmap_pagetable_corruption_records
[i
].abstime
);
303 pmap_pagetable_corruption_log_setup(void)
305 if (pmap_pagetable_corruption_log_call
== NULL
) {
306 nanotime_to_absolutetime(PMAP_PAGETABLE_CORRUPTION_INTERVAL
, 0, &pmap_pagetable_corruption_interval_abstime
);
307 thread_call_setup(&pmap_pagetable_corruption_log_call_data
,
308 (thread_call_func_t
) pmap_pagetable_corruption_msg_log
,
309 (thread_call_param_t
) &printf
);
310 pmap_pagetable_corruption_log_call
= &pmap_pagetable_corruption_log_call_data
;
315 mapping_free_prime(void)
318 pv_hashed_entry_t pvh_e
;
319 pv_hashed_entry_t pvh_eh
;
320 pv_hashed_entry_t pvh_et
;
323 /* Scale based on DRAM size */
324 pv_hashed_low_water_mark
= MAX(PV_HASHED_LOW_WATER_MARK_DEFAULT
, ((uint32_t)(sane_size
>> 30)) * 2000);
325 pv_hashed_low_water_mark
= MIN(pv_hashed_low_water_mark
, 16000);
326 /* Alterable via sysctl */
327 pv_hashed_kern_low_water_mark
= MAX(PV_HASHED_KERN_LOW_WATER_MARK_DEFAULT
, ((uint32_t)(sane_size
>> 30)) * 1000);
328 pv_hashed_kern_low_water_mark
= MIN(pv_hashed_kern_low_water_mark
, 16000);
329 pv_hashed_kern_alloc_chunk
= PV_HASHED_KERN_ALLOC_CHUNK_INITIAL
;
330 pv_hashed_alloc_chunk
= PV_HASHED_ALLOC_CHUNK_INITIAL
;
333 pvh_eh
= pvh_et
= PV_HASHED_ENTRY_NULL
;
335 for (i
= 0; i
< (5 * PV_HASHED_ALLOC_CHUNK_INITIAL
); i
++) {
336 pvh_e
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
338 pvh_e
->qlink
.next
= (queue_entry_t
)pvh_eh
;
341 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
346 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pv_cnt
);
349 pvh_eh
= pvh_et
= PV_HASHED_ENTRY_NULL
;
350 for (i
= 0; i
< PV_HASHED_KERN_ALLOC_CHUNK_INITIAL
; i
++) {
351 pvh_e
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
353 pvh_e
->qlink
.next
= (queue_entry_t
)pvh_eh
;
356 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
361 PV_HASHED_KERN_FREE_LIST(pvh_eh
, pvh_et
, pv_cnt
);
364 void mapping_replenish(void);
371 pmap_pagetable_corruption_log_setup();
373 mres
= kernel_thread_start_priority((thread_continue_t
)mapping_replenish
, NULL
, MAXPRI_KERNEL
, &mapping_replenish_thread
);
374 if (mres
!= KERN_SUCCESS
) {
375 panic("pmap: mapping_replenish_thread creation failed");
377 thread_deallocate(mapping_replenish_thread
);
380 unsigned pmap_mapping_thread_wakeups
;
381 unsigned pmap_kernel_reserve_replenish_stat
;
382 unsigned pmap_user_reserve_replenish_stat
;
383 unsigned pmap_kern_reserve_alloc_stat
;
385 __attribute__((noreturn
))
387 mapping_replenish(void)
389 pv_hashed_entry_t pvh_e
;
390 pv_hashed_entry_t pvh_eh
;
391 pv_hashed_entry_t pvh_et
;
395 /* We qualify for VM privileges...*/
396 current_thread()->options
|= TH_OPT_VMPRIV
;
399 while (pv_hashed_kern_free_count
< pv_hashed_kern_low_water_mark
) {
401 pvh_eh
= pvh_et
= PV_HASHED_ENTRY_NULL
;
403 for (i
= 0; i
< pv_hashed_kern_alloc_chunk
; i
++) {
404 pvh_e
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
405 pvh_e
->qlink
.next
= (queue_entry_t
)pvh_eh
;
408 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
413 pmap_kernel_reserve_replenish_stat
+= pv_cnt
;
414 PV_HASHED_KERN_FREE_LIST(pvh_eh
, pvh_et
, pv_cnt
);
418 pvh_eh
= pvh_et
= PV_HASHED_ENTRY_NULL
;
420 if (pv_hashed_free_count
< pv_hashed_low_water_mark
) {
421 for (i
= 0; i
< pv_hashed_alloc_chunk
; i
++) {
422 pvh_e
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
424 pvh_e
->qlink
.next
= (queue_entry_t
)pvh_eh
;
427 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
432 pmap_user_reserve_replenish_stat
+= pv_cnt
;
433 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pv_cnt
);
435 /* Wake threads throttled while the kernel reserve was being replenished.
437 if (pmap_pv_throttled_waiters
) {
438 pmap_pv_throttled_waiters
= 0;
439 thread_wakeup(&pmap_user_pv_throttle_event
);
441 /* Check if the kernel pool has been depleted since the
442 * first pass, to reduce refill latency.
444 if (pv_hashed_kern_free_count
< pv_hashed_kern_low_water_mark
) {
447 /* Block sans continuation to avoid yielding kernel stack */
448 assert_wait(&mapping_replenish_event
, THREAD_UNINT
);
450 thread_block(THREAD_CONTINUE_NULL
);
451 pmap_mapping_thread_wakeups
++;
456 * Set specified attribute bits.
467 assert(pn
!= vm_page_fictitious_addr
);
468 if (pn
== vm_page_guard_addr
) {
472 pai
= ppn_to_pai(pn
);
474 if (!IS_MANAGED_PAGE(pai
)) {
475 /* Not a managed page. */
480 pmap_phys_attributes
[pai
] |= bits
;
485 * Set the modify bit on the specified physical page.
489 pmap_set_modify(ppnum_t pn
)
491 phys_attribute_set(pn
, PHYS_MODIFIED
);
495 * Clear the modify bits on the specified physical page.
499 pmap_clear_modify(ppnum_t pn
)
501 phys_attribute_clear(pn
, PHYS_MODIFIED
, 0, NULL
);
507 * Return whether or not the specified physical page is modified
508 * by any physical maps.
512 pmap_is_modified(ppnum_t pn
)
514 if (phys_attribute_test(pn
, PHYS_MODIFIED
)) {
522 * pmap_clear_reference:
524 * Clear the reference bit on the specified physical page.
528 pmap_clear_reference(ppnum_t pn
)
530 phys_attribute_clear(pn
, PHYS_REFERENCED
, 0, NULL
);
534 pmap_set_reference(ppnum_t pn
)
536 phys_attribute_set(pn
, PHYS_REFERENCED
);
540 * pmap_is_referenced:
542 * Return whether or not the specified physical page is referenced
543 * by any physical maps.
547 pmap_is_referenced(ppnum_t pn
)
549 if (phys_attribute_test(pn
, PHYS_REFERENCED
)) {
557 * pmap_get_refmod(phys)
558 * returns the referenced and modified bits of the specified
562 pmap_get_refmod(ppnum_t pn
)
565 unsigned int retval
= 0;
567 refmod
= phys_attribute_test(pn
, PHYS_MODIFIED
| PHYS_REFERENCED
);
569 if (refmod
& PHYS_MODIFIED
) {
570 retval
|= VM_MEM_MODIFIED
;
572 if (refmod
& PHYS_REFERENCED
) {
573 retval
|= VM_MEM_REFERENCED
;
581 pmap_clear_refmod_options(ppnum_t pn
, unsigned int mask
, unsigned int options
, void *arg
)
583 unsigned int x86Mask
;
585 x86Mask
= (((mask
& VM_MEM_MODIFIED
)? PHYS_MODIFIED
: 0)
586 | ((mask
& VM_MEM_REFERENCED
)? PHYS_REFERENCED
: 0));
588 phys_attribute_clear(pn
, x86Mask
, options
, arg
);
592 * pmap_clear_refmod(phys, mask)
593 * clears the referenced and modified bits as specified by the mask
594 * of the specified physical page.
597 pmap_clear_refmod(ppnum_t pn
, unsigned int mask
)
599 unsigned int x86Mask
;
601 x86Mask
= (((mask
& VM_MEM_MODIFIED
)? PHYS_MODIFIED
: 0)
602 | ((mask
& VM_MEM_REFERENCED
)? PHYS_REFERENCED
: 0));
604 phys_attribute_clear(pn
, x86Mask
, 0, NULL
);
608 pmap_disconnect(ppnum_t pa
)
610 return pmap_disconnect_options(pa
, 0, NULL
);
615 * pmap_disconnect_options
618 * Disconnect all mappings for this page and return reference and change status
623 pmap_disconnect_options(ppnum_t pa
, unsigned int options
, void *arg
)
625 unsigned refmod
, vmrefmod
= 0;
627 pmap_page_protect_options(pa
, 0, options
, arg
); /* disconnect the page */
629 pmap_assert(pa
!= vm_page_fictitious_addr
);
630 if ((pa
== vm_page_guard_addr
) || !IS_MANAGED_PAGE(pa
) || (options
& PMAP_OPTIONS_NOREFMOD
)) {
633 refmod
= pmap_phys_attributes
[pa
] & (PHYS_MODIFIED
| PHYS_REFERENCED
);
635 if (refmod
& PHYS_MODIFIED
) {
636 vmrefmod
|= VM_MEM_MODIFIED
;
638 if (refmod
& PHYS_REFERENCED
) {
639 vmrefmod
|= VM_MEM_REFERENCED
;