]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap_x86_common.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap_x86_common.c
CommitLineData
b0d623f7 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
b0d623f7
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
b0d623f7
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
b0d623f7
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
b0d623f7
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
b0d623f7
A
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
fe8ab488
A
28
29#include <mach_assert.h>
30
b0d623f7
A
31#include <vm/pmap.h>
32#include <vm/vm_map.h>
39037602 33#include <vm/vm_kern.h>
316670eb 34#include <kern/ledger.h>
b0d623f7 35#include <i386/pmap_internal.h>
b7266188 36
0a7de745
A
37void pmap_remove_range(
38 pmap_t pmap,
39 vm_map_offset_t va,
40 pt_entry_t *spte,
41 pt_entry_t *epte);
42
43void pmap_remove_range_options(
44 pmap_t pmap,
45 vm_map_offset_t va,
46 pt_entry_t *spte,
47 pt_entry_t *epte,
48 int options);
49
50void pmap_reusable_range(
51 pmap_t pmap,
52 vm_map_offset_t va,
53 pt_entry_t *spte,
54 pt_entry_t *epte,
55 boolean_t reusable);
39236c6e 56
316670eb
A
57uint32_t pmap_update_clear_pte_count;
58
b0d623f7
A
59/*
60 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
61 * on a NBPDE boundary.
62 */
63
64/* These symbols may be referenced directly by VM */
65uint64_t pmap_nesting_size_min = NBPDE;
66uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE;
67
68/*
69 * kern_return_t pmap_nest(grand, subord, va_start, size)
70 *
71 * grand = the pmap that we will nest subord into
72 * subord = the pmap that goes into the grand
73 * va_start = start of range in pmap to be inserted
74 * nstart = start of range in pmap nested pmap
75 * size = Size of nest area (up to 16TB)
76 *
77 * Inserts a pmap into another. This is used to implement shared segments.
78 *
79 * Note that we depend upon higher level VM locks to insure that things don't change while
80 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
81 * or do 2 nests at once.
82 */
83
84/*
85 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
86 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
87 * container and the "grand" parent. A minor optimization to consider for the
88 * future: make the "subord" truly a container rather than a full-fledged
89 * pagetable hierarchy which can be unnecessarily sparse (DRK).
90 */
91
0a7de745
A
92kern_return_t
93pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size)
94{
95 vm_map_offset_t vaddr, nvaddr;
96 pd_entry_t *pde, *npde;
97 unsigned int i;
98 uint64_t num_pde;
b0d623f7 99
3e170ce0
A
100 assert(!is_ept_pmap(grand));
101 assert(!is_ept_pmap(subord));
102
0a7de745
A
103 if ((size & (pmap_nesting_size_min - 1)) ||
104 (va_start & (pmap_nesting_size_min - 1)) ||
105 (nstart & (pmap_nesting_size_min - 1)) ||
106 ((size >> 28) > 65536)) { /* Max size we can nest is 16TB */
b0d623f7 107 return KERN_INVALID_VALUE;
0a7de745 108 }
b0d623f7 109
0a7de745 110 if (size == 0) {
b0d623f7
A
111 panic("pmap_nest: size is invalid - %016llX\n", size);
112 }
113
0a7de745 114 if (va_start != nstart) {
b0d623f7 115 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
0a7de745 116 }
b0d623f7
A
117
118 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
0a7de745
A
119 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
120 VM_KERNEL_ADDRHIDE(va_start));
b0d623f7
A
121
122 nvaddr = (vm_map_offset_t)nstart;
123 num_pde = size >> PDESHIFT;
124
0a7de745 125 PMAP_LOCK_EXCLUSIVE(subord);
b0d623f7
A
126
127 subord->pm_shared = TRUE;
128
129 for (i = 0; i < num_pde;) {
0a7de745 130 if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG) {
b0d623f7
A
131 npde = pmap64_pdpt(subord, nvaddr);
132
133 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
0a7de745 134 PMAP_UNLOCK_EXCLUSIVE(subord);
316670eb 135 pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
0a7de745 136 PMAP_LOCK_EXCLUSIVE(subord);
b0d623f7
A
137 npde = pmap64_pdpt(subord, nvaddr);
138 }
139 *npde |= INTEL_PDPTE_NESTED;
140 nvaddr += NBPDPT;
141 i += (uint32_t)NPDEPG;
0a7de745 142 } else {
b0d623f7
A
143 npde = pmap_pde(subord, nvaddr);
144
145 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
0a7de745 146 PMAP_UNLOCK_EXCLUSIVE(subord);
316670eb 147 pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
0a7de745 148 PMAP_LOCK_EXCLUSIVE(subord);
b0d623f7
A
149 npde = pmap_pde(subord, nvaddr);
150 }
151 nvaddr += NBPDE;
152 i++;
153 }
154 }
155
0a7de745 156 PMAP_UNLOCK_EXCLUSIVE(subord);
b0d623f7
A
157
158 vaddr = (vm_map_offset_t)va_start;
159
0a7de745 160 PMAP_LOCK_EXCLUSIVE(grand);
b0d623f7 161
0a7de745 162 for (i = 0; i < num_pde;) {
b0d623f7
A
163 pd_entry_t tpde;
164
0a7de745 165 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG)) {
b0d623f7 166 npde = pmap64_pdpt(subord, vaddr);
0a7de745 167 if (npde == 0) {
b0d623f7 168 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
0a7de745 169 }
b0d623f7
A
170 tpde = *npde;
171 pde = pmap64_pdpt(grand, vaddr);
172 if (0 == pde) {
0a7de745 173 PMAP_UNLOCK_EXCLUSIVE(grand);
316670eb 174 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
0a7de745 175 PMAP_LOCK_EXCLUSIVE(grand);
b0d623f7
A
176 pde = pmap64_pdpt(grand, vaddr);
177 }
0a7de745 178 if (pde == 0) {
b0d623f7 179 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr);
0a7de745 180 }
b0d623f7
A
181 pmap_store_pte(pde, tpde);
182 vaddr += NBPDPT;
183 i += (uint32_t) NPDEPG;
0a7de745 184 } else {
39037602 185 npde = pmap_pde(subord, vaddr);
0a7de745 186 if (npde == 0) {
39037602 187 panic("pmap_nest: no npde, subord %p vaddr 0x%llx", subord, vaddr);
0a7de745 188 }
b0d623f7 189 tpde = *npde;
b0d623f7 190 pde = pmap_pde(grand, vaddr);
0a7de745
A
191 if (0 == pde) {
192 PMAP_UNLOCK_EXCLUSIVE(grand);
316670eb 193 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
0a7de745 194 PMAP_LOCK_EXCLUSIVE(grand);
b0d623f7
A
195 pde = pmap_pde(grand, vaddr);
196 }
197
0a7de745 198 if (pde == 0) {
b0d623f7 199 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
0a7de745 200 }
b0d623f7
A
201 vaddr += NBPDE;
202 pmap_store_pte(pde, tpde);
203 i++;
204 }
205 }
206
0a7de745 207 PMAP_UNLOCK_EXCLUSIVE(grand);
b0d623f7 208
5ba3f43e 209 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, KERN_SUCCESS);
b0d623f7
A
210
211 return KERN_SUCCESS;
212}
213
214/*
215 * kern_return_t pmap_unnest(grand, vaddr)
216 *
217 * grand = the pmap that we will un-nest subord from
218 * vaddr = start of range in pmap to be unnested
219 *
220 * Removes a pmap from another. This is used to implement shared segments.
221 */
222
0a7de745
A
223kern_return_t
224pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size)
225{
b0d623f7
A
226 pd_entry_t *pde;
227 unsigned int i;
228 uint64_t num_pde;
229 addr64_t va_start, va_end;
230 uint64_t npdpt = PMAP_INVALID_PDPTNUM;
231
232 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
0a7de745 233 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
b0d623f7 234
0a7de745
A
235 if ((size & (pmap_nesting_size_min - 1)) ||
236 (vaddr & (pmap_nesting_size_min - 1))) {
b0d623f7
A
237 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
238 grand, vaddr, size);
239 }
240
3e170ce0
A
241 assert(!is_ept_pmap(grand));
242
b0d623f7 243 /* align everything to PDE boundaries */
0a7de745
A
244 va_start = vaddr & ~(NBPDE - 1);
245 va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE - 1);
b0d623f7
A
246 size = va_end - va_start;
247
0a7de745 248 PMAP_LOCK_EXCLUSIVE(grand);
b0d623f7
A
249
250 num_pde = size >> PDESHIFT;
251 vaddr = va_start;
252
0a7de745
A
253 for (i = 0; i < num_pde;) {
254 if (pdptnum(grand, vaddr) != npdpt) {
b0d623f7
A
255 npdpt = pdptnum(grand, vaddr);
256 pde = pmap64_pdpt(grand, vaddr);
257 if (pde && (*pde & INTEL_PDPTE_NESTED)) {
258 pmap_store_pte(pde, (pd_entry_t)0);
259 i += (uint32_t) NPDEPG;
260 vaddr += NBPDPT;
261 continue;
262 }
263 }
264 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
0a7de745 265 if (pde == 0) {
b0d623f7 266 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr);
0a7de745 267 }
b0d623f7
A
268 pmap_store_pte(pde, (pd_entry_t)0);
269 i++;
270 vaddr += NBPDE;
271 }
272
273 PMAP_UPDATE_TLBS(grand, va_start, va_end);
274
0a7de745 275 PMAP_UNLOCK_EXCLUSIVE(grand);
5ba3f43e
A
276
277 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, KERN_SUCCESS);
b0d623f7
A
278
279 return KERN_SUCCESS;
280}
281
3e170ce0
A
282kern_return_t
283pmap_unnest_options(
284 pmap_t grand,
285 addr64_t vaddr,
286 __unused uint64_t size,
0a7de745
A
287 __unused unsigned int options)
288{
3e170ce0
A
289 return pmap_unnest(grand, vaddr, size);
290}
291
b0d623f7
A
292/* Invoked by the Mach VM to determine the platform specific unnest region */
293
0a7de745
A
294boolean_t
295pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e)
296{
b0d623f7
A
297 pd_entry_t *pdpte;
298 boolean_t rval = FALSE;
299
0a7de745 300 PMAP_LOCK_EXCLUSIVE(p);
b0d623f7
A
301
302 pdpte = pmap64_pdpt(p, *s);
303 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
0a7de745 304 *s &= ~(NBPDPT - 1);
b0d623f7
A
305 rval = TRUE;
306 }
307
308 pdpte = pmap64_pdpt(p, *e);
309 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
0a7de745 310 *e = ((*e + NBPDPT) & ~(NBPDPT - 1));
b0d623f7
A
311 rval = TRUE;
312 }
313
0a7de745 314 PMAP_UNLOCK_EXCLUSIVE(p);
b0d623f7
A
315
316 return rval;
317}
318
319/*
320 * pmap_find_phys returns the (4K) physical page number containing a
321 * given virtual address in a given pmap.
322 * Note that pmap_pte may return a pde if this virtual address is
323 * mapped by a large page and this is taken into account in order
324 * to return the correct page number in this case.
325 */
326ppnum_t
327pmap_find_phys(pmap_t pmap, addr64_t va)
328{
0a7de745
A
329 pt_entry_t *ptp;
330 pd_entry_t *pdep;
331 ppnum_t ppn = 0;
332 pd_entry_t pde;
333 pt_entry_t pte;
334 boolean_t is_ept, locked = FALSE;
3e170ce0
A
335
336 is_ept = is_ept_pmap(pmap);
b0d623f7 337
0a7de745
A
338 if ((pmap != kernel_pmap) && not_in_kdp) {
339 PMAP_LOCK_EXCLUSIVE(pmap);
340 locked = TRUE;
341 } else {
342 mp_disable_preemption();
343 }
b0d623f7 344
cb323159 345 if (os_ref_get_count(&pmap->ref_count) == 0) {
b0d623f7 346 goto pfp_exit;
0a7de745 347 }
b0d623f7
A
348
349 pdep = pmap_pde(pmap, va);
350
3e170ce0
A
351 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
352 if (pde & PTE_PS) {
b0d623f7
A
353 ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
354 ppn += (ppnum_t) ptenum(va);
0a7de745 355 } else {
b0d623f7 356 ptp = pmap_pte(pmap, va);
3e170ce0 357 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
b0d623f7
A
358 ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
359 }
360 }
361 }
0a7de745
A
362pfp_exit:
363 if (locked) {
364 PMAP_UNLOCK_EXCLUSIVE(pmap);
365 } else {
366 mp_enable_preemption();
367 }
b0d623f7 368
0a7de745 369 return ppn;
b0d623f7
A
370}
371
6d2010ae
A
372/*
373 * Update cache attributes for all extant managed mappings.
374 * Assumes PV for this page is locked, and that the page
3e170ce0
A
375 * is managed. We assume that this physical page may be mapped in
376 * both EPT and normal Intel PTEs, so we convert the attributes
377 * to the corresponding format for each pmap.
378 *
379 * We assert that the passed set of attributes is a subset of the
380 * PHYS_CACHEABILITY_MASK.
6d2010ae 381 */
6d2010ae 382void
0a7de745
A
383pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes)
384{
385 pv_rooted_entry_t pv_h, pv_e;
6d2010ae
A
386 pv_hashed_entry_t pvh_e, nexth;
387 vm_map_offset_t vaddr;
0a7de745
A
388 pmap_t pmap;
389 pt_entry_t *ptep;
390 boolean_t is_ept;
391 unsigned ept_attributes;
392
6d2010ae 393 assert(IS_MANAGED_PAGE(pn));
3e170ce0
A
394 assert(((~PHYS_CACHEABILITY_MASK) & attributes) == 0);
395
0a7de745
A
396 /* We don't support the PAT bit for EPT PTEs */
397 if (attributes & INTEL_PTE_NCACHE) {
3e170ce0 398 ept_attributes = INTEL_EPT_NCACHE;
0a7de745 399 } else {
3e170ce0 400 ept_attributes = INTEL_EPT_WB;
0a7de745 401 }
6d2010ae
A
402
403 pv_h = pai_to_pvh(pn);
404 /* TODO: translate the PHYS_* bits to PTE bits, while they're
405 * currently identical, they may not remain so
406 * Potential optimization (here and in page_protect),
407 * parallel shootdowns, check for redundant
408 * attribute modifications.
409 */
0a7de745 410
6d2010ae
A
411 /*
412 * Alter attributes on all mappings
413 */
414 if (pv_h->pmap != PMAP_NULL) {
415 pv_e = pv_h;
416 pvh_e = (pv_hashed_entry_t)pv_e;
417
418 do {
419 pmap = pv_e->pmap;
39037602 420 vaddr = PVE_VA(pv_e);
6d2010ae 421 ptep = pmap_pte(pmap, vaddr);
0a7de745
A
422
423 if (0 == ptep) {
6d2010ae 424 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
0a7de745 425 }
6d2010ae 426
3e170ce0
A
427 is_ept = is_ept_pmap(pmap);
428
6d2010ae 429 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
3e170ce0
A
430 if (!is_ept) {
431 pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
432 } else {
433 pmap_update_pte(ptep, INTEL_EPT_CACHE_MASK, ept_attributes);
434 }
6d2010ae
A
435 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
436 pvh_e = nexth;
437 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
438 }
439}
440
0a7de745
A
441void
442x86_filter_TLB_coherency_interrupts(boolean_t dofilter)
443{
6d2010ae
A
444 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
445
446 if (dofilter) {
447 CPU_CR3_MARK_INACTIVE();
448 } else {
449 CPU_CR3_MARK_ACTIVE();
39236c6e 450 mfence();
0a7de745 451 pmap_update_interrupt();
6d2010ae
A
452 }
453}
454
455
b7266188
A
456/*
457 * Insert the given physical page (p) at
458 * the specified virtual address (v) in the
459 * target physical map with the protection requested.
460 *
461 * If specified, the page will be wired down, meaning
462 * that the related pte cannot be reclaimed.
463 *
464 * NB: This is the only routine which MAY NOT lazy-evaluate
465 * or lose information. That is, this routine must actually
466 * insert this page into the given map NOW.
467 */
316670eb 468
5ba3f43e 469kern_return_t
b7266188 470pmap_enter(
0a7de745
A
471 pmap_t pmap,
472 vm_map_offset_t vaddr,
b7266188 473 ppnum_t pn,
0a7de745
A
474 vm_prot_t prot,
475 vm_prot_t fault_type,
476 unsigned int flags,
477 boolean_t wired)
316670eb 478{
5ba3f43e 479 return pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL);
316670eb
A
480}
481
0a7de745
A
482#define PTE_LOCK(EPT) INTEL_PTE_SWLOCK
483
484static inline void PTE_LOCK_LOCK(pt_entry_t *);
485static inline void PTE_LOCK_UNLOCK(pt_entry_t *);
486
487void
488PTE_LOCK_LOCK(pt_entry_t *lpte)
489{
490 pt_entry_t pte;
491plretry:
492 while ((pte = __c11_atomic_load((_Atomic pt_entry_t *)lpte, memory_order_relaxed)) & PTE_LOCK(0)) {
493 __builtin_ia32_pause();
494 }
495 if (__c11_atomic_compare_exchange_strong((_Atomic pt_entry_t *)lpte, &pte, pte | PTE_LOCK(0), memory_order_acquire_smp, TRUE)) {
496 return;
497 }
498
499 goto plretry;
500}
501
502void
503PTE_LOCK_UNLOCK(pt_entry_t *lpte)
504{
505 __c11_atomic_fetch_and((_Atomic pt_entry_t *)lpte, ~PTE_LOCK(0), memory_order_release_smp);
506}
39236c6e 507
316670eb
A
508kern_return_t
509pmap_enter_options(
0a7de745
A
510 pmap_t pmap,
511 vm_map_offset_t vaddr,
316670eb 512 ppnum_t pn,
0a7de745
A
513 vm_prot_t prot,
514 __unused vm_prot_t fault_type,
515 unsigned int flags,
516 boolean_t wired,
517 unsigned int options,
518 void *arg)
b7266188 519{
0a7de745
A
520 pt_entry_t *pte = NULL;
521 pv_rooted_entry_t pv_h;
522 ppnum_t pai;
523 pv_hashed_entry_t pvh_e;
524 pv_hashed_entry_t pvh_new;
525 pt_entry_t template;
526 pmap_paddr_t old_pa;
527 pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn);
528 boolean_t need_tlbflush = FALSE;
529 boolean_t set_NX;
530 char oattr;
531 boolean_t old_pa_locked;
b7266188 532 /* 2MiB mappings are confined to x86_64 by VM */
0a7de745
A
533 boolean_t superpage = flags & VM_MEM_SUPERPAGE;
534 vm_object_t delpage_pm_obj = NULL;
535 uint64_t delpage_pde_index = 0;
536 pt_entry_t old_pte;
537 kern_return_t kr = KERN_FAILURE;
538 boolean_t is_ept;
539 boolean_t is_altacct;
540 boolean_t ptelocked = FALSE;
5ba3f43e 541
b7266188 542 pmap_intr_assert();
b7266188 543
0a7de745
A
544 if (__improbable(pmap == PMAP_NULL)) {
545 return KERN_INVALID_ARGUMENT;
546 }
547 if (__improbable(pn == vm_page_guard_addr)) {
316670eb 548 return KERN_INVALID_ARGUMENT;
0a7de745 549 }
316670eb 550
3e170ce0
A
551 is_ept = is_ept_pmap(pmap);
552
316670eb
A
553 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
554 * unused value for that scenario.
555 */
556 assert(pn != vm_page_fictitious_addr);
557
b7266188
A
558
559 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
0a7de745
A
560 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(vaddr), pn,
561 prot);
b7266188 562
0a7de745 563 if ((prot & VM_PROT_EXECUTE)) {
b7266188 564 set_NX = FALSE;
0a7de745 565 } else {
b7266188 566 set_NX = TRUE;
0a7de745 567 }
b7266188 568
0a7de745
A
569#if DEVELOPMENT || DEBUG
570 if (__improbable(set_NX && (!nx_enabled || !pmap->nx_enabled))) {
316670eb
A
571 set_NX = FALSE;
572 }
573
0a7de745
A
574 if (__improbable(set_NX && (pmap == kernel_pmap) &&
575 ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) ||
576 (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
577 set_NX = FALSE;
578 }
579#endif
b7266188
A
580
581 pvh_new = PV_HASHED_ENTRY_NULL;
582Retry:
583 pvh_e = PV_HASHED_ENTRY_NULL;
584
0a7de745 585 PMAP_LOCK_SHARED(pmap);
b7266188
A
586
587 /*
588 * Expand pmap to include this pte. Assume that
589 * pmap is always expanded to include enough hardware
590 * pages to map one VM page.
591 */
0a7de745
A
592 if (__improbable(superpage)) {
593 while ((pte = pmap_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
b7266188 594 /* need room for another pde entry */
0a7de745 595 PMAP_UNLOCK_SHARED(pmap);
5ba3f43e 596 kr = pmap_expand_pdpt(pmap, vaddr, options);
0a7de745
A
597 if (kr != KERN_SUCCESS) {
598 goto done1;
599 }
600 PMAP_LOCK_SHARED(pmap);
b7266188
A
601 }
602 } else {
603 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
604 /*
605 * Must unlock to expand the pmap
606 * going to grow pde level page(s)
607 */
0a7de745 608 PMAP_UNLOCK_SHARED(pmap);
5ba3f43e 609 kr = pmap_expand(pmap, vaddr, options);
0a7de745
A
610 if (kr != KERN_SUCCESS) {
611 goto done1;
612 }
613 PMAP_LOCK_SHARED(pmap);
b7266188
A
614 }
615 }
0a7de745
A
616
617 if (__improbable(options & PMAP_EXPAND_OPTIONS_NOENTER)) {
618 PMAP_UNLOCK_SHARED(pmap);
5ba3f43e 619 kr = KERN_SUCCESS;
0a7de745 620 goto done1;
316670eb 621 }
b7266188 622
0a7de745 623 if (__improbable(superpage && *pte && !(*pte & PTE_PS))) {
b7266188
A
624 /*
625 * There is still an empty page table mapped that
626 * was used for a previous base page mapping.
627 * Remember the PDE and the PDE index, so that we
628 * can free the page at the end of this function.
629 */
22ba694c 630 delpage_pde_index = pdeidx(pmap, vaddr);
b7266188 631 delpage_pm_obj = pmap->pm_obj;
0a7de745 632 pmap_store_pte(pte, 0);
b7266188
A
633 }
634
0a7de745
A
635 PTE_LOCK_LOCK(pte);
636 ptelocked = TRUE;
637
b7266188
A
638 old_pa = pte_to_pa(*pte);
639 pai = pa_index(old_pa);
640 old_pa_locked = FALSE;
641
39236c6e 642 if (old_pa == 0 &&
cb323159 643 PTE_IS_COMPRESSED(*pte, pte, pmap, vaddr)) {
39037602
A
644 /*
645 * "pmap" should be locked at this point, so this should
646 * not race with another pmap_enter() or pmap_remove_range().
647 */
648 assert(pmap != kernel_pmap);
649
39236c6e
A
650 /* one less "compressed" */
651 OSAddAtomic64(-1, &pmap->stats.compressed);
39037602 652 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
0a7de745 653 PAGE_SIZE);
39037602
A
654 if (*pte & PTE_COMPRESSED_ALT) {
655 pmap_ledger_debit(
656 pmap,
657 task_ledgers.alternate_accounting_compressed,
658 PAGE_SIZE);
659 } else {
660 /* was part of the footprint */
661 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
0a7de745 662 PAGE_SIZE);
39037602 663 }
39236c6e
A
664 /* marker will be cleared below */
665 }
666
b7266188
A
667 /*
668 * if we have a previous managed page, lock the pv entry now. after
669 * we lock it, check to see if someone beat us to the lock and if so
670 * drop the lock
671 */
672 if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
673 LOCK_PVH(pai);
674 old_pa_locked = TRUE;
675 old_pa = pte_to_pa(*pte);
676 if (0 == old_pa) {
0a7de745 677 UNLOCK_PVH(pai); /* another path beat us to it */
b7266188
A
678 old_pa_locked = FALSE;
679 }
680 }
681
682 /*
683 * Special case if the incoming physical page is already mapped
684 * at this address.
685 */
686 if (old_pa == pa) {
6d2010ae 687 pt_entry_t old_attributes =
0a7de745 688 *pte & ~(PTE_REF(is_ept) | PTE_MOD(is_ept) | PTE_LOCK(is_ept));
b7266188
A
689
690 /*
0a7de745
A
691 * May be changing its wired attribute or protection
692 */
b7266188 693
3e170ce0
A
694 template = pa_to_pte(pa);
695
0a7de745 696 if (__probable(!is_ept)) {
3e170ce0
A
697 template |= INTEL_PTE_VALID;
698 } else {
0a7de745 699 template |= INTEL_EPT_IPAT;
3e170ce0
A
700 }
701
702 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
b7266188 703
3e170ce0
A
704 /*
705 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
706 */
707 if (!is_ept && (VM_MEM_NOT_CACHEABLE ==
708 (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)))) {
0a7de745
A
709 if (!(flags & VM_MEM_GUARDED)) {
710 template |= INTEL_PTE_PAT;
711 }
b7266188
A
712 template |= INTEL_PTE_NCACHE;
713 }
0a7de745 714 if (pmap != kernel_pmap && !is_ept) {
b7266188 715 template |= INTEL_PTE_USER;
0a7de745 716 }
3e170ce0 717
0a7de745 718 if (prot & VM_PROT_READ) {
3e170ce0 719 template |= PTE_READ(is_ept);
0a7de745 720 }
3e170ce0 721
39236c6e 722 if (prot & VM_PROT_WRITE) {
3e170ce0
A
723 template |= PTE_WRITE(is_ept);
724 if (is_ept && !pmap_ept_support_ad) {
725 template |= PTE_MOD(is_ept);
726 if (old_pa_locked) {
727 assert(IS_MANAGED_PAGE(pai));
728 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
729 }
730 }
731 }
732 if (prot & VM_PROT_EXECUTE) {
733 assert(set_NX == 0);
734 template = pte_set_ex(template, is_ept);
39236c6e 735 }
b7266188 736
0a7de745 737 if (set_NX) {
3e170ce0 738 template = pte_remove_ex(template, is_ept);
0a7de745 739 }
b7266188
A
740
741 if (wired) {
3e170ce0 742 template |= PTE_WIRED;
0a7de745 743 if (!iswired(old_attributes)) {
316670eb
A
744 OSAddAtomic(+1, &pmap->stats.wired_count);
745 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
746 }
b7266188 747 } else {
6d2010ae 748 if (iswired(old_attributes)) {
b7266188 749 assert(pmap->stats.wired_count >= 1);
316670eb
A
750 OSAddAtomic(-1, &pmap->stats.wired_count);
751 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
b7266188
A
752 }
753 }
3e170ce0 754
0a7de745
A
755 if (superpage) { /* this path can not be used */
756 template |= PTE_PS; /* to change the page size! */
757 }
758 if (old_attributes == template) {
15129b1c 759 goto dont_update_pte;
0a7de745 760 }
15129b1c 761
6d2010ae
A
762 /* Determine delta, PV locked */
763 need_tlbflush =
3e170ce0 764 ((old_attributes ^ template) != PTE_WIRED);
5ba3f43e 765
0a7de745 766 /* Optimisation: avoid TLB flush when adding writability */
3e170ce0 767 if (need_tlbflush == TRUE && !(old_attributes & PTE_WRITE(is_ept))) {
0a7de745 768 if ((old_attributes ^ template) == PTE_WRITE(is_ept)) {
39236c6e 769 need_tlbflush = FALSE;
0a7de745 770 }
39236c6e 771 }
b7266188 772
3e170ce0 773 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
0a7de745 774 if (__improbable(is_ept && !pmap_ept_support_ad)) {
3e170ce0
A
775 template |= PTE_REF(is_ept);
776 if (old_pa_locked) {
777 assert(IS_MANAGED_PAGE(pai));
778 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
779 }
780 }
781
b7266188 782 /* store modified PTE and preserve RC bits */
0a7de745
A
783 pt_entry_t npte, opte;
784
785 assert((*pte & PTE_LOCK(is_ept)) != 0);
786
316670eb
A
787 do {
788 opte = *pte;
0a7de745
A
789 npte = template | (opte & (PTE_REF(is_ept) |
790 PTE_MOD(is_ept))) | PTE_LOCK(is_ept);
316670eb 791 } while (!pmap_cmpx_pte(pte, opte, npte));
0a7de745 792
15129b1c 793dont_update_pte:
b7266188
A
794 if (old_pa_locked) {
795 UNLOCK_PVH(pai);
796 old_pa_locked = FALSE;
797 }
0a7de745 798 goto done2;
b7266188
A
799 }
800
801 /*
802 * Outline of code from here:
803 * 1) If va was mapped, update TLBs, remove the mapping
804 * and remove old pvlist entry.
805 * 2) Add pvlist entry for new mapping
806 * 3) Enter new mapping.
807 *
808 * If the old physical page is not managed step 1) is skipped
809 * (except for updating the TLBs), and the mapping is
810 * overwritten at step 3). If the new physical page is not
811 * managed, step 2) is skipped.
812 */
0a7de745 813 /* TODO: add opportunistic refmod collect */
b7266188 814 if (old_pa != (pmap_paddr_t) 0) {
0a7de745 815 boolean_t was_altacct = FALSE;
b7266188
A
816
817 /*
0a7de745
A
818 * Don't do anything to pages outside valid memory here.
819 * Instead convince the code that enters a new mapping
820 * to overwrite the old one.
821 */
b7266188
A
822
823 /* invalidate the PTE */
3e170ce0 824 pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
b7266188
A
825 /* propagate invalidate everywhere */
826 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
827 /* remember reference and change */
0a7de745 828 old_pte = *pte;
3e170ce0 829 oattr = (char) (old_pte & (PTE_MOD(is_ept) | PTE_REF(is_ept)));
b7266188 830 /* completely invalidate the PTE */
0a7de745 831 pmap_store_pte(pte, PTE_LOCK(is_ept));
b7266188 832
d190cdc3
A
833 if (IS_MANAGED_PAGE(pai)) {
834 /*
835 * Remove the mapping from the pvlist for
836 * this physical page.
837 * We'll end up with either a rooted pv or a
838 * hashed pv
839 */
840 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte, &was_altacct);
841 }
842
b7266188 843 if (IS_MANAGED_PAGE(pai)) {
6d2010ae 844 pmap_assert(old_pa_locked == TRUE);
316670eb 845 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
b7266188 846 assert(pmap->stats.resident_count >= 1);
316670eb 847 OSAddAtomic(-1, &pmap->stats.resident_count);
39236c6e 848 if (pmap != kernel_pmap) {
d190cdc3 849 /* update pmap stats */
39236c6e 850 if (IS_REUSABLE_PAGE(pai)) {
39037602
A
851 PMAP_STATS_ASSERTF(
852 (pmap->stats.reusable > 0,
0a7de745
A
853 "reusable %d",
854 pmap->stats.reusable));
39236c6e
A
855 OSAddAtomic(-1, &pmap->stats.reusable);
856 } else if (IS_INTERNAL_PAGE(pai)) {
39037602
A
857 PMAP_STATS_ASSERTF(
858 (pmap->stats.internal > 0,
0a7de745
A
859 "internal %d",
860 pmap->stats.internal));
39236c6e
A
861 OSAddAtomic(-1, &pmap->stats.internal);
862 } else {
39037602
A
863 PMAP_STATS_ASSERTF(
864 (pmap->stats.external > 0,
0a7de745
A
865 "external %d",
866 pmap->stats.external));
39236c6e
A
867 OSAddAtomic(-1, &pmap->stats.external);
868 }
d190cdc3
A
869
870 /* update ledgers */
871 if (was_altacct) {
872 assert(IS_INTERNAL_PAGE(pai));
873 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
874 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
875 } else if (IS_REUSABLE_PAGE(pai)) {
876 assert(!was_altacct);
877 assert(IS_INTERNAL_PAGE(pai));
878 /* was already not in phys_footprint */
879 } else if (IS_INTERNAL_PAGE(pai)) {
880 assert(!was_altacct);
881 assert(!IS_REUSABLE_PAGE(pai));
882 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
883 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
884 } else {
885 /* not an internal page */
886 }
39236c6e 887 }
b7266188 888 if (iswired(*pte)) {
b7266188 889 assert(pmap->stats.wired_count >= 1);
316670eb
A
890 OSAddAtomic(-1, &pmap->stats.wired_count);
891 pmap_ledger_debit(pmap, task_ledgers.wired_mem,
892 PAGE_SIZE);
b7266188 893 }
3e170ce0
A
894
895 if (!is_ept) {
896 pmap_phys_attributes[pai] |= oattr;
897 } else {
898 pmap_phys_attributes[pai] |= ept_refmod_to_physmap(oattr);
899 }
b7266188 900 } else {
b7266188
A
901 /*
902 * old_pa is not managed.
903 * Do removal part of accounting.
904 */
905
39236c6e
A
906 if (pmap != kernel_pmap) {
907#if 00
908 assert(pmap->stats.device > 0);
909 OSAddAtomic(-1, &pmap->stats.device);
910#endif
911 }
b7266188
A
912 if (iswired(*pte)) {
913 assert(pmap->stats.wired_count >= 1);
316670eb
A
914 OSAddAtomic(-1, &pmap->stats.wired_count);
915 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
b7266188
A
916 }
917 }
918 }
919
920 /*
921 * if we had a previously managed paged locked, unlock it now
922 */
923 if (old_pa_locked) {
924 UNLOCK_PVH(pai);
925 old_pa_locked = FALSE;
926 }
927
0a7de745 928 pai = pa_index(pa); /* now working with new incoming phys page */
b7266188 929 if (IS_MANAGED_PAGE(pai)) {
b7266188 930 /*
0a7de745
A
931 * Step 2) Enter the mapping in the PV list for this
932 * physical page.
933 */
b7266188
A
934 pv_h = pai_to_pvh(pai);
935
936 LOCK_PVH(pai);
937
938 if (pv_h->pmap == PMAP_NULL) {
939 /*
940 * No mappings yet, use rooted pv
941 */
39037602 942 pv_h->va_and_flags = vaddr;
b7266188
A
943 pv_h->pmap = pmap;
944 queue_init(&pv_h->qlink);
39236c6e
A
945
946 if (options & PMAP_OPTIONS_INTERNAL) {
947 pmap_phys_attributes[pai] |= PHYS_INTERNAL;
948 } else {
949 pmap_phys_attributes[pai] &= ~PHYS_INTERNAL;
950 }
951 if (options & PMAP_OPTIONS_REUSABLE) {
952 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
953 } else {
954 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
955 }
39037602
A
956 if ((options & PMAP_OPTIONS_ALT_ACCT) &&
957 IS_INTERNAL_PAGE(pai)) {
39037602 958 pv_h->va_and_flags |= PVE_IS_ALTACCT;
d190cdc3 959 is_altacct = TRUE;
39037602
A
960 } else {
961 pv_h->va_and_flags &= ~PVE_IS_ALTACCT;
d190cdc3 962 is_altacct = FALSE;
39037602 963 }
b7266188
A
964 } else {
965 /*
966 * Add new pv_hashed_entry after header.
967 */
968 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
969 pvh_e = pvh_new;
970 pvh_new = PV_HASHED_ENTRY_NULL;
971 } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
6d2010ae 972 PV_HASHED_ALLOC(&pvh_e);
b7266188
A
973 if (PV_HASHED_ENTRY_NULL == pvh_e) {
974 /*
975 * the pv list is empty. if we are on
976 * the kernel pmap we'll use one of
977 * the special private kernel pv_e's,
978 * else, we need to unlock
979 * everything, zalloc a pv_e, and
980 * restart bringing in the pv_e with
981 * us.
982 */
983 if (kernel_pmap == pmap) {
6d2010ae 984 PV_HASHED_KERN_ALLOC(&pvh_e);
b7266188
A
985 } else {
986 UNLOCK_PVH(pai);
0a7de745
A
987 PTE_LOCK_UNLOCK(pte);
988 PMAP_UNLOCK_SHARED(pmap);
6d2010ae 989 pmap_pv_throttle(pmap);
b7266188
A
990 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
991 goto Retry;
992 }
993 }
994 }
5ba3f43e 995
0a7de745 996 if (PV_HASHED_ENTRY_NULL == pvh_e) {
b7266188 997 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
0a7de745 998 }
b7266188 999
39037602 1000 pvh_e->va_and_flags = vaddr;
b7266188
A
1001 pvh_e->pmap = pmap;
1002 pvh_e->ppn = pn;
d190cdc3
A
1003 if ((options & PMAP_OPTIONS_ALT_ACCT) &&
1004 IS_INTERNAL_PAGE(pai)) {
1005 pvh_e->va_and_flags |= PVE_IS_ALTACCT;
1006 is_altacct = TRUE;
1007 } else {
1008 pvh_e->va_and_flags &= ~PVE_IS_ALTACCT;
1009 is_altacct = FALSE;
1010 }
b7266188
A
1011 pv_hash_add(pvh_e, pv_h);
1012
1013 /*
1014 * Remember that we used the pvlist entry.
1015 */
1016 pvh_e = PV_HASHED_ENTRY_NULL;
1017 }
1018
1019 /*
0a7de745
A
1020 * only count the mapping
1021 * for 'managed memory'
1022 */
316670eb 1023 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
0a7de745 1024 OSAddAtomic(+1, &pmap->stats.resident_count);
b7266188
A
1025 if (pmap->stats.resident_count > pmap->stats.resident_max) {
1026 pmap->stats.resident_max = pmap->stats.resident_count;
1027 }
39236c6e 1028 if (pmap != kernel_pmap) {
d190cdc3 1029 /* update pmap stats */
39236c6e
A
1030 if (IS_REUSABLE_PAGE(pai)) {
1031 OSAddAtomic(+1, &pmap->stats.reusable);
1032 PMAP_STATS_PEAK(pmap->stats.reusable);
1033 } else if (IS_INTERNAL_PAGE(pai)) {
1034 OSAddAtomic(+1, &pmap->stats.internal);
1035 PMAP_STATS_PEAK(pmap->stats.internal);
1036 } else {
1037 OSAddAtomic(+1, &pmap->stats.external);
1038 PMAP_STATS_PEAK(pmap->stats.external);
1039 }
d190cdc3
A
1040
1041 /* update ledgers */
1042 if (is_altacct) {
1043 /* internal but also alternate accounting */
1044 assert(IS_INTERNAL_PAGE(pai));
1045 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
1046 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
1047 /* alternate accounting, so not in footprint */
1048 } else if (IS_REUSABLE_PAGE(pai)) {
1049 assert(!is_altacct);
1050 assert(IS_INTERNAL_PAGE(pai));
1051 /* internal but reusable: not in footprint */
1052 } else if (IS_INTERNAL_PAGE(pai)) {
1053 assert(!is_altacct);
1054 assert(!IS_REUSABLE_PAGE(pai));
1055 /* internal: add to footprint */
1056 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
1057 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1058 } else {
1059 /* not internal: not in footprint */
1060 }
39236c6e 1061 }
060df5ea
A
1062 } else if (last_managed_page == 0) {
1063 /* Account for early mappings created before "managed pages"
1064 * are determined. Consider consulting the available DRAM map.
1065 */
316670eb 1066 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
0a7de745 1067 OSAddAtomic(+1, &pmap->stats.resident_count);
39236c6e
A
1068 if (pmap != kernel_pmap) {
1069#if 00
1070 OSAddAtomic(+1, &pmap->stats.device);
1071 PMAP_STATS_PEAK(pmap->stats.device);
1072#endif
1073 }
b7266188
A
1074 }
1075 /*
1076 * Step 3) Enter the mapping.
1077 *
1078 * Build a template to speed up entering -
1079 * only the pfn changes.
1080 */
3e170ce0
A
1081 template = pa_to_pte(pa);
1082
1083 if (!is_ept) {
1084 template |= INTEL_PTE_VALID;
1085 } else {
0a7de745 1086 template |= INTEL_EPT_IPAT;
3e170ce0
A
1087 }
1088
1089
6d2010ae
A
1090 /*
1091 * DRK: It may be worth asserting on cache attribute flags that diverge
1092 * from the existing physical page attributes.
1093 */
b7266188 1094
3e170ce0
A
1095 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
1096
1097 /*
1098 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
1099 */
1100 if (!is_ept && (flags & VM_MEM_NOT_CACHEABLE)) {
0a7de745
A
1101 if (!(flags & VM_MEM_GUARDED)) {
1102 template |= INTEL_PTE_PAT;
1103 }
b7266188
A
1104 template |= INTEL_PTE_NCACHE;
1105 }
0a7de745 1106 if (pmap != kernel_pmap && !is_ept) {
b7266188 1107 template |= INTEL_PTE_USER;
0a7de745
A
1108 }
1109 if (prot & VM_PROT_READ) {
3e170ce0 1110 template |= PTE_READ(is_ept);
0a7de745 1111 }
3e170ce0
A
1112 if (prot & VM_PROT_WRITE) {
1113 template |= PTE_WRITE(is_ept);
1114 if (is_ept && !pmap_ept_support_ad) {
1115 template |= PTE_MOD(is_ept);
0a7de745 1116 if (IS_MANAGED_PAGE(pai)) {
3e170ce0 1117 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
0a7de745 1118 }
3e170ce0
A
1119 }
1120 }
1121 if (prot & VM_PROT_EXECUTE) {
1122 assert(set_NX == 0);
1123 template = pte_set_ex(template, is_ept);
1124 }
1125
0a7de745 1126 if (set_NX) {
3e170ce0 1127 template = pte_remove_ex(template, is_ept);
0a7de745 1128 }
b7266188
A
1129 if (wired) {
1130 template |= INTEL_PTE_WIRED;
0a7de745 1131 OSAddAtomic(+1, &pmap->stats.wired_count);
316670eb 1132 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
b7266188 1133 }
0a7de745 1134 if (__improbable(superpage)) {
b7266188 1135 template |= INTEL_PTE_PS;
0a7de745 1136 }
3e170ce0
A
1137
1138 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
0a7de745 1139 if (__improbable(is_ept && !pmap_ept_support_ad)) {
3e170ce0 1140 template |= PTE_REF(is_ept);
0a7de745 1141 if (IS_MANAGED_PAGE(pai)) {
3e170ce0 1142 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
0a7de745 1143 }
3e170ce0 1144 }
0a7de745 1145 template |= PTE_LOCK(is_ept);
b7266188
A
1146 pmap_store_pte(pte, template);
1147
1148 /*
1149 * if this was a managed page we delayed unlocking the pv until here
1150 * to prevent pmap_page_protect et al from finding it until the pte
1151 * has been stored
1152 */
1153 if (IS_MANAGED_PAGE(pai)) {
1154 UNLOCK_PVH(pai);
1155 }
0a7de745 1156done2:
39236c6e 1157 if (need_tlbflush == TRUE) {
0a7de745 1158 if (options & PMAP_OPTIONS_NOFLUSH) {
39236c6e 1159 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
0a7de745 1160 } else {
39236c6e 1161 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
0a7de745
A
1162 }
1163 }
1164 if (ptelocked) {
1165 PTE_LOCK_UNLOCK(pte);
39236c6e 1166 }
0a7de745
A
1167 PMAP_UNLOCK_SHARED(pmap);
1168
b7266188
A
1169 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1170 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
1171 }
1172 if (pvh_new != PV_HASHED_ENTRY_NULL) {
1173 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
1174 }
b7266188
A
1175
1176 if (delpage_pm_obj) {
1177 vm_page_t m;
1178
1179 vm_object_lock(delpage_pm_obj);
22ba694c 1180 m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE));
0a7de745
A
1181 if (m == VM_PAGE_NULL) {
1182 panic("pmap_enter: pte page not in object");
1183 }
b7266188 1184 VM_PAGE_FREE(m);
3e170ce0 1185 vm_object_unlock(delpage_pm_obj);
0a7de745 1186 OSAddAtomic(-1, &inuse_ptepages_count);
316670eb 1187 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
b7266188
A
1188 }
1189
5ba3f43e 1190 kr = KERN_SUCCESS;
0a7de745 1191done1:
5ba3f43e
A
1192 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
1193 return kr;
b7266188
A
1194}
1195
1196/*
1197 * Remove a range of hardware page-table entries.
1198 * The entries given are the first (inclusive)
1199 * and last (exclusive) entries for the VM pages.
1200 * The virtual address is the va for the first pte.
1201 *
1202 * The pmap must be locked.
1203 * If the pmap is not the kernel pmap, the range must lie
1204 * entirely within one pte-page. This is NOT checked.
1205 * Assumes that the pte-page exists.
1206 */
1207
1208void
1209pmap_remove_range(
0a7de745
A
1210 pmap_t pmap,
1211 vm_map_offset_t start_vaddr,
1212 pt_entry_t *spte,
1213 pt_entry_t *epte)
39236c6e 1214{
3e170ce0 1215 pmap_remove_range_options(pmap, start_vaddr, spte, epte,
0a7de745 1216 PMAP_OPTIONS_REMOVE);
39236c6e
A
1217}
1218
1219void
1220pmap_remove_range_options(
0a7de745
A
1221 pmap_t pmap,
1222 vm_map_offset_t start_vaddr,
1223 pt_entry_t *spte,
1224 pt_entry_t *epte,
1225 int options)
b7266188 1226{
0a7de745 1227 pt_entry_t *cpte;
b7266188
A
1228 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1229 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1230 pv_hashed_entry_t pvh_e;
0a7de745
A
1231 int pvh_cnt = 0;
1232 int num_removed, num_unwired, num_found, num_invalid;
1233 int stats_external, stats_internal, stats_reusable;
1234 uint64_t stats_compressed;
1235 int ledgers_internal, ledgers_alt_internal;
1236 uint64_t ledgers_compressed, ledgers_alt_compressed;
1237 ppnum_t pai;
1238 pmap_paddr_t pa;
1239 vm_map_offset_t vaddr;
1240 boolean_t is_ept = is_ept_pmap(pmap);
1241 boolean_t was_altacct;
b7266188
A
1242
1243 num_removed = 0;
1244 num_unwired = 0;
1245 num_found = 0;
1246 num_invalid = 0;
d190cdc3
A
1247 stats_external = 0;
1248 stats_internal = 0;
1249 stats_reusable = 0;
1250 stats_compressed = 0;
1251 ledgers_internal = 0;
1252 ledgers_compressed = 0;
1253 ledgers_alt_internal = 0;
1254 ledgers_alt_compressed = 0;
b7266188
A
1255 /* invalidate the PTEs first to "freeze" them */
1256 for (cpte = spte, vaddr = start_vaddr;
0a7de745
A
1257 cpte < epte;
1258 cpte++, vaddr += PAGE_SIZE_64) {
b7266188
A
1259 pt_entry_t p = *cpte;
1260
1261 pa = pte_to_pa(p);
39236c6e 1262 if (pa == 0) {
d190cdc3 1263 if ((options & PMAP_OPTIONS_REMOVE) &&
cb323159 1264 (PTE_IS_COMPRESSED(p, cpte, pmap, vaddr))) {
d190cdc3 1265 assert(pmap != kernel_pmap);
39037602 1266 /* one less "compressed"... */
d190cdc3
A
1267 stats_compressed++;
1268 ledgers_compressed++;
39037602
A
1269 if (p & PTE_COMPRESSED_ALT) {
1270 /* ... but it used to be "ALTACCT" */
d190cdc3 1271 ledgers_alt_compressed++;
39037602
A
1272 }
1273 /* clear marker(s) */
39236c6e 1274 /* XXX probably does not need to be atomic! */
39037602 1275 pmap_update_pte(cpte, INTEL_PTE_COMPRESSED_MASK, 0);
39236c6e 1276 }
b7266188 1277 continue;
39236c6e 1278 }
b7266188
A
1279 num_found++;
1280
0a7de745 1281 if (iswired(p)) {
b7266188 1282 num_unwired++;
0a7de745
A
1283 }
1284
b7266188
A
1285 pai = pa_index(pa);
1286
1287 if (!IS_MANAGED_PAGE(pai)) {
1288 /*
1289 * Outside range of managed physical memory.
1290 * Just remove the mappings.
1291 */
1292 pmap_store_pte(cpte, 0);
1293 continue;
1294 }
1295
0a7de745 1296 if ((p & PTE_VALID_MASK(is_ept)) == 0) {
b7266188 1297 num_invalid++;
0a7de745 1298 }
b7266188 1299
316670eb 1300 /* invalidate the PTE */
3e170ce0 1301 pmap_update_pte(cpte, PTE_VALID_MASK(is_ept), 0);
b7266188
A
1302 }
1303
1304 if (num_found == 0) {
1305 /* nothing was changed: we're done */
0a7de745 1306 goto update_counts;
b7266188
A
1307 }
1308
1309 /* propagate the invalidates to other CPUs */
1310
1311 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
1312
1313 for (cpte = spte, vaddr = start_vaddr;
0a7de745
A
1314 cpte < epte;
1315 cpte++, vaddr += PAGE_SIZE_64) {
b7266188 1316 pa = pte_to_pa(*cpte);
39037602 1317 if (pa == 0) {
0a7de745 1318check_pte_for_compressed_marker:
39037602
A
1319 /*
1320 * This PTE could have been replaced with a
1321 * "compressed" marker after our first "freeze"
1322 * loop above, so check again.
1323 */
d190cdc3 1324 if ((options & PMAP_OPTIONS_REMOVE) &&
cb323159 1325 (PTE_IS_COMPRESSED(*cpte, cpte, pmap, vaddr))) {
d190cdc3 1326 assert(pmap != kernel_pmap);
39037602 1327 /* one less "compressed"... */
d190cdc3
A
1328 stats_compressed++;
1329 ledgers_compressed++;
39037602
A
1330 if (*cpte & PTE_COMPRESSED_ALT) {
1331 /* ... but it used to be "ALTACCT" */
d190cdc3 1332 ledgers_alt_compressed++;
39037602
A
1333 }
1334 pmap_store_pte(cpte, 0);
1335 }
b7266188 1336 continue;
39037602 1337 }
b7266188
A
1338
1339 pai = pa_index(pa);
1340
1341 LOCK_PVH(pai);
1342
1343 pa = pte_to_pa(*cpte);
1344 if (pa == 0) {
1345 UNLOCK_PVH(pai);
39037602 1346 goto check_pte_for_compressed_marker;
b7266188 1347 }
d190cdc3
A
1348
1349 /*
1350 * Remove the mapping from the pvlist for this physical page.
1351 */
1352 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte, &was_altacct);
1353
b7266188 1354 num_removed++;
d190cdc3 1355 /* update pmap stats */
39236c6e 1356 if (IS_REUSABLE_PAGE(pai)) {
d190cdc3 1357 stats_reusable++;
39236c6e 1358 } else if (IS_INTERNAL_PAGE(pai)) {
d190cdc3
A
1359 stats_internal++;
1360 } else {
1361 stats_external++;
1362 }
1363 /* update ledgers */
1364 if (was_altacct) {
1365 /* internal and alternate accounting */
1366 assert(IS_INTERNAL_PAGE(pai));
1367 ledgers_internal++;
1368 ledgers_alt_internal++;
1369 } else if (IS_REUSABLE_PAGE(pai)) {
1370 /* internal but reusable */
1371 assert(!was_altacct);
1372 assert(IS_INTERNAL_PAGE(pai));
1373 } else if (IS_INTERNAL_PAGE(pai)) {
1374 /* internal */
1375 assert(!was_altacct);
1376 assert(!IS_REUSABLE_PAGE(pai));
1377 ledgers_internal++;
39236c6e 1378 } else {
d190cdc3 1379 /* not internal */
39236c6e 1380 }
b7266188
A
1381
1382 /*
0a7de745
A
1383 * Get the modify and reference bits, then
1384 * nuke the entry in the page table
1385 */
b7266188 1386 /* remember reference and change */
7e41aa88
A
1387 if (!is_ept) {
1388 pmap_phys_attributes[pai] |=
0a7de745 1389 *cpte & (PHYS_MODIFIED | PHYS_REFERENCED);
7e41aa88
A
1390 } else {
1391 pmap_phys_attributes[pai] |=
0a7de745 1392 ept_refmod_to_physmap((*cpte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
7e41aa88 1393 }
b7266188 1394
b7266188
A
1395 /* completely invalidate the PTE */
1396 pmap_store_pte(cpte, 0);
1397
1398 UNLOCK_PVH(pai);
1399
1400 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1401 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1402 pvh_eh = pvh_e;
1403
1404 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1405 pvh_et = pvh_e;
1406 }
1407 pvh_cnt++;
1408 }
1409 } /* for loop */
1410
1411 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1412 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1413 }
1414update_counts:
1415 /*
1416 * Update the counts
1417 */
1418#if TESTING
0a7de745
A
1419 if (pmap->stats.resident_count < num_removed) {
1420 panic("pmap_remove_range: resident_count");
1421 }
b7266188 1422#endif
316670eb 1423 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
39037602 1424 PMAP_STATS_ASSERTF((pmap->stats.resident_count >= num_removed,
0a7de745
A
1425 "pmap=%p num_removed=%d stats.resident_count=%d",
1426 pmap, num_removed, pmap->stats.resident_count));
1427 OSAddAtomic(-num_removed, &pmap->stats.resident_count);
b7266188 1428
39236c6e 1429 if (pmap != kernel_pmap) {
d190cdc3 1430 PMAP_STATS_ASSERTF((pmap->stats.external >= stats_external,
0a7de745
A
1431 "pmap=%p stats_external=%d stats.external=%d",
1432 pmap, stats_external, pmap->stats.external));
d190cdc3 1433 PMAP_STATS_ASSERTF((pmap->stats.internal >= stats_internal,
0a7de745
A
1434 "pmap=%p stats_internal=%d stats.internal=%d",
1435 pmap, stats_internal, pmap->stats.internal));
d190cdc3 1436 PMAP_STATS_ASSERTF((pmap->stats.reusable >= stats_reusable,
0a7de745
A
1437 "pmap=%p stats_reusable=%d stats.reusable=%d",
1438 pmap, stats_reusable, pmap->stats.reusable));
d190cdc3 1439 PMAP_STATS_ASSERTF((pmap->stats.compressed >= stats_compressed,
0a7de745
A
1440 "pmap=%p stats_compressed=%lld, stats.compressed=%lld",
1441 pmap, stats_compressed, pmap->stats.compressed));
d190cdc3
A
1442
1443 /* update pmap stats */
1444 if (stats_external) {
1445 OSAddAtomic(-stats_external, &pmap->stats.external);
39037602 1446 }
d190cdc3
A
1447 if (stats_internal) {
1448 OSAddAtomic(-stats_internal, &pmap->stats.internal);
1449 }
0a7de745 1450 if (stats_reusable) {
d190cdc3 1451 OSAddAtomic(-stats_reusable, &pmap->stats.reusable);
0a7de745
A
1452 }
1453 if (stats_compressed) {
d190cdc3 1454 OSAddAtomic64(-stats_compressed, &pmap->stats.compressed);
0a7de745 1455 }
d190cdc3
A
1456 /* update ledgers */
1457 if (ledgers_internal) {
39037602 1458 pmap_ledger_debit(pmap,
0a7de745
A
1459 task_ledgers.internal,
1460 machine_ptob(ledgers_internal));
39037602 1461 }
d190cdc3 1462 if (ledgers_compressed) {
39037602 1463 pmap_ledger_debit(pmap,
0a7de745
A
1464 task_ledgers.internal_compressed,
1465 machine_ptob(ledgers_compressed));
39037602 1466 }
d190cdc3 1467 if (ledgers_alt_internal) {
39037602 1468 pmap_ledger_debit(pmap,
0a7de745
A
1469 task_ledgers.alternate_accounting,
1470 machine_ptob(ledgers_alt_internal));
39037602 1471 }
d190cdc3 1472 if (ledgers_alt_compressed) {
39037602 1473 pmap_ledger_debit(pmap,
0a7de745
A
1474 task_ledgers.alternate_accounting_compressed,
1475 machine_ptob(ledgers_alt_compressed));
39037602
A
1476 }
1477 pmap_ledger_debit(pmap,
0a7de745
A
1478 task_ledgers.phys_footprint,
1479 machine_ptob((ledgers_internal -
1480 ledgers_alt_internal) +
1481 (ledgers_compressed -
1482 ledgers_alt_compressed)));
39236c6e
A
1483 }
1484
b7266188 1485#if TESTING
0a7de745
A
1486 if (pmap->stats.wired_count < num_unwired) {
1487 panic("pmap_remove_range: wired_count");
1488 }
b7266188 1489#endif
39037602 1490 PMAP_STATS_ASSERTF((pmap->stats.wired_count >= num_unwired,
0a7de745
A
1491 "pmap=%p num_unwired=%d stats.wired_count=%d",
1492 pmap, num_unwired, pmap->stats.wired_count));
1493 OSAddAtomic(-num_unwired, &pmap->stats.wired_count);
316670eb 1494 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
b7266188
A
1495
1496 return;
1497}
1498
1499
1500/*
1501 * Remove the given range of addresses
1502 * from the specified map.
1503 *
1504 * It is assumed that the start and end are properly
1505 * rounded to the hardware page size.
1506 */
1507void
1508pmap_remove(
0a7de745
A
1509 pmap_t map,
1510 addr64_t s64,
1511 addr64_t e64)
39236c6e 1512{
3e170ce0 1513 pmap_remove_options(map, s64, e64, PMAP_OPTIONS_REMOVE);
39236c6e 1514}
0a7de745 1515#define PLCHECK_THRESHOLD (8)
39236c6e
A
1516
1517void
1518pmap_remove_options(
0a7de745
A
1519 pmap_t map,
1520 addr64_t s64,
1521 addr64_t e64,
1522 int options)
b7266188
A
1523{
1524 pt_entry_t *pde;
1525 pt_entry_t *spte, *epte;
1526 addr64_t l64;
0a7de745
A
1527 uint64_t deadline = 0;
1528 boolean_t is_ept;
b7266188
A
1529
1530 pmap_intr_assert();
1531
0a7de745 1532 if (map == PMAP_NULL || s64 == e64) {
b7266188 1533 return;
0a7de745 1534 }
b7266188 1535
3e170ce0
A
1536 is_ept = is_ept_pmap(map);
1537
b7266188 1538 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
0a7de745
A
1539 VM_KERNEL_ADDRHIDE(map), VM_KERNEL_ADDRHIDE(s64),
1540 VM_KERNEL_ADDRHIDE(e64));
b7266188 1541
0a7de745
A
1542 PMAP_LOCK_EXCLUSIVE(map);
1543 uint32_t traverse_count = 0;
b7266188 1544
0a7de745
A
1545 while (s64 < e64) {
1546 pml4_entry_t *pml4e = pmap64_pml4(map, s64);
1547 if ((pml4e == NULL) ||
1548 ((*pml4e & PTE_VALID_MASK(is_ept)) == 0)) {
1549 s64 = (s64 + NBPML4) & ~(PML4MASK);
1550 continue;
1551 }
1552 pdpt_entry_t *pdpte = pmap64_pdpt(map, s64);
1553 if ((pdpte == NULL) ||
1554 ((*pdpte & PTE_VALID_MASK(is_ept)) == 0)) {
1555 s64 = (s64 + NBPDPT) & ~(PDPTMASK);
1556 continue;
b7266188 1557 }
b7266188 1558
0a7de745 1559 l64 = (s64 + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE - 1);
b7266188 1560
0a7de745 1561 if (l64 > e64) {
b7266188 1562 l64 = e64;
0a7de745
A
1563 }
1564
b7266188
A
1565 pde = pmap_pde(map, s64);
1566
3e170ce0
A
1567 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
1568 if (*pde & PTE_PS) {
b7266188
A
1569 /*
1570 * If we're removing a superpage, pmap_remove_range()
1571 * must work on level 2 instead of level 1; and we're
1572 * only passing a single level 2 entry instead of a
1573 * level 1 range.
1574 */
1575 spte = pde;
0a7de745 1576 epte = spte + 1; /* excluded */
b7266188 1577 } else {
0a7de745 1578 spte = pmap_pte(map, (s64 & ~(PDE_MAPPED_SIZE - 1)));
b7266188
A
1579 spte = &spte[ptenum(s64)];
1580 epte = &spte[intel_btop(l64 - s64)];
1581 }
39236c6e 1582 pmap_remove_range_options(map, s64, spte, epte,
0a7de745 1583 options);
b7266188
A
1584 }
1585 s64 = l64;
1586
0a7de745
A
1587 if ((s64 < e64) && (traverse_count++ > PLCHECK_THRESHOLD)) {
1588 if (deadline == 0) {
1589 deadline = rdtsc64() + max_preemption_latency_tsc;
1590 } else {
1591 if (rdtsc64() > deadline) {
1592 PMAP_UNLOCK_EXCLUSIVE(map);
1593 __builtin_ia32_pause();
1594 PMAP_LOCK_EXCLUSIVE(map);
1595 deadline = rdtsc64() + max_preemption_latency_tsc;
1596 }
1597 }
b7266188
A
1598 }
1599 }
1600
0a7de745 1601 PMAP_UNLOCK_EXCLUSIVE(map);
b7266188 1602
5ba3f43e 1603 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
b7266188
A
1604}
1605
39236c6e
A
1606void
1607pmap_page_protect(
0a7de745
A
1608 ppnum_t pn,
1609 vm_prot_t prot)
39236c6e
A
1610{
1611 pmap_page_protect_options(pn, prot, 0, NULL);
1612}
1613
b7266188 1614/*
39236c6e 1615 * Routine: pmap_page_protect_options
b7266188
A
1616 *
1617 * Function:
1618 * Lower the permission for all mappings to a given
1619 * page.
1620 */
1621void
39236c6e 1622pmap_page_protect_options(
0a7de745
A
1623 ppnum_t pn,
1624 vm_prot_t prot,
1625 unsigned int options,
1626 void *arg)
b7266188 1627{
0a7de745
A
1628 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1629 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1630 pv_hashed_entry_t nexth;
1631 int pvh_cnt = 0;
1632 pv_rooted_entry_t pv_h;
1633 pv_rooted_entry_t pv_e;
1634 pv_hashed_entry_t pvh_e;
1635 pt_entry_t *pte;
1636 int pai;
1637 pmap_t pmap;
1638 boolean_t remove;
1639 pt_entry_t new_pte_value;
1640 boolean_t is_ept;
b7266188
A
1641
1642 pmap_intr_assert();
1643 assert(pn != vm_page_fictitious_addr);
0a7de745 1644 if (pn == vm_page_guard_addr) {
b7266188 1645 return;
0a7de745 1646 }
b7266188
A
1647
1648 pai = ppn_to_pai(pn);
1649
1650 if (!IS_MANAGED_PAGE(pai)) {
1651 /*
0a7de745
A
1652 * Not a managed page.
1653 */
b7266188
A
1654 return;
1655 }
5ba3f43e
A
1656
1657 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, pn, prot);
b7266188
A
1658
1659 /*
1660 * Determine the new protection.
1661 */
1662 switch (prot) {
1663 case VM_PROT_READ:
1664 case VM_PROT_READ | VM_PROT_EXECUTE:
1665 remove = FALSE;
1666 break;
1667 case VM_PROT_ALL:
0a7de745 1668 return; /* nothing to do */
b7266188
A
1669 default:
1670 remove = TRUE;
1671 break;
1672 }
1673
1674 pv_h = pai_to_pvh(pai);
1675
1676 LOCK_PVH(pai);
1677
1678
1679 /*
1680 * Walk down PV list, if any, changing or removing all mappings.
1681 */
0a7de745 1682 if (pv_h->pmap == PMAP_NULL) {
b7266188 1683 goto done;
0a7de745 1684 }
b7266188
A
1685
1686 pv_e = pv_h;
0a7de745 1687 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */
b7266188
A
1688
1689 do {
1690 vm_map_offset_t vaddr;
1691
3e170ce0
A
1692 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1693 (pmap_phys_attributes[pai] & PHYS_MODIFIED)) {
1694 /* page was modified, so it will be compressed */
1695 options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1696 options |= PMAP_OPTIONS_COMPRESSOR;
1697 }
1698
b7266188 1699 pmap = pv_e->pmap;
3e170ce0 1700 is_ept = is_ept_pmap(pmap);
39037602 1701 vaddr = PVE_VA(pv_e);
b7266188
A
1702 pte = pmap_pte(pmap, vaddr);
1703
6d2010ae
A
1704 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1705 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1706
b7266188
A
1707 if (0 == pte) {
1708 panic("pmap_page_protect() "
0a7de745
A
1709 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1710 pmap, pn, vaddr);
b7266188
A
1711 }
1712 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1713
1714 /*
1715 * Remove the mapping if new protection is NONE
b7266188 1716 */
6d2010ae 1717 if (remove) {
6d2010ae
A
1718 /* Remove per-pmap wired count */
1719 if (iswired(*pte)) {
1720 OSAddAtomic(-1, &pmap->stats.wired_count);
316670eb 1721 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6d2010ae
A
1722 }
1723
39236c6e
A
1724 if (pmap != kernel_pmap &&
1725 (options & PMAP_OPTIONS_COMPRESSOR) &&
1726 IS_INTERNAL_PAGE(pai)) {
cb323159 1727 assert(!PTE_IS_COMPRESSED(*pte, pte, pmap, vaddr));
39037602 1728 /* mark this PTE as having been "compressed" */
3e170ce0 1729 new_pte_value = PTE_COMPRESSED;
d190cdc3 1730 if (IS_ALTACCT_PAGE(pai, pv_e)) {
39037602
A
1731 new_pte_value |= PTE_COMPRESSED_ALT;
1732 }
39236c6e
A
1733 } else {
1734 new_pte_value = 0;
1735 }
1736
1737 if (options & PMAP_OPTIONS_NOREFMOD) {
1738 pmap_store_pte(pte, new_pte_value);
b7266188 1739
0a7de745 1740 if (options & PMAP_OPTIONS_NOFLUSH) {
39236c6e 1741 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
0a7de745 1742 } else {
39236c6e 1743 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
0a7de745 1744 }
39236c6e
A
1745 } else {
1746 /*
1747 * Remove the mapping, collecting dirty bits.
1748 */
3e170ce0 1749 pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
39236c6e 1750
0a7de745 1751 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
39037602
A
1752 if (!is_ept) {
1753 pmap_phys_attributes[pai] |=
0a7de745 1754 *pte & (PHYS_MODIFIED | PHYS_REFERENCED);
39037602
A
1755 } else {
1756 pmap_phys_attributes[pai] |=
0a7de745 1757 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
39037602 1758 }
3e170ce0 1759 if ((options &
0a7de745 1760 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
39037602
A
1761 IS_INTERNAL_PAGE(pai) &&
1762 (pmap_phys_attributes[pai] &
0a7de745 1763 PHYS_MODIFIED)) {
3e170ce0
A
1764 /*
1765 * Page is actually "modified" and
1766 * will be compressed. Start
1767 * accounting for it as "compressed".
1768 */
39037602 1769 assert(!(options & PMAP_OPTIONS_COMPRESSOR));
3e170ce0
A
1770 options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1771 options |= PMAP_OPTIONS_COMPRESSOR;
39037602 1772 assert(new_pte_value == 0);
d190cdc3
A
1773 if (pmap != kernel_pmap) {
1774 new_pte_value = PTE_COMPRESSED;
1775 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1776 new_pte_value |= PTE_COMPRESSED_ALT;
1777 }
39037602 1778 }
3e170ce0 1779 }
39236c6e
A
1780 pmap_store_pte(pte, new_pte_value);
1781 }
3e170ce0 1782
b7266188 1783#if TESTING
0a7de745 1784 if (pmap->stats.resident_count < 1) {
b7266188 1785 panic("pmap_page_protect: resident_count");
0a7de745 1786 }
b7266188 1787#endif
316670eb 1788 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
b7266188 1789 assert(pmap->stats.resident_count >= 1);
0a7de745 1790 OSAddAtomic(-1, &pmap->stats.resident_count);
39037602
A
1791
1792 /*
1793 * We only ever compress internal pages.
1794 */
39236c6e 1795 if (options & PMAP_OPTIONS_COMPRESSOR) {
39037602 1796 assert(IS_INTERNAL_PAGE(pai));
39236c6e 1797 }
39236c6e 1798 if (pmap != kernel_pmap) {
d190cdc3 1799 /* update pmap stats */
39236c6e
A
1800 if (IS_REUSABLE_PAGE(pai)) {
1801 assert(pmap->stats.reusable > 0);
1802 OSAddAtomic(-1, &pmap->stats.reusable);
1803 } else if (IS_INTERNAL_PAGE(pai)) {
1804 assert(pmap->stats.internal > 0);
1805 OSAddAtomic(-1, &pmap->stats.internal);
1806 } else {
1807 assert(pmap->stats.external > 0);
1808 OSAddAtomic(-1, &pmap->stats.external);
1809 }
39037602
A
1810 if ((options & PMAP_OPTIONS_COMPRESSOR) &&
1811 IS_INTERNAL_PAGE(pai)) {
1812 /* adjust "compressed" stats */
1813 OSAddAtomic64(+1, &pmap->stats.compressed);
1814 PMAP_STATS_PEAK(pmap->stats.compressed);
1815 pmap->stats.compressed_lifetime++;
1816 }
d190cdc3
A
1817
1818 /* update ledgers */
1819 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1820 assert(IS_INTERNAL_PAGE(pai));
1821 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
1822 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
1823 if (options & PMAP_OPTIONS_COMPRESSOR) {
1824 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1825 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
1826 }
1827 } else if (IS_REUSABLE_PAGE(pai)) {
1828 assert(!IS_ALTACCT_PAGE(pai, pv_e));
1829 assert(IS_INTERNAL_PAGE(pai));
39037602
A
1830 if (options & PMAP_OPTIONS_COMPRESSOR) {
1831 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1832 /* was not in footprint, but is now */
1833 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1834 }
1835 } else if (IS_INTERNAL_PAGE(pai)) {
d190cdc3
A
1836 assert(!IS_ALTACCT_PAGE(pai, pv_e));
1837 assert(!IS_REUSABLE_PAGE(pai));
39037602
A
1838 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
1839 /*
1840 * Update all stats related to physical
1841 * footprint, which only deals with
1842 * internal pages.
1843 */
1844 if (options & PMAP_OPTIONS_COMPRESSOR) {
1845 /*
1846 * This removal is only being
1847 * done so we can send this page
1848 * to the compressor; therefore
1849 * it mustn't affect total task
1850 * footprint.
1851 */
39037602
A
1852 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1853 } else {
1854 /*
1855 * This internal page isn't
1856 * going to the compressor,
0a7de745 1857 * so adjust stats to keep
39037602
A
1858 * phys_footprint up to date.
1859 */
d190cdc3 1860 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
39037602
A
1861 }
1862 }
39236c6e
A
1863 }
1864
b7266188 1865 /*
0a7de745
A
1866 * Deal with the pv_rooted_entry.
1867 */
b7266188
A
1868
1869 if (pv_e == pv_h) {
1870 /*
1871 * Fix up head later.
1872 */
1873 pv_h->pmap = PMAP_NULL;
1874 } else {
1875 /*
1876 * Delete this entry.
1877 */
1878 pv_hash_remove(pvh_e);
1879 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1880 pvh_eh = pvh_e;
1881
0a7de745 1882 if (pvh_et == PV_HASHED_ENTRY_NULL) {
b7266188 1883 pvh_et = pvh_e;
0a7de745 1884 }
b7266188
A
1885 pvh_cnt++;
1886 }
1887 } else {
1888 /*
0a7de745
A
1889 * Write-protect, after opportunistic refmod collect
1890 */
3e170ce0
A
1891 if (!is_ept) {
1892 pmap_phys_attributes[pai] |=
0a7de745 1893 *pte & (PHYS_MODIFIED | PHYS_REFERENCED);
3e170ce0
A
1894 } else {
1895 pmap_phys_attributes[pai] |=
0a7de745 1896 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
3e170ce0
A
1897 }
1898 pmap_update_pte(pte, PTE_WRITE(is_ept), 0);
39236c6e 1899
0a7de745 1900 if (options & PMAP_OPTIONS_NOFLUSH) {
39236c6e 1901 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
0a7de745
A
1902 } else {
1903 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1904 }
b7266188
A
1905 }
1906 pvh_e = nexth;
1907 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1908
1909
1910 /*
1911 * If pv_head mapping was removed, fix it up.
1912 */
1913 if (pv_h->pmap == PMAP_NULL) {
1914 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1915
1916 if (pvh_e != (pv_hashed_entry_t) pv_h) {
1917 pv_hash_remove(pvh_e);
1918 pv_h->pmap = pvh_e->pmap;
d190cdc3 1919 pv_h->va_and_flags = pvh_e->va_and_flags;
b7266188
A
1920 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1921 pvh_eh = pvh_e;
1922
0a7de745 1923 if (pvh_et == PV_HASHED_ENTRY_NULL) {
b7266188 1924 pvh_et = pvh_e;
0a7de745 1925 }
b7266188
A
1926 pvh_cnt++;
1927 }
1928 }
1929 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1930 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1931 }
1932done:
1933 UNLOCK_PVH(pai);
1934
5ba3f43e 1935 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
b7266188
A
1936}
1937
39236c6e 1938
6d2010ae
A
1939/*
1940 * Clear specified attribute bits.
1941 */
b7266188 1942void
6d2010ae 1943phys_attribute_clear(
0a7de745
A
1944 ppnum_t pn,
1945 int bits,
1946 unsigned int options,
1947 void *arg)
b7266188 1948{
0a7de745
A
1949 pv_rooted_entry_t pv_h;
1950 pv_hashed_entry_t pv_e;
1951 pt_entry_t *pte = NULL;
1952 int pai;
1953 pmap_t pmap;
1954 char attributes = 0;
1955 boolean_t is_internal, is_reusable, is_altacct, is_ept;
1956 int ept_bits_to_clear;
1957 boolean_t ept_keep_global_mod = FALSE;
fe8ab488
A
1958
1959 if ((bits & PHYS_MODIFIED) &&
1960 (options & PMAP_OPTIONS_NOFLUSH) &&
1961 arg == NULL) {
1962 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
0a7de745
A
1963 "should not clear 'modified' without flushing TLBs\n",
1964 pn, bits, options, arg);
fe8ab488
A
1965 }
1966
3e170ce0
A
1967 /* We only support converting MOD and REF bits for EPT PTEs in this function */
1968 assert((bits & ~(PHYS_REFERENCED | PHYS_MODIFIED)) == 0);
1969
1970 ept_bits_to_clear = (unsigned)physmap_refmod_to_ept(bits & (PHYS_MODIFIED | PHYS_REFERENCED));
1971
6d2010ae
A
1972 pmap_intr_assert();
1973 assert(pn != vm_page_fictitious_addr);
0a7de745 1974 if (pn == vm_page_guard_addr) {
6d2010ae 1975 return;
0a7de745 1976 }
b7266188 1977
6d2010ae 1978 pai = ppn_to_pai(pn);
b7266188 1979
6d2010ae
A
1980 if (!IS_MANAGED_PAGE(pai)) {
1981 /*
1982 * Not a managed page.
1983 */
1984 return;
b7266188 1985 }
b7266188 1986
5ba3f43e 1987 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
b7266188 1988
6d2010ae 1989 pv_h = pai_to_pvh(pai);
b7266188 1990
6d2010ae 1991 LOCK_PVH(pai);
b7266188 1992
3e170ce0 1993
6d2010ae
A
1994 /*
1995 * Walk down PV list, clearing all modify or reference bits.
1996 * We do not have to lock the pv_list because we have
316670eb 1997 * the per-pmap lock
6d2010ae
A
1998 */
1999 if (pv_h->pmap != PMAP_NULL) {
2000 /*
2001 * There are some mappings.
2002 */
b7266188 2003
fe8ab488
A
2004 is_internal = IS_INTERNAL_PAGE(pai);
2005 is_reusable = IS_REUSABLE_PAGE(pai);
2006
6d2010ae 2007 pv_e = (pv_hashed_entry_t)pv_h;
b7266188 2008
6d2010ae 2009 do {
0a7de745 2010 vm_map_offset_t va;
fe8ab488 2011 char pte_bits;
b7266188 2012
6d2010ae 2013 pmap = pv_e->pmap;
3e170ce0 2014 is_ept = is_ept_pmap(pmap);
d190cdc3 2015 is_altacct = IS_ALTACCT_PAGE(pai, pv_e);
39037602 2016 va = PVE_VA(pv_e);
fe8ab488
A
2017 pte_bits = 0;
2018
2019 if (bits) {
2020 pte = pmap_pte(pmap, va);
2021 /* grab ref/mod bits from this PTE */
3e170ce0 2022 pte_bits = (*pte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
fe8ab488 2023 /* propagate to page's global attributes */
3e170ce0
A
2024 if (!is_ept) {
2025 attributes |= pte_bits;
2026 } else {
2027 attributes |= ept_refmod_to_physmap(pte_bits);
2028 if (!pmap_ept_support_ad && (pte_bits & INTEL_EPT_MOD)) {
2029 ept_keep_global_mod = TRUE;
2030 }
2031 }
fe8ab488 2032 /* which bits to clear for this PTE? */
3e170ce0
A
2033 if (!is_ept) {
2034 pte_bits &= bits;
2035 } else {
2036 pte_bits &= ept_bits_to_clear;
2037 }
fe8ab488 2038 }
0a7de745
A
2039 if (options & PMAP_OPTIONS_CLEAR_WRITE) {
2040 pte_bits |= PTE_WRITE(is_ept);
2041 }
b7266188 2042
0a7de745
A
2043 /*
2044 * Clear modify and/or reference bits.
2045 */
fe8ab488 2046 if (pte_bits) {
d9a64523 2047 pmap_update_pte(pte, pte_bits, 0);
fe8ab488
A
2048
2049 /* Ensure all processors using this translation
2050 * invalidate this TLB entry. The invalidation
2051 * *must* follow the PTE update, to ensure that
2052 * the TLB shadow of the 'D' bit (in particular)
2053 * is synchronized with the updated PTE.
2054 */
0a7de745 2055 if (!(options & PMAP_OPTIONS_NOFLUSH)) {
fe8ab488
A
2056 /* flush TLBS now */
2057 PMAP_UPDATE_TLBS(pmap,
0a7de745
A
2058 va,
2059 va + PAGE_SIZE);
fe8ab488
A
2060 } else if (arg) {
2061 /* delayed TLB flush: add "pmap" info */
2062 PMAP_UPDATE_TLBS_DELAYED(
2063 pmap,
2064 va,
2065 va + PAGE_SIZE,
2066 (pmap_flush_context *)arg);
2067 } else {
2068 /* no TLB flushing at all */
2069 }
2070 }
2071
2072 /* update pmap "reusable" stats */
2073 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
2074 is_reusable &&
2075 pmap != kernel_pmap) {
2076 /* one less "reusable" */
2077 assert(pmap->stats.reusable > 0);
2078 OSAddAtomic(-1, &pmap->stats.reusable);
2079 if (is_internal) {
2080 /* one more "internal" */
2081 OSAddAtomic(+1, &pmap->stats.internal);
2082 PMAP_STATS_PEAK(pmap->stats.internal);
39037602 2083 assert(pmap->stats.internal > 0);
d190cdc3
A
2084 if (is_altacct) {
2085 /* no impact on ledgers */
39037602 2086 } else {
d190cdc3 2087 pmap_ledger_credit(pmap,
0a7de745
A
2088 task_ledgers.internal,
2089 PAGE_SIZE);
39037602
A
2090 pmap_ledger_credit(
2091 pmap,
2092 task_ledgers.phys_footprint,
2093 PAGE_SIZE);
2094 }
fe8ab488
A
2095 } else {
2096 /* one more "external" */
2097 OSAddAtomic(+1, &pmap->stats.external);
2098 PMAP_STATS_PEAK(pmap->stats.external);
39037602 2099 assert(pmap->stats.external > 0);
fe8ab488
A
2100 }
2101 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
0a7de745
A
2102 !is_reusable &&
2103 pmap != kernel_pmap) {
fe8ab488
A
2104 /* one more "reusable" */
2105 OSAddAtomic(+1, &pmap->stats.reusable);
2106 PMAP_STATS_PEAK(pmap->stats.reusable);
39037602 2107 assert(pmap->stats.reusable > 0);
fe8ab488
A
2108 if (is_internal) {
2109 /* one less "internal" */
2110 assert(pmap->stats.internal > 0);
2111 OSAddAtomic(-1, &pmap->stats.internal);
d190cdc3 2112 if (is_altacct) {
39037602
A
2113 /* no impact on footprint */
2114 } else {
d190cdc3 2115 pmap_ledger_debit(pmap,
0a7de745
A
2116 task_ledgers.internal,
2117 PAGE_SIZE);
39037602
A
2118 pmap_ledger_debit(
2119 pmap,
2120 task_ledgers.phys_footprint,
2121 PAGE_SIZE);
2122 }
fe8ab488
A
2123 } else {
2124 /* one less "external" */
2125 assert(pmap->stats.external > 0);
2126 OSAddAtomic(-1, &pmap->stats.external);
2127 }
2128 }
b7266188 2129
6d2010ae 2130 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
6d2010ae 2131 } while (pv_e != (pv_hashed_entry_t)pv_h);
b7266188 2132 }
6d2010ae
A
2133 /* Opportunistic refmod collection, annulled
2134 * if both REF and MOD are being cleared.
2135 */
b7266188 2136
6d2010ae 2137 pmap_phys_attributes[pai] |= attributes;
3e170ce0
A
2138
2139 if (ept_keep_global_mod) {
2140 /*
2141 * If the hardware doesn't support AD bits for EPT PTEs and someone is
2142 * requesting that we clear the modified bit for a phys page, we need
2143 * to ensure that there are no EPT mappings for the page with the
2144 * modified bit set. If there are, we cannot clear the global modified bit.
2145 */
2146 bits &= ~PHYS_MODIFIED;
2147 }
2148 pmap_phys_attributes[pai] &= ~(bits);
b7266188 2149
fe8ab488
A
2150 /* update this page's "reusable" status */
2151 if (options & PMAP_OPTIONS_CLEAR_REUSABLE) {
2152 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
2153 } else if (options & PMAP_OPTIONS_SET_REUSABLE) {
2154 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
2155 }
2156
6d2010ae 2157 UNLOCK_PVH(pai);
b7266188 2158
5ba3f43e 2159 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
b7266188
A
2160}
2161
6d2010ae
A
2162/*
2163 * Check specified attribute bits.
2164 */
2165int
2166phys_attribute_test(
0a7de745
A
2167 ppnum_t pn,
2168 int bits)
0b4c1975 2169{
0a7de745
A
2170 pv_rooted_entry_t pv_h;
2171 pv_hashed_entry_t pv_e;
2172 pt_entry_t *pte;
2173 int pai;
2174 pmap_t pmap;
2175 int attributes = 0;
2176 boolean_t is_ept;
6d2010ae
A
2177
2178 pmap_intr_assert();
2179 assert(pn != vm_page_fictitious_addr);
3e170ce0 2180 assert((bits & ~(PHYS_MODIFIED | PHYS_REFERENCED)) == 0);
0a7de745 2181 if (pn == vm_page_guard_addr) {
6d2010ae 2182 return 0;
0a7de745 2183 }
0b4c1975
A
2184
2185 pai = ppn_to_pai(pn);
2186
6d2010ae
A
2187 if (!IS_MANAGED_PAGE(pai)) {
2188 /*
2189 * Not a managed page.
2190 */
2191 return 0;
2192 }
0b4c1975 2193
6d2010ae
A
2194 /*
2195 * Fast check... if bits already collected
2196 * no need to take any locks...
2197 * if not set, we need to recheck after taking
2198 * the lock in case they got pulled in while
2199 * we were waiting for the lock
2200 */
0a7de745 2201 if ((pmap_phys_attributes[pai] & bits) == bits) {
6d2010ae 2202 return bits;
0a7de745 2203 }
0b4c1975 2204
6d2010ae 2205 pv_h = pai_to_pvh(pai);
0b4c1975 2206
6d2010ae 2207 LOCK_PVH(pai);
0b4c1975 2208
6d2010ae 2209 attributes = pmap_phys_attributes[pai] & bits;
0b4c1975 2210
0b4c1975 2211
6d2010ae
A
2212 /*
2213 * Walk down PV list, checking the mappings until we
2214 * reach the end or we've found the desired attributes.
2215 */
2216 if (attributes != bits &&
2217 pv_h->pmap != PMAP_NULL) {
2218 /*
2219 * There are some mappings.
2220 */
2221 pv_e = (pv_hashed_entry_t)pv_h;
2222 do {
2223 vm_map_offset_t va;
0b4c1975 2224
6d2010ae 2225 pmap = pv_e->pmap;
3e170ce0 2226 is_ept = is_ept_pmap(pmap);
39037602 2227 va = PVE_VA(pv_e);
6d2010ae 2228 /*
0a7de745 2229 * pick up modify and/or reference bits from mapping
6d2010ae 2230 */
0b4c1975 2231
6d2010ae 2232 pte = pmap_pte(pmap, va);
3e170ce0
A
2233 if (!is_ept) {
2234 attributes |= (int)(*pte & bits);
2235 } else {
2236 attributes |= (int)(ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED));
3e170ce0 2237 }
6d2010ae
A
2238
2239 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
6d2010ae 2240 } while ((attributes != bits) &&
0a7de745 2241 (pv_e != (pv_hashed_entry_t)pv_h));
0b4c1975 2242 }
6d2010ae 2243 pmap_phys_attributes[pai] |= attributes;
0b4c1975 2244
6d2010ae 2245 UNLOCK_PVH(pai);
0a7de745 2246 return attributes;
6d2010ae 2247}
0b4c1975 2248
6d2010ae
A
2249/*
2250 * Routine: pmap_change_wiring
2251 * Function: Change the wiring attribute for a map/virtual-address
2252 * pair.
2253 * In/out conditions:
2254 * The mapping must already exist in the pmap.
2255 */
0b4c1975 2256void
6d2010ae 2257pmap_change_wiring(
0a7de745
A
2258 pmap_t map,
2259 vm_map_offset_t vaddr,
2260 boolean_t wired)
0b4c1975 2261{
0a7de745 2262 pt_entry_t *pte;
0b4c1975 2263
0a7de745 2264 PMAP_LOCK_SHARED(map);
0b4c1975 2265
0a7de745 2266 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL) {
4bd07ac2 2267 panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
0a7de745
A
2268 map, vaddr, wired);
2269 }
0b4c1975 2270
6d2010ae
A
2271 if (wired && !iswired(*pte)) {
2272 /*
2273 * wiring down mapping
2274 */
316670eb 2275 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
0a7de745 2276 OSAddAtomic(+1, &map->stats.wired_count);
3e170ce0 2277 pmap_update_pte(pte, 0, PTE_WIRED);
0a7de745 2278 } else if (!wired && iswired(*pte)) {
6d2010ae
A
2279 /*
2280 * unwiring mapping
2281 */
2282 assert(map->stats.wired_count >= 1);
0a7de745 2283 OSAddAtomic(-1, &map->stats.wired_count);
316670eb 2284 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
3e170ce0 2285 pmap_update_pte(pte, PTE_WIRED, 0);
060df5ea 2286 }
060df5ea 2287
0a7de745 2288 PMAP_UNLOCK_SHARED(map);
6d2010ae 2289}
7ddcb079
A
2290
2291/*
2292 * "Backdoor" direct map routine for early mappings.
0a7de745 2293 * Useful for mapping memory outside the range
7ddcb079
A
2294 * Sets A, D and NC if requested
2295 */
2296
2297vm_offset_t
2298pmap_map_bd(
0a7de745
A
2299 vm_offset_t virt,
2300 vm_map_offset_t start_addr,
2301 vm_map_offset_t end_addr,
2302 vm_prot_t prot,
2303 unsigned int flags)
7ddcb079 2304{
0a7de745
A
2305 pt_entry_t template;
2306 pt_entry_t *ptep;
5c9f4661 2307
0a7de745
A
2308 vm_offset_t base = virt;
2309 boolean_t doflush = FALSE;
5c9f4661 2310
7ddcb079 2311 template = pa_to_pte(start_addr)
0a7de745
A
2312 | INTEL_PTE_REF
2313 | INTEL_PTE_MOD
2314 | INTEL_PTE_WIRED
2315 | INTEL_PTE_VALID;
7ddcb079
A
2316
2317 if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
2318 template |= INTEL_PTE_NCACHE;
0a7de745
A
2319 if (!(flags & (VM_MEM_GUARDED))) {
2320 template |= INTEL_PTE_PAT;
2321 }
7ddcb079 2322 }
316670eb 2323
0a7de745 2324 if ((prot & VM_PROT_EXECUTE) == 0) {
316670eb 2325 template |= INTEL_PTE_NX;
0a7de745 2326 }
316670eb 2327
0a7de745 2328 if (prot & VM_PROT_WRITE) {
7ddcb079 2329 template |= INTEL_PTE_WRITE;
0a7de745
A
2330 }
2331 vm_map_offset_t caddr = start_addr;
2332 while (caddr < end_addr) {
5c9f4661
A
2333 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
2334 if (ptep == PT_ENTRY_NULL) {
2335 panic("pmap_map_bd: Invalid kernel address");
2336 }
2337 if (pte_to_pa(*ptep)) {
2338 doflush = TRUE;
7ddcb079 2339 }
5c9f4661 2340 pmap_store_pte(ptep, template);
7ddcb079
A
2341 pte_increment_pa(template);
2342 virt += PAGE_SIZE;
0a7de745 2343 caddr += PAGE_SIZE;
7ddcb079 2344 }
5c9f4661 2345 if (doflush) {
0a7de745 2346 pmap_tlbi_range(0, ~0ULL, true, 0);
5c9f4661
A
2347 PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
2348 }
0a7de745 2349 return virt;
7ddcb079 2350}
39236c6e 2351
5c9f4661
A
2352/* Create a virtual alias beginning at 'ava' of the specified kernel virtual
2353 * range. The aliased pagetable range is expanded if
2354 * PMAP_EXPAND_OPTIONS_ALIASMAP is specified. Performs no synchronization,
2355 * assumes caller has stabilized the source and destination ranges. Currently
2356 * used to populate sections of the trampoline "doublemap" at CPU startup.
2357 */
2358
2359void
2360pmap_alias(
0a7de745
A
2361 vm_offset_t ava,
2362 vm_map_offset_t start_addr,
2363 vm_map_offset_t end_addr,
2364 vm_prot_t prot,
2365 unsigned int eoptions)
5c9f4661 2366{
0a7de745
A
2367 pt_entry_t prot_template, template;
2368 pt_entry_t *aptep, *sptep;
5c9f4661
A
2369
2370 prot_template = INTEL_PTE_REF | INTEL_PTE_MOD | INTEL_PTE_WIRED | INTEL_PTE_VALID;
0a7de745 2371 if ((prot & VM_PROT_EXECUTE) == 0) {
5c9f4661 2372 prot_template |= INTEL_PTE_NX;
0a7de745 2373 }
5c9f4661 2374
0a7de745 2375 if (prot & VM_PROT_WRITE) {
5c9f4661 2376 prot_template |= INTEL_PTE_WRITE;
0a7de745 2377 }
5c9f4661
A
2378 assert(((start_addr | end_addr) & PAGE_MASK) == 0);
2379 while (start_addr < end_addr) {
2380 aptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ava);
2381 if (aptep == PT_ENTRY_NULL) {
2382 if (eoptions & PMAP_EXPAND_OPTIONS_ALIASMAP) {
2383 pmap_expand(kernel_pmap, ava, PMAP_EXPAND_OPTIONS_ALIASMAP);
2384 aptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ava);
2385 } else {
2386 panic("pmap_alias: Invalid alias address");
2387 }
2388 }
2389 /* The aliased range should not have any active mappings */
2390 assert(pte_to_pa(*aptep) == 0);
2391
2392 sptep = pmap_pte(kernel_pmap, start_addr);
2393 assert(sptep != PT_ENTRY_NULL && (pte_to_pa(*sptep) != 0));
2394 template = pa_to_pte(pte_to_pa(*sptep)) | prot_template;
2395 pmap_store_pte(aptep, template);
2396
2397 ava += PAGE_SIZE;
2398 start_addr += PAGE_SIZE;
2399 }
2400}
2401
4bd07ac2 2402mach_vm_size_t
39236c6e 2403pmap_query_resident(
0a7de745
A
2404 pmap_t pmap,
2405 addr64_t s64,
2406 addr64_t e64,
2407 mach_vm_size_t *compressed_bytes_p)
39236c6e
A
2408{
2409 pt_entry_t *pde;
2410 pt_entry_t *spte, *epte;
2411 addr64_t l64;
0a7de745
A
2412 uint64_t deadline = 0;
2413 mach_vm_size_t resident_bytes;
2414 mach_vm_size_t compressed_bytes;
2415 boolean_t is_ept;
39236c6e
A
2416
2417 pmap_intr_assert();
2418
3e170ce0 2419 if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) {
4bd07ac2
A
2420 if (compressed_bytes_p) {
2421 *compressed_bytes_p = 0;
3e170ce0 2422 }
39236c6e 2423 return 0;
3e170ce0
A
2424 }
2425
2426 is_ept = is_ept_pmap(pmap);
39236c6e
A
2427
2428 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
0a7de745
A
2429 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(s64),
2430 VM_KERNEL_ADDRHIDE(e64));
39236c6e 2431
4bd07ac2
A
2432 resident_bytes = 0;
2433 compressed_bytes = 0;
39236c6e 2434
0a7de745
A
2435 PMAP_LOCK_EXCLUSIVE(pmap);
2436 uint32_t traverse_count = 0;
39236c6e
A
2437
2438 while (s64 < e64) {
0a7de745
A
2439 l64 = (s64 + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE - 1);
2440 if (l64 > e64) {
39236c6e 2441 l64 = e64;
0a7de745 2442 }
39236c6e
A
2443 pde = pmap_pde(pmap, s64);
2444
3e170ce0
A
2445 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
2446 if (*pde & PTE_PS) {
39236c6e
A
2447 /* superpage: not supported */
2448 } else {
2449 spte = pmap_pte(pmap,
0a7de745 2450 (s64 & ~(PDE_MAPPED_SIZE - 1)));
39236c6e
A
2451 spte = &spte[ptenum(s64)];
2452 epte = &spte[intel_btop(l64 - s64)];
2453
2454 for (; spte < epte; spte++) {
2455 if (pte_to_pa(*spte) != 0) {
4bd07ac2 2456 resident_bytes += PAGE_SIZE;
3e170ce0 2457 } else if (*spte & PTE_COMPRESSED) {
4bd07ac2 2458 compressed_bytes += PAGE_SIZE;
39236c6e
A
2459 }
2460 }
39236c6e
A
2461 }
2462 }
2463 s64 = l64;
2464
0a7de745
A
2465 if ((s64 < e64) && (traverse_count++ > PLCHECK_THRESHOLD)) {
2466 if (deadline == 0) {
2467 deadline = rdtsc64() + max_preemption_latency_tsc;
2468 } else {
2469 if (rdtsc64() > deadline) {
2470 PMAP_UNLOCK_EXCLUSIVE(pmap);
2471 __builtin_ia32_pause();
2472 PMAP_LOCK_EXCLUSIVE(pmap);
2473 deadline = rdtsc64() + max_preemption_latency_tsc;
2474 }
2475 }
39236c6e
A
2476 }
2477 }
2478
0a7de745 2479 PMAP_UNLOCK_EXCLUSIVE(pmap);
39236c6e
A
2480
2481 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
0a7de745 2482 resident_bytes);
39236c6e 2483
4bd07ac2
A
2484 if (compressed_bytes_p) {
2485 *compressed_bytes_p = compressed_bytes;
3e170ce0 2486 }
4bd07ac2 2487 return resident_bytes;
39236c6e 2488}
fe8ab488 2489
39037602
A
2490kern_return_t
2491pmap_query_page_info(
0a7de745
A
2492 pmap_t pmap,
2493 vm_map_offset_t va,
2494 int *disp_p)
39037602 2495{
0a7de745
A
2496 int disp;
2497 boolean_t is_ept;
2498 pmap_paddr_t pa;
2499 ppnum_t pai;
2500 pd_entry_t *pde;
2501 pt_entry_t *pte;
39037602
A
2502
2503 pmap_intr_assert();
2504 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
2505 *disp_p = 0;
2506 return KERN_INVALID_ARGUMENT;
2507 }
2508
2509 disp = 0;
2510 is_ept = is_ept_pmap(pmap);
2511
0a7de745 2512 PMAP_LOCK_EXCLUSIVE(pmap);
39037602
A
2513
2514 pde = pmap_pde(pmap, va);
2515 if (!pde ||
2516 !(*pde & PTE_VALID_MASK(is_ept)) ||
2517 (*pde & PTE_PS)) {
2518 goto done;
2519 }
2520
2521 pte = pmap_pte(pmap, va);
2522 if (pte == PT_ENTRY_NULL) {
2523 goto done;
2524 }
2525
2526 pa = pte_to_pa(*pte);
2527 if (pa == 0) {
cb323159 2528 if (PTE_IS_COMPRESSED(*pte, pte, pmap, va)) {
39037602
A
2529 disp |= PMAP_QUERY_PAGE_COMPRESSED;
2530 if (*pte & PTE_COMPRESSED_ALT) {
2531 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
2532 }
2533 }
2534 } else {
2535 disp |= PMAP_QUERY_PAGE_PRESENT;
2536 pai = pa_index(pa);
2537 if (!IS_MANAGED_PAGE(pai)) {
d190cdc3
A
2538 } else if (pmap_pv_is_altacct(pmap, va, pai)) {
2539 assert(IS_INTERNAL_PAGE(pai));
2540 disp |= PMAP_QUERY_PAGE_INTERNAL;
2541 disp |= PMAP_QUERY_PAGE_ALTACCT;
39037602
A
2542 } else if (IS_REUSABLE_PAGE(pai)) {
2543 disp |= PMAP_QUERY_PAGE_REUSABLE;
2544 } else if (IS_INTERNAL_PAGE(pai)) {
2545 disp |= PMAP_QUERY_PAGE_INTERNAL;
39037602
A
2546 }
2547 }
2548
2549done:
0a7de745 2550 PMAP_UNLOCK_EXCLUSIVE(pmap);
39037602
A
2551 *disp_p = disp;
2552 return KERN_SUCCESS;
2553}
2554
d9a64523
A
2555void
2556pmap_set_jit_entitled(__unused pmap_t pmap)
39037602 2557{
5ba3f43e
A
2558 /* The x86 pmap layer does not care if a map has a JIT entry. */
2559 return;
39037602
A
2560}
2561
d9a64523
A
2562bool
2563pmap_has_prot_policy(__unused vm_prot_t prot)
fe8ab488 2564{
5ba3f43e
A
2565 /*
2566 * The x86 pmap layer does not apply any policy to any protection
2567 * types.
2568 */
2569 return FALSE;
2570}
2571
d9a64523
A
2572uint64_t
2573pmap_release_pages_fast(void)
2574{
2575 return 0;
2576}
2577
2578void
2579pmap_trim(__unused pmap_t grand, __unused pmap_t subord, __unused addr64_t vstart, __unused addr64_t nstart, __unused uint64_t size)
5ba3f43e
A
2580{
2581 return;
fe8ab488 2582}
5ba3f43e 2583
cb323159 2584__dead2
0a7de745
A
2585void
2586pmap_ledger_alloc_init(size_t size)
d9a64523
A
2587{
2588 panic("%s: unsupported, "
0a7de745
A
2589 "size=%lu",
2590 __func__, size);
d9a64523
A
2591}
2592
cb323159 2593__dead2
0a7de745
A
2594ledger_t
2595pmap_ledger_alloc(void)
d9a64523
A
2596{
2597 panic("%s: unsupported",
0a7de745 2598 __func__);
d9a64523
A
2599}
2600
cb323159 2601__dead2
0a7de745
A
2602void
2603pmap_ledger_free(ledger_t ledger)
d9a64523
A
2604{
2605 panic("%s: unsupported, "
0a7de745
A
2606 "ledger=%p",
2607 __func__, ledger);
d9a64523
A
2608}
2609
2610size_t
2611pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused)
2612{
2613 return (size_t)-1;
2614}
2615
0a7de745
A
2616void *
2617pmap_map_compressor_page(ppnum_t pn)
2618{
2619 assertf(IS_MANAGED_PAGE(ppn_to_pai(pn)), "%s called on non-managed page 0x%08x", __func__, pn);
2620 return PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT);
2621}
2622
2623void
2624pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
2625{
2626}