]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/i386/pmap_x86_common.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap_x86_common.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <mach_assert.h>
30
31#include <vm/pmap.h>
32#include <vm/vm_map.h>
33#include <vm/vm_kern.h>
34#include <kern/ledger.h>
35#include <i386/pmap_internal.h>
36
37void pmap_remove_range(
38 pmap_t pmap,
39 vm_map_offset_t va,
40 pt_entry_t *spte,
41 pt_entry_t *epte);
42
43static void pmap_remove_range_options(
44 pmap_t pmap,
45 vm_map_offset_t va,
46 pt_entry_t *spte,
47 pt_entry_t *epte,
48 int options);
49
50void pmap_reusable_range(
51 pmap_t pmap,
52 vm_map_offset_t va,
53 pt_entry_t *spte,
54 pt_entry_t *epte,
55 boolean_t reusable);
56
57uint32_t pmap_update_clear_pte_count;
58
59/*
60 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
61 * on a NBPDE boundary.
62 */
63
64uint64_t
65pmap_shared_region_size_min(__unused pmap_t pmap)
66{
67 return NBPDE;
68}
69
70uint64_t
71pmap_commpage_size_min(__unused pmap_t pmap)
72{
73 return NBPDE;
74}
75
76uint64_t
77pmap_nesting_size_max(__unused pmap_t pmap)
78{
79 return 0llu - (uint64_t)NBPDE;
80}
81
82/*
83 * kern_return_t pmap_nest(grand, subord, va_start, size)
84 *
85 * grand = the pmap that we will nest subord into
86 * subord = the pmap that goes into the grand
87 * va_start = start of range in pmap to be inserted
88 * size = Size of nest area (up to 16TB)
89 *
90 * Inserts a pmap into another. This is used to implement shared segments.
91 *
92 * Note that we depend upon higher level VM locks to insure that things don't change while
93 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
94 * or do 2 nests at once.
95 */
96
97/*
98 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
99 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
100 * container and the "grand" parent. A minor optimization to consider for the
101 * future: make the "subord" truly a container rather than a full-fledged
102 * pagetable hierarchy which can be unnecessarily sparse (DRK).
103 */
104
105kern_return_t
106pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, uint64_t size)
107{
108 vm_map_offset_t vaddr;
109 pd_entry_t *pde, *npde;
110 unsigned int i;
111 uint64_t num_pde;
112
113 assert(!is_ept_pmap(grand));
114 assert(!is_ept_pmap(subord));
115
116 if ((size & (pmap_shared_region_size_min(grand) - 1)) ||
117 (va_start & (pmap_shared_region_size_min(grand) - 1)) ||
118 ((size >> 28) > 65536)) { /* Max size we can nest is 16TB */
119 return KERN_INVALID_VALUE;
120 }
121
122 if (size == 0) {
123 panic("pmap_nest: size is invalid - %016llX\n", size);
124 }
125
126 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
127 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
128 VM_KERNEL_ADDRHIDE(va_start));
129
130 vaddr = (vm_map_offset_t)va_start;
131 num_pde = size >> PDESHIFT;
132
133 PMAP_LOCK_EXCLUSIVE(subord);
134
135 subord->pm_shared = TRUE;
136
137 for (i = 0; i < num_pde;) {
138 if (((vaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG) {
139 npde = pmap64_pdpt(subord, vaddr);
140
141 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
142 PMAP_UNLOCK_EXCLUSIVE(subord);
143 pmap_expand_pdpt(subord, vaddr, PMAP_EXPAND_OPTIONS_NONE);
144 PMAP_LOCK_EXCLUSIVE(subord);
145 npde = pmap64_pdpt(subord, vaddr);
146 }
147 *npde |= INTEL_PDPTE_NESTED;
148 vaddr += NBPDPT;
149 i += (uint32_t)NPDEPG;
150 } else {
151 npde = pmap_pde(subord, vaddr);
152
153 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
154 PMAP_UNLOCK_EXCLUSIVE(subord);
155 pmap_expand(subord, vaddr, PMAP_EXPAND_OPTIONS_NONE);
156 PMAP_LOCK_EXCLUSIVE(subord);
157 npde = pmap_pde(subord, vaddr);
158 }
159 vaddr += NBPDE;
160 i++;
161 }
162 }
163
164 PMAP_UNLOCK_EXCLUSIVE(subord);
165
166 vaddr = (vm_map_offset_t)va_start;
167
168 PMAP_LOCK_EXCLUSIVE(grand);
169
170 for (i = 0; i < num_pde;) {
171 pd_entry_t tpde;
172
173 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG)) {
174 npde = pmap64_pdpt(subord, vaddr);
175 if (npde == 0) {
176 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
177 }
178 tpde = *npde;
179 pde = pmap64_pdpt(grand, vaddr);
180 if (0 == pde) {
181 PMAP_UNLOCK_EXCLUSIVE(grand);
182 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
183 PMAP_LOCK_EXCLUSIVE(grand);
184 pde = pmap64_pdpt(grand, vaddr);
185 }
186 if (pde == 0) {
187 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr);
188 }
189 pmap_store_pte(pde, tpde);
190 vaddr += NBPDPT;
191 i += (uint32_t) NPDEPG;
192 } else {
193 npde = pmap_pde(subord, vaddr);
194 if (npde == 0) {
195 panic("pmap_nest: no npde, subord %p vaddr 0x%llx", subord, vaddr);
196 }
197 tpde = *npde;
198 pde = pmap_pde(grand, vaddr);
199 if (0 == pde) {
200 PMAP_UNLOCK_EXCLUSIVE(grand);
201 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
202 PMAP_LOCK_EXCLUSIVE(grand);
203 pde = pmap_pde(grand, vaddr);
204 }
205
206 if (pde == 0) {
207 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
208 }
209 vaddr += NBPDE;
210 pmap_store_pte(pde, tpde);
211 i++;
212 }
213 }
214
215 PMAP_UNLOCK_EXCLUSIVE(grand);
216
217 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, KERN_SUCCESS);
218
219 return KERN_SUCCESS;
220}
221
222/*
223 * kern_return_t pmap_unnest(grand, vaddr)
224 *
225 * grand = the pmap that we will un-nest subord from
226 * vaddr = start of range in pmap to be unnested
227 *
228 * Removes a pmap from another. This is used to implement shared segments.
229 */
230
231kern_return_t
232pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size)
233{
234 pd_entry_t *pde;
235 unsigned int i;
236 uint64_t num_pde;
237 addr64_t va_start, va_end;
238 uint64_t npdpt = PMAP_INVALID_PDPTNUM;
239
240 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
241 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
242
243 if ((size & (pmap_shared_region_size_min(grand) - 1)) ||
244 (vaddr & (pmap_shared_region_size_min(grand) - 1))) {
245 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
246 grand, vaddr, size);
247 }
248
249 assert(!is_ept_pmap(grand));
250
251 /* align everything to PDE boundaries */
252 va_start = vaddr & ~(NBPDE - 1);
253 va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE - 1);
254 size = va_end - va_start;
255
256 PMAP_LOCK_EXCLUSIVE(grand);
257
258 num_pde = size >> PDESHIFT;
259 vaddr = va_start;
260
261 for (i = 0; i < num_pde;) {
262 if (pdptnum(grand, vaddr) != npdpt) {
263 npdpt = pdptnum(grand, vaddr);
264 pde = pmap64_pdpt(grand, vaddr);
265 if (pde && (*pde & INTEL_PDPTE_NESTED)) {
266 pmap_store_pte(pde, (pd_entry_t)0);
267 i += (uint32_t) NPDEPG;
268 vaddr += NBPDPT;
269 continue;
270 }
271 }
272 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
273 if (pde == 0) {
274 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr);
275 }
276 pmap_store_pte(pde, (pd_entry_t)0);
277 i++;
278 vaddr += NBPDE;
279 }
280
281 PMAP_UPDATE_TLBS(grand, va_start, va_end);
282
283 PMAP_UNLOCK_EXCLUSIVE(grand);
284
285 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, KERN_SUCCESS);
286
287 return KERN_SUCCESS;
288}
289
290kern_return_t
291pmap_unnest_options(
292 pmap_t grand,
293 addr64_t vaddr,
294 __unused uint64_t size,
295 __unused unsigned int options)
296{
297 return pmap_unnest(grand, vaddr, size);
298}
299
300/* Invoked by the Mach VM to determine the platform specific unnest region */
301
302boolean_t
303pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e)
304{
305 pd_entry_t *pdpte;
306 boolean_t rval = FALSE;
307
308 PMAP_LOCK_EXCLUSIVE(p);
309
310 pdpte = pmap64_pdpt(p, *s);
311 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
312 *s &= ~(NBPDPT - 1);
313 rval = TRUE;
314 }
315
316 pdpte = pmap64_pdpt(p, *e);
317 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
318 *e = ((*e + NBPDPT) & ~(NBPDPT - 1));
319 rval = TRUE;
320 }
321
322 PMAP_UNLOCK_EXCLUSIVE(p);
323
324 return rval;
325}
326
327pmap_paddr_t
328pmap_find_pa(pmap_t pmap, addr64_t va)
329{
330 pt_entry_t *ptp;
331 pd_entry_t *pdep;
332 pd_entry_t pde;
333 pt_entry_t pte;
334 boolean_t is_ept, locked = FALSE;
335 pmap_paddr_t pa = 0;
336
337 is_ept = is_ept_pmap(pmap);
338
339 if ((pmap != kernel_pmap) && not_in_kdp) {
340 PMAP_LOCK_EXCLUSIVE(pmap);
341 locked = TRUE;
342 } else {
343 mp_disable_preemption();
344 }
345
346 if (os_ref_get_count(&pmap->ref_count) == 0) {
347 goto pfp_exit;
348 }
349
350 pdep = pmap_pde(pmap, va);
351
352 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
353 if (pde & PTE_PS) {
354 pa = pte_to_pa(pde) + (va & I386_LPGMASK);
355 } else {
356 ptp = pmap_pte(pmap, va);
357 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
358 pa = pte_to_pa(pte) + (va & PAGE_MASK);
359 }
360 }
361 }
362pfp_exit:
363 if (locked) {
364 PMAP_UNLOCK_EXCLUSIVE(pmap);
365 } else {
366 mp_enable_preemption();
367 }
368
369 return pa;
370}
371
372/*
373 * pmap_find_phys returns the (4K) physical page number containing a
374 * given virtual address in a given pmap.
375 * Note that pmap_pte may return a pde if this virtual address is
376 * mapped by a large page and this is taken into account in order
377 * to return the correct page number in this case.
378 */
379ppnum_t
380pmap_find_phys(pmap_t pmap, addr64_t va)
381{
382 ppnum_t ppn = 0;
383 pmap_paddr_t pa = 0;
384
385 pa = pmap_find_pa(pmap, va);
386 ppn = (ppnum_t) i386_btop(pa);
387
388 return ppn;
389}
390
391ppnum_t
392pmap_find_phys_nofault(pmap_t pmap, addr64_t va)
393{
394 if ((pmap == kernel_pmap) ||
395 ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map)))) {
396 return pmap_find_phys(pmap, va);
397 }
398 return 0;
399}
400
401/*
402 * pmap_get_prot returns the equivalent Vm page protections
403 * set on a given address, 'va'. This function is used in the
404 * ml_static_verify_page_protections() routine which is used
405 * by the kext loading code to validate that the TEXT segment
406 * of a kext is mapped executable.
407 */
408kern_return_t
409pmap_get_prot(pmap_t pmap, addr64_t va, vm_prot_t *protp)
410{
411 pt_entry_t *ptp;
412 pd_entry_t *pdep;
413 pd_entry_t pde;
414 pt_entry_t pte;
415 boolean_t is_ept, locked = FALSE;
416 kern_return_t retval = KERN_FAILURE;
417 vm_prot_t prot = 0;
418
419 is_ept = is_ept_pmap(pmap);
420
421 if ((pmap != kernel_pmap) && not_in_kdp) {
422 PMAP_LOCK_EXCLUSIVE(pmap);
423 locked = TRUE;
424 } else {
425 mp_disable_preemption();
426 }
427
428 if (os_ref_get_count(&pmap->ref_count) == 0) {
429 goto pfp_exit;
430 }
431
432 pdep = pmap_pde(pmap, va);
433
434 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
435 if (pde & PTE_PS) {
436 prot = VM_PROT_READ;
437
438 if (pde & PTE_WRITE(is_ept)) {
439 prot |= VM_PROT_WRITE;
440 }
441 if (PTE_IS_EXECUTABLE(is_ept, pde)) {
442 prot |= VM_PROT_EXECUTE;
443 }
444 retval = KERN_SUCCESS;
445 } else {
446 ptp = pmap_pte(pmap, va);
447 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
448 prot = VM_PROT_READ;
449
450 if (pte & PTE_WRITE(is_ept)) {
451 prot |= VM_PROT_WRITE;
452 }
453 if (PTE_IS_EXECUTABLE(is_ept, pte)) {
454 prot |= VM_PROT_EXECUTE;
455 }
456 retval = KERN_SUCCESS;
457 }
458 }
459 }
460
461pfp_exit:
462 if (locked) {
463 PMAP_UNLOCK_EXCLUSIVE(pmap);
464 } else {
465 mp_enable_preemption();
466 }
467
468 if (protp) {
469 *protp = prot;
470 }
471
472 return retval;
473}
474
475/*
476 * Update cache attributes for all extant managed mappings.
477 * Assumes PV for this page is locked, and that the page
478 * is managed. We assume that this physical page may be mapped in
479 * both EPT and normal Intel PTEs, so we convert the attributes
480 * to the corresponding format for each pmap.
481 *
482 * We assert that the passed set of attributes is a subset of the
483 * PHYS_CACHEABILITY_MASK.
484 */
485void
486pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes)
487{
488 pv_rooted_entry_t pv_h, pv_e;
489 pv_hashed_entry_t pvh_e, nexth;
490 vm_map_offset_t vaddr;
491 pmap_t pmap;
492 pt_entry_t *ptep;
493 boolean_t is_ept;
494 unsigned ept_attributes;
495
496 assert(IS_MANAGED_PAGE(pn));
497 assert(((~PHYS_CACHEABILITY_MASK) & attributes) == 0);
498
499 /* We don't support the PAT bit for EPT PTEs */
500 if (attributes & INTEL_PTE_NCACHE) {
501 ept_attributes = INTEL_EPT_NCACHE;
502 } else {
503 ept_attributes = INTEL_EPT_WB;
504 }
505
506 pv_h = pai_to_pvh(pn);
507 /* TODO: translate the PHYS_* bits to PTE bits, while they're
508 * currently identical, they may not remain so
509 * Potential optimization (here and in page_protect),
510 * parallel shootdowns, check for redundant
511 * attribute modifications.
512 */
513
514 /*
515 * Alter attributes on all mappings
516 */
517 if (pv_h->pmap != PMAP_NULL) {
518 pv_e = pv_h;
519 pvh_e = (pv_hashed_entry_t)pv_e;
520
521 do {
522 pmap = pv_e->pmap;
523 vaddr = PVE_VA(pv_e);
524 ptep = pmap_pte(pmap, vaddr);
525
526 if (0 == ptep) {
527 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
528 }
529
530 is_ept = is_ept_pmap(pmap);
531
532 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
533 if (!is_ept) {
534 pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
535 } else {
536 pmap_update_pte(ptep, INTEL_EPT_CACHE_MASK, ept_attributes);
537 }
538 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
539 pvh_e = nexth;
540 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
541 }
542}
543
544void
545x86_filter_TLB_coherency_interrupts(boolean_t dofilter)
546{
547 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
548
549 if (dofilter) {
550 CPU_CR3_MARK_INACTIVE();
551 } else {
552 CPU_CR3_MARK_ACTIVE();
553 mfence();
554 pmap_update_interrupt();
555 }
556}
557
558
559/*
560 * Insert the given physical page (p) at
561 * the specified virtual address (v) in the
562 * target physical map with the protection requested.
563 *
564 * If specified, the page will be wired down, meaning
565 * that the related pte cannot be reclaimed.
566 *
567 * NB: This is the only routine which MAY NOT lazy-evaluate
568 * or lose information. That is, this routine must actually
569 * insert this page into the given map NOW.
570 */
571
572kern_return_t
573pmap_enter(
574 pmap_t pmap,
575 vm_map_offset_t vaddr,
576 ppnum_t pn,
577 vm_prot_t prot,
578 vm_prot_t fault_type,
579 unsigned int flags,
580 boolean_t wired)
581{
582 return pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL);
583}
584
585#define PTE_LOCK(EPT) INTEL_PTE_SWLOCK
586
587static inline void PTE_LOCK_LOCK(pt_entry_t *);
588static inline void PTE_LOCK_UNLOCK(pt_entry_t *);
589
590void
591PTE_LOCK_LOCK(pt_entry_t *lpte)
592{
593 pt_entry_t pte;
594plretry:
595 while ((pte = __c11_atomic_load((_Atomic pt_entry_t *)lpte, memory_order_relaxed)) & PTE_LOCK(0)) {
596 __builtin_ia32_pause();
597 }
598 if (__c11_atomic_compare_exchange_strong((_Atomic pt_entry_t *)lpte, &pte, pte | PTE_LOCK(0), memory_order_acquire_smp, TRUE)) {
599 return;
600 }
601
602 goto plretry;
603}
604
605void
606PTE_LOCK_UNLOCK(pt_entry_t *lpte)
607{
608 __c11_atomic_fetch_and((_Atomic pt_entry_t *)lpte, ~PTE_LOCK(0), memory_order_release_smp);
609}
610
611kern_return_t
612pmap_enter_options_addr(
613 pmap_t pmap,
614 vm_map_address_t v,
615 pmap_paddr_t pa,
616 vm_prot_t prot,
617 vm_prot_t fault_type,
618 unsigned int flags,
619 boolean_t wired,
620 unsigned int options,
621 __unused void *arg)
622{
623 return pmap_enter_options(pmap, v, intel_btop(pa), prot, fault_type, flags, wired, options, arg);
624}
625
626kern_return_t
627pmap_enter_options(
628 pmap_t pmap,
629 vm_map_offset_t vaddr,
630 ppnum_t pn,
631 vm_prot_t prot,
632 __unused vm_prot_t fault_type,
633 unsigned int flags,
634 boolean_t wired,
635 unsigned int options,
636 void *arg)
637{
638 pt_entry_t *pte = NULL;
639 pv_rooted_entry_t pv_h;
640 ppnum_t pai;
641 pv_hashed_entry_t pvh_e;
642 pv_hashed_entry_t pvh_new;
643 pt_entry_t template;
644 pmap_paddr_t old_pa;
645 pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn);
646 boolean_t need_tlbflush = FALSE;
647 boolean_t set_NX;
648 char oattr;
649 boolean_t old_pa_locked;
650 /* 2MiB mappings are confined to x86_64 by VM */
651 boolean_t superpage = flags & VM_MEM_SUPERPAGE;
652 vm_object_t delpage_pm_obj = NULL;
653 uint64_t delpage_pde_index = 0;
654 pt_entry_t old_pte;
655 kern_return_t kr = KERN_FAILURE;
656 boolean_t is_ept;
657 boolean_t is_altacct;
658 boolean_t ptelocked = FALSE;
659
660 pmap_intr_assert();
661
662 if (__improbable(pmap == PMAP_NULL)) {
663 return KERN_INVALID_ARGUMENT;
664 }
665 if (__improbable(pn == vm_page_guard_addr)) {
666 return KERN_INVALID_ARGUMENT;
667 }
668
669 is_ept = is_ept_pmap(pmap);
670
671 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
672 * unused value for that scenario.
673 */
674 assert(pn != vm_page_fictitious_addr);
675
676
677 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
678 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(vaddr), pn,
679 prot);
680
681 if ((prot & VM_PROT_EXECUTE)) {
682 set_NX = FALSE;
683 } else {
684 set_NX = TRUE;
685 }
686
687#if DEVELOPMENT || DEBUG
688 if (__improbable(set_NX && (!nx_enabled || !pmap->nx_enabled))) {
689 set_NX = FALSE;
690 }
691
692 if (__improbable(set_NX && (pmap == kernel_pmap) &&
693 ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) ||
694 (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
695 set_NX = FALSE;
696 }
697#endif
698
699 pvh_new = PV_HASHED_ENTRY_NULL;
700Retry:
701 pvh_e = PV_HASHED_ENTRY_NULL;
702
703 PMAP_LOCK_SHARED(pmap);
704
705 /*
706 * Expand pmap to include this pte. Assume that
707 * pmap is always expanded to include enough hardware
708 * pages to map one VM page.
709 */
710 if (__improbable(superpage)) {
711 while ((pte = pmap_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
712 /* need room for another pde entry */
713 PMAP_UNLOCK_SHARED(pmap);
714 kr = pmap_expand_pdpt(pmap, vaddr, options);
715 if (kr != KERN_SUCCESS) {
716 goto done1;
717 }
718 PMAP_LOCK_SHARED(pmap);
719 }
720 } else {
721 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
722 /*
723 * Must unlock to expand the pmap
724 * going to grow pde level page(s)
725 */
726 PMAP_UNLOCK_SHARED(pmap);
727 kr = pmap_expand(pmap, vaddr, options);
728 if (kr != KERN_SUCCESS) {
729 goto done1;
730 }
731 PMAP_LOCK_SHARED(pmap);
732 }
733 }
734
735 if (__improbable(options & PMAP_EXPAND_OPTIONS_NOENTER)) {
736 PMAP_UNLOCK_SHARED(pmap);
737 kr = KERN_SUCCESS;
738 goto done1;
739 }
740
741 if (__improbable(superpage && *pte && !(*pte & PTE_PS))) {
742 /*
743 * There is still an empty page table mapped that
744 * was used for a previous base page mapping.
745 * Remember the PDE and the PDE index, so that we
746 * can free the page at the end of this function.
747 */
748 delpage_pde_index = pdeidx(pmap, vaddr);
749 delpage_pm_obj = pmap->pm_obj;
750 pmap_store_pte(pte, 0);
751 }
752
753 PTE_LOCK_LOCK(pte);
754 ptelocked = TRUE;
755
756 old_pa = pte_to_pa(*pte);
757 pai = pa_index(old_pa);
758 old_pa_locked = FALSE;
759
760 if (old_pa == 0 &&
761 PTE_IS_COMPRESSED(*pte, pte, pmap, vaddr)) {
762 /*
763 * "pmap" should be locked at this point, so this should
764 * not race with another pmap_enter() or pmap_remove_range().
765 */
766 assert(pmap != kernel_pmap);
767
768 /* one less "compressed" */
769 OSAddAtomic64(-1, &pmap->stats.compressed);
770 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
771 PAGE_SIZE);
772 if (*pte & PTE_COMPRESSED_ALT) {
773 pmap_ledger_debit(
774 pmap,
775 task_ledgers.alternate_accounting_compressed,
776 PAGE_SIZE);
777 } else {
778 /* was part of the footprint */
779 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
780 PAGE_SIZE);
781 }
782 /* marker will be cleared below */
783 }
784
785 /*
786 * if we have a previous managed page, lock the pv entry now. after
787 * we lock it, check to see if someone beat us to the lock and if so
788 * drop the lock
789 */
790 if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
791 LOCK_PVH(pai);
792 old_pa_locked = TRUE;
793 old_pa = pte_to_pa(*pte);
794 if (0 == old_pa) {
795 UNLOCK_PVH(pai); /* another path beat us to it */
796 old_pa_locked = FALSE;
797 }
798 }
799
800 /*
801 * Special case if the incoming physical page is already mapped
802 * at this address.
803 */
804 if (old_pa == pa) {
805 pt_entry_t old_attributes =
806 *pte & ~(PTE_REF(is_ept) | PTE_MOD(is_ept) | PTE_LOCK(is_ept));
807
808 /*
809 * May be changing its wired attribute or protection
810 */
811
812 template = pa_to_pte(pa);
813
814 if (__probable(!is_ept)) {
815 template |= INTEL_PTE_VALID;
816 } else {
817 template |= INTEL_EPT_IPAT;
818 }
819
820 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
821
822 /*
823 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
824 */
825 if (!is_ept && (VM_MEM_NOT_CACHEABLE ==
826 (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)))) {
827 if (!(flags & VM_MEM_GUARDED)) {
828 template |= INTEL_PTE_PAT;
829 }
830 template |= INTEL_PTE_NCACHE;
831 }
832 if (pmap != kernel_pmap && !is_ept) {
833 template |= INTEL_PTE_USER;
834 }
835
836 if (prot & VM_PROT_READ) {
837 template |= PTE_READ(is_ept);
838 }
839
840 if (prot & VM_PROT_WRITE) {
841 template |= PTE_WRITE(is_ept);
842 if (is_ept && !pmap_ept_support_ad) {
843 template |= PTE_MOD(is_ept);
844 if (old_pa_locked) {
845 assert(IS_MANAGED_PAGE(pai));
846 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
847 }
848 }
849 }
850 if (prot & VM_PROT_EXECUTE) {
851 assert(set_NX == 0);
852 template = pte_set_ex(template, is_ept);
853 }
854
855 if (set_NX) {
856 template = pte_remove_ex(template, is_ept);
857 }
858
859 if (wired) {
860 template |= PTE_WIRED;
861 if (!iswired(old_attributes)) {
862 OSAddAtomic(+1, &pmap->stats.wired_count);
863 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
864 }
865 } else {
866 if (iswired(old_attributes)) {
867 assert(pmap->stats.wired_count >= 1);
868 OSAddAtomic(-1, &pmap->stats.wired_count);
869 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
870 }
871 }
872
873 if (superpage) { /* this path can not be used */
874 template |= PTE_PS; /* to change the page size! */
875 }
876 if (old_attributes == template) {
877 goto dont_update_pte;
878 }
879
880 /* Determine delta, PV locked */
881 need_tlbflush =
882 ((old_attributes ^ template) != PTE_WIRED);
883
884 /* Optimisation: avoid TLB flush when adding writability */
885 if (need_tlbflush == TRUE && !(old_attributes & PTE_WRITE(is_ept))) {
886 if ((old_attributes ^ template) == PTE_WRITE(is_ept)) {
887 need_tlbflush = FALSE;
888 }
889 }
890
891 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
892 if (__improbable(is_ept && !pmap_ept_support_ad)) {
893 template |= PTE_REF(is_ept);
894 if (old_pa_locked) {
895 assert(IS_MANAGED_PAGE(pai));
896 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
897 }
898 }
899
900 /* store modified PTE and preserve RC bits */
901 pt_entry_t npte, opte;
902
903 assert((*pte & PTE_LOCK(is_ept)) != 0);
904
905 do {
906 opte = *pte;
907 npte = template | (opte & (PTE_REF(is_ept) |
908 PTE_MOD(is_ept))) | PTE_LOCK(is_ept);
909 } while (!pmap_cmpx_pte(pte, opte, npte));
910
911dont_update_pte:
912 if (old_pa_locked) {
913 UNLOCK_PVH(pai);
914 old_pa_locked = FALSE;
915 }
916 goto done2;
917 }
918
919 /*
920 * Outline of code from here:
921 * 1) If va was mapped, update TLBs, remove the mapping
922 * and remove old pvlist entry.
923 * 2) Add pvlist entry for new mapping
924 * 3) Enter new mapping.
925 *
926 * If the old physical page is not managed step 1) is skipped
927 * (except for updating the TLBs), and the mapping is
928 * overwritten at step 3). If the new physical page is not
929 * managed, step 2) is skipped.
930 */
931 /* TODO: add opportunistic refmod collect */
932 if (old_pa != (pmap_paddr_t) 0) {
933 boolean_t was_altacct = FALSE;
934
935 /*
936 * Don't do anything to pages outside valid memory here.
937 * Instead convince the code that enters a new mapping
938 * to overwrite the old one.
939 */
940
941 /* invalidate the PTE */
942 pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
943 /* propagate invalidate everywhere */
944 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
945 /* remember reference and change */
946 old_pte = *pte;
947 oattr = (char) (old_pte & (PTE_MOD(is_ept) | PTE_REF(is_ept)));
948 /* completely invalidate the PTE */
949 pmap_store_pte(pte, PTE_LOCK(is_ept));
950
951 if (IS_MANAGED_PAGE(pai)) {
952 /*
953 * Remove the mapping from the pvlist for
954 * this physical page.
955 * We'll end up with either a rooted pv or a
956 * hashed pv
957 */
958 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte, &was_altacct);
959 }
960
961 if (IS_MANAGED_PAGE(pai)) {
962 pmap_assert(old_pa_locked == TRUE);
963 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
964 assert(pmap->stats.resident_count >= 1);
965 OSAddAtomic(-1, &pmap->stats.resident_count);
966 if (pmap != kernel_pmap) {
967 /* update pmap stats */
968 if (IS_REUSABLE_PAGE(pai)) {
969 PMAP_STATS_ASSERTF(
970 (pmap->stats.reusable > 0,
971 "reusable %d",
972 pmap->stats.reusable));
973 OSAddAtomic(-1, &pmap->stats.reusable);
974 } else if (IS_INTERNAL_PAGE(pai)) {
975 PMAP_STATS_ASSERTF(
976 (pmap->stats.internal > 0,
977 "internal %d",
978 pmap->stats.internal));
979 OSAddAtomic(-1, &pmap->stats.internal);
980 } else {
981 PMAP_STATS_ASSERTF(
982 (pmap->stats.external > 0,
983 "external %d",
984 pmap->stats.external));
985 OSAddAtomic(-1, &pmap->stats.external);
986 }
987
988 /* update ledgers */
989 if (was_altacct) {
990 assert(IS_INTERNAL_PAGE(pai));
991 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
992 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
993 } else if (IS_REUSABLE_PAGE(pai)) {
994 assert(!was_altacct);
995 assert(IS_INTERNAL_PAGE(pai));
996 /* was already not in phys_footprint */
997 } else if (IS_INTERNAL_PAGE(pai)) {
998 assert(!was_altacct);
999 assert(!IS_REUSABLE_PAGE(pai));
1000 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
1001 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1002 } else {
1003 /* not an internal page */
1004 }
1005 }
1006 if (iswired(*pte)) {
1007 assert(pmap->stats.wired_count >= 1);
1008 OSAddAtomic(-1, &pmap->stats.wired_count);
1009 pmap_ledger_debit(pmap, task_ledgers.wired_mem,
1010 PAGE_SIZE);
1011 }
1012
1013 if (!is_ept) {
1014 pmap_phys_attributes[pai] |= oattr;
1015 } else {
1016 pmap_phys_attributes[pai] |= ept_refmod_to_physmap(oattr);
1017 }
1018 } else {
1019 /*
1020 * old_pa is not managed.
1021 * Do removal part of accounting.
1022 */
1023
1024 if (pmap != kernel_pmap) {
1025#if 00
1026 assert(pmap->stats.device > 0);
1027 OSAddAtomic(-1, &pmap->stats.device);
1028#endif
1029 }
1030 if (iswired(*pte)) {
1031 assert(pmap->stats.wired_count >= 1);
1032 OSAddAtomic(-1, &pmap->stats.wired_count);
1033 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1034 }
1035 }
1036 }
1037
1038 /*
1039 * if we had a previously managed paged locked, unlock it now
1040 */
1041 if (old_pa_locked) {
1042 UNLOCK_PVH(pai);
1043 old_pa_locked = FALSE;
1044 }
1045
1046 pai = pa_index(pa); /* now working with new incoming phys page */
1047 if (IS_MANAGED_PAGE(pai)) {
1048 /*
1049 * Step 2) Enter the mapping in the PV list for this
1050 * physical page.
1051 */
1052 pv_h = pai_to_pvh(pai);
1053
1054 LOCK_PVH(pai);
1055
1056 if (pv_h->pmap == PMAP_NULL) {
1057 /*
1058 * No mappings yet, use rooted pv
1059 */
1060 pv_h->va_and_flags = vaddr;
1061 pv_h->pmap = pmap;
1062 queue_init(&pv_h->qlink);
1063
1064 if (options & PMAP_OPTIONS_INTERNAL) {
1065 pmap_phys_attributes[pai] |= PHYS_INTERNAL;
1066 } else {
1067 pmap_phys_attributes[pai] &= ~PHYS_INTERNAL;
1068 }
1069 if (options & PMAP_OPTIONS_REUSABLE) {
1070 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
1071 } else {
1072 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
1073 }
1074 if ((options & PMAP_OPTIONS_ALT_ACCT) &&
1075 IS_INTERNAL_PAGE(pai)) {
1076 pv_h->va_and_flags |= PVE_IS_ALTACCT;
1077 is_altacct = TRUE;
1078 } else {
1079 pv_h->va_and_flags &= ~PVE_IS_ALTACCT;
1080 is_altacct = FALSE;
1081 }
1082 } else {
1083 /*
1084 * Add new pv_hashed_entry after header.
1085 */
1086 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
1087 pvh_e = pvh_new;
1088 pvh_new = PV_HASHED_ENTRY_NULL;
1089 } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
1090 PV_HASHED_ALLOC(&pvh_e);
1091 if (PV_HASHED_ENTRY_NULL == pvh_e) {
1092 /*
1093 * the pv list is empty. if we are on
1094 * the kernel pmap we'll use one of
1095 * the special private kernel pv_e's,
1096 * else, we need to unlock
1097 * everything, zalloc a pv_e, and
1098 * restart bringing in the pv_e with
1099 * us.
1100 */
1101 if (kernel_pmap == pmap) {
1102 PV_HASHED_KERN_ALLOC(&pvh_e);
1103 } else {
1104 UNLOCK_PVH(pai);
1105 PTE_LOCK_UNLOCK(pte);
1106 PMAP_UNLOCK_SHARED(pmap);
1107 pmap_pv_throttle(pmap);
1108 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
1109 goto Retry;
1110 }
1111 }
1112 }
1113
1114 if (PV_HASHED_ENTRY_NULL == pvh_e) {
1115 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
1116 }
1117
1118 pvh_e->va_and_flags = vaddr;
1119 pvh_e->pmap = pmap;
1120 pvh_e->ppn = pn;
1121 if ((options & PMAP_OPTIONS_ALT_ACCT) &&
1122 IS_INTERNAL_PAGE(pai)) {
1123 pvh_e->va_and_flags |= PVE_IS_ALTACCT;
1124 is_altacct = TRUE;
1125 } else {
1126 pvh_e->va_and_flags &= ~PVE_IS_ALTACCT;
1127 is_altacct = FALSE;
1128 }
1129 pv_hash_add(pvh_e, pv_h);
1130
1131 /*
1132 * Remember that we used the pvlist entry.
1133 */
1134 pvh_e = PV_HASHED_ENTRY_NULL;
1135 }
1136
1137 /*
1138 * only count the mapping
1139 * for 'managed memory'
1140 */
1141 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1142 OSAddAtomic(+1, &pmap->stats.resident_count);
1143 if (pmap->stats.resident_count > pmap->stats.resident_max) {
1144 pmap->stats.resident_max = pmap->stats.resident_count;
1145 }
1146 if (pmap != kernel_pmap) {
1147 /* update pmap stats */
1148 if (IS_REUSABLE_PAGE(pai)) {
1149 OSAddAtomic(+1, &pmap->stats.reusable);
1150 PMAP_STATS_PEAK(pmap->stats.reusable);
1151 } else if (IS_INTERNAL_PAGE(pai)) {
1152 OSAddAtomic(+1, &pmap->stats.internal);
1153 PMAP_STATS_PEAK(pmap->stats.internal);
1154 } else {
1155 OSAddAtomic(+1, &pmap->stats.external);
1156 PMAP_STATS_PEAK(pmap->stats.external);
1157 }
1158
1159 /* update ledgers */
1160 if (is_altacct) {
1161 /* internal but also alternate accounting */
1162 assert(IS_INTERNAL_PAGE(pai));
1163 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
1164 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
1165 /* alternate accounting, so not in footprint */
1166 } else if (IS_REUSABLE_PAGE(pai)) {
1167 assert(!is_altacct);
1168 assert(IS_INTERNAL_PAGE(pai));
1169 /* internal but reusable: not in footprint */
1170 } else if (IS_INTERNAL_PAGE(pai)) {
1171 assert(!is_altacct);
1172 assert(!IS_REUSABLE_PAGE(pai));
1173 /* internal: add to footprint */
1174 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
1175 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1176 } else {
1177 /* not internal: not in footprint */
1178 }
1179 }
1180 } else if (last_managed_page == 0) {
1181 /* Account for early mappings created before "managed pages"
1182 * are determined. Consider consulting the available DRAM map.
1183 */
1184 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1185 OSAddAtomic(+1, &pmap->stats.resident_count);
1186 if (pmap != kernel_pmap) {
1187#if 00
1188 OSAddAtomic(+1, &pmap->stats.device);
1189 PMAP_STATS_PEAK(pmap->stats.device);
1190#endif
1191 }
1192 }
1193 /*
1194 * Step 3) Enter the mapping.
1195 *
1196 * Build a template to speed up entering -
1197 * only the pfn changes.
1198 */
1199 template = pa_to_pte(pa);
1200
1201 if (!is_ept) {
1202 template |= INTEL_PTE_VALID;
1203 } else {
1204 template |= INTEL_EPT_IPAT;
1205 }
1206
1207
1208 /*
1209 * DRK: It may be worth asserting on cache attribute flags that diverge
1210 * from the existing physical page attributes.
1211 */
1212
1213 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
1214
1215 /*
1216 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
1217 */
1218 if (!is_ept && (flags & VM_MEM_NOT_CACHEABLE)) {
1219 if (!(flags & VM_MEM_GUARDED)) {
1220 template |= INTEL_PTE_PAT;
1221 }
1222 template |= INTEL_PTE_NCACHE;
1223 }
1224 if (pmap != kernel_pmap && !is_ept) {
1225 template |= INTEL_PTE_USER;
1226 }
1227 if (prot & VM_PROT_READ) {
1228 template |= PTE_READ(is_ept);
1229 }
1230 if (prot & VM_PROT_WRITE) {
1231 template |= PTE_WRITE(is_ept);
1232 if (is_ept && !pmap_ept_support_ad) {
1233 template |= PTE_MOD(is_ept);
1234 if (IS_MANAGED_PAGE(pai)) {
1235 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
1236 }
1237 }
1238 }
1239 if (prot & VM_PROT_EXECUTE) {
1240 assert(set_NX == 0);
1241 template = pte_set_ex(template, is_ept);
1242 }
1243
1244 if (set_NX) {
1245 template = pte_remove_ex(template, is_ept);
1246 }
1247 if (wired) {
1248 template |= INTEL_PTE_WIRED;
1249 OSAddAtomic(+1, &pmap->stats.wired_count);
1250 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1251 }
1252 if (__improbable(superpage)) {
1253 template |= INTEL_PTE_PS;
1254 }
1255
1256 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
1257 if (__improbable(is_ept && !pmap_ept_support_ad)) {
1258 template |= PTE_REF(is_ept);
1259 if (IS_MANAGED_PAGE(pai)) {
1260 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
1261 }
1262 }
1263 template |= PTE_LOCK(is_ept);
1264 pmap_store_pte(pte, template);
1265
1266 /*
1267 * if this was a managed page we delayed unlocking the pv until here
1268 * to prevent pmap_page_protect et al from finding it until the pte
1269 * has been stored
1270 */
1271 if (IS_MANAGED_PAGE(pai)) {
1272 UNLOCK_PVH(pai);
1273 }
1274done2:
1275 if (need_tlbflush == TRUE) {
1276 if (options & PMAP_OPTIONS_NOFLUSH) {
1277 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1278 } else {
1279 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1280 }
1281 }
1282 if (ptelocked) {
1283 PTE_LOCK_UNLOCK(pte);
1284 }
1285 PMAP_UNLOCK_SHARED(pmap);
1286
1287 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1288 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
1289 }
1290 if (pvh_new != PV_HASHED_ENTRY_NULL) {
1291 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
1292 }
1293
1294 if (delpage_pm_obj) {
1295 vm_page_t m;
1296
1297 vm_object_lock(delpage_pm_obj);
1298 m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE));
1299 if (m == VM_PAGE_NULL) {
1300 panic("pmap_enter: pte page not in object");
1301 }
1302 VM_PAGE_FREE(m);
1303 vm_object_unlock(delpage_pm_obj);
1304 OSAddAtomic(-1, &inuse_ptepages_count);
1305 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
1306 }
1307
1308 kr = KERN_SUCCESS;
1309done1:
1310 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
1311 return kr;
1312}
1313
1314/*
1315 * Remove a range of hardware page-table entries.
1316 * The entries given are the first (inclusive)
1317 * and last (exclusive) entries for the VM pages.
1318 * The virtual address is the va for the first pte.
1319 *
1320 * The pmap must be locked.
1321 * If the pmap is not the kernel pmap, the range must lie
1322 * entirely within one pte-page. This is NOT checked.
1323 * Assumes that the pte-page exists.
1324 */
1325
1326void
1327pmap_remove_range(
1328 pmap_t pmap,
1329 vm_map_offset_t start_vaddr,
1330 pt_entry_t *spte,
1331 pt_entry_t *epte)
1332{
1333 pmap_remove_range_options(pmap, start_vaddr, spte, epte,
1334 PMAP_OPTIONS_REMOVE);
1335}
1336
1337static void
1338pmap_remove_range_options(
1339 pmap_t pmap,
1340 vm_map_offset_t start_vaddr,
1341 pt_entry_t *spte,
1342 pt_entry_t *epte,
1343 int options)
1344{
1345 pt_entry_t *cpte;
1346 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1347 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1348 pv_hashed_entry_t pvh_e;
1349 int pvh_cnt = 0;
1350 int num_removed, num_unwired, num_found, num_invalid;
1351 int stats_external, stats_internal, stats_reusable;
1352 uint64_t stats_compressed;
1353 int ledgers_internal, ledgers_alt_internal;
1354 uint64_t ledgers_compressed, ledgers_alt_compressed;
1355 ppnum_t pai;
1356 pmap_paddr_t pa;
1357 vm_map_offset_t vaddr;
1358 boolean_t is_ept = is_ept_pmap(pmap);
1359 boolean_t was_altacct;
1360
1361 num_removed = 0;
1362 num_unwired = 0;
1363 num_found = 0;
1364 num_invalid = 0;
1365 stats_external = 0;
1366 stats_internal = 0;
1367 stats_reusable = 0;
1368 stats_compressed = 0;
1369 ledgers_internal = 0;
1370 ledgers_compressed = 0;
1371 ledgers_alt_internal = 0;
1372 ledgers_alt_compressed = 0;
1373
1374 /* invalidate the PTEs first to "freeze" them */
1375 for (cpte = spte, vaddr = start_vaddr;
1376 cpte < epte;
1377 cpte++, vaddr += PAGE_SIZE_64) {
1378 pt_entry_t p = *cpte;
1379
1380 pa = pte_to_pa(p);
1381 if (pa == 0) {
1382 if ((options & PMAP_OPTIONS_REMOVE) &&
1383 (PTE_IS_COMPRESSED(p, cpte, pmap, vaddr))) {
1384 assert(pmap != kernel_pmap);
1385 /* one less "compressed"... */
1386 stats_compressed++;
1387 ledgers_compressed++;
1388 if (p & PTE_COMPRESSED_ALT) {
1389 /* ... but it used to be "ALTACCT" */
1390 ledgers_alt_compressed++;
1391 }
1392 /* clear marker(s) */
1393 /* XXX probably does not need to be atomic! */
1394 pmap_update_pte(cpte, INTEL_PTE_COMPRESSED_MASK, 0);
1395 }
1396 continue;
1397 }
1398 num_found++;
1399
1400 if (iswired(p)) {
1401 num_unwired++;
1402 }
1403
1404 pai = pa_index(pa);
1405
1406 if (!IS_MANAGED_PAGE(pai)) {
1407 /*
1408 * Outside range of managed physical memory.
1409 * Just remove the mappings.
1410 */
1411 pmap_store_pte(cpte, 0);
1412 continue;
1413 }
1414
1415 if ((p & PTE_VALID_MASK(is_ept)) == 0) {
1416 num_invalid++;
1417 }
1418
1419 /* invalidate the PTE */
1420 pmap_update_pte(cpte, PTE_VALID_MASK(is_ept), 0);
1421 }
1422
1423 if (num_found == 0) {
1424 /* nothing was changed: we're done */
1425 goto update_counts;
1426 }
1427
1428 /* propagate the invalidates to other CPUs */
1429
1430 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
1431
1432 for (cpte = spte, vaddr = start_vaddr;
1433 cpte < epte;
1434 cpte++, vaddr += PAGE_SIZE_64) {
1435 pa = pte_to_pa(*cpte);
1436 if (pa == 0) {
1437check_pte_for_compressed_marker:
1438 /*
1439 * This PTE could have been replaced with a
1440 * "compressed" marker after our first "freeze"
1441 * loop above, so check again.
1442 */
1443 if ((options & PMAP_OPTIONS_REMOVE) &&
1444 (PTE_IS_COMPRESSED(*cpte, cpte, pmap, vaddr))) {
1445 assert(pmap != kernel_pmap);
1446 /* one less "compressed"... */
1447 stats_compressed++;
1448 ledgers_compressed++;
1449 if (*cpte & PTE_COMPRESSED_ALT) {
1450 /* ... but it used to be "ALTACCT" */
1451 ledgers_alt_compressed++;
1452 }
1453 pmap_store_pte(cpte, 0);
1454 }
1455 continue;
1456 }
1457
1458 pai = pa_index(pa);
1459
1460 LOCK_PVH(pai);
1461
1462 pa = pte_to_pa(*cpte);
1463 if (pa == 0) {
1464 UNLOCK_PVH(pai);
1465 goto check_pte_for_compressed_marker;
1466 }
1467
1468 /*
1469 * Remove the mapping from the pvlist for this physical page.
1470 */
1471 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte, &was_altacct);
1472
1473 num_removed++;
1474 /* update pmap stats */
1475 if (IS_REUSABLE_PAGE(pai)) {
1476 stats_reusable++;
1477 } else if (IS_INTERNAL_PAGE(pai)) {
1478 stats_internal++;
1479 } else {
1480 stats_external++;
1481 }
1482 /* update ledgers */
1483 if (was_altacct) {
1484 /* internal and alternate accounting */
1485 assert(IS_INTERNAL_PAGE(pai));
1486 ledgers_internal++;
1487 ledgers_alt_internal++;
1488 } else if (IS_REUSABLE_PAGE(pai)) {
1489 /* internal but reusable */
1490 assert(!was_altacct);
1491 assert(IS_INTERNAL_PAGE(pai));
1492 } else if (IS_INTERNAL_PAGE(pai)) {
1493 /* internal */
1494 assert(!was_altacct);
1495 assert(!IS_REUSABLE_PAGE(pai));
1496 ledgers_internal++;
1497 } else {
1498 /* not internal */
1499 }
1500
1501 /*
1502 * Get the modify and reference bits, then
1503 * nuke the entry in the page table
1504 */
1505 /* remember reference and change */
1506 if (!is_ept) {
1507 pmap_phys_attributes[pai] |=
1508 *cpte & (PHYS_MODIFIED | PHYS_REFERENCED);
1509 } else {
1510 pmap_phys_attributes[pai] |=
1511 ept_refmod_to_physmap((*cpte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1512 }
1513
1514 /* completely invalidate the PTE */
1515 pmap_store_pte(cpte, 0);
1516
1517 UNLOCK_PVH(pai);
1518
1519 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1520 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1521 pvh_eh = pvh_e;
1522
1523 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1524 pvh_et = pvh_e;
1525 }
1526 pvh_cnt++;
1527 }
1528 /* We can encounter at most 'num_found' PTEs for this level
1529 * Fewer may be encountered if some were replaced by
1530 * compressed markers. No new valid PTEs can be created
1531 * since the pmap lock is held exclusively.
1532 */
1533 if (num_removed == num_found) {
1534 break;
1535 }
1536 } /* for loop */
1537
1538 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1539 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1540 }
1541update_counts:
1542 /*
1543 * Update the counts
1544 */
1545#if TESTING
1546 if (pmap->stats.resident_count < num_removed) {
1547 panic("pmap_remove_range: resident_count");
1548 }
1549#endif
1550 if (num_removed) {
1551 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
1552 PMAP_STATS_ASSERTF((pmap->stats.resident_count >= num_removed,
1553 "pmap=%p num_removed=%d stats.resident_count=%d",
1554 pmap, num_removed, pmap->stats.resident_count));
1555 OSAddAtomic(-num_removed, &pmap->stats.resident_count);
1556 }
1557
1558 if (pmap != kernel_pmap) {
1559 PMAP_STATS_ASSERTF((pmap->stats.external >= stats_external,
1560 "pmap=%p stats_external=%d stats.external=%d",
1561 pmap, stats_external, pmap->stats.external));
1562 PMAP_STATS_ASSERTF((pmap->stats.internal >= stats_internal,
1563 "pmap=%p stats_internal=%d stats.internal=%d",
1564 pmap, stats_internal, pmap->stats.internal));
1565 PMAP_STATS_ASSERTF((pmap->stats.reusable >= stats_reusable,
1566 "pmap=%p stats_reusable=%d stats.reusable=%d",
1567 pmap, stats_reusable, pmap->stats.reusable));
1568 PMAP_STATS_ASSERTF((pmap->stats.compressed >= stats_compressed,
1569 "pmap=%p stats_compressed=%lld, stats.compressed=%lld",
1570 pmap, stats_compressed, pmap->stats.compressed));
1571
1572 /* update pmap stats */
1573 if (stats_external) {
1574 OSAddAtomic(-stats_external, &pmap->stats.external);
1575 }
1576 if (stats_internal) {
1577 OSAddAtomic(-stats_internal, &pmap->stats.internal);
1578 }
1579 if (stats_reusable) {
1580 OSAddAtomic(-stats_reusable, &pmap->stats.reusable);
1581 }
1582 if (stats_compressed) {
1583 OSAddAtomic64(-stats_compressed, &pmap->stats.compressed);
1584 }
1585 /* update ledgers */
1586
1587 if (ledgers_internal) {
1588 pmap_ledger_debit(pmap,
1589 task_ledgers.internal,
1590 machine_ptob(ledgers_internal));
1591 }
1592 if (ledgers_compressed) {
1593 pmap_ledger_debit(pmap,
1594 task_ledgers.internal_compressed,
1595 machine_ptob(ledgers_compressed));
1596 }
1597 if (ledgers_alt_internal) {
1598 pmap_ledger_debit(pmap,
1599 task_ledgers.alternate_accounting,
1600 machine_ptob(ledgers_alt_internal));
1601 }
1602 if (ledgers_alt_compressed) {
1603 pmap_ledger_debit(pmap,
1604 task_ledgers.alternate_accounting_compressed,
1605 machine_ptob(ledgers_alt_compressed));
1606 }
1607
1608 uint64_t net_debit = (ledgers_internal - ledgers_alt_internal) + (ledgers_compressed - ledgers_alt_compressed);
1609 if (net_debit) {
1610 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(net_debit));
1611 }
1612 }
1613
1614#if TESTING
1615 if (pmap->stats.wired_count < num_unwired) {
1616 panic("pmap_remove_range: wired_count");
1617 }
1618#endif
1619 PMAP_STATS_ASSERTF((pmap->stats.wired_count >= num_unwired,
1620 "pmap=%p num_unwired=%d stats.wired_count=%d",
1621 pmap, num_unwired, pmap->stats.wired_count));
1622
1623 if (num_unwired != 0) {
1624 OSAddAtomic(-num_unwired, &pmap->stats.wired_count);
1625 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
1626 }
1627 return;
1628}
1629
1630
1631/*
1632 * Remove the given range of addresses
1633 * from the specified map.
1634 *
1635 * It is assumed that the start and end are properly
1636 * rounded to the hardware page size.
1637 */
1638void
1639pmap_remove(
1640 pmap_t map,
1641 addr64_t s64,
1642 addr64_t e64)
1643{
1644 pmap_remove_options(map, s64, e64, PMAP_OPTIONS_REMOVE);
1645}
1646#define PLCHECK_THRESHOLD (2)
1647
1648void
1649pmap_remove_options(
1650 pmap_t map,
1651 addr64_t s64,
1652 addr64_t e64,
1653 int options)
1654{
1655 pt_entry_t *pde;
1656 pt_entry_t *spte, *epte;
1657 addr64_t l64;
1658 uint64_t deadline = 0;
1659 boolean_t is_ept;
1660
1661 pmap_intr_assert();
1662
1663 if (map == PMAP_NULL || s64 == e64) {
1664 return;
1665 }
1666
1667 is_ept = is_ept_pmap(map);
1668
1669 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
1670 VM_KERNEL_ADDRHIDE(map), VM_KERNEL_ADDRHIDE(s64),
1671 VM_KERNEL_ADDRHIDE(e64));
1672
1673 PMAP_LOCK_EXCLUSIVE(map);
1674 uint32_t traverse_count = 0;
1675
1676 while (s64 < e64) {
1677 pml4_entry_t *pml4e = pmap64_pml4(map, s64);
1678 if ((pml4e == NULL) ||
1679 ((*pml4e & PTE_VALID_MASK(is_ept)) == 0)) {
1680 s64 = (s64 + NBPML4) & ~(PML4MASK);
1681 continue;
1682 }
1683 pdpt_entry_t *pdpte = pmap64_pdpt(map, s64);
1684 if ((pdpte == NULL) ||
1685 ((*pdpte & PTE_VALID_MASK(is_ept)) == 0)) {
1686 s64 = (s64 + NBPDPT) & ~(PDPTMASK);
1687 continue;
1688 }
1689
1690 l64 = (s64 + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE - 1);
1691
1692 if (l64 > e64) {
1693 l64 = e64;
1694 }
1695
1696 pde = pmap_pde(map, s64);
1697
1698 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
1699 if (*pde & PTE_PS) {
1700 /*
1701 * If we're removing a superpage, pmap_remove_range()
1702 * must work on level 2 instead of level 1; and we're
1703 * only passing a single level 2 entry instead of a
1704 * level 1 range.
1705 */
1706 spte = pde;
1707 epte = spte + 1; /* excluded */
1708 } else {
1709 spte = pmap_pte(map, (s64 & ~(PDE_MAPPED_SIZE - 1)));
1710 spte = &spte[ptenum(s64)];
1711 epte = &spte[intel_btop(l64 - s64)];
1712 }
1713 pmap_remove_range_options(map, s64, spte, epte,
1714 options);
1715 }
1716 s64 = l64;
1717
1718 if ((s64 < e64) && (traverse_count++ > PLCHECK_THRESHOLD)) {
1719 if (deadline == 0) {
1720 deadline = rdtsc64_nofence() + max_preemption_latency_tsc;
1721 } else {
1722 if (rdtsc64_nofence() > deadline) {
1723 PMAP_UNLOCK_EXCLUSIVE(map);
1724 __builtin_ia32_pause();
1725 PMAP_LOCK_EXCLUSIVE(map);
1726 deadline = rdtsc64_nofence() + max_preemption_latency_tsc;
1727 }
1728 }
1729 }
1730 }
1731
1732 PMAP_UNLOCK_EXCLUSIVE(map);
1733
1734 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
1735}
1736
1737void
1738pmap_page_protect(
1739 ppnum_t pn,
1740 vm_prot_t prot)
1741{
1742 pmap_page_protect_options(pn, prot, 0, NULL);
1743}
1744
1745/*
1746 * Routine: pmap_page_protect_options
1747 *
1748 * Function:
1749 * Lower the permission for all mappings to a given
1750 * page.
1751 */
1752void
1753pmap_page_protect_options(
1754 ppnum_t pn,
1755 vm_prot_t prot,
1756 unsigned int options,
1757 void *arg)
1758{
1759 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1760 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1761 pv_hashed_entry_t nexth;
1762 int pvh_cnt = 0;
1763 pv_rooted_entry_t pv_h;
1764 pv_rooted_entry_t pv_e;
1765 pv_hashed_entry_t pvh_e;
1766 pt_entry_t *pte;
1767 int pai;
1768 pmap_t pmap;
1769 boolean_t remove;
1770 pt_entry_t new_pte_value;
1771 boolean_t is_ept;
1772
1773 pmap_intr_assert();
1774 assert(pn != vm_page_fictitious_addr);
1775 if (pn == vm_page_guard_addr) {
1776 return;
1777 }
1778
1779 pai = ppn_to_pai(pn);
1780
1781 if (!IS_MANAGED_PAGE(pai)) {
1782 /*
1783 * Not a managed page.
1784 */
1785 return;
1786 }
1787
1788 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, pn, prot);
1789
1790 /*
1791 * Determine the new protection.
1792 */
1793 switch (prot) {
1794 case VM_PROT_READ:
1795 case VM_PROT_READ | VM_PROT_EXECUTE:
1796 remove = FALSE;
1797 break;
1798 case VM_PROT_ALL:
1799 return; /* nothing to do */
1800 default:
1801 remove = TRUE;
1802 break;
1803 }
1804
1805 pv_h = pai_to_pvh(pai);
1806
1807 LOCK_PVH(pai);
1808
1809
1810 /*
1811 * Walk down PV list, if any, changing or removing all mappings.
1812 */
1813 if (pv_h->pmap == PMAP_NULL) {
1814 goto done;
1815 }
1816
1817 pv_e = pv_h;
1818 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */
1819
1820 do {
1821 vm_map_offset_t vaddr;
1822
1823 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1824 (pmap_phys_attributes[pai] & PHYS_MODIFIED)) {
1825 /* page was modified, so it will be compressed */
1826 options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1827 options |= PMAP_OPTIONS_COMPRESSOR;
1828 }
1829
1830 pmap = pv_e->pmap;
1831 is_ept = is_ept_pmap(pmap);
1832 vaddr = PVE_VA(pv_e);
1833 pte = pmap_pte(pmap, vaddr);
1834
1835 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1836 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1837
1838 if (0 == pte) {
1839 panic("pmap_page_protect() "
1840 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1841 pmap, pn, vaddr);
1842 }
1843 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1844
1845 /*
1846 * Remove the mapping if new protection is NONE
1847 */
1848 if (remove) {
1849 /* Remove per-pmap wired count */
1850 if (iswired(*pte)) {
1851 OSAddAtomic(-1, &pmap->stats.wired_count);
1852 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1853 }
1854
1855 if (pmap != kernel_pmap &&
1856 (options & PMAP_OPTIONS_COMPRESSOR) &&
1857 IS_INTERNAL_PAGE(pai)) {
1858 assert(!PTE_IS_COMPRESSED(*pte, pte, pmap, vaddr));
1859 /* mark this PTE as having been "compressed" */
1860 new_pte_value = PTE_COMPRESSED;
1861 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1862 new_pte_value |= PTE_COMPRESSED_ALT;
1863 }
1864 } else {
1865 new_pte_value = 0;
1866 }
1867
1868 if (options & PMAP_OPTIONS_NOREFMOD) {
1869 pmap_store_pte(pte, new_pte_value);
1870
1871 if (options & PMAP_OPTIONS_NOFLUSH) {
1872 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1873 } else {
1874 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1875 }
1876 } else {
1877 /*
1878 * Remove the mapping, collecting dirty bits.
1879 */
1880 pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
1881
1882 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1883 if (!is_ept) {
1884 pmap_phys_attributes[pai] |=
1885 *pte & (PHYS_MODIFIED | PHYS_REFERENCED);
1886 } else {
1887 pmap_phys_attributes[pai] |=
1888 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1889 }
1890 if ((options &
1891 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1892 IS_INTERNAL_PAGE(pai) &&
1893 (pmap_phys_attributes[pai] &
1894 PHYS_MODIFIED)) {
1895 /*
1896 * Page is actually "modified" and
1897 * will be compressed. Start
1898 * accounting for it as "compressed".
1899 */
1900 assert(!(options & PMAP_OPTIONS_COMPRESSOR));
1901 options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1902 options |= PMAP_OPTIONS_COMPRESSOR;
1903 assert(new_pte_value == 0);
1904 if (pmap != kernel_pmap) {
1905 new_pte_value = PTE_COMPRESSED;
1906 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1907 new_pte_value |= PTE_COMPRESSED_ALT;
1908 }
1909 }
1910 }
1911 pmap_store_pte(pte, new_pte_value);
1912 }
1913
1914#if TESTING
1915 if (pmap->stats.resident_count < 1) {
1916 panic("pmap_page_protect: resident_count");
1917 }
1918#endif
1919 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1920 assert(pmap->stats.resident_count >= 1);
1921 OSAddAtomic(-1, &pmap->stats.resident_count);
1922
1923 /*
1924 * We only ever compress internal pages.
1925 */
1926 if (options & PMAP_OPTIONS_COMPRESSOR) {
1927 assert(IS_INTERNAL_PAGE(pai));
1928 }
1929 if (pmap != kernel_pmap) {
1930 /* update pmap stats */
1931 if (IS_REUSABLE_PAGE(pai)) {
1932 assert(pmap->stats.reusable > 0);
1933 OSAddAtomic(-1, &pmap->stats.reusable);
1934 } else if (IS_INTERNAL_PAGE(pai)) {
1935 assert(pmap->stats.internal > 0);
1936 OSAddAtomic(-1, &pmap->stats.internal);
1937 } else {
1938 assert(pmap->stats.external > 0);
1939 OSAddAtomic(-1, &pmap->stats.external);
1940 }
1941 if ((options & PMAP_OPTIONS_COMPRESSOR) &&
1942 IS_INTERNAL_PAGE(pai)) {
1943 /* adjust "compressed" stats */
1944 OSAddAtomic64(+1, &pmap->stats.compressed);
1945 PMAP_STATS_PEAK(pmap->stats.compressed);
1946 pmap->stats.compressed_lifetime++;
1947 }
1948
1949 /* update ledgers */
1950 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1951 assert(IS_INTERNAL_PAGE(pai));
1952 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
1953 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
1954 if (options & PMAP_OPTIONS_COMPRESSOR) {
1955 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1956 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
1957 }
1958 } else if (IS_REUSABLE_PAGE(pai)) {
1959 assert(!IS_ALTACCT_PAGE(pai, pv_e));
1960 assert(IS_INTERNAL_PAGE(pai));
1961 if (options & PMAP_OPTIONS_COMPRESSOR) {
1962 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1963 /* was not in footprint, but is now */
1964 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1965 }
1966 } else if (IS_INTERNAL_PAGE(pai)) {
1967 assert(!IS_ALTACCT_PAGE(pai, pv_e));
1968 assert(!IS_REUSABLE_PAGE(pai));
1969 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
1970 /*
1971 * Update all stats related to physical
1972 * footprint, which only deals with
1973 * internal pages.
1974 */
1975 if (options & PMAP_OPTIONS_COMPRESSOR) {
1976 /*
1977 * This removal is only being
1978 * done so we can send this page
1979 * to the compressor; therefore
1980 * it mustn't affect total task
1981 * footprint.
1982 */
1983 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1984 } else {
1985 /*
1986 * This internal page isn't
1987 * going to the compressor,
1988 * so adjust stats to keep
1989 * phys_footprint up to date.
1990 */
1991 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1992 }
1993 }
1994 }
1995
1996 /*
1997 * Deal with the pv_rooted_entry.
1998 */
1999
2000 if (pv_e == pv_h) {
2001 /*
2002 * Fix up head later.
2003 */
2004 pv_h->pmap = PMAP_NULL;
2005 } else {
2006 /*
2007 * Delete this entry.
2008 */
2009 pv_hash_remove(pvh_e);
2010 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
2011 pvh_eh = pvh_e;
2012
2013 if (pvh_et == PV_HASHED_ENTRY_NULL) {
2014 pvh_et = pvh_e;
2015 }
2016 pvh_cnt++;
2017 }
2018 } else {
2019 /*
2020 * Write-protect, after opportunistic refmod collect
2021 */
2022 if (!is_ept) {
2023 pmap_phys_attributes[pai] |=
2024 *pte & (PHYS_MODIFIED | PHYS_REFERENCED);
2025 } else {
2026 pmap_phys_attributes[pai] |=
2027 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
2028 }
2029 pmap_update_pte(pte, PTE_WRITE(is_ept), 0);
2030
2031 if (options & PMAP_OPTIONS_NOFLUSH) {
2032 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
2033 } else {
2034 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
2035 }
2036 }
2037 pvh_e = nexth;
2038 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
2039
2040
2041 /*
2042 * If pv_head mapping was removed, fix it up.
2043 */
2044 if (pv_h->pmap == PMAP_NULL) {
2045 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
2046
2047 if (pvh_e != (pv_hashed_entry_t) pv_h) {
2048 pv_hash_remove(pvh_e);
2049 pv_h->pmap = pvh_e->pmap;
2050 pv_h->va_and_flags = pvh_e->va_and_flags;
2051 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
2052 pvh_eh = pvh_e;
2053
2054 if (pvh_et == PV_HASHED_ENTRY_NULL) {
2055 pvh_et = pvh_e;
2056 }
2057 pvh_cnt++;
2058 }
2059 }
2060 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
2061 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
2062 }
2063done:
2064 UNLOCK_PVH(pai);
2065
2066 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
2067}
2068
2069
2070/*
2071 * Clear specified attribute bits.
2072 */
2073void
2074phys_attribute_clear(
2075 ppnum_t pn,
2076 int bits,
2077 unsigned int options,
2078 void *arg)
2079{
2080 pv_rooted_entry_t pv_h;
2081 pv_hashed_entry_t pv_e;
2082 pt_entry_t *pte = NULL;
2083 int pai;
2084 pmap_t pmap;
2085 char attributes = 0;
2086 boolean_t is_internal, is_reusable, is_altacct, is_ept;
2087 int ept_bits_to_clear;
2088 boolean_t ept_keep_global_mod = FALSE;
2089
2090 if ((bits & PHYS_MODIFIED) &&
2091 (options & PMAP_OPTIONS_NOFLUSH) &&
2092 arg == NULL) {
2093 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
2094 "should not clear 'modified' without flushing TLBs\n",
2095 pn, bits, options, arg);
2096 }
2097
2098 /* We only support converting MOD and REF bits for EPT PTEs in this function */
2099 assert((bits & ~(PHYS_REFERENCED | PHYS_MODIFIED)) == 0);
2100
2101 ept_bits_to_clear = (unsigned)physmap_refmod_to_ept(bits & (PHYS_MODIFIED | PHYS_REFERENCED));
2102
2103 pmap_intr_assert();
2104 assert(pn != vm_page_fictitious_addr);
2105 if (pn == vm_page_guard_addr) {
2106 return;
2107 }
2108
2109 pai = ppn_to_pai(pn);
2110
2111 if (!IS_MANAGED_PAGE(pai)) {
2112 /*
2113 * Not a managed page.
2114 */
2115 return;
2116 }
2117
2118 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
2119
2120 pv_h = pai_to_pvh(pai);
2121
2122 LOCK_PVH(pai);
2123
2124
2125 /*
2126 * Walk down PV list, clearing all modify or reference bits.
2127 * We do not have to lock the pv_list because we have
2128 * the per-pmap lock
2129 */
2130 if (pv_h->pmap != PMAP_NULL) {
2131 /*
2132 * There are some mappings.
2133 */
2134
2135 is_internal = IS_INTERNAL_PAGE(pai);
2136 is_reusable = IS_REUSABLE_PAGE(pai);
2137
2138 pv_e = (pv_hashed_entry_t)pv_h;
2139
2140 do {
2141 vm_map_offset_t va;
2142 char pte_bits;
2143
2144 pmap = pv_e->pmap;
2145 is_ept = is_ept_pmap(pmap);
2146 is_altacct = IS_ALTACCT_PAGE(pai, pv_e);
2147 va = PVE_VA(pv_e);
2148 pte_bits = 0;
2149
2150 if (bits) {
2151 pte = pmap_pte(pmap, va);
2152 /* grab ref/mod bits from this PTE */
2153 pte_bits = (*pte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
2154 /* propagate to page's global attributes */
2155 if (!is_ept) {
2156 attributes |= pte_bits;
2157 } else {
2158 attributes |= ept_refmod_to_physmap(pte_bits);
2159 if (!pmap_ept_support_ad && (pte_bits & INTEL_EPT_MOD)) {
2160 ept_keep_global_mod = TRUE;
2161 }
2162 }
2163 /* which bits to clear for this PTE? */
2164 if (!is_ept) {
2165 pte_bits &= bits;
2166 } else {
2167 pte_bits &= ept_bits_to_clear;
2168 }
2169 }
2170 if (options & PMAP_OPTIONS_CLEAR_WRITE) {
2171 pte_bits |= PTE_WRITE(is_ept);
2172 }
2173
2174 /*
2175 * Clear modify and/or reference bits.
2176 */
2177 if (pte_bits) {
2178 pmap_update_pte(pte, pte_bits, 0);
2179
2180 /* Ensure all processors using this translation
2181 * invalidate this TLB entry. The invalidation
2182 * *must* follow the PTE update, to ensure that
2183 * the TLB shadow of the 'D' bit (in particular)
2184 * is synchronized with the updated PTE.
2185 */
2186 if (!(options & PMAP_OPTIONS_NOFLUSH)) {
2187 /* flush TLBS now */
2188 PMAP_UPDATE_TLBS(pmap,
2189 va,
2190 va + PAGE_SIZE);
2191 } else if (arg) {
2192 /* delayed TLB flush: add "pmap" info */
2193 PMAP_UPDATE_TLBS_DELAYED(
2194 pmap,
2195 va,
2196 va + PAGE_SIZE,
2197 (pmap_flush_context *)arg);
2198 } else {
2199 /* no TLB flushing at all */
2200 }
2201 }
2202
2203 /* update pmap "reusable" stats */
2204 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
2205 is_reusable &&
2206 pmap != kernel_pmap) {
2207 /* one less "reusable" */
2208 assert(pmap->stats.reusable > 0);
2209 OSAddAtomic(-1, &pmap->stats.reusable);
2210 if (is_internal) {
2211 /* one more "internal" */
2212 OSAddAtomic(+1, &pmap->stats.internal);
2213 PMAP_STATS_PEAK(pmap->stats.internal);
2214 assert(pmap->stats.internal > 0);
2215 if (is_altacct) {
2216 /* no impact on ledgers */
2217 } else {
2218 pmap_ledger_credit(pmap,
2219 task_ledgers.internal,
2220 PAGE_SIZE);
2221 pmap_ledger_credit(
2222 pmap,
2223 task_ledgers.phys_footprint,
2224 PAGE_SIZE);
2225 }
2226 } else {
2227 /* one more "external" */
2228 OSAddAtomic(+1, &pmap->stats.external);
2229 PMAP_STATS_PEAK(pmap->stats.external);
2230 assert(pmap->stats.external > 0);
2231 }
2232 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
2233 !is_reusable &&
2234 pmap != kernel_pmap) {
2235 /* one more "reusable" */
2236 OSAddAtomic(+1, &pmap->stats.reusable);
2237 PMAP_STATS_PEAK(pmap->stats.reusable);
2238 assert(pmap->stats.reusable > 0);
2239 if (is_internal) {
2240 /* one less "internal" */
2241 assert(pmap->stats.internal > 0);
2242 OSAddAtomic(-1, &pmap->stats.internal);
2243 if (is_altacct) {
2244 /* no impact on footprint */
2245 } else {
2246 pmap_ledger_debit(pmap,
2247 task_ledgers.internal,
2248 PAGE_SIZE);
2249 pmap_ledger_debit(
2250 pmap,
2251 task_ledgers.phys_footprint,
2252 PAGE_SIZE);
2253 }
2254 } else {
2255 /* one less "external" */
2256 assert(pmap->stats.external > 0);
2257 OSAddAtomic(-1, &pmap->stats.external);
2258 }
2259 }
2260
2261 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
2262 } while (pv_e != (pv_hashed_entry_t)pv_h);
2263 }
2264 /* Opportunistic refmod collection, annulled
2265 * if both REF and MOD are being cleared.
2266 */
2267
2268 pmap_phys_attributes[pai] |= attributes;
2269
2270 if (ept_keep_global_mod) {
2271 /*
2272 * If the hardware doesn't support AD bits for EPT PTEs and someone is
2273 * requesting that we clear the modified bit for a phys page, we need
2274 * to ensure that there are no EPT mappings for the page with the
2275 * modified bit set. If there are, we cannot clear the global modified bit.
2276 */
2277 bits &= ~PHYS_MODIFIED;
2278 }
2279 pmap_phys_attributes[pai] &= ~(bits);
2280
2281 /* update this page's "reusable" status */
2282 if (options & PMAP_OPTIONS_CLEAR_REUSABLE) {
2283 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
2284 } else if (options & PMAP_OPTIONS_SET_REUSABLE) {
2285 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
2286 }
2287
2288 UNLOCK_PVH(pai);
2289
2290 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
2291}
2292
2293/*
2294 * Check specified attribute bits.
2295 */
2296int
2297phys_attribute_test(
2298 ppnum_t pn,
2299 int bits)
2300{
2301 pv_rooted_entry_t pv_h;
2302 pv_hashed_entry_t pv_e;
2303 pt_entry_t *pte;
2304 int pai;
2305 pmap_t pmap;
2306 int attributes = 0;
2307 boolean_t is_ept;
2308
2309 pmap_intr_assert();
2310 assert(pn != vm_page_fictitious_addr);
2311 assert((bits & ~(PHYS_MODIFIED | PHYS_REFERENCED)) == 0);
2312 if (pn == vm_page_guard_addr) {
2313 return 0;
2314 }
2315
2316 pai = ppn_to_pai(pn);
2317
2318 if (!IS_MANAGED_PAGE(pai)) {
2319 /*
2320 * Not a managed page.
2321 */
2322 return 0;
2323 }
2324
2325 /*
2326 * Fast check... if bits already collected
2327 * no need to take any locks...
2328 * if not set, we need to recheck after taking
2329 * the lock in case they got pulled in while
2330 * we were waiting for the lock
2331 */
2332 if ((pmap_phys_attributes[pai] & bits) == bits) {
2333 return bits;
2334 }
2335
2336 pv_h = pai_to_pvh(pai);
2337
2338 LOCK_PVH(pai);
2339
2340 attributes = pmap_phys_attributes[pai] & bits;
2341
2342
2343 /*
2344 * Walk down PV list, checking the mappings until we
2345 * reach the end or we've found the desired attributes.
2346 */
2347 if (attributes != bits &&
2348 pv_h->pmap != PMAP_NULL) {
2349 /*
2350 * There are some mappings.
2351 */
2352 pv_e = (pv_hashed_entry_t)pv_h;
2353 do {
2354 vm_map_offset_t va;
2355
2356 pmap = pv_e->pmap;
2357 is_ept = is_ept_pmap(pmap);
2358 va = PVE_VA(pv_e);
2359 /*
2360 * pick up modify and/or reference bits from mapping
2361 */
2362
2363 pte = pmap_pte(pmap, va);
2364 if (!is_ept) {
2365 attributes |= (int)(*pte & bits);
2366 } else {
2367 attributes |= (int)(ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED));
2368 }
2369
2370 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
2371 } while ((attributes != bits) &&
2372 (pv_e != (pv_hashed_entry_t)pv_h));
2373 }
2374 pmap_phys_attributes[pai] |= attributes;
2375
2376 UNLOCK_PVH(pai);
2377 return attributes;
2378}
2379
2380/*
2381 * Routine: pmap_change_wiring
2382 * Function: Change the wiring attribute for a map/virtual-address
2383 * pair.
2384 * In/out conditions:
2385 * The mapping must already exist in the pmap.
2386 */
2387void
2388pmap_change_wiring(
2389 pmap_t map,
2390 vm_map_offset_t vaddr,
2391 boolean_t wired)
2392{
2393 pt_entry_t *pte;
2394
2395 PMAP_LOCK_SHARED(map);
2396
2397 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL) {
2398 panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
2399 map, vaddr, wired);
2400 }
2401
2402 if (wired && !iswired(*pte)) {
2403 /*
2404 * wiring down mapping
2405 */
2406 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
2407 OSAddAtomic(+1, &map->stats.wired_count);
2408 pmap_update_pte(pte, 0, PTE_WIRED);
2409 } else if (!wired && iswired(*pte)) {
2410 /*
2411 * unwiring mapping
2412 */
2413 assert(map->stats.wired_count >= 1);
2414 OSAddAtomic(-1, &map->stats.wired_count);
2415 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
2416 pmap_update_pte(pte, PTE_WIRED, 0);
2417 }
2418
2419 PMAP_UNLOCK_SHARED(map);
2420}
2421
2422/*
2423 * "Backdoor" direct map routine for early mappings.
2424 * Useful for mapping memory outside the range
2425 * Sets A, D and NC if requested
2426 */
2427
2428vm_offset_t
2429pmap_map_bd(
2430 vm_offset_t virt,
2431 vm_map_offset_t start_addr,
2432 vm_map_offset_t end_addr,
2433 vm_prot_t prot,
2434 unsigned int flags)
2435{
2436 pt_entry_t template;
2437 pt_entry_t *ptep;
2438
2439 vm_offset_t base = virt;
2440 boolean_t doflush = FALSE;
2441
2442 template = pa_to_pte(start_addr)
2443 | INTEL_PTE_REF
2444 | INTEL_PTE_MOD
2445 | INTEL_PTE_WIRED
2446 | INTEL_PTE_VALID;
2447
2448 if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
2449 template |= INTEL_PTE_NCACHE;
2450 if (!(flags & (VM_MEM_GUARDED))) {
2451 template |= INTEL_PTE_PAT;
2452 }
2453 }
2454
2455 if ((prot & VM_PROT_EXECUTE) == 0) {
2456 template |= INTEL_PTE_NX;
2457 }
2458
2459 if (prot & VM_PROT_WRITE) {
2460 template |= INTEL_PTE_WRITE;
2461 }
2462 vm_map_offset_t caddr = start_addr;
2463 while (caddr < end_addr) {
2464 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
2465 if (ptep == PT_ENTRY_NULL) {
2466 panic("pmap_map_bd: Invalid kernel address");
2467 }
2468 if (pte_to_pa(*ptep)) {
2469 doflush = TRUE;
2470 }
2471 pmap_store_pte(ptep, template);
2472 pte_increment_pa(template);
2473 virt += PAGE_SIZE;
2474 caddr += PAGE_SIZE;
2475 }
2476 if (doflush) {
2477 pmap_tlbi_range(0, ~0ULL, true, 0);
2478 PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
2479 }
2480 return virt;
2481}
2482
2483/* Create a virtual alias beginning at 'ava' of the specified kernel virtual
2484 * range. The aliased pagetable range is expanded if
2485 * PMAP_EXPAND_OPTIONS_ALIASMAP is specified. Performs no synchronization,
2486 * assumes caller has stabilized the source and destination ranges. Currently
2487 * used to populate sections of the trampoline "doublemap" at CPU startup.
2488 */
2489
2490void
2491pmap_alias(
2492 vm_offset_t ava,
2493 vm_map_offset_t start_addr,
2494 vm_map_offset_t end_addr,
2495 vm_prot_t prot,
2496 unsigned int eoptions)
2497{
2498 pt_entry_t prot_template, template;
2499 pt_entry_t *aptep, *sptep;
2500
2501 prot_template = INTEL_PTE_REF | INTEL_PTE_MOD | INTEL_PTE_WIRED | INTEL_PTE_VALID;
2502 if ((prot & VM_PROT_EXECUTE) == 0) {
2503 prot_template |= INTEL_PTE_NX;
2504 }
2505
2506 if (prot & VM_PROT_WRITE) {
2507 prot_template |= INTEL_PTE_WRITE;
2508 }
2509 assert(((start_addr | end_addr) & PAGE_MASK) == 0);
2510 while (start_addr < end_addr) {
2511 aptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ava);
2512 if (aptep == PT_ENTRY_NULL) {
2513 if (eoptions & PMAP_EXPAND_OPTIONS_ALIASMAP) {
2514 pmap_expand(kernel_pmap, ava, PMAP_EXPAND_OPTIONS_ALIASMAP);
2515 aptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ava);
2516 } else {
2517 panic("pmap_alias: Invalid alias address");
2518 }
2519 }
2520 /* The aliased range should not have any active mappings */
2521 assert(pte_to_pa(*aptep) == 0);
2522
2523 sptep = pmap_pte(kernel_pmap, start_addr);
2524 assert(sptep != PT_ENTRY_NULL && (pte_to_pa(*sptep) != 0));
2525 template = pa_to_pte(pte_to_pa(*sptep)) | prot_template;
2526 pmap_store_pte(aptep, template);
2527
2528 ava += PAGE_SIZE;
2529 start_addr += PAGE_SIZE;
2530 }
2531}
2532
2533mach_vm_size_t
2534pmap_query_resident(
2535 pmap_t pmap,
2536 addr64_t s64,
2537 addr64_t e64,
2538 mach_vm_size_t *compressed_bytes_p)
2539{
2540 pt_entry_t *pde;
2541 pt_entry_t *spte, *epte;
2542 addr64_t l64;
2543 uint64_t deadline = 0;
2544 mach_vm_size_t resident_bytes;
2545 mach_vm_size_t compressed_bytes;
2546 boolean_t is_ept;
2547
2548 pmap_intr_assert();
2549
2550 if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) {
2551 if (compressed_bytes_p) {
2552 *compressed_bytes_p = 0;
2553 }
2554 return 0;
2555 }
2556
2557 is_ept = is_ept_pmap(pmap);
2558
2559 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
2560 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(s64),
2561 VM_KERNEL_ADDRHIDE(e64));
2562
2563 resident_bytes = 0;
2564 compressed_bytes = 0;
2565
2566 PMAP_LOCK_EXCLUSIVE(pmap);
2567 uint32_t traverse_count = 0;
2568
2569 while (s64 < e64) {
2570 l64 = (s64 + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE - 1);
2571 if (l64 > e64) {
2572 l64 = e64;
2573 }
2574 pde = pmap_pde(pmap, s64);
2575
2576 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
2577 if (*pde & PTE_PS) {
2578 /* superpage: not supported */
2579 } else {
2580 spte = pmap_pte(pmap,
2581 (s64 & ~(PDE_MAPPED_SIZE - 1)));
2582 spte = &spte[ptenum(s64)];
2583 epte = &spte[intel_btop(l64 - s64)];
2584
2585 for (; spte < epte; spte++) {
2586 if (pte_to_pa(*spte) != 0) {
2587 resident_bytes += PAGE_SIZE;
2588 } else if (*spte & PTE_COMPRESSED) {
2589 compressed_bytes += PAGE_SIZE;
2590 }
2591 }
2592 }
2593 }
2594 s64 = l64;
2595
2596 if ((s64 < e64) && (traverse_count++ > PLCHECK_THRESHOLD)) {
2597 if (deadline == 0) {
2598 deadline = rdtsc64() + max_preemption_latency_tsc;
2599 } else {
2600 if (rdtsc64() > deadline) {
2601 PMAP_UNLOCK_EXCLUSIVE(pmap);
2602 __builtin_ia32_pause();
2603 PMAP_LOCK_EXCLUSIVE(pmap);
2604 deadline = rdtsc64() + max_preemption_latency_tsc;
2605 }
2606 }
2607 }
2608 }
2609
2610 PMAP_UNLOCK_EXCLUSIVE(pmap);
2611
2612 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
2613 resident_bytes);
2614
2615 if (compressed_bytes_p) {
2616 *compressed_bytes_p = compressed_bytes;
2617 }
2618 return resident_bytes;
2619}
2620
2621kern_return_t
2622pmap_query_page_info(
2623 pmap_t pmap,
2624 vm_map_offset_t va,
2625 int *disp_p)
2626{
2627 int disp;
2628 boolean_t is_ept;
2629 pmap_paddr_t pa;
2630 ppnum_t pai;
2631 pd_entry_t *pde;
2632 pt_entry_t *pte;
2633
2634 pmap_intr_assert();
2635 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
2636 *disp_p = 0;
2637 return KERN_INVALID_ARGUMENT;
2638 }
2639
2640 disp = 0;
2641 is_ept = is_ept_pmap(pmap);
2642
2643 PMAP_LOCK_EXCLUSIVE(pmap);
2644
2645 pde = pmap_pde(pmap, va);
2646 if (!pde ||
2647 !(*pde & PTE_VALID_MASK(is_ept)) ||
2648 (*pde & PTE_PS)) {
2649 goto done;
2650 }
2651
2652 pte = pmap_pte(pmap, va);
2653 if (pte == PT_ENTRY_NULL) {
2654 goto done;
2655 }
2656
2657 pa = pte_to_pa(*pte);
2658 if (pa == 0) {
2659 if (PTE_IS_COMPRESSED(*pte, pte, pmap, va)) {
2660 disp |= PMAP_QUERY_PAGE_COMPRESSED;
2661 if (*pte & PTE_COMPRESSED_ALT) {
2662 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
2663 }
2664 }
2665 } else {
2666 disp |= PMAP_QUERY_PAGE_PRESENT;
2667 pai = pa_index(pa);
2668 if (!IS_MANAGED_PAGE(pai)) {
2669 } else if (pmap_pv_is_altacct(pmap, va, pai)) {
2670 assert(IS_INTERNAL_PAGE(pai));
2671 disp |= PMAP_QUERY_PAGE_INTERNAL;
2672 disp |= PMAP_QUERY_PAGE_ALTACCT;
2673 } else if (IS_REUSABLE_PAGE(pai)) {
2674 disp |= PMAP_QUERY_PAGE_REUSABLE;
2675 } else if (IS_INTERNAL_PAGE(pai)) {
2676 disp |= PMAP_QUERY_PAGE_INTERNAL;
2677 }
2678 }
2679
2680done:
2681 PMAP_UNLOCK_EXCLUSIVE(pmap);
2682 *disp_p = disp;
2683 return KERN_SUCCESS;
2684}
2685
2686void
2687pmap_set_vm_map_cs_enforced(
2688 pmap_t pmap,
2689 bool new_value)
2690{
2691 PMAP_LOCK_EXCLUSIVE(pmap);
2692 pmap->pm_vm_map_cs_enforced = new_value;
2693 PMAP_UNLOCK_EXCLUSIVE(pmap);
2694}
2695extern int cs_process_enforcement_enable;
2696bool
2697pmap_get_vm_map_cs_enforced(
2698 pmap_t pmap)
2699{
2700 if (cs_process_enforcement_enable) {
2701 return true;
2702 }
2703 return pmap->pm_vm_map_cs_enforced;
2704}
2705
2706void
2707pmap_set_jit_entitled(__unused pmap_t pmap)
2708{
2709 /* The x86 pmap layer does not care if a map has a JIT entry. */
2710 return;
2711}
2712
2713bool
2714pmap_get_jit_entitled(__unused pmap_t pmap)
2715{
2716 /* The x86 pmap layer does not care if a map is using JIT. */
2717 return false;
2718}
2719
2720bool
2721pmap_has_prot_policy(__unused pmap_t pmap, __unused bool translated_allow_execute, __unused vm_prot_t prot)
2722{
2723 /*
2724 * The x86 pmap layer does not apply any policy to any protection
2725 * types.
2726 */
2727 return false;
2728}
2729
2730uint64_t
2731pmap_release_pages_fast(void)
2732{
2733 return 0;
2734}
2735
2736void
2737pmap_trim(__unused pmap_t grand, __unused pmap_t subord, __unused addr64_t vstart, __unused uint64_t size)
2738{
2739 return;
2740}
2741
2742__dead2
2743void
2744pmap_ledger_alloc_init(size_t size)
2745{
2746 panic("%s: unsupported, "
2747 "size=%lu",
2748 __func__, size);
2749}
2750
2751__dead2
2752ledger_t
2753pmap_ledger_alloc(void)
2754{
2755 panic("%s: unsupported",
2756 __func__);
2757}
2758
2759__dead2
2760void
2761pmap_ledger_free(ledger_t ledger)
2762{
2763 panic("%s: unsupported, "
2764 "ledger=%p",
2765 __func__, ledger);
2766}
2767
2768kern_return_t
2769pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused,
2770 unsigned int level_mask __unused, size_t *bytes_copied __unused)
2771{
2772 return KERN_NOT_SUPPORTED;
2773}
2774
2775void *
2776pmap_map_compressor_page(ppnum_t pn)
2777{
2778 assertf(IS_MANAGED_PAGE(ppn_to_pai(pn)), "%s called on non-managed page 0x%08x", __func__, pn);
2779 return PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT);
2780}
2781
2782void
2783pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
2784{
2785}
2786
2787bool
2788pmap_clear_refmod_range_options(
2789 pmap_t pmap __unused,
2790 vm_map_address_t start __unused,
2791 vm_map_address_t end __unused,
2792 unsigned int mask __unused,
2793 unsigned int options __unused)
2794{
2795 /*
2796 * x86 doesn't have ranged tlbi instructions, and we already have
2797 * the pmap_flush_context. This operation isn't implemented.
2798 */
2799 return false;
2800}