]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/pmap_x86_common.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap_x86_common.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach_assert.h>
30
31 #include <vm/pmap.h>
32 #include <vm/vm_map.h>
33 #include <vm/vm_kern.h>
34 #include <kern/ledger.h>
35 #include <i386/pmap_internal.h>
36
37 void pmap_remove_range(
38 pmap_t pmap,
39 vm_map_offset_t va,
40 pt_entry_t *spte,
41 pt_entry_t *epte);
42
43 void pmap_remove_range_options(
44 pmap_t pmap,
45 vm_map_offset_t va,
46 pt_entry_t *spte,
47 pt_entry_t *epte,
48 int options);
49
50 void pmap_reusable_range(
51 pmap_t pmap,
52 vm_map_offset_t va,
53 pt_entry_t *spte,
54 pt_entry_t *epte,
55 boolean_t reusable);
56
57 uint32_t pmap_update_clear_pte_count;
58
59 /*
60 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
61 * on a NBPDE boundary.
62 */
63
64 /* These symbols may be referenced directly by VM */
65 uint64_t pmap_nesting_size_min = NBPDE;
66 uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE;
67
68 /*
69 * kern_return_t pmap_nest(grand, subord, va_start, size)
70 *
71 * grand = the pmap that we will nest subord into
72 * subord = the pmap that goes into the grand
73 * va_start = start of range in pmap to be inserted
74 * nstart = start of range in pmap nested pmap
75 * size = Size of nest area (up to 16TB)
76 *
77 * Inserts a pmap into another. This is used to implement shared segments.
78 *
79 * Note that we depend upon higher level VM locks to insure that things don't change while
80 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
81 * or do 2 nests at once.
82 */
83
84 /*
85 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
86 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
87 * container and the "grand" parent. A minor optimization to consider for the
88 * future: make the "subord" truly a container rather than a full-fledged
89 * pagetable hierarchy which can be unnecessarily sparse (DRK).
90 */
91
92 kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) {
93 vm_map_offset_t vaddr, nvaddr;
94 pd_entry_t *pde,*npde;
95 unsigned int i;
96 uint64_t num_pde;
97
98 assert(!is_ept_pmap(grand));
99 assert(!is_ept_pmap(subord));
100
101 if ((size & (pmap_nesting_size_min-1)) ||
102 (va_start & (pmap_nesting_size_min-1)) ||
103 (nstart & (pmap_nesting_size_min-1)) ||
104 ((size >> 28) > 65536)) /* Max size we can nest is 16TB */
105 return KERN_INVALID_VALUE;
106
107 if(size == 0) {
108 panic("pmap_nest: size is invalid - %016llX\n", size);
109 }
110
111 if (va_start != nstart)
112 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
113
114 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
115 (uintptr_t) grand, (uintptr_t) subord,
116 (uintptr_t) (va_start>>32), (uintptr_t) va_start, 0);
117
118 nvaddr = (vm_map_offset_t)nstart;
119 num_pde = size >> PDESHIFT;
120
121 PMAP_LOCK(subord);
122
123 subord->pm_shared = TRUE;
124
125 for (i = 0; i < num_pde;) {
126 if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) {
127
128 npde = pmap64_pdpt(subord, nvaddr);
129
130 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
131 PMAP_UNLOCK(subord);
132 pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
133 PMAP_LOCK(subord);
134 npde = pmap64_pdpt(subord, nvaddr);
135 }
136 *npde |= INTEL_PDPTE_NESTED;
137 nvaddr += NBPDPT;
138 i += (uint32_t)NPDEPG;
139 }
140 else {
141 npde = pmap_pde(subord, nvaddr);
142
143 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
144 PMAP_UNLOCK(subord);
145 pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
146 PMAP_LOCK(subord);
147 npde = pmap_pde(subord, nvaddr);
148 }
149 nvaddr += NBPDE;
150 i++;
151 }
152 }
153
154 PMAP_UNLOCK(subord);
155
156 vaddr = (vm_map_offset_t)va_start;
157
158 PMAP_LOCK(grand);
159
160 for (i = 0;i < num_pde;) {
161 pd_entry_t tpde;
162
163 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) {
164 npde = pmap64_pdpt(subord, vaddr);
165 if (npde == 0)
166 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
167 tpde = *npde;
168 pde = pmap64_pdpt(grand, vaddr);
169 if (0 == pde) {
170 PMAP_UNLOCK(grand);
171 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
172 PMAP_LOCK(grand);
173 pde = pmap64_pdpt(grand, vaddr);
174 }
175 if (pde == 0)
176 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr);
177 pmap_store_pte(pde, tpde);
178 vaddr += NBPDPT;
179 i += (uint32_t) NPDEPG;
180 }
181 else {
182 npde = pmap_pde(subord, vaddr);
183 if (npde == 0)
184 panic("pmap_nest: no npde, subord %p vaddr 0x%llx", subord, vaddr);
185 tpde = *npde;
186 pde = pmap_pde(grand, vaddr);
187 if ((0 == pde) && cpu_64bit) {
188 PMAP_UNLOCK(grand);
189 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
190 PMAP_LOCK(grand);
191 pde = pmap_pde(grand, vaddr);
192 }
193
194 if (pde == 0)
195 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
196 vaddr += NBPDE;
197 pmap_store_pte(pde, tpde);
198 i++;
199 }
200 }
201
202 PMAP_UNLOCK(grand);
203
204 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
205
206 return KERN_SUCCESS;
207 }
208
209 /*
210 * kern_return_t pmap_unnest(grand, vaddr)
211 *
212 * grand = the pmap that we will un-nest subord from
213 * vaddr = start of range in pmap to be unnested
214 *
215 * Removes a pmap from another. This is used to implement shared segments.
216 */
217
218 kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
219
220 pd_entry_t *pde;
221 unsigned int i;
222 uint64_t num_pde;
223 addr64_t va_start, va_end;
224 uint64_t npdpt = PMAP_INVALID_PDPTNUM;
225
226 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
227 (uintptr_t) grand,
228 (uintptr_t) (vaddr>>32), (uintptr_t) vaddr, 0, 0);
229
230 if ((size & (pmap_nesting_size_min-1)) ||
231 (vaddr & (pmap_nesting_size_min-1))) {
232 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
233 grand, vaddr, size);
234 }
235
236 assert(!is_ept_pmap(grand));
237
238 /* align everything to PDE boundaries */
239 va_start = vaddr & ~(NBPDE-1);
240 va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
241 size = va_end - va_start;
242
243 PMAP_LOCK(grand);
244
245 num_pde = size >> PDESHIFT;
246 vaddr = va_start;
247
248 for (i = 0; i < num_pde; ) {
249 if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) {
250 npdpt = pdptnum(grand, vaddr);
251 pde = pmap64_pdpt(grand, vaddr);
252 if (pde && (*pde & INTEL_PDPTE_NESTED)) {
253 pmap_store_pte(pde, (pd_entry_t)0);
254 i += (uint32_t) NPDEPG;
255 vaddr += NBPDPT;
256 continue;
257 }
258 }
259 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
260 if (pde == 0)
261 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr);
262 pmap_store_pte(pde, (pd_entry_t)0);
263 i++;
264 vaddr += NBPDE;
265 }
266
267 PMAP_UPDATE_TLBS(grand, va_start, va_end);
268
269 PMAP_UNLOCK(grand);
270
271 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
272
273 return KERN_SUCCESS;
274 }
275
276 kern_return_t
277 pmap_unnest_options(
278 pmap_t grand,
279 addr64_t vaddr,
280 __unused uint64_t size,
281 __unused unsigned int options) {
282 return pmap_unnest(grand, vaddr, size);
283 }
284
285 /* Invoked by the Mach VM to determine the platform specific unnest region */
286
287 boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) {
288 pd_entry_t *pdpte;
289 boolean_t rval = FALSE;
290
291 if (!cpu_64bit)
292 return rval;
293
294 PMAP_LOCK(p);
295
296 pdpte = pmap64_pdpt(p, *s);
297 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
298 *s &= ~(NBPDPT -1);
299 rval = TRUE;
300 }
301
302 pdpte = pmap64_pdpt(p, *e);
303 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
304 *e = ((*e + NBPDPT) & ~(NBPDPT -1));
305 rval = TRUE;
306 }
307
308 PMAP_UNLOCK(p);
309
310 return rval;
311 }
312
313 /*
314 * pmap_find_phys returns the (4K) physical page number containing a
315 * given virtual address in a given pmap.
316 * Note that pmap_pte may return a pde if this virtual address is
317 * mapped by a large page and this is taken into account in order
318 * to return the correct page number in this case.
319 */
320 ppnum_t
321 pmap_find_phys(pmap_t pmap, addr64_t va)
322 {
323 pt_entry_t *ptp;
324 pd_entry_t *pdep;
325 ppnum_t ppn = 0;
326 pd_entry_t pde;
327 pt_entry_t pte;
328 boolean_t is_ept;
329
330 is_ept = is_ept_pmap(pmap);
331
332 mp_disable_preemption();
333
334 /* This refcount test is a band-aid--several infrastructural changes
335 * are necessary to eliminate invocation of this routine from arbitrary
336 * contexts.
337 */
338
339 if (!pmap->ref_count)
340 goto pfp_exit;
341
342 pdep = pmap_pde(pmap, va);
343
344 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
345 if (pde & PTE_PS) {
346 ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
347 ppn += (ppnum_t) ptenum(va);
348 }
349 else {
350 ptp = pmap_pte(pmap, va);
351 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
352 ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
353 }
354 }
355 }
356 pfp_exit:
357 mp_enable_preemption();
358
359 return ppn;
360 }
361
362 /*
363 * Update cache attributes for all extant managed mappings.
364 * Assumes PV for this page is locked, and that the page
365 * is managed. We assume that this physical page may be mapped in
366 * both EPT and normal Intel PTEs, so we convert the attributes
367 * to the corresponding format for each pmap.
368 *
369 * We assert that the passed set of attributes is a subset of the
370 * PHYS_CACHEABILITY_MASK.
371 */
372 void
373 pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
374 pv_rooted_entry_t pv_h, pv_e;
375 pv_hashed_entry_t pvh_e, nexth;
376 vm_map_offset_t vaddr;
377 pmap_t pmap;
378 pt_entry_t *ptep;
379 boolean_t is_ept;
380 unsigned ept_attributes;
381
382 assert(IS_MANAGED_PAGE(pn));
383 assert(((~PHYS_CACHEABILITY_MASK) & attributes) == 0);
384
385 /* We don't support the PTA bit for EPT PTEs */
386 if (attributes & INTEL_PTE_NCACHE)
387 ept_attributes = INTEL_EPT_NCACHE;
388 else
389 ept_attributes = INTEL_EPT_WB;
390
391 pv_h = pai_to_pvh(pn);
392 /* TODO: translate the PHYS_* bits to PTE bits, while they're
393 * currently identical, they may not remain so
394 * Potential optimization (here and in page_protect),
395 * parallel shootdowns, check for redundant
396 * attribute modifications.
397 */
398
399 /*
400 * Alter attributes on all mappings
401 */
402 if (pv_h->pmap != PMAP_NULL) {
403 pv_e = pv_h;
404 pvh_e = (pv_hashed_entry_t)pv_e;
405
406 do {
407 pmap = pv_e->pmap;
408 vaddr = PVE_VA(pv_e);
409 ptep = pmap_pte(pmap, vaddr);
410
411 if (0 == ptep)
412 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
413
414 is_ept = is_ept_pmap(pmap);
415
416 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
417 if (!is_ept) {
418 pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
419 } else {
420 pmap_update_pte(ptep, INTEL_EPT_CACHE_MASK, ept_attributes);
421 }
422 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
423 pvh_e = nexth;
424 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
425 }
426 }
427
428 void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
429 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
430
431 if (dofilter) {
432 CPU_CR3_MARK_INACTIVE();
433 } else {
434 CPU_CR3_MARK_ACTIVE();
435 mfence();
436 if (current_cpu_datap()->cpu_tlb_invalid)
437 process_pmap_updates();
438 }
439 }
440
441
442 /*
443 * Insert the given physical page (p) at
444 * the specified virtual address (v) in the
445 * target physical map with the protection requested.
446 *
447 * If specified, the page will be wired down, meaning
448 * that the related pte cannot be reclaimed.
449 *
450 * NB: This is the only routine which MAY NOT lazy-evaluate
451 * or lose information. That is, this routine must actually
452 * insert this page into the given map NOW.
453 */
454
455 void
456 pmap_enter(
457 pmap_t pmap,
458 vm_map_offset_t vaddr,
459 ppnum_t pn,
460 vm_prot_t prot,
461 vm_prot_t fault_type,
462 unsigned int flags,
463 boolean_t wired)
464 {
465 (void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL);
466 }
467
468
469 kern_return_t
470 pmap_enter_options(
471 pmap_t pmap,
472 vm_map_offset_t vaddr,
473 ppnum_t pn,
474 vm_prot_t prot,
475 __unused vm_prot_t fault_type,
476 unsigned int flags,
477 boolean_t wired,
478 unsigned int options,
479 void *arg)
480 {
481 pt_entry_t *pte;
482 pv_rooted_entry_t pv_h;
483 ppnum_t pai;
484 pv_hashed_entry_t pvh_e;
485 pv_hashed_entry_t pvh_new;
486 pt_entry_t template;
487 pmap_paddr_t old_pa;
488 pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn);
489 boolean_t need_tlbflush = FALSE;
490 boolean_t set_NX;
491 char oattr;
492 boolean_t old_pa_locked;
493 /* 2MiB mappings are confined to x86_64 by VM */
494 boolean_t superpage = flags & VM_MEM_SUPERPAGE;
495 vm_object_t delpage_pm_obj = NULL;
496 uint64_t delpage_pde_index = 0;
497 pt_entry_t old_pte;
498 kern_return_t kr_expand;
499 boolean_t is_ept;
500 boolean_t is_altacct;
501
502 pmap_intr_assert();
503
504 if (pmap == PMAP_NULL)
505 return KERN_INVALID_ARGUMENT;
506
507 is_ept = is_ept_pmap(pmap);
508
509 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
510 * unused value for that scenario.
511 */
512 assert(pn != vm_page_fictitious_addr);
513
514 if (pn == vm_page_guard_addr)
515 return KERN_INVALID_ARGUMENT;
516
517 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
518 pmap,
519 (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
520 pn, prot);
521
522 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
523 set_NX = FALSE;
524 else
525 set_NX = TRUE;
526
527 if (__improbable(set_NX && (pmap == kernel_pmap) && ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) || (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
528 set_NX = FALSE;
529 }
530
531 /*
532 * Must allocate a new pvlist entry while we're unlocked;
533 * zalloc may cause pageout (which will lock the pmap system).
534 * If we determine we need a pvlist entry, we will unlock
535 * and allocate one. Then we will retry, throughing away
536 * the allocated entry later (if we no longer need it).
537 */
538
539 pvh_new = PV_HASHED_ENTRY_NULL;
540 Retry:
541 pvh_e = PV_HASHED_ENTRY_NULL;
542
543 PMAP_LOCK(pmap);
544
545 /*
546 * Expand pmap to include this pte. Assume that
547 * pmap is always expanded to include enough hardware
548 * pages to map one VM page.
549 */
550 if (superpage) {
551 while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
552 /* need room for another pde entry */
553 PMAP_UNLOCK(pmap);
554 kr_expand = pmap_expand_pdpt(pmap, vaddr, options);
555 if (kr_expand != KERN_SUCCESS)
556 return kr_expand;
557 PMAP_LOCK(pmap);
558 }
559 } else {
560 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
561 /*
562 * Must unlock to expand the pmap
563 * going to grow pde level page(s)
564 */
565 PMAP_UNLOCK(pmap);
566 kr_expand = pmap_expand(pmap, vaddr, options);
567 if (kr_expand != KERN_SUCCESS)
568 return kr_expand;
569 PMAP_LOCK(pmap);
570 }
571 }
572 if (options & PMAP_EXPAND_OPTIONS_NOENTER) {
573 PMAP_UNLOCK(pmap);
574 return KERN_SUCCESS;
575 }
576
577 if (superpage && *pte && !(*pte & PTE_PS)) {
578 /*
579 * There is still an empty page table mapped that
580 * was used for a previous base page mapping.
581 * Remember the PDE and the PDE index, so that we
582 * can free the page at the end of this function.
583 */
584 delpage_pde_index = pdeidx(pmap, vaddr);
585 delpage_pm_obj = pmap->pm_obj;
586 *pte = 0;
587 }
588
589 old_pa = pte_to_pa(*pte);
590 pai = pa_index(old_pa);
591 old_pa_locked = FALSE;
592
593 if (old_pa == 0 &&
594 PTE_IS_COMPRESSED(*pte)) {
595 /*
596 * "pmap" should be locked at this point, so this should
597 * not race with another pmap_enter() or pmap_remove_range().
598 */
599 assert(pmap != kernel_pmap);
600
601 /* one less "compressed" */
602 OSAddAtomic64(-1, &pmap->stats.compressed);
603 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
604 PAGE_SIZE);
605 if (*pte & PTE_COMPRESSED_ALT) {
606 pmap_ledger_debit(
607 pmap,
608 task_ledgers.alternate_accounting_compressed,
609 PAGE_SIZE);
610 } else {
611 /* was part of the footprint */
612 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
613 PAGE_SIZE);
614 }
615 /* marker will be cleared below */
616 }
617
618 /*
619 * if we have a previous managed page, lock the pv entry now. after
620 * we lock it, check to see if someone beat us to the lock and if so
621 * drop the lock
622 */
623 if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
624 LOCK_PVH(pai);
625 old_pa_locked = TRUE;
626 old_pa = pte_to_pa(*pte);
627 if (0 == old_pa) {
628 UNLOCK_PVH(pai); /* another path beat us to it */
629 old_pa_locked = FALSE;
630 }
631 }
632
633 /*
634 * Special case if the incoming physical page is already mapped
635 * at this address.
636 */
637 if (old_pa == pa) {
638 pt_entry_t old_attributes =
639 *pte & ~(PTE_REF(is_ept) | PTE_MOD(is_ept));
640
641 /*
642 * May be changing its wired attribute or protection
643 */
644
645 template = pa_to_pte(pa);
646
647 /* ?: WORTH ASSERTING THAT AT LEAST ONE RWX (implicit valid) PASSED FOR EPT? */
648 if (!is_ept) {
649 template |= INTEL_PTE_VALID;
650 } else {
651 template |= INTEL_EPT_IPTA;
652 }
653
654 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
655
656 /*
657 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
658 */
659 if (!is_ept && (VM_MEM_NOT_CACHEABLE ==
660 (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)))) {
661 if (!(flags & VM_MEM_GUARDED))
662 template |= INTEL_PTE_PTA;
663 template |= INTEL_PTE_NCACHE;
664 }
665 if (pmap != kernel_pmap && !is_ept)
666 template |= INTEL_PTE_USER;
667
668 if (prot & VM_PROT_READ)
669 template |= PTE_READ(is_ept);
670
671 if (prot & VM_PROT_WRITE) {
672 template |= PTE_WRITE(is_ept);
673 if (is_ept && !pmap_ept_support_ad) {
674 template |= PTE_MOD(is_ept);
675 if (old_pa_locked) {
676 assert(IS_MANAGED_PAGE(pai));
677 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
678 }
679 }
680 }
681 if (prot & VM_PROT_EXECUTE) {
682 assert(set_NX == 0);
683 template = pte_set_ex(template, is_ept);
684 }
685
686 if (set_NX)
687 template = pte_remove_ex(template, is_ept);
688
689 if (wired) {
690 template |= PTE_WIRED;
691 if (!iswired(old_attributes)) {
692 OSAddAtomic(+1, &pmap->stats.wired_count);
693 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
694 }
695 } else {
696 if (iswired(old_attributes)) {
697 assert(pmap->stats.wired_count >= 1);
698 OSAddAtomic(-1, &pmap->stats.wired_count);
699 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
700 }
701 }
702
703 if (superpage) /* this path can not be used */
704 template |= PTE_PS; /* to change the page size! */
705
706 if (old_attributes == template)
707 goto dont_update_pte;
708
709 /* Determine delta, PV locked */
710 need_tlbflush =
711 ((old_attributes ^ template) != PTE_WIRED);
712
713 if (need_tlbflush == TRUE && !(old_attributes & PTE_WRITE(is_ept))) {
714 if ((old_attributes ^ template) == PTE_WRITE(is_ept))
715 need_tlbflush = FALSE;
716 }
717
718 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
719 if (is_ept && !pmap_ept_support_ad) {
720 template |= PTE_REF(is_ept);
721 if (old_pa_locked) {
722 assert(IS_MANAGED_PAGE(pai));
723 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
724 }
725 }
726
727 /* store modified PTE and preserve RC bits */
728 pt_entry_t npte, opte;;
729 do {
730 opte = *pte;
731 npte = template | (opte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
732 } while (!pmap_cmpx_pte(pte, opte, npte));
733 dont_update_pte:
734 if (old_pa_locked) {
735 UNLOCK_PVH(pai);
736 old_pa_locked = FALSE;
737 }
738 goto Done;
739 }
740
741 /*
742 * Outline of code from here:
743 * 1) If va was mapped, update TLBs, remove the mapping
744 * and remove old pvlist entry.
745 * 2) Add pvlist entry for new mapping
746 * 3) Enter new mapping.
747 *
748 * If the old physical page is not managed step 1) is skipped
749 * (except for updating the TLBs), and the mapping is
750 * overwritten at step 3). If the new physical page is not
751 * managed, step 2) is skipped.
752 */
753
754 if (old_pa != (pmap_paddr_t) 0) {
755 boolean_t was_altacct;
756
757 /*
758 * Don't do anything to pages outside valid memory here.
759 * Instead convince the code that enters a new mapping
760 * to overwrite the old one.
761 */
762
763 /* invalidate the PTE */
764 pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
765 /* propagate invalidate everywhere */
766 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
767 /* remember reference and change */
768 old_pte = *pte;
769 oattr = (char) (old_pte & (PTE_MOD(is_ept) | PTE_REF(is_ept)));
770 /* completely invalidate the PTE */
771 pmap_store_pte(pte, 0);
772
773 if (IS_MANAGED_PAGE(pai)) {
774 /*
775 * Remove the mapping from the pvlist for
776 * this physical page.
777 * We'll end up with either a rooted pv or a
778 * hashed pv
779 */
780 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte, &was_altacct);
781 }
782
783 if (IS_MANAGED_PAGE(pai)) {
784 pmap_assert(old_pa_locked == TRUE);
785 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
786 assert(pmap->stats.resident_count >= 1);
787 OSAddAtomic(-1, &pmap->stats.resident_count);
788 if (pmap != kernel_pmap) {
789 /* update pmap stats */
790 if (IS_REUSABLE_PAGE(pai)) {
791 PMAP_STATS_ASSERTF(
792 (pmap->stats.reusable > 0,
793 "reusable %d",
794 pmap->stats.reusable));
795 OSAddAtomic(-1, &pmap->stats.reusable);
796 } else if (IS_INTERNAL_PAGE(pai)) {
797 PMAP_STATS_ASSERTF(
798 (pmap->stats.internal > 0,
799 "internal %d",
800 pmap->stats.internal));
801 OSAddAtomic(-1, &pmap->stats.internal);
802 } else {
803 PMAP_STATS_ASSERTF(
804 (pmap->stats.external > 0,
805 "external %d",
806 pmap->stats.external));
807 OSAddAtomic(-1, &pmap->stats.external);
808 }
809
810 /* update ledgers */
811 if (was_altacct) {
812 assert(IS_INTERNAL_PAGE(pai));
813 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
814 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
815 } else if (IS_REUSABLE_PAGE(pai)) {
816 assert(!was_altacct);
817 assert(IS_INTERNAL_PAGE(pai));
818 /* was already not in phys_footprint */
819 } else if (IS_INTERNAL_PAGE(pai)) {
820 assert(!was_altacct);
821 assert(!IS_REUSABLE_PAGE(pai));
822 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
823 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
824 } else {
825 /* not an internal page */
826 }
827 }
828 if (iswired(*pte)) {
829 assert(pmap->stats.wired_count >= 1);
830 OSAddAtomic(-1, &pmap->stats.wired_count);
831 pmap_ledger_debit(pmap, task_ledgers.wired_mem,
832 PAGE_SIZE);
833 }
834
835 if (!is_ept) {
836 pmap_phys_attributes[pai] |= oattr;
837 } else {
838 pmap_phys_attributes[pai] |= ept_refmod_to_physmap(oattr);
839 }
840
841 } else {
842
843 /*
844 * old_pa is not managed.
845 * Do removal part of accounting.
846 */
847
848 if (pmap != kernel_pmap) {
849 #if 00
850 assert(pmap->stats.device > 0);
851 OSAddAtomic(-1, &pmap->stats.device);
852 #endif
853 }
854 if (iswired(*pte)) {
855 assert(pmap->stats.wired_count >= 1);
856 OSAddAtomic(-1, &pmap->stats.wired_count);
857 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
858 }
859 }
860 }
861
862 /*
863 * if we had a previously managed paged locked, unlock it now
864 */
865 if (old_pa_locked) {
866 UNLOCK_PVH(pai);
867 old_pa_locked = FALSE;
868 }
869
870 pai = pa_index(pa); /* now working with new incoming phys page */
871 if (IS_MANAGED_PAGE(pai)) {
872
873 /*
874 * Step 2) Enter the mapping in the PV list for this
875 * physical page.
876 */
877 pv_h = pai_to_pvh(pai);
878
879 LOCK_PVH(pai);
880
881 if (pv_h->pmap == PMAP_NULL) {
882 /*
883 * No mappings yet, use rooted pv
884 */
885 pv_h->va_and_flags = vaddr;
886 pv_h->pmap = pmap;
887 queue_init(&pv_h->qlink);
888
889 if (options & PMAP_OPTIONS_INTERNAL) {
890 pmap_phys_attributes[pai] |= PHYS_INTERNAL;
891 } else {
892 pmap_phys_attributes[pai] &= ~PHYS_INTERNAL;
893 }
894 if (options & PMAP_OPTIONS_REUSABLE) {
895 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
896 } else {
897 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
898 }
899 if ((options & PMAP_OPTIONS_ALT_ACCT) &&
900 IS_INTERNAL_PAGE(pai)) {
901 pv_h->va_and_flags |= PVE_IS_ALTACCT;
902 is_altacct = TRUE;
903 } else {
904 pv_h->va_and_flags &= ~PVE_IS_ALTACCT;
905 is_altacct = FALSE;
906 }
907 } else {
908 /*
909 * Add new pv_hashed_entry after header.
910 */
911 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
912 pvh_e = pvh_new;
913 pvh_new = PV_HASHED_ENTRY_NULL;
914 } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
915 PV_HASHED_ALLOC(&pvh_e);
916 if (PV_HASHED_ENTRY_NULL == pvh_e) {
917 /*
918 * the pv list is empty. if we are on
919 * the kernel pmap we'll use one of
920 * the special private kernel pv_e's,
921 * else, we need to unlock
922 * everything, zalloc a pv_e, and
923 * restart bringing in the pv_e with
924 * us.
925 */
926 if (kernel_pmap == pmap) {
927 PV_HASHED_KERN_ALLOC(&pvh_e);
928 } else {
929 UNLOCK_PVH(pai);
930 PMAP_UNLOCK(pmap);
931 pmap_pv_throttle(pmap);
932 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
933 goto Retry;
934 }
935 }
936 }
937
938 if (PV_HASHED_ENTRY_NULL == pvh_e)
939 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
940
941 pvh_e->va_and_flags = vaddr;
942 pvh_e->pmap = pmap;
943 pvh_e->ppn = pn;
944 if ((options & PMAP_OPTIONS_ALT_ACCT) &&
945 IS_INTERNAL_PAGE(pai)) {
946 pvh_e->va_and_flags |= PVE_IS_ALTACCT;
947 is_altacct = TRUE;
948 } else {
949 pvh_e->va_and_flags &= ~PVE_IS_ALTACCT;
950 is_altacct = FALSE;
951 }
952 pv_hash_add(pvh_e, pv_h);
953
954 /*
955 * Remember that we used the pvlist entry.
956 */
957 pvh_e = PV_HASHED_ENTRY_NULL;
958 }
959
960 /*
961 * only count the mapping
962 * for 'managed memory'
963 */
964 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
965 OSAddAtomic(+1, &pmap->stats.resident_count);
966 if (pmap->stats.resident_count > pmap->stats.resident_max) {
967 pmap->stats.resident_max = pmap->stats.resident_count;
968 }
969 if (pmap != kernel_pmap) {
970 /* update pmap stats */
971 if (IS_REUSABLE_PAGE(pai)) {
972 OSAddAtomic(+1, &pmap->stats.reusable);
973 PMAP_STATS_PEAK(pmap->stats.reusable);
974 } else if (IS_INTERNAL_PAGE(pai)) {
975 OSAddAtomic(+1, &pmap->stats.internal);
976 PMAP_STATS_PEAK(pmap->stats.internal);
977 } else {
978 OSAddAtomic(+1, &pmap->stats.external);
979 PMAP_STATS_PEAK(pmap->stats.external);
980 }
981
982 /* update ledgers */
983 if (is_altacct) {
984 /* internal but also alternate accounting */
985 assert(IS_INTERNAL_PAGE(pai));
986 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
987 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
988 /* alternate accounting, so not in footprint */
989 } else if (IS_REUSABLE_PAGE(pai)) {
990 assert(!is_altacct);
991 assert(IS_INTERNAL_PAGE(pai));
992 /* internal but reusable: not in footprint */
993 } else if (IS_INTERNAL_PAGE(pai)) {
994 assert(!is_altacct);
995 assert(!IS_REUSABLE_PAGE(pai));
996 /* internal: add to footprint */
997 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
998 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
999 } else {
1000 /* not internal: not in footprint */
1001 }
1002 }
1003 } else if (last_managed_page == 0) {
1004 /* Account for early mappings created before "managed pages"
1005 * are determined. Consider consulting the available DRAM map.
1006 */
1007 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1008 OSAddAtomic(+1, &pmap->stats.resident_count);
1009 if (pmap != kernel_pmap) {
1010 #if 00
1011 OSAddAtomic(+1, &pmap->stats.device);
1012 PMAP_STATS_PEAK(pmap->stats.device);
1013 #endif
1014 }
1015 }
1016 /*
1017 * Step 3) Enter the mapping.
1018 *
1019 * Build a template to speed up entering -
1020 * only the pfn changes.
1021 */
1022 template = pa_to_pte(pa);
1023
1024 if (!is_ept) {
1025 template |= INTEL_PTE_VALID;
1026 } else {
1027 template |= INTEL_EPT_IPTA;
1028 }
1029
1030
1031 /*
1032 * DRK: It may be worth asserting on cache attribute flags that diverge
1033 * from the existing physical page attributes.
1034 */
1035
1036 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
1037
1038 /*
1039 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
1040 */
1041 if (!is_ept && (flags & VM_MEM_NOT_CACHEABLE)) {
1042 if (!(flags & VM_MEM_GUARDED))
1043 template |= INTEL_PTE_PTA;
1044 template |= INTEL_PTE_NCACHE;
1045 }
1046 if (pmap != kernel_pmap && !is_ept)
1047 template |= INTEL_PTE_USER;
1048 if (prot & VM_PROT_READ)
1049 template |= PTE_READ(is_ept);
1050 if (prot & VM_PROT_WRITE) {
1051 template |= PTE_WRITE(is_ept);
1052 if (is_ept && !pmap_ept_support_ad) {
1053 template |= PTE_MOD(is_ept);
1054 if (IS_MANAGED_PAGE(pai))
1055 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
1056 }
1057 }
1058 if (prot & VM_PROT_EXECUTE) {
1059 assert(set_NX == 0);
1060 template = pte_set_ex(template, is_ept);
1061 }
1062
1063 if (set_NX)
1064 template = pte_remove_ex(template, is_ept);
1065 if (wired) {
1066 template |= INTEL_PTE_WIRED;
1067 OSAddAtomic(+1, & pmap->stats.wired_count);
1068 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1069 }
1070 if (superpage)
1071 template |= INTEL_PTE_PS;
1072
1073 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
1074 if (is_ept && !pmap_ept_support_ad) {
1075 template |= PTE_REF(is_ept);
1076 if (IS_MANAGED_PAGE(pai))
1077 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
1078 }
1079
1080 pmap_store_pte(pte, template);
1081
1082 /*
1083 * if this was a managed page we delayed unlocking the pv until here
1084 * to prevent pmap_page_protect et al from finding it until the pte
1085 * has been stored
1086 */
1087 if (IS_MANAGED_PAGE(pai)) {
1088 UNLOCK_PVH(pai);
1089 }
1090 Done:
1091 if (need_tlbflush == TRUE) {
1092 if (options & PMAP_OPTIONS_NOFLUSH)
1093 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1094 else
1095 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1096 }
1097 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1098 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
1099 }
1100 if (pvh_new != PV_HASHED_ENTRY_NULL) {
1101 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
1102 }
1103 PMAP_UNLOCK(pmap);
1104
1105 if (delpage_pm_obj) {
1106 vm_page_t m;
1107
1108 vm_object_lock(delpage_pm_obj);
1109 m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE));
1110 if (m == VM_PAGE_NULL)
1111 panic("pmap_enter: pte page not in object");
1112 VM_PAGE_FREE(m);
1113 vm_object_unlock(delpage_pm_obj);
1114 OSAddAtomic(-1, &inuse_ptepages_count);
1115 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
1116 }
1117
1118 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1119 return KERN_SUCCESS;
1120 }
1121
1122 /*
1123 * Remove a range of hardware page-table entries.
1124 * The entries given are the first (inclusive)
1125 * and last (exclusive) entries for the VM pages.
1126 * The virtual address is the va for the first pte.
1127 *
1128 * The pmap must be locked.
1129 * If the pmap is not the kernel pmap, the range must lie
1130 * entirely within one pte-page. This is NOT checked.
1131 * Assumes that the pte-page exists.
1132 */
1133
1134 void
1135 pmap_remove_range(
1136 pmap_t pmap,
1137 vm_map_offset_t start_vaddr,
1138 pt_entry_t *spte,
1139 pt_entry_t *epte)
1140 {
1141 pmap_remove_range_options(pmap, start_vaddr, spte, epte,
1142 PMAP_OPTIONS_REMOVE);
1143 }
1144
1145 void
1146 pmap_remove_range_options(
1147 pmap_t pmap,
1148 vm_map_offset_t start_vaddr,
1149 pt_entry_t *spte,
1150 pt_entry_t *epte,
1151 int options)
1152 {
1153 pt_entry_t *cpte;
1154 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1155 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1156 pv_hashed_entry_t pvh_e;
1157 int pvh_cnt = 0;
1158 int num_removed, num_unwired, num_found, num_invalid;
1159 int stats_external, stats_internal, stats_reusable;
1160 uint64_t stats_compressed;
1161 int ledgers_internal, ledgers_alt_internal;
1162 uint64_t ledgers_compressed, ledgers_alt_compressed;
1163 ppnum_t pai;
1164 pmap_paddr_t pa;
1165 vm_map_offset_t vaddr;
1166 boolean_t is_ept = is_ept_pmap(pmap);
1167 boolean_t was_altacct;
1168
1169 num_removed = 0;
1170 num_unwired = 0;
1171 num_found = 0;
1172 num_invalid = 0;
1173 stats_external = 0;
1174 stats_internal = 0;
1175 stats_reusable = 0;
1176 stats_compressed = 0;
1177 ledgers_internal = 0;
1178 ledgers_compressed = 0;
1179 ledgers_alt_internal = 0;
1180 ledgers_alt_compressed = 0;
1181 /* invalidate the PTEs first to "freeze" them */
1182 for (cpte = spte, vaddr = start_vaddr;
1183 cpte < epte;
1184 cpte++, vaddr += PAGE_SIZE_64) {
1185 pt_entry_t p = *cpte;
1186
1187 pa = pte_to_pa(p);
1188 if (pa == 0) {
1189 if ((options & PMAP_OPTIONS_REMOVE) &&
1190 (PTE_IS_COMPRESSED(p))) {
1191 assert(pmap != kernel_pmap);
1192 /* one less "compressed"... */
1193 stats_compressed++;
1194 ledgers_compressed++;
1195 if (p & PTE_COMPRESSED_ALT) {
1196 /* ... but it used to be "ALTACCT" */
1197 ledgers_alt_compressed++;
1198 }
1199 /* clear marker(s) */
1200 /* XXX probably does not need to be atomic! */
1201 pmap_update_pte(cpte, INTEL_PTE_COMPRESSED_MASK, 0);
1202 }
1203 continue;
1204 }
1205 num_found++;
1206
1207 if (iswired(p))
1208 num_unwired++;
1209
1210 pai = pa_index(pa);
1211
1212 if (!IS_MANAGED_PAGE(pai)) {
1213 /*
1214 * Outside range of managed physical memory.
1215 * Just remove the mappings.
1216 */
1217 pmap_store_pte(cpte, 0);
1218 continue;
1219 }
1220
1221 if ((p & PTE_VALID_MASK(is_ept)) == 0)
1222 num_invalid++;
1223
1224 /* invalidate the PTE */
1225 pmap_update_pte(cpte, PTE_VALID_MASK(is_ept), 0);
1226 }
1227
1228 if (num_found == 0) {
1229 /* nothing was changed: we're done */
1230 goto update_counts;
1231 }
1232
1233 /* propagate the invalidates to other CPUs */
1234
1235 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
1236
1237 for (cpte = spte, vaddr = start_vaddr;
1238 cpte < epte;
1239 cpte++, vaddr += PAGE_SIZE_64) {
1240
1241 pa = pte_to_pa(*cpte);
1242 if (pa == 0) {
1243 check_pte_for_compressed_marker:
1244 /*
1245 * This PTE could have been replaced with a
1246 * "compressed" marker after our first "freeze"
1247 * loop above, so check again.
1248 */
1249 if ((options & PMAP_OPTIONS_REMOVE) &&
1250 (PTE_IS_COMPRESSED(*cpte))) {
1251 assert(pmap != kernel_pmap);
1252 /* one less "compressed"... */
1253 stats_compressed++;
1254 ledgers_compressed++;
1255 if (*cpte & PTE_COMPRESSED_ALT) {
1256 /* ... but it used to be "ALTACCT" */
1257 ledgers_alt_compressed++;
1258 }
1259 pmap_store_pte(cpte, 0);
1260 }
1261 continue;
1262 }
1263
1264 pai = pa_index(pa);
1265
1266 LOCK_PVH(pai);
1267
1268 pa = pte_to_pa(*cpte);
1269 if (pa == 0) {
1270 UNLOCK_PVH(pai);
1271 goto check_pte_for_compressed_marker;
1272 }
1273
1274 /*
1275 * Remove the mapping from the pvlist for this physical page.
1276 */
1277 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte, &was_altacct);
1278
1279 num_removed++;
1280 /* update pmap stats */
1281 if (IS_REUSABLE_PAGE(pai)) {
1282 stats_reusable++;
1283 } else if (IS_INTERNAL_PAGE(pai)) {
1284 stats_internal++;
1285 } else {
1286 stats_external++;
1287 }
1288 /* update ledgers */
1289 if (was_altacct) {
1290 /* internal and alternate accounting */
1291 assert(IS_INTERNAL_PAGE(pai));
1292 ledgers_internal++;
1293 ledgers_alt_internal++;
1294 } else if (IS_REUSABLE_PAGE(pai)) {
1295 /* internal but reusable */
1296 assert(!was_altacct);
1297 assert(IS_INTERNAL_PAGE(pai));
1298 } else if (IS_INTERNAL_PAGE(pai)) {
1299 /* internal */
1300 assert(!was_altacct);
1301 assert(!IS_REUSABLE_PAGE(pai));
1302 ledgers_internal++;
1303 } else {
1304 /* not internal */
1305 }
1306
1307 /*
1308 * Get the modify and reference bits, then
1309 * nuke the entry in the page table
1310 */
1311 /* remember reference and change */
1312 if (!is_ept) {
1313 pmap_phys_attributes[pai] |=
1314 *cpte & (PHYS_MODIFIED | PHYS_REFERENCED);
1315 } else {
1316 pmap_phys_attributes[pai] |=
1317 ept_refmod_to_physmap((*cpte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1318 }
1319
1320 /* completely invalidate the PTE */
1321 pmap_store_pte(cpte, 0);
1322
1323 UNLOCK_PVH(pai);
1324
1325 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1326 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1327 pvh_eh = pvh_e;
1328
1329 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1330 pvh_et = pvh_e;
1331 }
1332 pvh_cnt++;
1333 }
1334 } /* for loop */
1335
1336 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1337 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1338 }
1339 update_counts:
1340 /*
1341 * Update the counts
1342 */
1343 #if TESTING
1344 if (pmap->stats.resident_count < num_removed)
1345 panic("pmap_remove_range: resident_count");
1346 #endif
1347 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
1348 PMAP_STATS_ASSERTF((pmap->stats.resident_count >= num_removed,
1349 "pmap=%p num_removed=%d stats.resident_count=%d",
1350 pmap, num_removed, pmap->stats.resident_count));
1351 OSAddAtomic(-num_removed, &pmap->stats.resident_count);
1352
1353 if (pmap != kernel_pmap) {
1354 PMAP_STATS_ASSERTF((pmap->stats.external >= stats_external,
1355 "pmap=%p stats_external=%d stats.external=%d",
1356 pmap, stats_external, pmap->stats.external));
1357 PMAP_STATS_ASSERTF((pmap->stats.internal >= stats_internal,
1358 "pmap=%p stats_internal=%d stats.internal=%d",
1359 pmap, stats_internal, pmap->stats.internal));
1360 PMAP_STATS_ASSERTF((pmap->stats.reusable >= stats_reusable,
1361 "pmap=%p stats_reusable=%d stats.reusable=%d",
1362 pmap, stats_reusable, pmap->stats.reusable));
1363 PMAP_STATS_ASSERTF((pmap->stats.compressed >= stats_compressed,
1364 "pmap=%p stats_compressed=%lld, stats.compressed=%lld",
1365 pmap, stats_compressed, pmap->stats.compressed));
1366
1367 /* update pmap stats */
1368 if (stats_external) {
1369 OSAddAtomic(-stats_external, &pmap->stats.external);
1370 }
1371 if (stats_internal) {
1372 OSAddAtomic(-stats_internal, &pmap->stats.internal);
1373 }
1374 if (stats_reusable)
1375 OSAddAtomic(-stats_reusable, &pmap->stats.reusable);
1376 if (stats_compressed)
1377 OSAddAtomic64(-stats_compressed, &pmap->stats.compressed);
1378 /* update ledgers */
1379 if (ledgers_internal) {
1380 pmap_ledger_debit(pmap,
1381 task_ledgers.internal,
1382 machine_ptob(ledgers_internal));
1383 }
1384 if (ledgers_compressed) {
1385 pmap_ledger_debit(pmap,
1386 task_ledgers.internal_compressed,
1387 machine_ptob(ledgers_compressed));
1388 }
1389 if (ledgers_alt_internal) {
1390 pmap_ledger_debit(pmap,
1391 task_ledgers.alternate_accounting,
1392 machine_ptob(ledgers_alt_internal));
1393 }
1394 if (ledgers_alt_compressed) {
1395 pmap_ledger_debit(pmap,
1396 task_ledgers.alternate_accounting_compressed,
1397 machine_ptob(ledgers_alt_compressed));
1398 }
1399 pmap_ledger_debit(pmap,
1400 task_ledgers.phys_footprint,
1401 machine_ptob((ledgers_internal -
1402 ledgers_alt_internal) +
1403 (ledgers_compressed -
1404 ledgers_alt_compressed)));
1405 }
1406
1407 #if TESTING
1408 if (pmap->stats.wired_count < num_unwired)
1409 panic("pmap_remove_range: wired_count");
1410 #endif
1411 PMAP_STATS_ASSERTF((pmap->stats.wired_count >= num_unwired,
1412 "pmap=%p num_unwired=%d stats.wired_count=%d",
1413 pmap, num_unwired, pmap->stats.wired_count));
1414 OSAddAtomic(-num_unwired, &pmap->stats.wired_count);
1415 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
1416
1417 return;
1418 }
1419
1420
1421 /*
1422 * Remove the given range of addresses
1423 * from the specified map.
1424 *
1425 * It is assumed that the start and end are properly
1426 * rounded to the hardware page size.
1427 */
1428 void
1429 pmap_remove(
1430 pmap_t map,
1431 addr64_t s64,
1432 addr64_t e64)
1433 {
1434 pmap_remove_options(map, s64, e64, PMAP_OPTIONS_REMOVE);
1435 }
1436
1437 void
1438 pmap_remove_options(
1439 pmap_t map,
1440 addr64_t s64,
1441 addr64_t e64,
1442 int options)
1443 {
1444 pt_entry_t *pde;
1445 pt_entry_t *spte, *epte;
1446 addr64_t l64;
1447 uint64_t deadline;
1448 boolean_t is_ept;
1449
1450 pmap_intr_assert();
1451
1452 if (map == PMAP_NULL || s64 == e64)
1453 return;
1454
1455 is_ept = is_ept_pmap(map);
1456
1457 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
1458 map,
1459 (uint32_t) (s64 >> 32), s64,
1460 (uint32_t) (e64 >> 32), e64);
1461
1462
1463 PMAP_LOCK(map);
1464
1465 #if 0
1466 /*
1467 * Check that address range in the kernel does not overlap the stacks.
1468 * We initialize local static min/max variables once to avoid making
1469 * 2 function calls for every remove. Note also that these functions
1470 * both return 0 before kernel stacks have been initialized, and hence
1471 * the panic is not triggered in this case.
1472 */
1473 if (map == kernel_pmap) {
1474 static vm_offset_t kernel_stack_min = 0;
1475 static vm_offset_t kernel_stack_max = 0;
1476
1477 if (kernel_stack_min == 0) {
1478 kernel_stack_min = min_valid_stack_address();
1479 kernel_stack_max = max_valid_stack_address();
1480 }
1481 if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
1482 (kernel_stack_min < e64 && e64 <= kernel_stack_max))
1483 panic("pmap_remove() attempted in kernel stack");
1484 }
1485 #else
1486
1487 /*
1488 * The values of kernel_stack_min and kernel_stack_max are no longer
1489 * relevant now that we allocate kernel stacks in the kernel map,
1490 * so the old code above no longer applies. If we wanted to check that
1491 * we weren't removing a mapping of a page in a kernel stack we'd
1492 * mark the PTE with an unused bit and check that here.
1493 */
1494
1495 #endif
1496
1497 deadline = rdtsc64() + max_preemption_latency_tsc;
1498
1499 while (s64 < e64) {
1500 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1501 if (l64 > e64)
1502 l64 = e64;
1503 pde = pmap_pde(map, s64);
1504
1505 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
1506 if (*pde & PTE_PS) {
1507 /*
1508 * If we're removing a superpage, pmap_remove_range()
1509 * must work on level 2 instead of level 1; and we're
1510 * only passing a single level 2 entry instead of a
1511 * level 1 range.
1512 */
1513 spte = pde;
1514 epte = spte+1; /* excluded */
1515 } else {
1516 spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
1517 spte = &spte[ptenum(s64)];
1518 epte = &spte[intel_btop(l64 - s64)];
1519 }
1520 pmap_remove_range_options(map, s64, spte, epte,
1521 options);
1522 }
1523 s64 = l64;
1524
1525 if (s64 < e64 && rdtsc64() >= deadline) {
1526 PMAP_UNLOCK(map)
1527 /* TODO: Rapid release/reacquisition can defeat
1528 * the "backoff" intent here; either consider a
1529 * fair spinlock, or a scheme whereby each lock
1530 * attempt marks the processor as within a spinlock
1531 * acquisition, and scan CPUs here to determine
1532 * if a backoff is necessary, to avoid sacrificing
1533 * performance in the common case.
1534 */
1535 PMAP_LOCK(map)
1536 deadline = rdtsc64() + max_preemption_latency_tsc;
1537 }
1538 }
1539
1540 PMAP_UNLOCK(map);
1541
1542 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
1543 map, 0, 0, 0, 0);
1544
1545 }
1546
1547 void
1548 pmap_page_protect(
1549 ppnum_t pn,
1550 vm_prot_t prot)
1551 {
1552 pmap_page_protect_options(pn, prot, 0, NULL);
1553 }
1554
1555 /*
1556 * Routine: pmap_page_protect_options
1557 *
1558 * Function:
1559 * Lower the permission for all mappings to a given
1560 * page.
1561 */
1562 void
1563 pmap_page_protect_options(
1564 ppnum_t pn,
1565 vm_prot_t prot,
1566 unsigned int options,
1567 void *arg)
1568 {
1569 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1570 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1571 pv_hashed_entry_t nexth;
1572 int pvh_cnt = 0;
1573 pv_rooted_entry_t pv_h;
1574 pv_rooted_entry_t pv_e;
1575 pv_hashed_entry_t pvh_e;
1576 pt_entry_t *pte;
1577 int pai;
1578 pmap_t pmap;
1579 boolean_t remove;
1580 pt_entry_t new_pte_value;
1581 boolean_t is_ept;
1582
1583 pmap_intr_assert();
1584 assert(pn != vm_page_fictitious_addr);
1585 if (pn == vm_page_guard_addr)
1586 return;
1587
1588 pai = ppn_to_pai(pn);
1589
1590 if (!IS_MANAGED_PAGE(pai)) {
1591 /*
1592 * Not a managed page.
1593 */
1594 return;
1595 }
1596 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
1597 pn, prot, 0, 0, 0);
1598
1599 /*
1600 * Determine the new protection.
1601 */
1602 switch (prot) {
1603 case VM_PROT_READ:
1604 case VM_PROT_READ | VM_PROT_EXECUTE:
1605 remove = FALSE;
1606 break;
1607 case VM_PROT_ALL:
1608 return; /* nothing to do */
1609 default:
1610 remove = TRUE;
1611 break;
1612 }
1613
1614 pv_h = pai_to_pvh(pai);
1615
1616 LOCK_PVH(pai);
1617
1618
1619 /*
1620 * Walk down PV list, if any, changing or removing all mappings.
1621 */
1622 if (pv_h->pmap == PMAP_NULL)
1623 goto done;
1624
1625 pv_e = pv_h;
1626 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */
1627
1628 do {
1629 vm_map_offset_t vaddr;
1630
1631 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1632 (pmap_phys_attributes[pai] & PHYS_MODIFIED)) {
1633 /* page was modified, so it will be compressed */
1634 options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1635 options |= PMAP_OPTIONS_COMPRESSOR;
1636 }
1637
1638 pmap = pv_e->pmap;
1639 is_ept = is_ept_pmap(pmap);
1640 vaddr = PVE_VA(pv_e);
1641 pte = pmap_pte(pmap, vaddr);
1642
1643 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1644 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1645
1646 if (0 == pte) {
1647 panic("pmap_page_protect() "
1648 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1649 pmap, pn, vaddr);
1650 }
1651 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1652
1653 /*
1654 * Remove the mapping if new protection is NONE
1655 */
1656 if (remove) {
1657
1658 /* Remove per-pmap wired count */
1659 if (iswired(*pte)) {
1660 OSAddAtomic(-1, &pmap->stats.wired_count);
1661 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1662 }
1663
1664 if (pmap != kernel_pmap &&
1665 (options & PMAP_OPTIONS_COMPRESSOR) &&
1666 IS_INTERNAL_PAGE(pai)) {
1667 assert(!PTE_IS_COMPRESSED(*pte));
1668 /* mark this PTE as having been "compressed" */
1669 new_pte_value = PTE_COMPRESSED;
1670 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1671 new_pte_value |= PTE_COMPRESSED_ALT;
1672 }
1673 } else {
1674 new_pte_value = 0;
1675 }
1676
1677 if (options & PMAP_OPTIONS_NOREFMOD) {
1678 pmap_store_pte(pte, new_pte_value);
1679
1680 if (options & PMAP_OPTIONS_NOFLUSH)
1681 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1682 else
1683 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1684 } else {
1685 /*
1686 * Remove the mapping, collecting dirty bits.
1687 */
1688 pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
1689
1690 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1691 if (!is_ept) {
1692 pmap_phys_attributes[pai] |=
1693 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1694 } else {
1695 pmap_phys_attributes[pai] |=
1696 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1697 }
1698 if ((options &
1699 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1700 IS_INTERNAL_PAGE(pai) &&
1701 (pmap_phys_attributes[pai] &
1702 PHYS_MODIFIED)) {
1703 /*
1704 * Page is actually "modified" and
1705 * will be compressed. Start
1706 * accounting for it as "compressed".
1707 */
1708 assert(!(options & PMAP_OPTIONS_COMPRESSOR));
1709 options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1710 options |= PMAP_OPTIONS_COMPRESSOR;
1711 assert(new_pte_value == 0);
1712 if (pmap != kernel_pmap) {
1713 new_pte_value = PTE_COMPRESSED;
1714 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1715 new_pte_value |= PTE_COMPRESSED_ALT;
1716 }
1717 }
1718 }
1719 pmap_store_pte(pte, new_pte_value);
1720 }
1721
1722 #if TESTING
1723 if (pmap->stats.resident_count < 1)
1724 panic("pmap_page_protect: resident_count");
1725 #endif
1726 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1727 assert(pmap->stats.resident_count >= 1);
1728 OSAddAtomic(-1, &pmap->stats.resident_count);
1729
1730 /*
1731 * We only ever compress internal pages.
1732 */
1733 if (options & PMAP_OPTIONS_COMPRESSOR) {
1734 assert(IS_INTERNAL_PAGE(pai));
1735 }
1736 if (pmap != kernel_pmap) {
1737 /* update pmap stats */
1738 if (IS_REUSABLE_PAGE(pai)) {
1739 assert(pmap->stats.reusable > 0);
1740 OSAddAtomic(-1, &pmap->stats.reusable);
1741 } else if (IS_INTERNAL_PAGE(pai)) {
1742 assert(pmap->stats.internal > 0);
1743 OSAddAtomic(-1, &pmap->stats.internal);
1744 } else {
1745 assert(pmap->stats.external > 0);
1746 OSAddAtomic(-1, &pmap->stats.external);
1747 }
1748 if ((options & PMAP_OPTIONS_COMPRESSOR) &&
1749 IS_INTERNAL_PAGE(pai)) {
1750 /* adjust "compressed" stats */
1751 OSAddAtomic64(+1, &pmap->stats.compressed);
1752 PMAP_STATS_PEAK(pmap->stats.compressed);
1753 pmap->stats.compressed_lifetime++;
1754 }
1755
1756 /* update ledgers */
1757 if (IS_ALTACCT_PAGE(pai, pv_e)) {
1758 assert(IS_INTERNAL_PAGE(pai));
1759 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
1760 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
1761 if (options & PMAP_OPTIONS_COMPRESSOR) {
1762 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1763 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
1764 }
1765 } else if (IS_REUSABLE_PAGE(pai)) {
1766 assert(!IS_ALTACCT_PAGE(pai, pv_e));
1767 assert(IS_INTERNAL_PAGE(pai));
1768 if (options & PMAP_OPTIONS_COMPRESSOR) {
1769 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1770 /* was not in footprint, but is now */
1771 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1772 }
1773 } else if (IS_INTERNAL_PAGE(pai)) {
1774 assert(!IS_ALTACCT_PAGE(pai, pv_e));
1775 assert(!IS_REUSABLE_PAGE(pai));
1776 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
1777 /*
1778 * Update all stats related to physical
1779 * footprint, which only deals with
1780 * internal pages.
1781 */
1782 if (options & PMAP_OPTIONS_COMPRESSOR) {
1783 /*
1784 * This removal is only being
1785 * done so we can send this page
1786 * to the compressor; therefore
1787 * it mustn't affect total task
1788 * footprint.
1789 */
1790 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1791 } else {
1792 /*
1793 * This internal page isn't
1794 * going to the compressor,
1795 * so adjust stats to keep
1796 * phys_footprint up to date.
1797 */
1798 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1799 }
1800 }
1801 }
1802
1803 /*
1804 * Deal with the pv_rooted_entry.
1805 */
1806
1807 if (pv_e == pv_h) {
1808 /*
1809 * Fix up head later.
1810 */
1811 pv_h->pmap = PMAP_NULL;
1812 } else {
1813 /*
1814 * Delete this entry.
1815 */
1816 pv_hash_remove(pvh_e);
1817 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1818 pvh_eh = pvh_e;
1819
1820 if (pvh_et == PV_HASHED_ENTRY_NULL)
1821 pvh_et = pvh_e;
1822 pvh_cnt++;
1823 }
1824 } else {
1825 /*
1826 * Write-protect, after opportunistic refmod collect
1827 */
1828 if (!is_ept) {
1829 pmap_phys_attributes[pai] |=
1830 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1831 } else {
1832 pmap_phys_attributes[pai] |=
1833 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1834 }
1835 pmap_update_pte(pte, PTE_WRITE(is_ept), 0);
1836
1837 if (options & PMAP_OPTIONS_NOFLUSH)
1838 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1839 else
1840 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1841 }
1842 pvh_e = nexth;
1843 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1844
1845
1846 /*
1847 * If pv_head mapping was removed, fix it up.
1848 */
1849 if (pv_h->pmap == PMAP_NULL) {
1850 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1851
1852 if (pvh_e != (pv_hashed_entry_t) pv_h) {
1853 pv_hash_remove(pvh_e);
1854 pv_h->pmap = pvh_e->pmap;
1855 pv_h->va_and_flags = pvh_e->va_and_flags;
1856 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1857 pvh_eh = pvh_e;
1858
1859 if (pvh_et == PV_HASHED_ENTRY_NULL)
1860 pvh_et = pvh_e;
1861 pvh_cnt++;
1862 }
1863 }
1864 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1865 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1866 }
1867 done:
1868 UNLOCK_PVH(pai);
1869
1870 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
1871 0, 0, 0, 0, 0);
1872 }
1873
1874
1875 /*
1876 * Clear specified attribute bits.
1877 */
1878 void
1879 phys_attribute_clear(
1880 ppnum_t pn,
1881 int bits,
1882 unsigned int options,
1883 void *arg)
1884 {
1885 pv_rooted_entry_t pv_h;
1886 pv_hashed_entry_t pv_e;
1887 pt_entry_t *pte;
1888 int pai;
1889 pmap_t pmap;
1890 char attributes = 0;
1891 boolean_t is_internal, is_reusable, is_altacct, is_ept;
1892 int ept_bits_to_clear;
1893 boolean_t ept_keep_global_mod = FALSE;
1894
1895 if ((bits & PHYS_MODIFIED) &&
1896 (options & PMAP_OPTIONS_NOFLUSH) &&
1897 arg == NULL) {
1898 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
1899 "should not clear 'modified' without flushing TLBs\n",
1900 pn, bits, options, arg);
1901 }
1902
1903 /* We only support converting MOD and REF bits for EPT PTEs in this function */
1904 assert((bits & ~(PHYS_REFERENCED | PHYS_MODIFIED)) == 0);
1905
1906 ept_bits_to_clear = (unsigned)physmap_refmod_to_ept(bits & (PHYS_MODIFIED | PHYS_REFERENCED));
1907
1908 pmap_intr_assert();
1909 assert(pn != vm_page_fictitious_addr);
1910 if (pn == vm_page_guard_addr)
1911 return;
1912
1913 pai = ppn_to_pai(pn);
1914
1915 if (!IS_MANAGED_PAGE(pai)) {
1916 /*
1917 * Not a managed page.
1918 */
1919 return;
1920 }
1921
1922 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
1923 pn, bits, 0, 0, 0);
1924
1925 pv_h = pai_to_pvh(pai);
1926
1927 LOCK_PVH(pai);
1928
1929
1930 /*
1931 * Walk down PV list, clearing all modify or reference bits.
1932 * We do not have to lock the pv_list because we have
1933 * the per-pmap lock
1934 */
1935 if (pv_h->pmap != PMAP_NULL) {
1936 /*
1937 * There are some mappings.
1938 */
1939
1940 is_internal = IS_INTERNAL_PAGE(pai);
1941 is_reusable = IS_REUSABLE_PAGE(pai);
1942
1943 pv_e = (pv_hashed_entry_t)pv_h;
1944
1945 do {
1946 vm_map_offset_t va;
1947 char pte_bits;
1948
1949 pmap = pv_e->pmap;
1950 is_ept = is_ept_pmap(pmap);
1951 is_altacct = IS_ALTACCT_PAGE(pai, pv_e);
1952 va = PVE_VA(pv_e);
1953 pte_bits = 0;
1954
1955 if (bits) {
1956 pte = pmap_pte(pmap, va);
1957 /* grab ref/mod bits from this PTE */
1958 pte_bits = (*pte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
1959 /* propagate to page's global attributes */
1960 if (!is_ept) {
1961 attributes |= pte_bits;
1962 } else {
1963 attributes |= ept_refmod_to_physmap(pte_bits);
1964 if (!pmap_ept_support_ad && (pte_bits & INTEL_EPT_MOD)) {
1965 ept_keep_global_mod = TRUE;
1966 }
1967 }
1968 /* which bits to clear for this PTE? */
1969 if (!is_ept) {
1970 pte_bits &= bits;
1971 } else {
1972 pte_bits &= ept_bits_to_clear;
1973 }
1974 }
1975
1976 /*
1977 * Clear modify and/or reference bits.
1978 */
1979 if (pte_bits) {
1980 pmap_update_pte(pte, bits, 0);
1981
1982 /* Ensure all processors using this translation
1983 * invalidate this TLB entry. The invalidation
1984 * *must* follow the PTE update, to ensure that
1985 * the TLB shadow of the 'D' bit (in particular)
1986 * is synchronized with the updated PTE.
1987 */
1988 if (! (options & PMAP_OPTIONS_NOFLUSH)) {
1989 /* flush TLBS now */
1990 PMAP_UPDATE_TLBS(pmap,
1991 va,
1992 va + PAGE_SIZE);
1993 } else if (arg) {
1994 /* delayed TLB flush: add "pmap" info */
1995 PMAP_UPDATE_TLBS_DELAYED(
1996 pmap,
1997 va,
1998 va + PAGE_SIZE,
1999 (pmap_flush_context *)arg);
2000 } else {
2001 /* no TLB flushing at all */
2002 }
2003 }
2004
2005 /* update pmap "reusable" stats */
2006 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
2007 is_reusable &&
2008 pmap != kernel_pmap) {
2009 /* one less "reusable" */
2010 assert(pmap->stats.reusable > 0);
2011 OSAddAtomic(-1, &pmap->stats.reusable);
2012 if (is_internal) {
2013 /* one more "internal" */
2014 OSAddAtomic(+1, &pmap->stats.internal);
2015 PMAP_STATS_PEAK(pmap->stats.internal);
2016 assert(pmap->stats.internal > 0);
2017 if (is_altacct) {
2018 /* no impact on ledgers */
2019 } else {
2020 pmap_ledger_credit(pmap,
2021 task_ledgers.internal,
2022 PAGE_SIZE);
2023 pmap_ledger_credit(
2024 pmap,
2025 task_ledgers.phys_footprint,
2026 PAGE_SIZE);
2027 }
2028 } else {
2029 /* one more "external" */
2030 OSAddAtomic(+1, &pmap->stats.external);
2031 PMAP_STATS_PEAK(pmap->stats.external);
2032 assert(pmap->stats.external > 0);
2033 }
2034 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
2035 !is_reusable &&
2036 pmap != kernel_pmap) {
2037 /* one more "reusable" */
2038 OSAddAtomic(+1, &pmap->stats.reusable);
2039 PMAP_STATS_PEAK(pmap->stats.reusable);
2040 assert(pmap->stats.reusable > 0);
2041 if (is_internal) {
2042 /* one less "internal" */
2043 assert(pmap->stats.internal > 0);
2044 OSAddAtomic(-1, &pmap->stats.internal);
2045 if (is_altacct) {
2046 /* no impact on footprint */
2047 } else {
2048 pmap_ledger_debit(pmap,
2049 task_ledgers.internal,
2050 PAGE_SIZE);
2051 pmap_ledger_debit(
2052 pmap,
2053 task_ledgers.phys_footprint,
2054 PAGE_SIZE);
2055 }
2056 } else {
2057 /* one less "external" */
2058 assert(pmap->stats.external > 0);
2059 OSAddAtomic(-1, &pmap->stats.external);
2060 }
2061 }
2062
2063 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
2064
2065 } while (pv_e != (pv_hashed_entry_t)pv_h);
2066 }
2067 /* Opportunistic refmod collection, annulled
2068 * if both REF and MOD are being cleared.
2069 */
2070
2071 pmap_phys_attributes[pai] |= attributes;
2072
2073 if (ept_keep_global_mod) {
2074 /*
2075 * If the hardware doesn't support AD bits for EPT PTEs and someone is
2076 * requesting that we clear the modified bit for a phys page, we need
2077 * to ensure that there are no EPT mappings for the page with the
2078 * modified bit set. If there are, we cannot clear the global modified bit.
2079 */
2080 bits &= ~PHYS_MODIFIED;
2081 }
2082 pmap_phys_attributes[pai] &= ~(bits);
2083
2084 /* update this page's "reusable" status */
2085 if (options & PMAP_OPTIONS_CLEAR_REUSABLE) {
2086 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
2087 } else if (options & PMAP_OPTIONS_SET_REUSABLE) {
2088 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
2089 }
2090
2091 UNLOCK_PVH(pai);
2092
2093 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
2094 0, 0, 0, 0, 0);
2095 }
2096
2097 /*
2098 * Check specified attribute bits.
2099 */
2100 int
2101 phys_attribute_test(
2102 ppnum_t pn,
2103 int bits)
2104 {
2105 pv_rooted_entry_t pv_h;
2106 pv_hashed_entry_t pv_e;
2107 pt_entry_t *pte;
2108 int pai;
2109 pmap_t pmap;
2110 int attributes = 0;
2111 boolean_t is_ept;
2112
2113 pmap_intr_assert();
2114 assert(pn != vm_page_fictitious_addr);
2115 assert((bits & ~(PHYS_MODIFIED | PHYS_REFERENCED)) == 0);
2116 if (pn == vm_page_guard_addr)
2117 return 0;
2118
2119 pai = ppn_to_pai(pn);
2120
2121 if (!IS_MANAGED_PAGE(pai)) {
2122 /*
2123 * Not a managed page.
2124 */
2125 return 0;
2126 }
2127
2128 /*
2129 * Fast check... if bits already collected
2130 * no need to take any locks...
2131 * if not set, we need to recheck after taking
2132 * the lock in case they got pulled in while
2133 * we were waiting for the lock
2134 */
2135 if ((pmap_phys_attributes[pai] & bits) == bits)
2136 return bits;
2137
2138 pv_h = pai_to_pvh(pai);
2139
2140 LOCK_PVH(pai);
2141
2142 attributes = pmap_phys_attributes[pai] & bits;
2143
2144
2145 /*
2146 * Walk down PV list, checking the mappings until we
2147 * reach the end or we've found the desired attributes.
2148 */
2149 if (attributes != bits &&
2150 pv_h->pmap != PMAP_NULL) {
2151 /*
2152 * There are some mappings.
2153 */
2154 pv_e = (pv_hashed_entry_t)pv_h;
2155 do {
2156 vm_map_offset_t va;
2157
2158 pmap = pv_e->pmap;
2159 is_ept = is_ept_pmap(pmap);
2160 va = PVE_VA(pv_e);
2161 /*
2162 * pick up modify and/or reference bits from mapping
2163 */
2164
2165 pte = pmap_pte(pmap, va);
2166 if (!is_ept) {
2167 attributes |= (int)(*pte & bits);
2168 } else {
2169 attributes |= (int)(ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED));
2170
2171 }
2172
2173 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
2174
2175 } while ((attributes != bits) &&
2176 (pv_e != (pv_hashed_entry_t)pv_h));
2177 }
2178 pmap_phys_attributes[pai] |= attributes;
2179
2180 UNLOCK_PVH(pai);
2181 return (attributes);
2182 }
2183
2184 /*
2185 * Routine: pmap_change_wiring
2186 * Function: Change the wiring attribute for a map/virtual-address
2187 * pair.
2188 * In/out conditions:
2189 * The mapping must already exist in the pmap.
2190 */
2191 void
2192 pmap_change_wiring(
2193 pmap_t map,
2194 vm_map_offset_t vaddr,
2195 boolean_t wired)
2196 {
2197 pt_entry_t *pte;
2198
2199 PMAP_LOCK(map);
2200
2201 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
2202 panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
2203 map, vaddr, wired);
2204
2205 if (wired && !iswired(*pte)) {
2206 /*
2207 * wiring down mapping
2208 */
2209 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
2210 OSAddAtomic(+1, &map->stats.wired_count);
2211 pmap_update_pte(pte, 0, PTE_WIRED);
2212 }
2213 else if (!wired && iswired(*pte)) {
2214 /*
2215 * unwiring mapping
2216 */
2217 assert(map->stats.wired_count >= 1);
2218 OSAddAtomic(-1, &map->stats.wired_count);
2219 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
2220 pmap_update_pte(pte, PTE_WIRED, 0);
2221 }
2222
2223 PMAP_UNLOCK(map);
2224 }
2225
2226 /*
2227 * "Backdoor" direct map routine for early mappings.
2228 * Useful for mapping memory outside the range
2229 * Sets A, D and NC if requested
2230 */
2231
2232 vm_offset_t
2233 pmap_map_bd(
2234 vm_offset_t virt,
2235 vm_map_offset_t start_addr,
2236 vm_map_offset_t end_addr,
2237 vm_prot_t prot,
2238 unsigned int flags)
2239 {
2240 pt_entry_t template;
2241 pt_entry_t *pte;
2242 spl_t spl;
2243 vm_offset_t base = virt;
2244 template = pa_to_pte(start_addr)
2245 | INTEL_PTE_REF
2246 | INTEL_PTE_MOD
2247 | INTEL_PTE_WIRED
2248 | INTEL_PTE_VALID;
2249
2250 if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
2251 template |= INTEL_PTE_NCACHE;
2252 if (!(flags & (VM_MEM_GUARDED)))
2253 template |= INTEL_PTE_PTA;
2254 }
2255
2256 #if defined(__x86_64__)
2257 if ((prot & VM_PROT_EXECUTE) == 0)
2258 template |= INTEL_PTE_NX;
2259 #endif
2260
2261 if (prot & VM_PROT_WRITE)
2262 template |= INTEL_PTE_WRITE;
2263
2264 while (start_addr < end_addr) {
2265 spl = splhigh();
2266 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
2267 if (pte == PT_ENTRY_NULL) {
2268 panic("pmap_map_bd: Invalid kernel address\n");
2269 }
2270 pmap_store_pte(pte, template);
2271 splx(spl);
2272 pte_increment_pa(template);
2273 virt += PAGE_SIZE;
2274 start_addr += PAGE_SIZE;
2275 }
2276 flush_tlb_raw();
2277 PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
2278 return(virt);
2279 }
2280
2281 mach_vm_size_t
2282 pmap_query_resident(
2283 pmap_t pmap,
2284 addr64_t s64,
2285 addr64_t e64,
2286 mach_vm_size_t *compressed_bytes_p)
2287 {
2288 pt_entry_t *pde;
2289 pt_entry_t *spte, *epte;
2290 addr64_t l64;
2291 uint64_t deadline;
2292 mach_vm_size_t resident_bytes;
2293 mach_vm_size_t compressed_bytes;
2294 boolean_t is_ept;
2295
2296 pmap_intr_assert();
2297
2298 if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) {
2299 if (compressed_bytes_p) {
2300 *compressed_bytes_p = 0;
2301 }
2302 return 0;
2303 }
2304
2305 is_ept = is_ept_pmap(pmap);
2306
2307 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
2308 pmap,
2309 (uint32_t) (s64 >> 32), s64,
2310 (uint32_t) (e64 >> 32), e64);
2311
2312 resident_bytes = 0;
2313 compressed_bytes = 0;
2314
2315 PMAP_LOCK(pmap);
2316
2317 deadline = rdtsc64() + max_preemption_latency_tsc;
2318
2319 while (s64 < e64) {
2320 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
2321 if (l64 > e64)
2322 l64 = e64;
2323 pde = pmap_pde(pmap, s64);
2324
2325 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
2326 if (*pde & PTE_PS) {
2327 /* superpage: not supported */
2328 } else {
2329 spte = pmap_pte(pmap,
2330 (s64 & ~(pde_mapped_size - 1)));
2331 spte = &spte[ptenum(s64)];
2332 epte = &spte[intel_btop(l64 - s64)];
2333
2334 for (; spte < epte; spte++) {
2335 if (pte_to_pa(*spte) != 0) {
2336 resident_bytes += PAGE_SIZE;
2337 } else if (*spte & PTE_COMPRESSED) {
2338 compressed_bytes += PAGE_SIZE;
2339 }
2340 }
2341
2342 }
2343 }
2344 s64 = l64;
2345
2346 if (s64 < e64 && rdtsc64() >= deadline) {
2347 PMAP_UNLOCK(pmap);
2348 PMAP_LOCK(pmap);
2349 deadline = rdtsc64() + max_preemption_latency_tsc;
2350 }
2351 }
2352
2353 PMAP_UNLOCK(pmap);
2354
2355 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
2356 pmap, 0, 0, 0, 0);
2357
2358 if (compressed_bytes_p) {
2359 *compressed_bytes_p = compressed_bytes;
2360 }
2361 return resident_bytes;
2362 }
2363
2364 kern_return_t
2365 pmap_query_page_info(
2366 pmap_t pmap,
2367 vm_map_offset_t va,
2368 int *disp_p)
2369 {
2370 int disp;
2371 boolean_t is_ept;
2372 pmap_paddr_t pa;
2373 ppnum_t pai;
2374 pd_entry_t *pde;
2375 pt_entry_t *pte;
2376
2377 pmap_intr_assert();
2378 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
2379 *disp_p = 0;
2380 return KERN_INVALID_ARGUMENT;
2381 }
2382
2383 disp = 0;
2384 is_ept = is_ept_pmap(pmap);
2385
2386 PMAP_LOCK(pmap);
2387
2388 pde = pmap_pde(pmap, va);
2389 if (!pde ||
2390 !(*pde & PTE_VALID_MASK(is_ept)) ||
2391 (*pde & PTE_PS)) {
2392 goto done;
2393 }
2394
2395 pte = pmap_pte(pmap, va);
2396 if (pte == PT_ENTRY_NULL) {
2397 goto done;
2398 }
2399
2400 pa = pte_to_pa(*pte);
2401 if (pa == 0) {
2402 if (PTE_IS_COMPRESSED(*pte)) {
2403 disp |= PMAP_QUERY_PAGE_COMPRESSED;
2404 if (*pte & PTE_COMPRESSED_ALT) {
2405 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
2406 }
2407 }
2408 } else {
2409 disp |= PMAP_QUERY_PAGE_PRESENT;
2410 pai = pa_index(pa);
2411 if (!IS_MANAGED_PAGE(pai)) {
2412 } else if (pmap_pv_is_altacct(pmap, va, pai)) {
2413 assert(IS_INTERNAL_PAGE(pai));
2414 disp |= PMAP_QUERY_PAGE_INTERNAL;
2415 disp |= PMAP_QUERY_PAGE_ALTACCT;
2416 } else if (IS_REUSABLE_PAGE(pai)) {
2417 disp |= PMAP_QUERY_PAGE_REUSABLE;
2418 } else if (IS_INTERNAL_PAGE(pai)) {
2419 disp |= PMAP_QUERY_PAGE_INTERNAL;
2420 }
2421 }
2422
2423 done:
2424 PMAP_UNLOCK(pmap);
2425 *disp_p = disp;
2426 return KERN_SUCCESS;
2427 }
2428
2429 #if DEBUG || DEVELOPMENT
2430 void
2431 kernel_pmap_lock(void)
2432 {
2433 PMAP_LOCK(kernel_pmap);
2434 }
2435
2436 void
2437 kernel_pmap_unlock(void)
2438 {
2439 PMAP_UNLOCK(kernel_pmap);
2440 }
2441 #endif /* DEBUG || DEVELOPMENT */