]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap_x86_common.c
xnu-3248.50.21.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap_x86_common.c
CommitLineData
b0d623f7 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
b0d623f7
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
fe8ab488
A
28
29#include <mach_assert.h>
30
b0d623f7
A
31#include <vm/pmap.h>
32#include <vm/vm_map.h>
316670eb 33#include <kern/ledger.h>
b0d623f7 34#include <i386/pmap_internal.h>
b7266188 35
b7266188
A
36void pmap_remove_range(
37 pmap_t pmap,
38 vm_map_offset_t va,
39 pt_entry_t *spte,
40 pt_entry_t *epte);
41
39236c6e
A
42void pmap_remove_range_options(
43 pmap_t pmap,
44 vm_map_offset_t va,
45 pt_entry_t *spte,
46 pt_entry_t *epte,
47 int options);
48
49void pmap_reusable_range(
50 pmap_t pmap,
51 vm_map_offset_t va,
52 pt_entry_t *spte,
53 pt_entry_t *epte,
54 boolean_t reusable);
55
316670eb
A
56uint32_t pmap_update_clear_pte_count;
57
b0d623f7
A
58/*
59 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
60 * on a NBPDE boundary.
61 */
62
63/* These symbols may be referenced directly by VM */
64uint64_t pmap_nesting_size_min = NBPDE;
65uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE;
66
67/*
68 * kern_return_t pmap_nest(grand, subord, va_start, size)
69 *
70 * grand = the pmap that we will nest subord into
71 * subord = the pmap that goes into the grand
72 * va_start = start of range in pmap to be inserted
73 * nstart = start of range in pmap nested pmap
74 * size = Size of nest area (up to 16TB)
75 *
76 * Inserts a pmap into another. This is used to implement shared segments.
77 *
78 * Note that we depend upon higher level VM locks to insure that things don't change while
79 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
80 * or do 2 nests at once.
81 */
82
83/*
84 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
85 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
86 * container and the "grand" parent. A minor optimization to consider for the
87 * future: make the "subord" truly a container rather than a full-fledged
88 * pagetable hierarchy which can be unnecessarily sparse (DRK).
89 */
90
91kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) {
92 vm_map_offset_t vaddr, nvaddr;
93 pd_entry_t *pde,*npde;
94 unsigned int i;
95 uint64_t num_pde;
96
3e170ce0
A
97 assert(!is_ept_pmap(grand));
98 assert(!is_ept_pmap(subord));
99
b0d623f7
A
100 if ((size & (pmap_nesting_size_min-1)) ||
101 (va_start & (pmap_nesting_size_min-1)) ||
102 (nstart & (pmap_nesting_size_min-1)) ||
103 ((size >> 28) > 65536)) /* Max size we can nest is 16TB */
104 return KERN_INVALID_VALUE;
105
106 if(size == 0) {
107 panic("pmap_nest: size is invalid - %016llX\n", size);
108 }
109
110 if (va_start != nstart)
111 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
112
113 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
6d2010ae
A
114 (uintptr_t) grand, (uintptr_t) subord,
115 (uintptr_t) (va_start>>32), (uintptr_t) va_start, 0);
b0d623f7
A
116
117 nvaddr = (vm_map_offset_t)nstart;
118 num_pde = size >> PDESHIFT;
119
120 PMAP_LOCK(subord);
121
122 subord->pm_shared = TRUE;
123
124 for (i = 0; i < num_pde;) {
125 if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) {
126
127 npde = pmap64_pdpt(subord, nvaddr);
128
129 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
130 PMAP_UNLOCK(subord);
316670eb 131 pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
b0d623f7
A
132 PMAP_LOCK(subord);
133 npde = pmap64_pdpt(subord, nvaddr);
134 }
135 *npde |= INTEL_PDPTE_NESTED;
136 nvaddr += NBPDPT;
137 i += (uint32_t)NPDEPG;
138 }
139 else {
140 npde = pmap_pde(subord, nvaddr);
141
142 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
143 PMAP_UNLOCK(subord);
316670eb 144 pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
b0d623f7
A
145 PMAP_LOCK(subord);
146 npde = pmap_pde(subord, nvaddr);
147 }
148 nvaddr += NBPDE;
149 i++;
150 }
151 }
152
153 PMAP_UNLOCK(subord);
154
155 vaddr = (vm_map_offset_t)va_start;
156
157 PMAP_LOCK(grand);
158
159 for (i = 0;i < num_pde;) {
160 pd_entry_t tpde;
161
162 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) {
163 npde = pmap64_pdpt(subord, vaddr);
164 if (npde == 0)
165 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
166 tpde = *npde;
167 pde = pmap64_pdpt(grand, vaddr);
168 if (0 == pde) {
169 PMAP_UNLOCK(grand);
316670eb 170 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
b0d623f7
A
171 PMAP_LOCK(grand);
172 pde = pmap64_pdpt(grand, vaddr);
173 }
174 if (pde == 0)
175 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr);
176 pmap_store_pte(pde, tpde);
177 vaddr += NBPDPT;
178 i += (uint32_t) NPDEPG;
179 }
180 else {
181 npde = pmap_pde(subord, nstart);
182 if (npde == 0)
183 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart);
184 tpde = *npde;
185 nstart += NBPDE;
186 pde = pmap_pde(grand, vaddr);
187 if ((0 == pde) && cpu_64bit) {
188 PMAP_UNLOCK(grand);
316670eb 189 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
b0d623f7
A
190 PMAP_LOCK(grand);
191 pde = pmap_pde(grand, vaddr);
192 }
193
194 if (pde == 0)
195 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
196 vaddr += NBPDE;
197 pmap_store_pte(pde, tpde);
198 i++;
199 }
200 }
201
202 PMAP_UNLOCK(grand);
203
204 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
205
206 return KERN_SUCCESS;
207}
208
209/*
210 * kern_return_t pmap_unnest(grand, vaddr)
211 *
212 * grand = the pmap that we will un-nest subord from
213 * vaddr = start of range in pmap to be unnested
214 *
215 * Removes a pmap from another. This is used to implement shared segments.
216 */
217
218kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
219
220 pd_entry_t *pde;
221 unsigned int i;
222 uint64_t num_pde;
223 addr64_t va_start, va_end;
224 uint64_t npdpt = PMAP_INVALID_PDPTNUM;
225
226 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
6d2010ae
A
227 (uintptr_t) grand,
228 (uintptr_t) (vaddr>>32), (uintptr_t) vaddr, 0, 0);
b0d623f7
A
229
230 if ((size & (pmap_nesting_size_min-1)) ||
231 (vaddr & (pmap_nesting_size_min-1))) {
232 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
233 grand, vaddr, size);
234 }
235
3e170ce0
A
236 assert(!is_ept_pmap(grand));
237
b0d623f7
A
238 /* align everything to PDE boundaries */
239 va_start = vaddr & ~(NBPDE-1);
240 va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
241 size = va_end - va_start;
242
243 PMAP_LOCK(grand);
244
245 num_pde = size >> PDESHIFT;
246 vaddr = va_start;
247
248 for (i = 0; i < num_pde; ) {
249 if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) {
250 npdpt = pdptnum(grand, vaddr);
251 pde = pmap64_pdpt(grand, vaddr);
252 if (pde && (*pde & INTEL_PDPTE_NESTED)) {
253 pmap_store_pte(pde, (pd_entry_t)0);
254 i += (uint32_t) NPDEPG;
255 vaddr += NBPDPT;
256 continue;
257 }
258 }
259 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
260 if (pde == 0)
261 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr);
262 pmap_store_pte(pde, (pd_entry_t)0);
263 i++;
264 vaddr += NBPDE;
265 }
266
267 PMAP_UPDATE_TLBS(grand, va_start, va_end);
268
269 PMAP_UNLOCK(grand);
270
271 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
272
273 return KERN_SUCCESS;
274}
275
3e170ce0
A
276kern_return_t
277pmap_unnest_options(
278 pmap_t grand,
279 addr64_t vaddr,
280 __unused uint64_t size,
281 __unused unsigned int options) {
282 return pmap_unnest(grand, vaddr, size);
283}
284
b0d623f7
A
285/* Invoked by the Mach VM to determine the platform specific unnest region */
286
287boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) {
288 pd_entry_t *pdpte;
289 boolean_t rval = FALSE;
290
291 if (!cpu_64bit)
292 return rval;
293
294 PMAP_LOCK(p);
295
296 pdpte = pmap64_pdpt(p, *s);
297 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
298 *s &= ~(NBPDPT -1);
299 rval = TRUE;
300 }
301
302 pdpte = pmap64_pdpt(p, *e);
303 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
304 *e = ((*e + NBPDPT) & ~(NBPDPT -1));
305 rval = TRUE;
306 }
307
308 PMAP_UNLOCK(p);
309
310 return rval;
311}
312
313/*
314 * pmap_find_phys returns the (4K) physical page number containing a
315 * given virtual address in a given pmap.
316 * Note that pmap_pte may return a pde if this virtual address is
317 * mapped by a large page and this is taken into account in order
318 * to return the correct page number in this case.
319 */
320ppnum_t
321pmap_find_phys(pmap_t pmap, addr64_t va)
322{
323 pt_entry_t *ptp;
324 pd_entry_t *pdep;
325 ppnum_t ppn = 0;
326 pd_entry_t pde;
327 pt_entry_t pte;
3e170ce0
A
328 boolean_t is_ept;
329
330 is_ept = is_ept_pmap(pmap);
b0d623f7
A
331
332 mp_disable_preemption();
333
334 /* This refcount test is a band-aid--several infrastructural changes
335 * are necessary to eliminate invocation of this routine from arbitrary
336 * contexts.
337 */
338
339 if (!pmap->ref_count)
340 goto pfp_exit;
341
342 pdep = pmap_pde(pmap, va);
343
3e170ce0
A
344 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
345 if (pde & PTE_PS) {
b0d623f7
A
346 ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
347 ppn += (ppnum_t) ptenum(va);
348 }
349 else {
350 ptp = pmap_pte(pmap, va);
3e170ce0 351 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
b0d623f7
A
352 ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
353 }
354 }
355 }
356pfp_exit:
357 mp_enable_preemption();
358
359 return ppn;
360}
361
6d2010ae
A
362/*
363 * Update cache attributes for all extant managed mappings.
364 * Assumes PV for this page is locked, and that the page
3e170ce0
A
365 * is managed. We assume that this physical page may be mapped in
366 * both EPT and normal Intel PTEs, so we convert the attributes
367 * to the corresponding format for each pmap.
368 *
369 * We assert that the passed set of attributes is a subset of the
370 * PHYS_CACHEABILITY_MASK.
6d2010ae 371 */
6d2010ae
A
372void
373pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
374 pv_rooted_entry_t pv_h, pv_e;
375 pv_hashed_entry_t pvh_e, nexth;
376 vm_map_offset_t vaddr;
377 pmap_t pmap;
378 pt_entry_t *ptep;
3e170ce0
A
379 boolean_t is_ept;
380 unsigned ept_attributes;
6d2010ae
A
381
382 assert(IS_MANAGED_PAGE(pn));
3e170ce0
A
383 assert(((~PHYS_CACHEABILITY_MASK) & attributes) == 0);
384
385 /* We don't support the PTA bit for EPT PTEs */
386 if (attributes & INTEL_PTE_NCACHE)
387 ept_attributes = INTEL_EPT_NCACHE;
388 else
389 ept_attributes = INTEL_EPT_WB;
6d2010ae
A
390
391 pv_h = pai_to_pvh(pn);
392 /* TODO: translate the PHYS_* bits to PTE bits, while they're
393 * currently identical, they may not remain so
394 * Potential optimization (here and in page_protect),
395 * parallel shootdowns, check for redundant
396 * attribute modifications.
397 */
398
399 /*
400 * Alter attributes on all mappings
401 */
402 if (pv_h->pmap != PMAP_NULL) {
403 pv_e = pv_h;
404 pvh_e = (pv_hashed_entry_t)pv_e;
405
406 do {
407 pmap = pv_e->pmap;
408 vaddr = pv_e->va;
409 ptep = pmap_pte(pmap, vaddr);
3e170ce0 410
6d2010ae
A
411 if (0 == ptep)
412 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
413
3e170ce0
A
414 is_ept = is_ept_pmap(pmap);
415
6d2010ae 416 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
3e170ce0
A
417 if (!is_ept) {
418 pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
419 } else {
420 pmap_update_pte(ptep, INTEL_EPT_CACHE_MASK, ept_attributes);
421 }
6d2010ae
A
422 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
423 pvh_e = nexth;
424 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
425 }
426}
427
428void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
429 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
430
431 if (dofilter) {
432 CPU_CR3_MARK_INACTIVE();
433 } else {
434 CPU_CR3_MARK_ACTIVE();
39236c6e 435 mfence();
6d2010ae
A
436 if (current_cpu_datap()->cpu_tlb_invalid)
437 process_pmap_updates();
438 }
439}
440
441
b7266188
A
442/*
443 * Insert the given physical page (p) at
444 * the specified virtual address (v) in the
445 * target physical map with the protection requested.
446 *
447 * If specified, the page will be wired down, meaning
448 * that the related pte cannot be reclaimed.
449 *
450 * NB: This is the only routine which MAY NOT lazy-evaluate
451 * or lose information. That is, this routine must actually
452 * insert this page into the given map NOW.
453 */
316670eb 454
b7266188
A
455void
456pmap_enter(
457 register pmap_t pmap,
458 vm_map_offset_t vaddr,
459 ppnum_t pn,
460 vm_prot_t prot,
316670eb 461 vm_prot_t fault_type,
b7266188
A
462 unsigned int flags,
463 boolean_t wired)
316670eb 464{
39236c6e 465 (void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL);
316670eb
A
466}
467
39236c6e 468
316670eb
A
469kern_return_t
470pmap_enter_options(
471 register pmap_t pmap,
472 vm_map_offset_t vaddr,
473 ppnum_t pn,
474 vm_prot_t prot,
475 __unused vm_prot_t fault_type,
476 unsigned int flags,
477 boolean_t wired,
39236c6e
A
478 unsigned int options,
479 void *arg)
b7266188
A
480{
481 pt_entry_t *pte;
482 pv_rooted_entry_t pv_h;
316670eb 483 ppnum_t pai;
b7266188
A
484 pv_hashed_entry_t pvh_e;
485 pv_hashed_entry_t pvh_new;
486 pt_entry_t template;
487 pmap_paddr_t old_pa;
488 pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn);
489 boolean_t need_tlbflush = FALSE;
490 boolean_t set_NX;
491 char oattr;
492 boolean_t old_pa_locked;
493 /* 2MiB mappings are confined to x86_64 by VM */
494 boolean_t superpage = flags & VM_MEM_SUPERPAGE;
495 vm_object_t delpage_pm_obj = NULL;
22ba694c 496 uint64_t delpage_pde_index = 0;
b7266188 497 pt_entry_t old_pte;
316670eb 498 kern_return_t kr_expand;
3e170ce0 499 boolean_t is_ept;
b7266188
A
500
501 pmap_intr_assert();
b7266188
A
502
503 if (pmap == PMAP_NULL)
316670eb
A
504 return KERN_INVALID_ARGUMENT;
505
3e170ce0
A
506 is_ept = is_ept_pmap(pmap);
507
316670eb
A
508 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
509 * unused value for that scenario.
510 */
511 assert(pn != vm_page_fictitious_addr);
512
b7266188 513 if (pn == vm_page_guard_addr)
316670eb 514 return KERN_INVALID_ARGUMENT;
b7266188
A
515
516 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
316670eb
A
517 pmap,
518 (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
519 pn, prot);
b7266188
A
520
521 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
522 set_NX = FALSE;
523 else
524 set_NX = TRUE;
525
316670eb
A
526 if (__improbable(set_NX && (pmap == kernel_pmap) && ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) || (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
527 set_NX = FALSE;
528 }
529
b7266188
A
530 /*
531 * Must allocate a new pvlist entry while we're unlocked;
532 * zalloc may cause pageout (which will lock the pmap system).
533 * If we determine we need a pvlist entry, we will unlock
534 * and allocate one. Then we will retry, throughing away
535 * the allocated entry later (if we no longer need it).
536 */
537
538 pvh_new = PV_HASHED_ENTRY_NULL;
539Retry:
540 pvh_e = PV_HASHED_ENTRY_NULL;
541
542 PMAP_LOCK(pmap);
543
544 /*
545 * Expand pmap to include this pte. Assume that
546 * pmap is always expanded to include enough hardware
547 * pages to map one VM page.
548 */
549 if(superpage) {
550 while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
551 /* need room for another pde entry */
552 PMAP_UNLOCK(pmap);
316670eb
A
553 kr_expand = pmap_expand_pdpt(pmap, vaddr, options);
554 if (kr_expand != KERN_SUCCESS)
555 return kr_expand;
b7266188
A
556 PMAP_LOCK(pmap);
557 }
558 } else {
559 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
560 /*
561 * Must unlock to expand the pmap
562 * going to grow pde level page(s)
563 */
564 PMAP_UNLOCK(pmap);
316670eb
A
565 kr_expand = pmap_expand(pmap, vaddr, options);
566 if (kr_expand != KERN_SUCCESS)
567 return kr_expand;
b7266188
A
568 PMAP_LOCK(pmap);
569 }
570 }
316670eb
A
571 if (options & PMAP_EXPAND_OPTIONS_NOENTER) {
572 PMAP_UNLOCK(pmap);
573 return KERN_SUCCESS;
574 }
b7266188 575
3e170ce0 576 if (superpage && *pte && !(*pte & PTE_PS)) {
b7266188
A
577 /*
578 * There is still an empty page table mapped that
579 * was used for a previous base page mapping.
580 * Remember the PDE and the PDE index, so that we
581 * can free the page at the end of this function.
582 */
22ba694c 583 delpage_pde_index = pdeidx(pmap, vaddr);
b7266188
A
584 delpage_pm_obj = pmap->pm_obj;
585 *pte = 0;
586 }
587
b7266188
A
588 old_pa = pte_to_pa(*pte);
589 pai = pa_index(old_pa);
590 old_pa_locked = FALSE;
591
39236c6e 592 if (old_pa == 0 &&
3e170ce0 593 (*pte & PTE_COMPRESSED)) {
39236c6e
A
594 /* one less "compressed" */
595 OSAddAtomic64(-1, &pmap->stats.compressed);
596 /* marker will be cleared below */
597 }
598
b7266188
A
599 /*
600 * if we have a previous managed page, lock the pv entry now. after
601 * we lock it, check to see if someone beat us to the lock and if so
602 * drop the lock
603 */
604 if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
605 LOCK_PVH(pai);
606 old_pa_locked = TRUE;
607 old_pa = pte_to_pa(*pte);
608 if (0 == old_pa) {
609 UNLOCK_PVH(pai); /* another path beat us to it */
610 old_pa_locked = FALSE;
611 }
612 }
613
614 /*
615 * Special case if the incoming physical page is already mapped
616 * at this address.
617 */
618 if (old_pa == pa) {
6d2010ae 619 pt_entry_t old_attributes =
3e170ce0 620 *pte & ~(PTE_REF(is_ept) | PTE_MOD(is_ept));
b7266188
A
621
622 /*
623 * May be changing its wired attribute or protection
624 */
625
3e170ce0
A
626 template = pa_to_pte(pa);
627
628 /* ?: WORTH ASSERTING THAT AT LEAST ONE RWX (implicit valid) PASSED FOR EPT? */
629 if (!is_ept) {
630 template |= INTEL_PTE_VALID;
631 } else {
632 template |= INTEL_EPT_IPTA;
633 }
634
635 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
b7266188 636
3e170ce0
A
637 /*
638 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
639 */
640 if (!is_ept && (VM_MEM_NOT_CACHEABLE ==
641 (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)))) {
b7266188
A
642 if (!(flags & VM_MEM_GUARDED))
643 template |= INTEL_PTE_PTA;
644 template |= INTEL_PTE_NCACHE;
645 }
3e170ce0 646 if (pmap != kernel_pmap && !is_ept)
b7266188 647 template |= INTEL_PTE_USER;
3e170ce0
A
648
649 if (prot & VM_PROT_READ)
650 template |= PTE_READ(is_ept);
651
39236c6e 652 if (prot & VM_PROT_WRITE) {
3e170ce0
A
653 template |= PTE_WRITE(is_ept);
654 if (is_ept && !pmap_ept_support_ad) {
655 template |= PTE_MOD(is_ept);
656 if (old_pa_locked) {
657 assert(IS_MANAGED_PAGE(pai));
658 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
659 }
660 }
661 }
662 if (prot & VM_PROT_EXECUTE) {
663 assert(set_NX == 0);
664 template = pte_set_ex(template, is_ept);
39236c6e 665 }
b7266188
A
666
667 if (set_NX)
3e170ce0 668 template = pte_remove_ex(template, is_ept);
b7266188
A
669
670 if (wired) {
3e170ce0 671 template |= PTE_WIRED;
316670eb
A
672 if (!iswired(old_attributes)) {
673 OSAddAtomic(+1, &pmap->stats.wired_count);
674 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
675 }
b7266188 676 } else {
6d2010ae 677 if (iswired(old_attributes)) {
b7266188 678 assert(pmap->stats.wired_count >= 1);
316670eb
A
679 OSAddAtomic(-1, &pmap->stats.wired_count);
680 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
b7266188
A
681 }
682 }
3e170ce0 683
b7266188 684 if (superpage) /* this path can not be used */
3e170ce0 685 template |= PTE_PS; /* to change the page size! */
15129b1c
A
686
687 if (old_attributes == template)
688 goto dont_update_pte;
689
6d2010ae
A
690 /* Determine delta, PV locked */
691 need_tlbflush =
3e170ce0 692 ((old_attributes ^ template) != PTE_WIRED);
39236c6e 693
3e170ce0
A
694 if (need_tlbflush == TRUE && !(old_attributes & PTE_WRITE(is_ept))) {
695 if ((old_attributes ^ template) == PTE_WRITE(is_ept))
39236c6e
A
696 need_tlbflush = FALSE;
697 }
b7266188 698
3e170ce0
A
699 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
700 if (is_ept && !pmap_ept_support_ad) {
701 template |= PTE_REF(is_ept);
702 if (old_pa_locked) {
703 assert(IS_MANAGED_PAGE(pai));
704 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
705 }
706 }
707
b7266188 708 /* store modified PTE and preserve RC bits */
316670eb
A
709 pt_entry_t npte, opte;;
710 do {
711 opte = *pte;
3e170ce0 712 npte = template | (opte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
316670eb 713 } while (!pmap_cmpx_pte(pte, opte, npte));
15129b1c 714dont_update_pte:
b7266188
A
715 if (old_pa_locked) {
716 UNLOCK_PVH(pai);
717 old_pa_locked = FALSE;
718 }
b7266188
A
719 goto Done;
720 }
721
722 /*
723 * Outline of code from here:
724 * 1) If va was mapped, update TLBs, remove the mapping
725 * and remove old pvlist entry.
726 * 2) Add pvlist entry for new mapping
727 * 3) Enter new mapping.
728 *
729 * If the old physical page is not managed step 1) is skipped
730 * (except for updating the TLBs), and the mapping is
731 * overwritten at step 3). If the new physical page is not
732 * managed, step 2) is skipped.
733 */
734
735 if (old_pa != (pmap_paddr_t) 0) {
736
737 /*
738 * Don't do anything to pages outside valid memory here.
739 * Instead convince the code that enters a new mapping
740 * to overwrite the old one.
741 */
742
743 /* invalidate the PTE */
3e170ce0 744 pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
b7266188
A
745 /* propagate invalidate everywhere */
746 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
747 /* remember reference and change */
748 old_pte = *pte;
3e170ce0 749 oattr = (char) (old_pte & (PTE_MOD(is_ept) | PTE_REF(is_ept)));
b7266188
A
750 /* completely invalidate the PTE */
751 pmap_store_pte(pte, 0);
752
753 if (IS_MANAGED_PAGE(pai)) {
6d2010ae 754 pmap_assert(old_pa_locked == TRUE);
316670eb 755 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
39236c6e 756 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
b7266188 757 assert(pmap->stats.resident_count >= 1);
316670eb 758 OSAddAtomic(-1, &pmap->stats.resident_count);
39236c6e
A
759 if (pmap != kernel_pmap) {
760 if (IS_REUSABLE_PAGE(pai)) {
761 assert(pmap->stats.reusable > 0);
762 OSAddAtomic(-1, &pmap->stats.reusable);
763 } else if (IS_INTERNAL_PAGE(pai)) {
764 assert(pmap->stats.internal > 0);
765 OSAddAtomic(-1, &pmap->stats.internal);
766 } else {
767 assert(pmap->stats.external > 0);
768 OSAddAtomic(-1, &pmap->stats.external);
769 }
770 }
b7266188 771 if (iswired(*pte)) {
b7266188 772 assert(pmap->stats.wired_count >= 1);
316670eb
A
773 OSAddAtomic(-1, &pmap->stats.wired_count);
774 pmap_ledger_debit(pmap, task_ledgers.wired_mem,
775 PAGE_SIZE);
b7266188 776 }
3e170ce0
A
777
778 if (!is_ept) {
779 pmap_phys_attributes[pai] |= oattr;
780 } else {
781 pmap_phys_attributes[pai] |= ept_refmod_to_physmap(oattr);
782 }
b7266188
A
783
784 /*
785 * Remove the mapping from the pvlist for
786 * this physical page.
787 * We'll end up with either a rooted pv or a
788 * hashed pv
789 */
790 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte);
791
792 } else {
793
794 /*
795 * old_pa is not managed.
796 * Do removal part of accounting.
797 */
798
39236c6e
A
799 if (pmap != kernel_pmap) {
800#if 00
801 assert(pmap->stats.device > 0);
802 OSAddAtomic(-1, &pmap->stats.device);
803#endif
804 }
b7266188
A
805 if (iswired(*pte)) {
806 assert(pmap->stats.wired_count >= 1);
316670eb
A
807 OSAddAtomic(-1, &pmap->stats.wired_count);
808 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
b7266188
A
809 }
810 }
811 }
812
813 /*
814 * if we had a previously managed paged locked, unlock it now
815 */
816 if (old_pa_locked) {
817 UNLOCK_PVH(pai);
818 old_pa_locked = FALSE;
819 }
820
821 pai = pa_index(pa); /* now working with new incoming phys page */
822 if (IS_MANAGED_PAGE(pai)) {
823
824 /*
825 * Step 2) Enter the mapping in the PV list for this
826 * physical page.
827 */
828 pv_h = pai_to_pvh(pai);
829
830 LOCK_PVH(pai);
831
832 if (pv_h->pmap == PMAP_NULL) {
833 /*
834 * No mappings yet, use rooted pv
835 */
836 pv_h->va = vaddr;
837 pv_h->pmap = pmap;
838 queue_init(&pv_h->qlink);
39236c6e
A
839
840 if (options & PMAP_OPTIONS_INTERNAL) {
841 pmap_phys_attributes[pai] |= PHYS_INTERNAL;
842 } else {
843 pmap_phys_attributes[pai] &= ~PHYS_INTERNAL;
844 }
845 if (options & PMAP_OPTIONS_REUSABLE) {
846 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
847 } else {
848 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
849 }
b7266188
A
850 } else {
851 /*
852 * Add new pv_hashed_entry after header.
853 */
854 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
855 pvh_e = pvh_new;
856 pvh_new = PV_HASHED_ENTRY_NULL;
857 } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
6d2010ae 858 PV_HASHED_ALLOC(&pvh_e);
b7266188
A
859 if (PV_HASHED_ENTRY_NULL == pvh_e) {
860 /*
861 * the pv list is empty. if we are on
862 * the kernel pmap we'll use one of
863 * the special private kernel pv_e's,
864 * else, we need to unlock
865 * everything, zalloc a pv_e, and
866 * restart bringing in the pv_e with
867 * us.
868 */
869 if (kernel_pmap == pmap) {
6d2010ae 870 PV_HASHED_KERN_ALLOC(&pvh_e);
b7266188
A
871 } else {
872 UNLOCK_PVH(pai);
873 PMAP_UNLOCK(pmap);
6d2010ae 874 pmap_pv_throttle(pmap);
b7266188
A
875 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
876 goto Retry;
877 }
878 }
879 }
880
881 if (PV_HASHED_ENTRY_NULL == pvh_e)
882 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
883
884 pvh_e->va = vaddr;
885 pvh_e->pmap = pmap;
886 pvh_e->ppn = pn;
887 pv_hash_add(pvh_e, pv_h);
888
889 /*
890 * Remember that we used the pvlist entry.
891 */
892 pvh_e = PV_HASHED_ENTRY_NULL;
893 }
894
895 /*
896 * only count the mapping
897 * for 'managed memory'
898 */
316670eb 899 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
39236c6e 900 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
6d2010ae 901 OSAddAtomic(+1, &pmap->stats.resident_count);
b7266188
A
902 if (pmap->stats.resident_count > pmap->stats.resident_max) {
903 pmap->stats.resident_max = pmap->stats.resident_count;
904 }
39236c6e
A
905 if (pmap != kernel_pmap) {
906 if (IS_REUSABLE_PAGE(pai)) {
907 OSAddAtomic(+1, &pmap->stats.reusable);
908 PMAP_STATS_PEAK(pmap->stats.reusable);
909 } else if (IS_INTERNAL_PAGE(pai)) {
910 OSAddAtomic(+1, &pmap->stats.internal);
911 PMAP_STATS_PEAK(pmap->stats.internal);
912 } else {
913 OSAddAtomic(+1, &pmap->stats.external);
914 PMAP_STATS_PEAK(pmap->stats.external);
915 }
916 }
060df5ea
A
917 } else if (last_managed_page == 0) {
918 /* Account for early mappings created before "managed pages"
919 * are determined. Consider consulting the available DRAM map.
920 */
316670eb 921 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
39236c6e 922 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
060df5ea 923 OSAddAtomic(+1, &pmap->stats.resident_count);
39236c6e
A
924 if (pmap != kernel_pmap) {
925#if 00
926 OSAddAtomic(+1, &pmap->stats.device);
927 PMAP_STATS_PEAK(pmap->stats.device);
928#endif
929 }
b7266188
A
930 }
931 /*
932 * Step 3) Enter the mapping.
933 *
934 * Build a template to speed up entering -
935 * only the pfn changes.
936 */
3e170ce0
A
937 template = pa_to_pte(pa);
938
939 if (!is_ept) {
940 template |= INTEL_PTE_VALID;
941 } else {
942 template |= INTEL_EPT_IPTA;
943 }
944
945
6d2010ae
A
946 /*
947 * DRK: It may be worth asserting on cache attribute flags that diverge
948 * from the existing physical page attributes.
949 */
b7266188 950
3e170ce0
A
951 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
952
953 /*
954 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
955 */
956 if (!is_ept && (flags & VM_MEM_NOT_CACHEABLE)) {
b7266188
A
957 if (!(flags & VM_MEM_GUARDED))
958 template |= INTEL_PTE_PTA;
959 template |= INTEL_PTE_NCACHE;
960 }
3e170ce0 961 if (pmap != kernel_pmap && !is_ept)
b7266188 962 template |= INTEL_PTE_USER;
3e170ce0
A
963 if (prot & VM_PROT_READ)
964 template |= PTE_READ(is_ept);
965 if (prot & VM_PROT_WRITE) {
966 template |= PTE_WRITE(is_ept);
967 if (is_ept && !pmap_ept_support_ad) {
968 template |= PTE_MOD(is_ept);
969 if (IS_MANAGED_PAGE(pai))
970 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
971 }
972 }
973 if (prot & VM_PROT_EXECUTE) {
974 assert(set_NX == 0);
975 template = pte_set_ex(template, is_ept);
976 }
977
b7266188 978 if (set_NX)
3e170ce0 979 template = pte_remove_ex(template, is_ept);
b7266188
A
980 if (wired) {
981 template |= INTEL_PTE_WIRED;
982 OSAddAtomic(+1, & pmap->stats.wired_count);
316670eb 983 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
b7266188
A
984 }
985 if (superpage)
986 template |= INTEL_PTE_PS;
3e170ce0
A
987
988 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
989 if (is_ept && !pmap_ept_support_ad) {
990 template |= PTE_REF(is_ept);
991 if (IS_MANAGED_PAGE(pai))
992 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
993 }
994
b7266188
A
995 pmap_store_pte(pte, template);
996
997 /*
998 * if this was a managed page we delayed unlocking the pv until here
999 * to prevent pmap_page_protect et al from finding it until the pte
1000 * has been stored
1001 */
1002 if (IS_MANAGED_PAGE(pai)) {
1003 UNLOCK_PVH(pai);
1004 }
1005Done:
39236c6e
A
1006 if (need_tlbflush == TRUE) {
1007 if (options & PMAP_OPTIONS_NOFLUSH)
1008 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1009 else
1010 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1011 }
b7266188
A
1012 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1013 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
1014 }
1015 if (pvh_new != PV_HASHED_ENTRY_NULL) {
1016 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
1017 }
1018 PMAP_UNLOCK(pmap);
1019
1020 if (delpage_pm_obj) {
1021 vm_page_t m;
1022
1023 vm_object_lock(delpage_pm_obj);
22ba694c 1024 m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE));
b7266188
A
1025 if (m == VM_PAGE_NULL)
1026 panic("pmap_enter: pte page not in object");
1027 VM_PAGE_FREE(m);
3e170ce0 1028 vm_object_unlock(delpage_pm_obj);
b7266188 1029 OSAddAtomic(-1, &inuse_ptepages_count);
316670eb 1030 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
b7266188
A
1031 }
1032
1033 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
316670eb 1034 return KERN_SUCCESS;
b7266188
A
1035}
1036
1037/*
1038 * Remove a range of hardware page-table entries.
1039 * The entries given are the first (inclusive)
1040 * and last (exclusive) entries for the VM pages.
1041 * The virtual address is the va for the first pte.
1042 *
1043 * The pmap must be locked.
1044 * If the pmap is not the kernel pmap, the range must lie
1045 * entirely within one pte-page. This is NOT checked.
1046 * Assumes that the pte-page exists.
1047 */
1048
1049void
1050pmap_remove_range(
1051 pmap_t pmap,
1052 vm_map_offset_t start_vaddr,
1053 pt_entry_t *spte,
1054 pt_entry_t *epte)
39236c6e 1055{
3e170ce0
A
1056 pmap_remove_range_options(pmap, start_vaddr, spte, epte,
1057 PMAP_OPTIONS_REMOVE);
39236c6e
A
1058}
1059
1060void
1061pmap_remove_range_options(
1062 pmap_t pmap,
1063 vm_map_offset_t start_vaddr,
1064 pt_entry_t *spte,
1065 pt_entry_t *epte,
1066 int options)
b7266188
A
1067{
1068 pt_entry_t *cpte;
1069 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1070 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1071 pv_hashed_entry_t pvh_e;
1072 int pvh_cnt = 0;
1073 int num_removed, num_unwired, num_found, num_invalid;
39236c6e
A
1074 int num_device, num_external, num_internal, num_reusable;
1075 uint64_t num_compressed;
316670eb 1076 ppnum_t pai;
b7266188
A
1077 pmap_paddr_t pa;
1078 vm_map_offset_t vaddr;
3e170ce0 1079 boolean_t is_ept = is_ept_pmap(pmap);
b7266188
A
1080
1081 num_removed = 0;
1082 num_unwired = 0;
1083 num_found = 0;
1084 num_invalid = 0;
39236c6e
A
1085 num_device = 0;
1086 num_external = 0;
1087 num_internal = 0;
1088 num_reusable = 0;
1089 num_compressed = 0;
b7266188
A
1090 /* invalidate the PTEs first to "freeze" them */
1091 for (cpte = spte, vaddr = start_vaddr;
1092 cpte < epte;
1093 cpte++, vaddr += PAGE_SIZE_64) {
1094 pt_entry_t p = *cpte;
1095
1096 pa = pte_to_pa(p);
39236c6e
A
1097 if (pa == 0) {
1098 if (pmap != kernel_pmap &&
1099 (options & PMAP_OPTIONS_REMOVE) &&
3e170ce0 1100 (p & PTE_COMPRESSED)) {
39236c6e
A
1101 /* one less "compressed" */
1102 num_compressed++;
1103 /* clear marker */
1104 /* XXX probably does not need to be atomic! */
3e170ce0 1105 pmap_update_pte(cpte, PTE_COMPRESSED, 0);
39236c6e 1106 }
b7266188 1107 continue;
39236c6e 1108 }
b7266188
A
1109 num_found++;
1110
1111 if (iswired(p))
1112 num_unwired++;
1113
1114 pai = pa_index(pa);
1115
1116 if (!IS_MANAGED_PAGE(pai)) {
1117 /*
1118 * Outside range of managed physical memory.
1119 * Just remove the mappings.
1120 */
1121 pmap_store_pte(cpte, 0);
39236c6e 1122 num_device++;
b7266188
A
1123 continue;
1124 }
1125
3e170ce0 1126 if ((p & PTE_VALID_MASK(is_ept)) == 0)
b7266188
A
1127 num_invalid++;
1128
316670eb 1129 /* invalidate the PTE */
3e170ce0 1130 pmap_update_pte(cpte, PTE_VALID_MASK(is_ept), 0);
b7266188
A
1131 }
1132
1133 if (num_found == 0) {
1134 /* nothing was changed: we're done */
1135 goto update_counts;
1136 }
1137
1138 /* propagate the invalidates to other CPUs */
1139
1140 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
1141
1142 for (cpte = spte, vaddr = start_vaddr;
1143 cpte < epte;
1144 cpte++, vaddr += PAGE_SIZE_64) {
1145
1146 pa = pte_to_pa(*cpte);
1147 if (pa == 0)
1148 continue;
1149
1150 pai = pa_index(pa);
1151
1152 LOCK_PVH(pai);
1153
1154 pa = pte_to_pa(*cpte);
1155 if (pa == 0) {
1156 UNLOCK_PVH(pai);
1157 continue;
1158 }
1159 num_removed++;
39236c6e
A
1160 if (IS_REUSABLE_PAGE(pai)) {
1161 num_reusable++;
1162 } else if (IS_INTERNAL_PAGE(pai)) {
1163 num_internal++;
1164 } else {
1165 num_external++;
1166 }
b7266188
A
1167
1168 /*
1169 * Get the modify and reference bits, then
1170 * nuke the entry in the page table
1171 */
1172 /* remember reference and change */
1173 pmap_phys_attributes[pai] |=
1174 (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
1175
1176 /*
1177 * Remove the mapping from the pvlist for this physical page.
1178 */
1179 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte);
1180
1181 /* completely invalidate the PTE */
1182 pmap_store_pte(cpte, 0);
1183
1184 UNLOCK_PVH(pai);
1185
1186 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1187 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1188 pvh_eh = pvh_e;
1189
1190 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1191 pvh_et = pvh_e;
1192 }
1193 pvh_cnt++;
1194 }
1195 } /* for loop */
1196
1197 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1198 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1199 }
1200update_counts:
1201 /*
1202 * Update the counts
1203 */
1204#if TESTING
1205 if (pmap->stats.resident_count < num_removed)
1206 panic("pmap_remove_range: resident_count");
1207#endif
316670eb 1208 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
39236c6e 1209 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(num_removed));
b7266188
A
1210 assert(pmap->stats.resident_count >= num_removed);
1211 OSAddAtomic(-num_removed, &pmap->stats.resident_count);
1212
39236c6e
A
1213 if (pmap != kernel_pmap) {
1214#if 00
1215 assert(pmap->stats.device >= num_device);
1216 if (num_device)
1217 OSAddAtomic(-num_device, &pmap->stats.device);
1218#endif /* 00 */
1219 assert(pmap->stats.external >= num_external);
1220 if (num_external)
1221 OSAddAtomic(-num_external, &pmap->stats.external);
1222 assert(pmap->stats.internal >= num_internal);
1223 if (num_internal)
1224 OSAddAtomic(-num_internal, &pmap->stats.internal);
1225 assert(pmap->stats.reusable >= num_reusable);
1226 if (num_reusable)
1227 OSAddAtomic(-num_reusable, &pmap->stats.reusable);
1228 assert(pmap->stats.compressed >= num_compressed);
1229 if (num_compressed)
1230 OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
1231 }
1232
b7266188
A
1233#if TESTING
1234 if (pmap->stats.wired_count < num_unwired)
1235 panic("pmap_remove_range: wired_count");
1236#endif
1237 assert(pmap->stats.wired_count >= num_unwired);
1238 OSAddAtomic(-num_unwired, &pmap->stats.wired_count);
316670eb 1239 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
b7266188
A
1240
1241 return;
1242}
1243
1244
1245/*
1246 * Remove the given range of addresses
1247 * from the specified map.
1248 *
1249 * It is assumed that the start and end are properly
1250 * rounded to the hardware page size.
1251 */
1252void
1253pmap_remove(
1254 pmap_t map,
1255 addr64_t s64,
1256 addr64_t e64)
39236c6e 1257{
3e170ce0 1258 pmap_remove_options(map, s64, e64, PMAP_OPTIONS_REMOVE);
39236c6e
A
1259}
1260
1261void
1262pmap_remove_options(
1263 pmap_t map,
1264 addr64_t s64,
1265 addr64_t e64,
1266 int options)
b7266188
A
1267{
1268 pt_entry_t *pde;
1269 pt_entry_t *spte, *epte;
1270 addr64_t l64;
1271 uint64_t deadline;
3e170ce0 1272 boolean_t is_ept;
b7266188
A
1273
1274 pmap_intr_assert();
1275
1276 if (map == PMAP_NULL || s64 == e64)
1277 return;
1278
3e170ce0
A
1279 is_ept = is_ept_pmap(map);
1280
b7266188
A
1281 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
1282 map,
1283 (uint32_t) (s64 >> 32), s64,
1284 (uint32_t) (e64 >> 32), e64);
1285
1286
1287 PMAP_LOCK(map);
1288
1289#if 0
1290 /*
1291 * Check that address range in the kernel does not overlap the stacks.
1292 * We initialize local static min/max variables once to avoid making
1293 * 2 function calls for every remove. Note also that these functions
1294 * both return 0 before kernel stacks have been initialized, and hence
1295 * the panic is not triggered in this case.
1296 */
1297 if (map == kernel_pmap) {
1298 static vm_offset_t kernel_stack_min = 0;
1299 static vm_offset_t kernel_stack_max = 0;
1300
1301 if (kernel_stack_min == 0) {
1302 kernel_stack_min = min_valid_stack_address();
1303 kernel_stack_max = max_valid_stack_address();
1304 }
1305 if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
1306 (kernel_stack_min < e64 && e64 <= kernel_stack_max))
1307 panic("pmap_remove() attempted in kernel stack");
1308 }
1309#else
1310
1311 /*
1312 * The values of kernel_stack_min and kernel_stack_max are no longer
1313 * relevant now that we allocate kernel stacks in the kernel map,
1314 * so the old code above no longer applies. If we wanted to check that
1315 * we weren't removing a mapping of a page in a kernel stack we'd
1316 * mark the PTE with an unused bit and check that here.
1317 */
1318
1319#endif
1320
1321 deadline = rdtsc64() + max_preemption_latency_tsc;
1322
1323 while (s64 < e64) {
1324 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1325 if (l64 > e64)
1326 l64 = e64;
1327 pde = pmap_pde(map, s64);
1328
3e170ce0
A
1329 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
1330 if (*pde & PTE_PS) {
b7266188
A
1331 /*
1332 * If we're removing a superpage, pmap_remove_range()
1333 * must work on level 2 instead of level 1; and we're
1334 * only passing a single level 2 entry instead of a
1335 * level 1 range.
1336 */
1337 spte = pde;
1338 epte = spte+1; /* excluded */
1339 } else {
1340 spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
1341 spte = &spte[ptenum(s64)];
1342 epte = &spte[intel_btop(l64 - s64)];
1343 }
39236c6e
A
1344 pmap_remove_range_options(map, s64, spte, epte,
1345 options);
b7266188
A
1346 }
1347 s64 = l64;
1348
1349 if (s64 < e64 && rdtsc64() >= deadline) {
1350 PMAP_UNLOCK(map)
fe8ab488
A
1351 /* TODO: Rapid release/reacquisition can defeat
1352 * the "backoff" intent here; either consider a
1353 * fair spinlock, or a scheme whereby each lock
1354 * attempt marks the processor as within a spinlock
1355 * acquisition, and scan CPUs here to determine
1356 * if a backoff is necessary, to avoid sacrificing
1357 * performance in the common case.
1358 */
b7266188
A
1359 PMAP_LOCK(map)
1360 deadline = rdtsc64() + max_preemption_latency_tsc;
1361 }
1362 }
1363
1364 PMAP_UNLOCK(map);
1365
1366 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
1367 map, 0, 0, 0, 0);
1368
1369}
1370
39236c6e
A
1371void
1372pmap_page_protect(
1373 ppnum_t pn,
1374 vm_prot_t prot)
1375{
1376 pmap_page_protect_options(pn, prot, 0, NULL);
1377}
1378
b7266188 1379/*
39236c6e 1380 * Routine: pmap_page_protect_options
b7266188
A
1381 *
1382 * Function:
1383 * Lower the permission for all mappings to a given
1384 * page.
1385 */
1386void
39236c6e 1387pmap_page_protect_options(
b7266188 1388 ppnum_t pn,
39236c6e
A
1389 vm_prot_t prot,
1390 unsigned int options,
1391 void *arg)
b7266188
A
1392{
1393 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1394 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1395 pv_hashed_entry_t nexth;
1396 int pvh_cnt = 0;
1397 pv_rooted_entry_t pv_h;
1398 pv_rooted_entry_t pv_e;
1399 pv_hashed_entry_t pvh_e;
1400 pt_entry_t *pte;
1401 int pai;
1402 pmap_t pmap;
1403 boolean_t remove;
39236c6e 1404 pt_entry_t new_pte_value;
3e170ce0 1405 boolean_t is_ept;
b7266188
A
1406
1407 pmap_intr_assert();
1408 assert(pn != vm_page_fictitious_addr);
1409 if (pn == vm_page_guard_addr)
1410 return;
1411
1412 pai = ppn_to_pai(pn);
1413
1414 if (!IS_MANAGED_PAGE(pai)) {
1415 /*
1416 * Not a managed page.
1417 */
1418 return;
1419 }
1420 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
1421 pn, prot, 0, 0, 0);
1422
1423 /*
1424 * Determine the new protection.
1425 */
1426 switch (prot) {
1427 case VM_PROT_READ:
1428 case VM_PROT_READ | VM_PROT_EXECUTE:
1429 remove = FALSE;
1430 break;
1431 case VM_PROT_ALL:
1432 return; /* nothing to do */
1433 default:
1434 remove = TRUE;
1435 break;
1436 }
1437
1438 pv_h = pai_to_pvh(pai);
1439
1440 LOCK_PVH(pai);
1441
1442
1443 /*
1444 * Walk down PV list, if any, changing or removing all mappings.
1445 */
1446 if (pv_h->pmap == PMAP_NULL)
1447 goto done;
1448
1449 pv_e = pv_h;
1450 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */
1451
1452 do {
1453 vm_map_offset_t vaddr;
1454
3e170ce0
A
1455 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1456 (pmap_phys_attributes[pai] & PHYS_MODIFIED)) {
1457 /* page was modified, so it will be compressed */
1458 options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1459 options |= PMAP_OPTIONS_COMPRESSOR;
1460 }
1461
b7266188 1462 pmap = pv_e->pmap;
3e170ce0 1463 is_ept = is_ept_pmap(pmap);
b7266188
A
1464 vaddr = pv_e->va;
1465 pte = pmap_pte(pmap, vaddr);
1466
6d2010ae
A
1467 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1468 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1469
b7266188
A
1470 if (0 == pte) {
1471 panic("pmap_page_protect() "
1472 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1473 pmap, pn, vaddr);
1474 }
1475 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1476
1477 /*
1478 * Remove the mapping if new protection is NONE
b7266188 1479 */
6d2010ae 1480 if (remove) {
6d2010ae
A
1481
1482 /* Remove per-pmap wired count */
1483 if (iswired(*pte)) {
1484 OSAddAtomic(-1, &pmap->stats.wired_count);
316670eb 1485 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6d2010ae
A
1486 }
1487
39236c6e
A
1488 if (pmap != kernel_pmap &&
1489 (options & PMAP_OPTIONS_COMPRESSOR) &&
1490 IS_INTERNAL_PAGE(pai)) {
39236c6e 1491 /* mark this PTE as having been "reclaimed" */
3e170ce0 1492 new_pte_value = PTE_COMPRESSED;
39236c6e
A
1493 } else {
1494 new_pte_value = 0;
1495 }
1496
1497 if (options & PMAP_OPTIONS_NOREFMOD) {
1498 pmap_store_pte(pte, new_pte_value);
b7266188 1499
39236c6e
A
1500 if (options & PMAP_OPTIONS_NOFLUSH)
1501 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1502 else
1503 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1504 } else {
1505 /*
1506 * Remove the mapping, collecting dirty bits.
1507 */
3e170ce0 1508 pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
39236c6e
A
1509
1510 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
3e170ce0
A
1511 if ((options &
1512 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1513 ! (pmap_phys_attributes[pai] &
1514 PHYS_MODIFIED) &&
1515 (*pte & PHYS_MODIFIED)) {
1516 /*
1517 * Page is actually "modified" and
1518 * will be compressed. Start
1519 * accounting for it as "compressed".
1520 */
1521 options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1522 options |= PMAP_OPTIONS_COMPRESSOR;
1523 new_pte_value = PTE_COMPRESSED;
1524 }
1525 if (!is_ept) {
1526 pmap_phys_attributes[pai] |=
1527 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1528 } else {
1529 pmap_phys_attributes[pai] |=
1530 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1531 }
39236c6e
A
1532 pmap_store_pte(pte, new_pte_value);
1533 }
3e170ce0
A
1534
1535 if (new_pte_value == PTE_COMPRESSED) {
1536 /* one more "compressed" page */
1537 OSAddAtomic64(+1, &pmap->stats.compressed);
1538 PMAP_STATS_PEAK(pmap->stats.compressed);
1539 pmap->stats.compressed_lifetime++;
1540 }
1541
b7266188
A
1542#if TESTING
1543 if (pmap->stats.resident_count < 1)
1544 panic("pmap_page_protect: resident_count");
1545#endif
316670eb 1546 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
b7266188
A
1547 assert(pmap->stats.resident_count >= 1);
1548 OSAddAtomic(-1, &pmap->stats.resident_count);
39236c6e
A
1549 if (options & PMAP_OPTIONS_COMPRESSOR) {
1550 /*
1551 * This removal is only being done so we can send this page to
1552 * the compressor; therefore it mustn't affect total task footprint.
1553 */
fe8ab488 1554 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
39236c6e
A
1555 } else {
1556 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1557 }
1558
1559 if (pmap != kernel_pmap) {
1560 if (IS_REUSABLE_PAGE(pai)) {
1561 assert(pmap->stats.reusable > 0);
1562 OSAddAtomic(-1, &pmap->stats.reusable);
1563 } else if (IS_INTERNAL_PAGE(pai)) {
1564 assert(pmap->stats.internal > 0);
1565 OSAddAtomic(-1, &pmap->stats.internal);
1566 } else {
1567 assert(pmap->stats.external > 0);
1568 OSAddAtomic(-1, &pmap->stats.external);
1569 }
1570 }
1571
b7266188
A
1572 /*
1573 * Deal with the pv_rooted_entry.
1574 */
1575
1576 if (pv_e == pv_h) {
1577 /*
1578 * Fix up head later.
1579 */
1580 pv_h->pmap = PMAP_NULL;
1581 } else {
1582 /*
1583 * Delete this entry.
1584 */
1585 pv_hash_remove(pvh_e);
1586 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1587 pvh_eh = pvh_e;
1588
1589 if (pvh_et == PV_HASHED_ENTRY_NULL)
1590 pvh_et = pvh_e;
1591 pvh_cnt++;
1592 }
1593 } else {
1594 /*
6d2010ae 1595 * Write-protect, after opportunistic refmod collect
b7266188 1596 */
3e170ce0
A
1597 if (!is_ept) {
1598 pmap_phys_attributes[pai] |=
1599 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1600 } else {
1601 pmap_phys_attributes[pai] |=
1602 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1603 }
1604 pmap_update_pte(pte, PTE_WRITE(is_ept), 0);
39236c6e
A
1605
1606 if (options & PMAP_OPTIONS_NOFLUSH)
1607 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1608 else
1609 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
b7266188
A
1610 }
1611 pvh_e = nexth;
1612 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1613
1614
1615 /*
1616 * If pv_head mapping was removed, fix it up.
1617 */
1618 if (pv_h->pmap == PMAP_NULL) {
1619 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1620
1621 if (pvh_e != (pv_hashed_entry_t) pv_h) {
1622 pv_hash_remove(pvh_e);
1623 pv_h->pmap = pvh_e->pmap;
1624 pv_h->va = pvh_e->va;
1625 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1626 pvh_eh = pvh_e;
1627
1628 if (pvh_et == PV_HASHED_ENTRY_NULL)
1629 pvh_et = pvh_e;
1630 pvh_cnt++;
1631 }
1632 }
1633 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1634 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1635 }
1636done:
1637 UNLOCK_PVH(pai);
1638
1639 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
1640 0, 0, 0, 0, 0);
1641}
1642
39236c6e 1643
6d2010ae
A
1644/*
1645 * Clear specified attribute bits.
1646 */
b7266188 1647void
6d2010ae
A
1648phys_attribute_clear(
1649 ppnum_t pn,
39236c6e
A
1650 int bits,
1651 unsigned int options,
1652 void *arg)
b7266188 1653{
6d2010ae
A
1654 pv_rooted_entry_t pv_h;
1655 pv_hashed_entry_t pv_e;
1656 pt_entry_t *pte;
1657 int pai;
1658 pmap_t pmap;
1659 char attributes = 0;
3e170ce0
A
1660 boolean_t is_internal, is_reusable, is_ept;
1661 int ept_bits_to_clear;
1662 boolean_t ept_keep_global_mod = FALSE;
fe8ab488
A
1663
1664 if ((bits & PHYS_MODIFIED) &&
1665 (options & PMAP_OPTIONS_NOFLUSH) &&
1666 arg == NULL) {
1667 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
1668 "should not clear 'modified' without flushing TLBs\n",
1669 pn, bits, options, arg);
1670 }
1671
3e170ce0
A
1672 /* We only support converting MOD and REF bits for EPT PTEs in this function */
1673 assert((bits & ~(PHYS_REFERENCED | PHYS_MODIFIED)) == 0);
1674
1675 ept_bits_to_clear = (unsigned)physmap_refmod_to_ept(bits & (PHYS_MODIFIED | PHYS_REFERENCED));
1676
6d2010ae
A
1677 pmap_intr_assert();
1678 assert(pn != vm_page_fictitious_addr);
1679 if (pn == vm_page_guard_addr)
1680 return;
b7266188 1681
6d2010ae 1682 pai = ppn_to_pai(pn);
b7266188 1683
6d2010ae
A
1684 if (!IS_MANAGED_PAGE(pai)) {
1685 /*
1686 * Not a managed page.
1687 */
1688 return;
b7266188 1689 }
b7266188 1690
6d2010ae
A
1691 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
1692 pn, bits, 0, 0, 0);
b7266188 1693
6d2010ae 1694 pv_h = pai_to_pvh(pai);
b7266188 1695
6d2010ae 1696 LOCK_PVH(pai);
b7266188 1697
3e170ce0 1698
6d2010ae
A
1699 /*
1700 * Walk down PV list, clearing all modify or reference bits.
1701 * We do not have to lock the pv_list because we have
316670eb 1702 * the per-pmap lock
6d2010ae
A
1703 */
1704 if (pv_h->pmap != PMAP_NULL) {
1705 /*
1706 * There are some mappings.
1707 */
b7266188 1708
fe8ab488
A
1709 is_internal = IS_INTERNAL_PAGE(pai);
1710 is_reusable = IS_REUSABLE_PAGE(pai);
1711
6d2010ae 1712 pv_e = (pv_hashed_entry_t)pv_h;
b7266188 1713
6d2010ae
A
1714 do {
1715 vm_map_offset_t va;
fe8ab488 1716 char pte_bits;
b7266188 1717
6d2010ae 1718 pmap = pv_e->pmap;
3e170ce0 1719 is_ept = is_ept_pmap(pmap);
6d2010ae 1720 va = pv_e->va;
fe8ab488
A
1721 pte_bits = 0;
1722
1723 if (bits) {
1724 pte = pmap_pte(pmap, va);
1725 /* grab ref/mod bits from this PTE */
3e170ce0 1726 pte_bits = (*pte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
fe8ab488 1727 /* propagate to page's global attributes */
3e170ce0
A
1728 if (!is_ept) {
1729 attributes |= pte_bits;
1730 } else {
1731 attributes |= ept_refmod_to_physmap(pte_bits);
1732 if (!pmap_ept_support_ad && (pte_bits & INTEL_EPT_MOD)) {
1733 ept_keep_global_mod = TRUE;
1734 }
1735 }
fe8ab488 1736 /* which bits to clear for this PTE? */
3e170ce0
A
1737 if (!is_ept) {
1738 pte_bits &= bits;
1739 } else {
1740 pte_bits &= ept_bits_to_clear;
1741 }
fe8ab488 1742 }
b7266188 1743
6d2010ae
A
1744 /*
1745 * Clear modify and/or reference bits.
1746 */
fe8ab488
A
1747 if (pte_bits) {
1748 pmap_update_pte(pte, bits, 0);
1749
1750 /* Ensure all processors using this translation
1751 * invalidate this TLB entry. The invalidation
1752 * *must* follow the PTE update, to ensure that
1753 * the TLB shadow of the 'D' bit (in particular)
1754 * is synchronized with the updated PTE.
1755 */
1756 if (! (options & PMAP_OPTIONS_NOFLUSH)) {
1757 /* flush TLBS now */
1758 PMAP_UPDATE_TLBS(pmap,
1759 va,
1760 va + PAGE_SIZE);
1761 } else if (arg) {
1762 /* delayed TLB flush: add "pmap" info */
1763 PMAP_UPDATE_TLBS_DELAYED(
1764 pmap,
1765 va,
1766 va + PAGE_SIZE,
1767 (pmap_flush_context *)arg);
1768 } else {
1769 /* no TLB flushing at all */
1770 }
1771 }
1772
1773 /* update pmap "reusable" stats */
1774 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
1775 is_reusable &&
1776 pmap != kernel_pmap) {
1777 /* one less "reusable" */
1778 assert(pmap->stats.reusable > 0);
1779 OSAddAtomic(-1, &pmap->stats.reusable);
1780 if (is_internal) {
1781 /* one more "internal" */
1782 OSAddAtomic(+1, &pmap->stats.internal);
1783 PMAP_STATS_PEAK(pmap->stats.internal);
1784 } else {
1785 /* one more "external" */
1786 OSAddAtomic(+1, &pmap->stats.external);
1787 PMAP_STATS_PEAK(pmap->stats.external);
1788 }
1789 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
1790 !is_reusable &&
1791 pmap != kernel_pmap) {
1792 /* one more "reusable" */
1793 OSAddAtomic(+1, &pmap->stats.reusable);
1794 PMAP_STATS_PEAK(pmap->stats.reusable);
1795 if (is_internal) {
1796 /* one less "internal" */
1797 assert(pmap->stats.internal > 0);
1798 OSAddAtomic(-1, &pmap->stats.internal);
1799 } else {
1800 /* one less "external" */
1801 assert(pmap->stats.external > 0);
1802 OSAddAtomic(-1, &pmap->stats.external);
1803 }
1804 }
b7266188 1805
6d2010ae
A
1806 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1807
1808 } while (pv_e != (pv_hashed_entry_t)pv_h);
b7266188 1809 }
6d2010ae
A
1810 /* Opportunistic refmod collection, annulled
1811 * if both REF and MOD are being cleared.
1812 */
b7266188 1813
6d2010ae 1814 pmap_phys_attributes[pai] |= attributes;
3e170ce0
A
1815
1816 if (ept_keep_global_mod) {
1817 /*
1818 * If the hardware doesn't support AD bits for EPT PTEs and someone is
1819 * requesting that we clear the modified bit for a phys page, we need
1820 * to ensure that there are no EPT mappings for the page with the
1821 * modified bit set. If there are, we cannot clear the global modified bit.
1822 */
1823 bits &= ~PHYS_MODIFIED;
1824 }
1825 pmap_phys_attributes[pai] &= ~(bits);
b7266188 1826
fe8ab488
A
1827 /* update this page's "reusable" status */
1828 if (options & PMAP_OPTIONS_CLEAR_REUSABLE) {
1829 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
1830 } else if (options & PMAP_OPTIONS_SET_REUSABLE) {
1831 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
1832 }
1833
6d2010ae 1834 UNLOCK_PVH(pai);
b7266188 1835
6d2010ae
A
1836 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
1837 0, 0, 0, 0, 0);
b7266188
A
1838}
1839
6d2010ae
A
1840/*
1841 * Check specified attribute bits.
1842 */
1843int
1844phys_attribute_test(
1845 ppnum_t pn,
1846 int bits)
0b4c1975 1847{
6d2010ae
A
1848 pv_rooted_entry_t pv_h;
1849 pv_hashed_entry_t pv_e;
1850 pt_entry_t *pte;
1851 int pai;
1852 pmap_t pmap;
1853 int attributes = 0;
3e170ce0 1854 boolean_t is_ept;
6d2010ae
A
1855
1856 pmap_intr_assert();
1857 assert(pn != vm_page_fictitious_addr);
3e170ce0 1858 assert((bits & ~(PHYS_MODIFIED | PHYS_REFERENCED)) == 0);
6d2010ae
A
1859 if (pn == vm_page_guard_addr)
1860 return 0;
0b4c1975
A
1861
1862 pai = ppn_to_pai(pn);
1863
6d2010ae
A
1864 if (!IS_MANAGED_PAGE(pai)) {
1865 /*
1866 * Not a managed page.
1867 */
1868 return 0;
1869 }
0b4c1975 1870
6d2010ae
A
1871 /*
1872 * Fast check... if bits already collected
1873 * no need to take any locks...
1874 * if not set, we need to recheck after taking
1875 * the lock in case they got pulled in while
1876 * we were waiting for the lock
1877 */
1878 if ((pmap_phys_attributes[pai] & bits) == bits)
1879 return bits;
0b4c1975 1880
6d2010ae 1881 pv_h = pai_to_pvh(pai);
0b4c1975 1882
6d2010ae 1883 LOCK_PVH(pai);
0b4c1975 1884
6d2010ae 1885 attributes = pmap_phys_attributes[pai] & bits;
0b4c1975 1886
0b4c1975 1887
6d2010ae
A
1888 /*
1889 * Walk down PV list, checking the mappings until we
1890 * reach the end or we've found the desired attributes.
1891 */
1892 if (attributes != bits &&
1893 pv_h->pmap != PMAP_NULL) {
1894 /*
1895 * There are some mappings.
1896 */
1897 pv_e = (pv_hashed_entry_t)pv_h;
1898 do {
1899 vm_map_offset_t va;
0b4c1975 1900
6d2010ae 1901 pmap = pv_e->pmap;
3e170ce0 1902 is_ept = is_ept_pmap(pmap);
6d2010ae
A
1903 va = pv_e->va;
1904 /*
1905 * pick up modify and/or reference bits from mapping
1906 */
0b4c1975 1907
6d2010ae 1908 pte = pmap_pte(pmap, va);
3e170ce0
A
1909 if (!is_ept) {
1910 attributes |= (int)(*pte & bits);
1911 } else {
1912 attributes |= (int)(ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED));
1913
1914 }
6d2010ae
A
1915
1916 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1917
1918 } while ((attributes != bits) &&
1919 (pv_e != (pv_hashed_entry_t)pv_h));
0b4c1975 1920 }
6d2010ae 1921 pmap_phys_attributes[pai] |= attributes;
0b4c1975 1922
6d2010ae
A
1923 UNLOCK_PVH(pai);
1924 return (attributes);
1925}
0b4c1975 1926
6d2010ae
A
1927/*
1928 * Routine: pmap_change_wiring
1929 * Function: Change the wiring attribute for a map/virtual-address
1930 * pair.
1931 * In/out conditions:
1932 * The mapping must already exist in the pmap.
1933 */
0b4c1975 1934void
6d2010ae
A
1935pmap_change_wiring(
1936 pmap_t map,
1937 vm_map_offset_t vaddr,
1938 boolean_t wired)
0b4c1975 1939{
6d2010ae 1940 pt_entry_t *pte;
0b4c1975 1941
6d2010ae 1942 PMAP_LOCK(map);
0b4c1975 1943
6d2010ae 1944 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
4bd07ac2
A
1945 panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
1946 map, vaddr, wired);
0b4c1975 1947
6d2010ae
A
1948 if (wired && !iswired(*pte)) {
1949 /*
1950 * wiring down mapping
1951 */
316670eb 1952 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
6d2010ae 1953 OSAddAtomic(+1, &map->stats.wired_count);
3e170ce0 1954 pmap_update_pte(pte, 0, PTE_WIRED);
0b4c1975 1955 }
6d2010ae
A
1956 else if (!wired && iswired(*pte)) {
1957 /*
1958 * unwiring mapping
1959 */
1960 assert(map->stats.wired_count >= 1);
1961 OSAddAtomic(-1, &map->stats.wired_count);
316670eb 1962 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
3e170ce0 1963 pmap_update_pte(pte, PTE_WIRED, 0);
060df5ea 1964 }
060df5ea 1965
6d2010ae
A
1966 PMAP_UNLOCK(map);
1967}
7ddcb079
A
1968
1969/*
1970 * "Backdoor" direct map routine for early mappings.
1971 * Useful for mapping memory outside the range
1972 * Sets A, D and NC if requested
1973 */
1974
1975vm_offset_t
1976pmap_map_bd(
1977 vm_offset_t virt,
1978 vm_map_offset_t start_addr,
1979 vm_map_offset_t end_addr,
1980 vm_prot_t prot,
1981 unsigned int flags)
1982{
1983 pt_entry_t template;
1984 pt_entry_t *pte;
1985 spl_t spl;
1986 vm_offset_t base = virt;
1987 template = pa_to_pte(start_addr)
1988 | INTEL_PTE_REF
1989 | INTEL_PTE_MOD
1990 | INTEL_PTE_WIRED
1991 | INTEL_PTE_VALID;
1992
1993 if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
1994 template |= INTEL_PTE_NCACHE;
1995 if (!(flags & (VM_MEM_GUARDED)))
1996 template |= INTEL_PTE_PTA;
1997 }
316670eb
A
1998
1999#if defined(__x86_64__)
2000 if ((prot & VM_PROT_EXECUTE) == 0)
2001 template |= INTEL_PTE_NX;
2002#endif
2003
7ddcb079
A
2004 if (prot & VM_PROT_WRITE)
2005 template |= INTEL_PTE_WRITE;
2006
2007 while (start_addr < end_addr) {
2008 spl = splhigh();
2009 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
2010 if (pte == PT_ENTRY_NULL) {
2011 panic("pmap_map_bd: Invalid kernel address\n");
2012 }
2013 pmap_store_pte(pte, template);
2014 splx(spl);
2015 pte_increment_pa(template);
2016 virt += PAGE_SIZE;
2017 start_addr += PAGE_SIZE;
2018 }
2019 flush_tlb_raw();
2020 PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
2021 return(virt);
2022}
39236c6e 2023
4bd07ac2 2024mach_vm_size_t
39236c6e
A
2025pmap_query_resident(
2026 pmap_t pmap,
2027 addr64_t s64,
3e170ce0 2028 addr64_t e64,
4bd07ac2 2029 mach_vm_size_t *compressed_bytes_p)
39236c6e
A
2030{
2031 pt_entry_t *pde;
2032 pt_entry_t *spte, *epte;
2033 addr64_t l64;
2034 uint64_t deadline;
4bd07ac2
A
2035 mach_vm_size_t resident_bytes;
2036 mach_vm_size_t compressed_bytes;
3e170ce0 2037 boolean_t is_ept;
39236c6e
A
2038
2039 pmap_intr_assert();
2040
3e170ce0 2041 if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) {
4bd07ac2
A
2042 if (compressed_bytes_p) {
2043 *compressed_bytes_p = 0;
3e170ce0 2044 }
39236c6e 2045 return 0;
3e170ce0
A
2046 }
2047
2048 is_ept = is_ept_pmap(pmap);
39236c6e
A
2049
2050 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
2051 pmap,
2052 (uint32_t) (s64 >> 32), s64,
2053 (uint32_t) (e64 >> 32), e64);
2054
4bd07ac2
A
2055 resident_bytes = 0;
2056 compressed_bytes = 0;
39236c6e
A
2057
2058 PMAP_LOCK(pmap);
2059
2060 deadline = rdtsc64() + max_preemption_latency_tsc;
2061
2062 while (s64 < e64) {
2063 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
2064 if (l64 > e64)
2065 l64 = e64;
2066 pde = pmap_pde(pmap, s64);
2067
3e170ce0
A
2068 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
2069 if (*pde & PTE_PS) {
39236c6e
A
2070 /* superpage: not supported */
2071 } else {
2072 spte = pmap_pte(pmap,
2073 (s64 & ~(pde_mapped_size - 1)));
2074 spte = &spte[ptenum(s64)];
2075 epte = &spte[intel_btop(l64 - s64)];
2076
2077 for (; spte < epte; spte++) {
2078 if (pte_to_pa(*spte) != 0) {
4bd07ac2 2079 resident_bytes += PAGE_SIZE;
3e170ce0 2080 } else if (*spte & PTE_COMPRESSED) {
4bd07ac2 2081 compressed_bytes += PAGE_SIZE;
39236c6e
A
2082 }
2083 }
2084
2085 }
2086 }
2087 s64 = l64;
2088
2089 if (s64 < e64 && rdtsc64() >= deadline) {
2090 PMAP_UNLOCK(pmap);
2091 PMAP_LOCK(pmap);
2092 deadline = rdtsc64() + max_preemption_latency_tsc;
2093 }
2094 }
2095
2096 PMAP_UNLOCK(pmap);
2097
2098 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
2099 pmap, 0, 0, 0, 0);
2100
4bd07ac2
A
2101 if (compressed_bytes_p) {
2102 *compressed_bytes_p = compressed_bytes;
3e170ce0 2103 }
4bd07ac2 2104 return resident_bytes;
39236c6e 2105}
fe8ab488
A
2106
2107#if MACH_ASSERT
2108void
2109pmap_set_process(
2110 __unused pmap_t pmap,
2111 __unused int pid,
2112 __unused char *procname)
2113{
2114}
2115#endif /* MACH_ASSERT */