]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/i386/pmap_x86_common.c
xnu-2782.40.9.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap_x86_common.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <mach_assert.h>
30
31#include <vm/pmap.h>
32#include <vm/vm_map.h>
33#include <kern/ledger.h>
34#include <i386/pmap_internal.h>
35
36void pmap_remove_range(
37 pmap_t pmap,
38 vm_map_offset_t va,
39 pt_entry_t *spte,
40 pt_entry_t *epte);
41
42void pmap_remove_range_options(
43 pmap_t pmap,
44 vm_map_offset_t va,
45 pt_entry_t *spte,
46 pt_entry_t *epte,
47 int options);
48
49void pmap_reusable_range(
50 pmap_t pmap,
51 vm_map_offset_t va,
52 pt_entry_t *spte,
53 pt_entry_t *epte,
54 boolean_t reusable);
55
56uint32_t pmap_update_clear_pte_count;
57
58/*
59 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
60 * on a NBPDE boundary.
61 */
62
63/* These symbols may be referenced directly by VM */
64uint64_t pmap_nesting_size_min = NBPDE;
65uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE;
66
67/*
68 * kern_return_t pmap_nest(grand, subord, va_start, size)
69 *
70 * grand = the pmap that we will nest subord into
71 * subord = the pmap that goes into the grand
72 * va_start = start of range in pmap to be inserted
73 * nstart = start of range in pmap nested pmap
74 * size = Size of nest area (up to 16TB)
75 *
76 * Inserts a pmap into another. This is used to implement shared segments.
77 *
78 * Note that we depend upon higher level VM locks to insure that things don't change while
79 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
80 * or do 2 nests at once.
81 */
82
83/*
84 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
85 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
86 * container and the "grand" parent. A minor optimization to consider for the
87 * future: make the "subord" truly a container rather than a full-fledged
88 * pagetable hierarchy which can be unnecessarily sparse (DRK).
89 */
90
91kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) {
92 vm_map_offset_t vaddr, nvaddr;
93 pd_entry_t *pde,*npde;
94 unsigned int i;
95 uint64_t num_pde;
96
97 if ((size & (pmap_nesting_size_min-1)) ||
98 (va_start & (pmap_nesting_size_min-1)) ||
99 (nstart & (pmap_nesting_size_min-1)) ||
100 ((size >> 28) > 65536)) /* Max size we can nest is 16TB */
101 return KERN_INVALID_VALUE;
102
103 if(size == 0) {
104 panic("pmap_nest: size is invalid - %016llX\n", size);
105 }
106
107 if (va_start != nstart)
108 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
109
110 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
111 (uintptr_t) grand, (uintptr_t) subord,
112 (uintptr_t) (va_start>>32), (uintptr_t) va_start, 0);
113
114 nvaddr = (vm_map_offset_t)nstart;
115 num_pde = size >> PDESHIFT;
116
117 PMAP_LOCK(subord);
118
119 subord->pm_shared = TRUE;
120
121 for (i = 0; i < num_pde;) {
122 if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) {
123
124 npde = pmap64_pdpt(subord, nvaddr);
125
126 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
127 PMAP_UNLOCK(subord);
128 pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
129 PMAP_LOCK(subord);
130 npde = pmap64_pdpt(subord, nvaddr);
131 }
132 *npde |= INTEL_PDPTE_NESTED;
133 nvaddr += NBPDPT;
134 i += (uint32_t)NPDEPG;
135 }
136 else {
137 npde = pmap_pde(subord, nvaddr);
138
139 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
140 PMAP_UNLOCK(subord);
141 pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
142 PMAP_LOCK(subord);
143 npde = pmap_pde(subord, nvaddr);
144 }
145 nvaddr += NBPDE;
146 i++;
147 }
148 }
149
150 PMAP_UNLOCK(subord);
151
152 vaddr = (vm_map_offset_t)va_start;
153
154 PMAP_LOCK(grand);
155
156 for (i = 0;i < num_pde;) {
157 pd_entry_t tpde;
158
159 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) {
160 npde = pmap64_pdpt(subord, vaddr);
161 if (npde == 0)
162 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
163 tpde = *npde;
164 pde = pmap64_pdpt(grand, vaddr);
165 if (0 == pde) {
166 PMAP_UNLOCK(grand);
167 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
168 PMAP_LOCK(grand);
169 pde = pmap64_pdpt(grand, vaddr);
170 }
171 if (pde == 0)
172 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr);
173 pmap_store_pte(pde, tpde);
174 vaddr += NBPDPT;
175 i += (uint32_t) NPDEPG;
176 }
177 else {
178 npde = pmap_pde(subord, nstart);
179 if (npde == 0)
180 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart);
181 tpde = *npde;
182 nstart += NBPDE;
183 pde = pmap_pde(grand, vaddr);
184 if ((0 == pde) && cpu_64bit) {
185 PMAP_UNLOCK(grand);
186 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
187 PMAP_LOCK(grand);
188 pde = pmap_pde(grand, vaddr);
189 }
190
191 if (pde == 0)
192 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
193 vaddr += NBPDE;
194 pmap_store_pte(pde, tpde);
195 i++;
196 }
197 }
198
199 PMAP_UNLOCK(grand);
200
201 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
202
203 return KERN_SUCCESS;
204}
205
206/*
207 * kern_return_t pmap_unnest(grand, vaddr)
208 *
209 * grand = the pmap that we will un-nest subord from
210 * vaddr = start of range in pmap to be unnested
211 *
212 * Removes a pmap from another. This is used to implement shared segments.
213 */
214
215kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
216
217 pd_entry_t *pde;
218 unsigned int i;
219 uint64_t num_pde;
220 addr64_t va_start, va_end;
221 uint64_t npdpt = PMAP_INVALID_PDPTNUM;
222
223 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
224 (uintptr_t) grand,
225 (uintptr_t) (vaddr>>32), (uintptr_t) vaddr, 0, 0);
226
227 if ((size & (pmap_nesting_size_min-1)) ||
228 (vaddr & (pmap_nesting_size_min-1))) {
229 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
230 grand, vaddr, size);
231 }
232
233 /* align everything to PDE boundaries */
234 va_start = vaddr & ~(NBPDE-1);
235 va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
236 size = va_end - va_start;
237
238 PMAP_LOCK(grand);
239
240 num_pde = size >> PDESHIFT;
241 vaddr = va_start;
242
243 for (i = 0; i < num_pde; ) {
244 if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) {
245 npdpt = pdptnum(grand, vaddr);
246 pde = pmap64_pdpt(grand, vaddr);
247 if (pde && (*pde & INTEL_PDPTE_NESTED)) {
248 pmap_store_pte(pde, (pd_entry_t)0);
249 i += (uint32_t) NPDEPG;
250 vaddr += NBPDPT;
251 continue;
252 }
253 }
254 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
255 if (pde == 0)
256 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr);
257 pmap_store_pte(pde, (pd_entry_t)0);
258 i++;
259 vaddr += NBPDE;
260 }
261
262 PMAP_UPDATE_TLBS(grand, va_start, va_end);
263
264 PMAP_UNLOCK(grand);
265
266 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
267
268 return KERN_SUCCESS;
269}
270
271/* Invoked by the Mach VM to determine the platform specific unnest region */
272
273boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) {
274 pd_entry_t *pdpte;
275 boolean_t rval = FALSE;
276
277 if (!cpu_64bit)
278 return rval;
279
280 PMAP_LOCK(p);
281
282 pdpte = pmap64_pdpt(p, *s);
283 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
284 *s &= ~(NBPDPT -1);
285 rval = TRUE;
286 }
287
288 pdpte = pmap64_pdpt(p, *e);
289 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
290 *e = ((*e + NBPDPT) & ~(NBPDPT -1));
291 rval = TRUE;
292 }
293
294 PMAP_UNLOCK(p);
295
296 return rval;
297}
298
299/*
300 * pmap_find_phys returns the (4K) physical page number containing a
301 * given virtual address in a given pmap.
302 * Note that pmap_pte may return a pde if this virtual address is
303 * mapped by a large page and this is taken into account in order
304 * to return the correct page number in this case.
305 */
306ppnum_t
307pmap_find_phys(pmap_t pmap, addr64_t va)
308{
309 pt_entry_t *ptp;
310 pd_entry_t *pdep;
311 ppnum_t ppn = 0;
312 pd_entry_t pde;
313 pt_entry_t pte;
314
315 mp_disable_preemption();
316
317 /* This refcount test is a band-aid--several infrastructural changes
318 * are necessary to eliminate invocation of this routine from arbitrary
319 * contexts.
320 */
321
322 if (!pmap->ref_count)
323 goto pfp_exit;
324
325 pdep = pmap_pde(pmap, va);
326
327 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) {
328 if (pde & INTEL_PTE_PS) {
329 ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
330 ppn += (ppnum_t) ptenum(va);
331 }
332 else {
333 ptp = pmap_pte(pmap, va);
334 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) {
335 ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
336 }
337 }
338 }
339pfp_exit:
340 mp_enable_preemption();
341
342 return ppn;
343}
344
345/*
346 * Update cache attributes for all extant managed mappings.
347 * Assumes PV for this page is locked, and that the page
348 * is managed.
349 */
350
351void
352pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
353 pv_rooted_entry_t pv_h, pv_e;
354 pv_hashed_entry_t pvh_e, nexth;
355 vm_map_offset_t vaddr;
356 pmap_t pmap;
357 pt_entry_t *ptep;
358
359 assert(IS_MANAGED_PAGE(pn));
360
361 pv_h = pai_to_pvh(pn);
362 /* TODO: translate the PHYS_* bits to PTE bits, while they're
363 * currently identical, they may not remain so
364 * Potential optimization (here and in page_protect),
365 * parallel shootdowns, check for redundant
366 * attribute modifications.
367 */
368
369 /*
370 * Alter attributes on all mappings
371 */
372 if (pv_h->pmap != PMAP_NULL) {
373 pv_e = pv_h;
374 pvh_e = (pv_hashed_entry_t)pv_e;
375
376 do {
377 pmap = pv_e->pmap;
378 vaddr = pv_e->va;
379 ptep = pmap_pte(pmap, vaddr);
380
381 if (0 == ptep)
382 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
383
384 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
385 pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
386 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
387 pvh_e = nexth;
388 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
389 }
390}
391
392void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
393 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
394
395 if (dofilter) {
396 CPU_CR3_MARK_INACTIVE();
397 } else {
398 CPU_CR3_MARK_ACTIVE();
399 mfence();
400 if (current_cpu_datap()->cpu_tlb_invalid)
401 process_pmap_updates();
402 }
403}
404
405
406/*
407 * Insert the given physical page (p) at
408 * the specified virtual address (v) in the
409 * target physical map with the protection requested.
410 *
411 * If specified, the page will be wired down, meaning
412 * that the related pte cannot be reclaimed.
413 *
414 * NB: This is the only routine which MAY NOT lazy-evaluate
415 * or lose information. That is, this routine must actually
416 * insert this page into the given map NOW.
417 */
418
419void
420pmap_enter(
421 register pmap_t pmap,
422 vm_map_offset_t vaddr,
423 ppnum_t pn,
424 vm_prot_t prot,
425 vm_prot_t fault_type,
426 unsigned int flags,
427 boolean_t wired)
428{
429 (void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL);
430}
431
432
433kern_return_t
434pmap_enter_options(
435 register pmap_t pmap,
436 vm_map_offset_t vaddr,
437 ppnum_t pn,
438 vm_prot_t prot,
439 __unused vm_prot_t fault_type,
440 unsigned int flags,
441 boolean_t wired,
442 unsigned int options,
443 void *arg)
444{
445 pt_entry_t *pte;
446 pv_rooted_entry_t pv_h;
447 ppnum_t pai;
448 pv_hashed_entry_t pvh_e;
449 pv_hashed_entry_t pvh_new;
450 pt_entry_t template;
451 pmap_paddr_t old_pa;
452 pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn);
453 boolean_t need_tlbflush = FALSE;
454 boolean_t set_NX;
455 char oattr;
456 boolean_t old_pa_locked;
457 /* 2MiB mappings are confined to x86_64 by VM */
458 boolean_t superpage = flags & VM_MEM_SUPERPAGE;
459 vm_object_t delpage_pm_obj = NULL;
460 uint64_t delpage_pde_index = 0;
461 pt_entry_t old_pte;
462 kern_return_t kr_expand;
463
464 pmap_intr_assert();
465
466 if (pmap == PMAP_NULL)
467 return KERN_INVALID_ARGUMENT;
468
469 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
470 * unused value for that scenario.
471 */
472 assert(pn != vm_page_fictitious_addr);
473
474 if (pn == vm_page_guard_addr)
475 return KERN_INVALID_ARGUMENT;
476
477 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
478 pmap,
479 (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
480 pn, prot);
481
482 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
483 set_NX = FALSE;
484 else
485 set_NX = TRUE;
486
487 if (__improbable(set_NX && (pmap == kernel_pmap) && ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) || (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
488 set_NX = FALSE;
489 }
490
491 /*
492 * Must allocate a new pvlist entry while we're unlocked;
493 * zalloc may cause pageout (which will lock the pmap system).
494 * If we determine we need a pvlist entry, we will unlock
495 * and allocate one. Then we will retry, throughing away
496 * the allocated entry later (if we no longer need it).
497 */
498
499 pvh_new = PV_HASHED_ENTRY_NULL;
500Retry:
501 pvh_e = PV_HASHED_ENTRY_NULL;
502
503 PMAP_LOCK(pmap);
504
505 /*
506 * Expand pmap to include this pte. Assume that
507 * pmap is always expanded to include enough hardware
508 * pages to map one VM page.
509 */
510 if(superpage) {
511 while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
512 /* need room for another pde entry */
513 PMAP_UNLOCK(pmap);
514 kr_expand = pmap_expand_pdpt(pmap, vaddr, options);
515 if (kr_expand != KERN_SUCCESS)
516 return kr_expand;
517 PMAP_LOCK(pmap);
518 }
519 } else {
520 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
521 /*
522 * Must unlock to expand the pmap
523 * going to grow pde level page(s)
524 */
525 PMAP_UNLOCK(pmap);
526 kr_expand = pmap_expand(pmap, vaddr, options);
527 if (kr_expand != KERN_SUCCESS)
528 return kr_expand;
529 PMAP_LOCK(pmap);
530 }
531 }
532 if (options & PMAP_EXPAND_OPTIONS_NOENTER) {
533 PMAP_UNLOCK(pmap);
534 return KERN_SUCCESS;
535 }
536
537 if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
538 /*
539 * There is still an empty page table mapped that
540 * was used for a previous base page mapping.
541 * Remember the PDE and the PDE index, so that we
542 * can free the page at the end of this function.
543 */
544 delpage_pde_index = pdeidx(pmap, vaddr);
545 delpage_pm_obj = pmap->pm_obj;
546 *pte = 0;
547 }
548
549 old_pa = pte_to_pa(*pte);
550 pai = pa_index(old_pa);
551 old_pa_locked = FALSE;
552
553 if (old_pa == 0 &&
554 (*pte & INTEL_PTE_COMPRESSED)) {
555 /* one less "compressed" */
556 OSAddAtomic64(-1, &pmap->stats.compressed);
557 /* marker will be cleared below */
558 }
559
560 /*
561 * if we have a previous managed page, lock the pv entry now. after
562 * we lock it, check to see if someone beat us to the lock and if so
563 * drop the lock
564 */
565 if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
566 LOCK_PVH(pai);
567 old_pa_locked = TRUE;
568 old_pa = pte_to_pa(*pte);
569 if (0 == old_pa) {
570 UNLOCK_PVH(pai); /* another path beat us to it */
571 old_pa_locked = FALSE;
572 }
573 }
574
575 /*
576 * Special case if the incoming physical page is already mapped
577 * at this address.
578 */
579 if (old_pa == pa) {
580 pt_entry_t old_attributes =
581 *pte & ~(INTEL_PTE_REF | INTEL_PTE_MOD);
582
583 /*
584 * May be changing its wired attribute or protection
585 */
586
587 template = pa_to_pte(pa) | INTEL_PTE_VALID;
588 template |= pmap_get_cache_attributes(pa_index(pa));
589
590 if (VM_MEM_NOT_CACHEABLE ==
591 (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
592 if (!(flags & VM_MEM_GUARDED))
593 template |= INTEL_PTE_PTA;
594 template |= INTEL_PTE_NCACHE;
595 }
596 if (pmap != kernel_pmap)
597 template |= INTEL_PTE_USER;
598 if (prot & VM_PROT_WRITE) {
599 template |= INTEL_PTE_WRITE;
600 }
601
602 if (set_NX)
603 template |= INTEL_PTE_NX;
604
605 if (wired) {
606 template |= INTEL_PTE_WIRED;
607 if (!iswired(old_attributes)) {
608 OSAddAtomic(+1, &pmap->stats.wired_count);
609 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
610 }
611 } else {
612 if (iswired(old_attributes)) {
613 assert(pmap->stats.wired_count >= 1);
614 OSAddAtomic(-1, &pmap->stats.wired_count);
615 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
616 }
617 }
618 if (superpage) /* this path can not be used */
619 template |= INTEL_PTE_PS; /* to change the page size! */
620
621 if (old_attributes == template)
622 goto dont_update_pte;
623
624 /* Determine delta, PV locked */
625 need_tlbflush =
626 ((old_attributes ^ template) != INTEL_PTE_WIRED);
627
628 if (need_tlbflush == TRUE && !(old_attributes & INTEL_PTE_WRITE)) {
629 if ((old_attributes ^ template) == INTEL_PTE_WRITE)
630 need_tlbflush = FALSE;
631 }
632
633 /* store modified PTE and preserve RC bits */
634 pt_entry_t npte, opte;;
635 do {
636 opte = *pte;
637 npte = template | (opte & (INTEL_PTE_REF | INTEL_PTE_MOD));
638 } while (!pmap_cmpx_pte(pte, opte, npte));
639dont_update_pte:
640 if (old_pa_locked) {
641 UNLOCK_PVH(pai);
642 old_pa_locked = FALSE;
643 }
644 goto Done;
645 }
646
647 /*
648 * Outline of code from here:
649 * 1) If va was mapped, update TLBs, remove the mapping
650 * and remove old pvlist entry.
651 * 2) Add pvlist entry for new mapping
652 * 3) Enter new mapping.
653 *
654 * If the old physical page is not managed step 1) is skipped
655 * (except for updating the TLBs), and the mapping is
656 * overwritten at step 3). If the new physical page is not
657 * managed, step 2) is skipped.
658 */
659
660 if (old_pa != (pmap_paddr_t) 0) {
661
662 /*
663 * Don't do anything to pages outside valid memory here.
664 * Instead convince the code that enters a new mapping
665 * to overwrite the old one.
666 */
667
668 /* invalidate the PTE */
669 pmap_update_pte(pte, INTEL_PTE_VALID, 0);
670 /* propagate invalidate everywhere */
671 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
672 /* remember reference and change */
673 old_pte = *pte;
674 oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED));
675 /* completely invalidate the PTE */
676 pmap_store_pte(pte, 0);
677
678 if (IS_MANAGED_PAGE(pai)) {
679 pmap_assert(old_pa_locked == TRUE);
680 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
681 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
682 assert(pmap->stats.resident_count >= 1);
683 OSAddAtomic(-1, &pmap->stats.resident_count);
684 if (pmap != kernel_pmap) {
685 if (IS_REUSABLE_PAGE(pai)) {
686 assert(pmap->stats.reusable > 0);
687 OSAddAtomic(-1, &pmap->stats.reusable);
688 } else if (IS_INTERNAL_PAGE(pai)) {
689 assert(pmap->stats.internal > 0);
690 OSAddAtomic(-1, &pmap->stats.internal);
691 } else {
692 assert(pmap->stats.external > 0);
693 OSAddAtomic(-1, &pmap->stats.external);
694 }
695 }
696 if (iswired(*pte)) {
697 assert(pmap->stats.wired_count >= 1);
698 OSAddAtomic(-1, &pmap->stats.wired_count);
699 pmap_ledger_debit(pmap, task_ledgers.wired_mem,
700 PAGE_SIZE);
701 }
702 pmap_phys_attributes[pai] |= oattr;
703
704 /*
705 * Remove the mapping from the pvlist for
706 * this physical page.
707 * We'll end up with either a rooted pv or a
708 * hashed pv
709 */
710 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte);
711
712 } else {
713
714 /*
715 * old_pa is not managed.
716 * Do removal part of accounting.
717 */
718
719 if (pmap != kernel_pmap) {
720#if 00
721 assert(pmap->stats.device > 0);
722 OSAddAtomic(-1, &pmap->stats.device);
723#endif
724 }
725 if (iswired(*pte)) {
726 assert(pmap->stats.wired_count >= 1);
727 OSAddAtomic(-1, &pmap->stats.wired_count);
728 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
729 }
730 }
731 }
732
733 /*
734 * if we had a previously managed paged locked, unlock it now
735 */
736 if (old_pa_locked) {
737 UNLOCK_PVH(pai);
738 old_pa_locked = FALSE;
739 }
740
741 pai = pa_index(pa); /* now working with new incoming phys page */
742 if (IS_MANAGED_PAGE(pai)) {
743
744 /*
745 * Step 2) Enter the mapping in the PV list for this
746 * physical page.
747 */
748 pv_h = pai_to_pvh(pai);
749
750 LOCK_PVH(pai);
751
752 if (pv_h->pmap == PMAP_NULL) {
753 /*
754 * No mappings yet, use rooted pv
755 */
756 pv_h->va = vaddr;
757 pv_h->pmap = pmap;
758 queue_init(&pv_h->qlink);
759
760 if (options & PMAP_OPTIONS_INTERNAL) {
761 pmap_phys_attributes[pai] |= PHYS_INTERNAL;
762 } else {
763 pmap_phys_attributes[pai] &= ~PHYS_INTERNAL;
764 }
765 if (options & PMAP_OPTIONS_REUSABLE) {
766 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
767 } else {
768 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
769 }
770 } else {
771 /*
772 * Add new pv_hashed_entry after header.
773 */
774 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
775 pvh_e = pvh_new;
776 pvh_new = PV_HASHED_ENTRY_NULL;
777 } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
778 PV_HASHED_ALLOC(&pvh_e);
779 if (PV_HASHED_ENTRY_NULL == pvh_e) {
780 /*
781 * the pv list is empty. if we are on
782 * the kernel pmap we'll use one of
783 * the special private kernel pv_e's,
784 * else, we need to unlock
785 * everything, zalloc a pv_e, and
786 * restart bringing in the pv_e with
787 * us.
788 */
789 if (kernel_pmap == pmap) {
790 PV_HASHED_KERN_ALLOC(&pvh_e);
791 } else {
792 UNLOCK_PVH(pai);
793 PMAP_UNLOCK(pmap);
794 pmap_pv_throttle(pmap);
795 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
796 goto Retry;
797 }
798 }
799 }
800
801 if (PV_HASHED_ENTRY_NULL == pvh_e)
802 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
803
804 pvh_e->va = vaddr;
805 pvh_e->pmap = pmap;
806 pvh_e->ppn = pn;
807 pv_hash_add(pvh_e, pv_h);
808
809 /*
810 * Remember that we used the pvlist entry.
811 */
812 pvh_e = PV_HASHED_ENTRY_NULL;
813 }
814
815 /*
816 * only count the mapping
817 * for 'managed memory'
818 */
819 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
820 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
821 OSAddAtomic(+1, &pmap->stats.resident_count);
822 if (pmap->stats.resident_count > pmap->stats.resident_max) {
823 pmap->stats.resident_max = pmap->stats.resident_count;
824 }
825 if (pmap != kernel_pmap) {
826 if (IS_REUSABLE_PAGE(pai)) {
827 OSAddAtomic(+1, &pmap->stats.reusable);
828 PMAP_STATS_PEAK(pmap->stats.reusable);
829 } else if (IS_INTERNAL_PAGE(pai)) {
830 OSAddAtomic(+1, &pmap->stats.internal);
831 PMAP_STATS_PEAK(pmap->stats.internal);
832 } else {
833 OSAddAtomic(+1, &pmap->stats.external);
834 PMAP_STATS_PEAK(pmap->stats.external);
835 }
836 }
837 } else if (last_managed_page == 0) {
838 /* Account for early mappings created before "managed pages"
839 * are determined. Consider consulting the available DRAM map.
840 */
841 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
842 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
843 OSAddAtomic(+1, &pmap->stats.resident_count);
844 if (pmap != kernel_pmap) {
845#if 00
846 OSAddAtomic(+1, &pmap->stats.device);
847 PMAP_STATS_PEAK(pmap->stats.device);
848#endif
849 }
850 }
851 /*
852 * Step 3) Enter the mapping.
853 *
854 * Build a template to speed up entering -
855 * only the pfn changes.
856 */
857 template = pa_to_pte(pa) | INTEL_PTE_VALID;
858 /*
859 * DRK: It may be worth asserting on cache attribute flags that diverge
860 * from the existing physical page attributes.
861 */
862
863 template |= pmap_get_cache_attributes(pa_index(pa));
864
865 if (flags & VM_MEM_NOT_CACHEABLE) {
866 if (!(flags & VM_MEM_GUARDED))
867 template |= INTEL_PTE_PTA;
868 template |= INTEL_PTE_NCACHE;
869 }
870 if (pmap != kernel_pmap)
871 template |= INTEL_PTE_USER;
872 if (prot & VM_PROT_WRITE)
873 template |= INTEL_PTE_WRITE;
874 if (set_NX)
875 template |= INTEL_PTE_NX;
876 if (wired) {
877 template |= INTEL_PTE_WIRED;
878 OSAddAtomic(+1, & pmap->stats.wired_count);
879 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
880 }
881 if (superpage)
882 template |= INTEL_PTE_PS;
883 pmap_store_pte(pte, template);
884
885 /*
886 * if this was a managed page we delayed unlocking the pv until here
887 * to prevent pmap_page_protect et al from finding it until the pte
888 * has been stored
889 */
890 if (IS_MANAGED_PAGE(pai)) {
891 UNLOCK_PVH(pai);
892 }
893Done:
894 if (need_tlbflush == TRUE) {
895 if (options & PMAP_OPTIONS_NOFLUSH)
896 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
897 else
898 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
899 }
900 if (pvh_e != PV_HASHED_ENTRY_NULL) {
901 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
902 }
903 if (pvh_new != PV_HASHED_ENTRY_NULL) {
904 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
905 }
906 PMAP_UNLOCK(pmap);
907
908 if (delpage_pm_obj) {
909 vm_page_t m;
910
911 vm_object_lock(delpage_pm_obj);
912 m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE));
913 if (m == VM_PAGE_NULL)
914 panic("pmap_enter: pte page not in object");
915 vm_object_unlock(delpage_pm_obj);
916 VM_PAGE_FREE(m);
917 OSAddAtomic(-1, &inuse_ptepages_count);
918 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
919 }
920
921 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
922 return KERN_SUCCESS;
923}
924
925/*
926 * Remove a range of hardware page-table entries.
927 * The entries given are the first (inclusive)
928 * and last (exclusive) entries for the VM pages.
929 * The virtual address is the va for the first pte.
930 *
931 * The pmap must be locked.
932 * If the pmap is not the kernel pmap, the range must lie
933 * entirely within one pte-page. This is NOT checked.
934 * Assumes that the pte-page exists.
935 */
936
937void
938pmap_remove_range(
939 pmap_t pmap,
940 vm_map_offset_t start_vaddr,
941 pt_entry_t *spte,
942 pt_entry_t *epte)
943{
944 pmap_remove_range_options(pmap, start_vaddr, spte, epte, 0);
945}
946
947void
948pmap_remove_range_options(
949 pmap_t pmap,
950 vm_map_offset_t start_vaddr,
951 pt_entry_t *spte,
952 pt_entry_t *epte,
953 int options)
954{
955 pt_entry_t *cpte;
956 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
957 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
958 pv_hashed_entry_t pvh_e;
959 int pvh_cnt = 0;
960 int num_removed, num_unwired, num_found, num_invalid;
961 int num_device, num_external, num_internal, num_reusable;
962 uint64_t num_compressed;
963 ppnum_t pai;
964 pmap_paddr_t pa;
965 vm_map_offset_t vaddr;
966
967 num_removed = 0;
968 num_unwired = 0;
969 num_found = 0;
970 num_invalid = 0;
971 num_device = 0;
972 num_external = 0;
973 num_internal = 0;
974 num_reusable = 0;
975 num_compressed = 0;
976 /* invalidate the PTEs first to "freeze" them */
977 for (cpte = spte, vaddr = start_vaddr;
978 cpte < epte;
979 cpte++, vaddr += PAGE_SIZE_64) {
980 pt_entry_t p = *cpte;
981
982 pa = pte_to_pa(p);
983 if (pa == 0) {
984 if (pmap != kernel_pmap &&
985 (options & PMAP_OPTIONS_REMOVE) &&
986 (p & INTEL_PTE_COMPRESSED)) {
987 /* one less "compressed" */
988 num_compressed++;
989 /* clear marker */
990 /* XXX probably does not need to be atomic! */
991 pmap_update_pte(cpte, INTEL_PTE_COMPRESSED, 0);
992 }
993 continue;
994 }
995 num_found++;
996
997 if (iswired(p))
998 num_unwired++;
999
1000 pai = pa_index(pa);
1001
1002 if (!IS_MANAGED_PAGE(pai)) {
1003 /*
1004 * Outside range of managed physical memory.
1005 * Just remove the mappings.
1006 */
1007 pmap_store_pte(cpte, 0);
1008 num_device++;
1009 continue;
1010 }
1011
1012 if ((p & INTEL_PTE_VALID) == 0)
1013 num_invalid++;
1014
1015 /* invalidate the PTE */
1016 pmap_update_pte(cpte, INTEL_PTE_VALID, 0);
1017 }
1018
1019 if (num_found == 0) {
1020 /* nothing was changed: we're done */
1021 goto update_counts;
1022 }
1023
1024 /* propagate the invalidates to other CPUs */
1025
1026 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
1027
1028 for (cpte = spte, vaddr = start_vaddr;
1029 cpte < epte;
1030 cpte++, vaddr += PAGE_SIZE_64) {
1031
1032 pa = pte_to_pa(*cpte);
1033 if (pa == 0)
1034 continue;
1035
1036 pai = pa_index(pa);
1037
1038 LOCK_PVH(pai);
1039
1040 pa = pte_to_pa(*cpte);
1041 if (pa == 0) {
1042 UNLOCK_PVH(pai);
1043 continue;
1044 }
1045 num_removed++;
1046 if (IS_REUSABLE_PAGE(pai)) {
1047 num_reusable++;
1048 } else if (IS_INTERNAL_PAGE(pai)) {
1049 num_internal++;
1050 } else {
1051 num_external++;
1052 }
1053
1054 /*
1055 * Get the modify and reference bits, then
1056 * nuke the entry in the page table
1057 */
1058 /* remember reference and change */
1059 pmap_phys_attributes[pai] |=
1060 (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
1061
1062 /*
1063 * Remove the mapping from the pvlist for this physical page.
1064 */
1065 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte);
1066
1067 /* completely invalidate the PTE */
1068 pmap_store_pte(cpte, 0);
1069
1070 UNLOCK_PVH(pai);
1071
1072 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1073 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1074 pvh_eh = pvh_e;
1075
1076 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1077 pvh_et = pvh_e;
1078 }
1079 pvh_cnt++;
1080 }
1081 } /* for loop */
1082
1083 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1084 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1085 }
1086update_counts:
1087 /*
1088 * Update the counts
1089 */
1090#if TESTING
1091 if (pmap->stats.resident_count < num_removed)
1092 panic("pmap_remove_range: resident_count");
1093#endif
1094 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
1095 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(num_removed));
1096 assert(pmap->stats.resident_count >= num_removed);
1097 OSAddAtomic(-num_removed, &pmap->stats.resident_count);
1098
1099 if (pmap != kernel_pmap) {
1100#if 00
1101 assert(pmap->stats.device >= num_device);
1102 if (num_device)
1103 OSAddAtomic(-num_device, &pmap->stats.device);
1104#endif /* 00 */
1105 assert(pmap->stats.external >= num_external);
1106 if (num_external)
1107 OSAddAtomic(-num_external, &pmap->stats.external);
1108 assert(pmap->stats.internal >= num_internal);
1109 if (num_internal)
1110 OSAddAtomic(-num_internal, &pmap->stats.internal);
1111 assert(pmap->stats.reusable >= num_reusable);
1112 if (num_reusable)
1113 OSAddAtomic(-num_reusable, &pmap->stats.reusable);
1114 assert(pmap->stats.compressed >= num_compressed);
1115 if (num_compressed)
1116 OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
1117 }
1118
1119#if TESTING
1120 if (pmap->stats.wired_count < num_unwired)
1121 panic("pmap_remove_range: wired_count");
1122#endif
1123 assert(pmap->stats.wired_count >= num_unwired);
1124 OSAddAtomic(-num_unwired, &pmap->stats.wired_count);
1125 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
1126
1127 return;
1128}
1129
1130
1131/*
1132 * Remove the given range of addresses
1133 * from the specified map.
1134 *
1135 * It is assumed that the start and end are properly
1136 * rounded to the hardware page size.
1137 */
1138void
1139pmap_remove(
1140 pmap_t map,
1141 addr64_t s64,
1142 addr64_t e64)
1143{
1144 pmap_remove_options(map, s64, e64, 0);
1145}
1146
1147void
1148pmap_remove_options(
1149 pmap_t map,
1150 addr64_t s64,
1151 addr64_t e64,
1152 int options)
1153{
1154 pt_entry_t *pde;
1155 pt_entry_t *spte, *epte;
1156 addr64_t l64;
1157 uint64_t deadline;
1158
1159 pmap_intr_assert();
1160
1161 if (map == PMAP_NULL || s64 == e64)
1162 return;
1163
1164 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
1165 map,
1166 (uint32_t) (s64 >> 32), s64,
1167 (uint32_t) (e64 >> 32), e64);
1168
1169
1170 PMAP_LOCK(map);
1171
1172#if 0
1173 /*
1174 * Check that address range in the kernel does not overlap the stacks.
1175 * We initialize local static min/max variables once to avoid making
1176 * 2 function calls for every remove. Note also that these functions
1177 * both return 0 before kernel stacks have been initialized, and hence
1178 * the panic is not triggered in this case.
1179 */
1180 if (map == kernel_pmap) {
1181 static vm_offset_t kernel_stack_min = 0;
1182 static vm_offset_t kernel_stack_max = 0;
1183
1184 if (kernel_stack_min == 0) {
1185 kernel_stack_min = min_valid_stack_address();
1186 kernel_stack_max = max_valid_stack_address();
1187 }
1188 if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
1189 (kernel_stack_min < e64 && e64 <= kernel_stack_max))
1190 panic("pmap_remove() attempted in kernel stack");
1191 }
1192#else
1193
1194 /*
1195 * The values of kernel_stack_min and kernel_stack_max are no longer
1196 * relevant now that we allocate kernel stacks in the kernel map,
1197 * so the old code above no longer applies. If we wanted to check that
1198 * we weren't removing a mapping of a page in a kernel stack we'd
1199 * mark the PTE with an unused bit and check that here.
1200 */
1201
1202#endif
1203
1204 deadline = rdtsc64() + max_preemption_latency_tsc;
1205
1206 while (s64 < e64) {
1207 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1208 if (l64 > e64)
1209 l64 = e64;
1210 pde = pmap_pde(map, s64);
1211
1212 if (pde && (*pde & INTEL_PTE_VALID)) {
1213 if (*pde & INTEL_PTE_PS) {
1214 /*
1215 * If we're removing a superpage, pmap_remove_range()
1216 * must work on level 2 instead of level 1; and we're
1217 * only passing a single level 2 entry instead of a
1218 * level 1 range.
1219 */
1220 spte = pde;
1221 epte = spte+1; /* excluded */
1222 } else {
1223 spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
1224 spte = &spte[ptenum(s64)];
1225 epte = &spte[intel_btop(l64 - s64)];
1226 }
1227 pmap_remove_range_options(map, s64, spte, epte,
1228 options);
1229 }
1230 s64 = l64;
1231
1232 if (s64 < e64 && rdtsc64() >= deadline) {
1233 PMAP_UNLOCK(map)
1234 /* TODO: Rapid release/reacquisition can defeat
1235 * the "backoff" intent here; either consider a
1236 * fair spinlock, or a scheme whereby each lock
1237 * attempt marks the processor as within a spinlock
1238 * acquisition, and scan CPUs here to determine
1239 * if a backoff is necessary, to avoid sacrificing
1240 * performance in the common case.
1241 */
1242 PMAP_LOCK(map)
1243 deadline = rdtsc64() + max_preemption_latency_tsc;
1244 }
1245 }
1246
1247 PMAP_UNLOCK(map);
1248
1249 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
1250 map, 0, 0, 0, 0);
1251
1252}
1253
1254void
1255pmap_page_protect(
1256 ppnum_t pn,
1257 vm_prot_t prot)
1258{
1259 pmap_page_protect_options(pn, prot, 0, NULL);
1260}
1261
1262/*
1263 * Routine: pmap_page_protect_options
1264 *
1265 * Function:
1266 * Lower the permission for all mappings to a given
1267 * page.
1268 */
1269void
1270pmap_page_protect_options(
1271 ppnum_t pn,
1272 vm_prot_t prot,
1273 unsigned int options,
1274 void *arg)
1275{
1276 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1277 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1278 pv_hashed_entry_t nexth;
1279 int pvh_cnt = 0;
1280 pv_rooted_entry_t pv_h;
1281 pv_rooted_entry_t pv_e;
1282 pv_hashed_entry_t pvh_e;
1283 pt_entry_t *pte;
1284 int pai;
1285 pmap_t pmap;
1286 boolean_t remove;
1287 pt_entry_t new_pte_value;
1288
1289 pmap_intr_assert();
1290 assert(pn != vm_page_fictitious_addr);
1291 if (pn == vm_page_guard_addr)
1292 return;
1293
1294 pai = ppn_to_pai(pn);
1295
1296 if (!IS_MANAGED_PAGE(pai)) {
1297 /*
1298 * Not a managed page.
1299 */
1300 return;
1301 }
1302 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
1303 pn, prot, 0, 0, 0);
1304
1305 /*
1306 * Determine the new protection.
1307 */
1308 switch (prot) {
1309 case VM_PROT_READ:
1310 case VM_PROT_READ | VM_PROT_EXECUTE:
1311 remove = FALSE;
1312 break;
1313 case VM_PROT_ALL:
1314 return; /* nothing to do */
1315 default:
1316 remove = TRUE;
1317 break;
1318 }
1319
1320 pv_h = pai_to_pvh(pai);
1321
1322 LOCK_PVH(pai);
1323
1324
1325 /*
1326 * Walk down PV list, if any, changing or removing all mappings.
1327 */
1328 if (pv_h->pmap == PMAP_NULL)
1329 goto done;
1330
1331 pv_e = pv_h;
1332 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */
1333
1334 do {
1335 vm_map_offset_t vaddr;
1336
1337 pmap = pv_e->pmap;
1338 vaddr = pv_e->va;
1339 pte = pmap_pte(pmap, vaddr);
1340
1341 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1342 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1343
1344 if (0 == pte) {
1345 panic("pmap_page_protect() "
1346 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1347 pmap, pn, vaddr);
1348 }
1349 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1350
1351 /*
1352 * Remove the mapping if new protection is NONE
1353 */
1354 if (remove) {
1355
1356 /* Remove per-pmap wired count */
1357 if (iswired(*pte)) {
1358 OSAddAtomic(-1, &pmap->stats.wired_count);
1359 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1360 }
1361
1362 if (pmap != kernel_pmap &&
1363 (options & PMAP_OPTIONS_COMPRESSOR) &&
1364 IS_INTERNAL_PAGE(pai)) {
1365 /* adjust "reclaimed" stats */
1366 OSAddAtomic64(+1, &pmap->stats.compressed);
1367 PMAP_STATS_PEAK(pmap->stats.compressed);
1368 pmap->stats.compressed_lifetime++;
1369 /* mark this PTE as having been "reclaimed" */
1370 new_pte_value = INTEL_PTE_COMPRESSED;
1371 } else {
1372 new_pte_value = 0;
1373 }
1374
1375 if (options & PMAP_OPTIONS_NOREFMOD) {
1376 pmap_store_pte(pte, new_pte_value);
1377
1378 if (options & PMAP_OPTIONS_NOFLUSH)
1379 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1380 else
1381 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1382 } else {
1383 /*
1384 * Remove the mapping, collecting dirty bits.
1385 */
1386 pmap_update_pte(pte, INTEL_PTE_VALID, 0);
1387
1388 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1389 pmap_phys_attributes[pai] |=
1390 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1391 pmap_store_pte(pte, new_pte_value);
1392 }
1393#if TESTING
1394 if (pmap->stats.resident_count < 1)
1395 panic("pmap_page_protect: resident_count");
1396#endif
1397 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1398 assert(pmap->stats.resident_count >= 1);
1399 OSAddAtomic(-1, &pmap->stats.resident_count);
1400 if (options & PMAP_OPTIONS_COMPRESSOR) {
1401 /*
1402 * This removal is only being done so we can send this page to
1403 * the compressor; therefore it mustn't affect total task footprint.
1404 */
1405 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1406 } else {
1407 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1408 }
1409
1410 if (pmap != kernel_pmap) {
1411 if (IS_REUSABLE_PAGE(pai)) {
1412 assert(pmap->stats.reusable > 0);
1413 OSAddAtomic(-1, &pmap->stats.reusable);
1414 } else if (IS_INTERNAL_PAGE(pai)) {
1415 assert(pmap->stats.internal > 0);
1416 OSAddAtomic(-1, &pmap->stats.internal);
1417 } else {
1418 assert(pmap->stats.external > 0);
1419 OSAddAtomic(-1, &pmap->stats.external);
1420 }
1421 }
1422
1423 /*
1424 * Deal with the pv_rooted_entry.
1425 */
1426
1427 if (pv_e == pv_h) {
1428 /*
1429 * Fix up head later.
1430 */
1431 pv_h->pmap = PMAP_NULL;
1432 } else {
1433 /*
1434 * Delete this entry.
1435 */
1436 pv_hash_remove(pvh_e);
1437 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1438 pvh_eh = pvh_e;
1439
1440 if (pvh_et == PV_HASHED_ENTRY_NULL)
1441 pvh_et = pvh_e;
1442 pvh_cnt++;
1443 }
1444 } else {
1445 /*
1446 * Write-protect, after opportunistic refmod collect
1447 */
1448 pmap_phys_attributes[pai] |=
1449 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1450 pmap_update_pte(pte, INTEL_PTE_WRITE, 0);
1451
1452 if (options & PMAP_OPTIONS_NOFLUSH)
1453 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1454 else
1455 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1456 }
1457 pvh_e = nexth;
1458 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1459
1460
1461 /*
1462 * If pv_head mapping was removed, fix it up.
1463 */
1464 if (pv_h->pmap == PMAP_NULL) {
1465 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1466
1467 if (pvh_e != (pv_hashed_entry_t) pv_h) {
1468 pv_hash_remove(pvh_e);
1469 pv_h->pmap = pvh_e->pmap;
1470 pv_h->va = pvh_e->va;
1471 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1472 pvh_eh = pvh_e;
1473
1474 if (pvh_et == PV_HASHED_ENTRY_NULL)
1475 pvh_et = pvh_e;
1476 pvh_cnt++;
1477 }
1478 }
1479 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1480 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1481 }
1482done:
1483 UNLOCK_PVH(pai);
1484
1485 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
1486 0, 0, 0, 0, 0);
1487}
1488
1489
1490/*
1491 * Clear specified attribute bits.
1492 */
1493void
1494phys_attribute_clear(
1495 ppnum_t pn,
1496 int bits,
1497 unsigned int options,
1498 void *arg)
1499{
1500 pv_rooted_entry_t pv_h;
1501 pv_hashed_entry_t pv_e;
1502 pt_entry_t *pte;
1503 int pai;
1504 pmap_t pmap;
1505 char attributes = 0;
1506 boolean_t is_internal, is_reusable;
1507
1508 if ((bits & PHYS_MODIFIED) &&
1509 (options & PMAP_OPTIONS_NOFLUSH) &&
1510 arg == NULL) {
1511 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
1512 "should not clear 'modified' without flushing TLBs\n",
1513 pn, bits, options, arg);
1514 }
1515
1516 pmap_intr_assert();
1517 assert(pn != vm_page_fictitious_addr);
1518 if (pn == vm_page_guard_addr)
1519 return;
1520
1521 pai = ppn_to_pai(pn);
1522
1523 if (!IS_MANAGED_PAGE(pai)) {
1524 /*
1525 * Not a managed page.
1526 */
1527 return;
1528 }
1529
1530 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
1531 pn, bits, 0, 0, 0);
1532
1533 pv_h = pai_to_pvh(pai);
1534
1535 LOCK_PVH(pai);
1536
1537 /*
1538 * Walk down PV list, clearing all modify or reference bits.
1539 * We do not have to lock the pv_list because we have
1540 * the per-pmap lock
1541 */
1542 if (pv_h->pmap != PMAP_NULL) {
1543 /*
1544 * There are some mappings.
1545 */
1546
1547 is_internal = IS_INTERNAL_PAGE(pai);
1548 is_reusable = IS_REUSABLE_PAGE(pai);
1549
1550 pv_e = (pv_hashed_entry_t)pv_h;
1551
1552 do {
1553 vm_map_offset_t va;
1554 char pte_bits;
1555
1556 pmap = pv_e->pmap;
1557 va = pv_e->va;
1558 pte_bits = 0;
1559
1560 if (bits) {
1561 pte = pmap_pte(pmap, va);
1562 /* grab ref/mod bits from this PTE */
1563 pte_bits = (*pte & (PHYS_MODIFIED |
1564 PHYS_REFERENCED));
1565 /* propagate to page's global attributes */
1566 attributes |= pte_bits;
1567 /* which bits to clear for this PTE? */
1568 pte_bits &= bits;
1569 }
1570
1571 /*
1572 * Clear modify and/or reference bits.
1573 */
1574 if (pte_bits) {
1575 pmap_update_pte(pte, bits, 0);
1576
1577 /* Ensure all processors using this translation
1578 * invalidate this TLB entry. The invalidation
1579 * *must* follow the PTE update, to ensure that
1580 * the TLB shadow of the 'D' bit (in particular)
1581 * is synchronized with the updated PTE.
1582 */
1583 if (! (options & PMAP_OPTIONS_NOFLUSH)) {
1584 /* flush TLBS now */
1585 PMAP_UPDATE_TLBS(pmap,
1586 va,
1587 va + PAGE_SIZE);
1588 } else if (arg) {
1589 /* delayed TLB flush: add "pmap" info */
1590 PMAP_UPDATE_TLBS_DELAYED(
1591 pmap,
1592 va,
1593 va + PAGE_SIZE,
1594 (pmap_flush_context *)arg);
1595 } else {
1596 /* no TLB flushing at all */
1597 }
1598 }
1599
1600 /* update pmap "reusable" stats */
1601 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
1602 is_reusable &&
1603 pmap != kernel_pmap) {
1604 /* one less "reusable" */
1605 assert(pmap->stats.reusable > 0);
1606 OSAddAtomic(-1, &pmap->stats.reusable);
1607 if (is_internal) {
1608 /* one more "internal" */
1609 OSAddAtomic(+1, &pmap->stats.internal);
1610 PMAP_STATS_PEAK(pmap->stats.internal);
1611 } else {
1612 /* one more "external" */
1613 OSAddAtomic(+1, &pmap->stats.external);
1614 PMAP_STATS_PEAK(pmap->stats.external);
1615 }
1616 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
1617 !is_reusable &&
1618 pmap != kernel_pmap) {
1619 /* one more "reusable" */
1620 OSAddAtomic(+1, &pmap->stats.reusable);
1621 PMAP_STATS_PEAK(pmap->stats.reusable);
1622 if (is_internal) {
1623 /* one less "internal" */
1624 assert(pmap->stats.internal > 0);
1625 OSAddAtomic(-1, &pmap->stats.internal);
1626 } else {
1627 /* one less "external" */
1628 assert(pmap->stats.external > 0);
1629 OSAddAtomic(-1, &pmap->stats.external);
1630 }
1631 }
1632
1633 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1634
1635 } while (pv_e != (pv_hashed_entry_t)pv_h);
1636 }
1637 /* Opportunistic refmod collection, annulled
1638 * if both REF and MOD are being cleared.
1639 */
1640
1641 pmap_phys_attributes[pai] |= attributes;
1642 pmap_phys_attributes[pai] &= (~bits);
1643
1644 /* update this page's "reusable" status */
1645 if (options & PMAP_OPTIONS_CLEAR_REUSABLE) {
1646 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
1647 } else if (options & PMAP_OPTIONS_SET_REUSABLE) {
1648 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
1649 }
1650
1651 UNLOCK_PVH(pai);
1652
1653 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
1654 0, 0, 0, 0, 0);
1655}
1656
1657/*
1658 * Check specified attribute bits.
1659 */
1660int
1661phys_attribute_test(
1662 ppnum_t pn,
1663 int bits)
1664{
1665 pv_rooted_entry_t pv_h;
1666 pv_hashed_entry_t pv_e;
1667 pt_entry_t *pte;
1668 int pai;
1669 pmap_t pmap;
1670 int attributes = 0;
1671
1672 pmap_intr_assert();
1673 assert(pn != vm_page_fictitious_addr);
1674 if (pn == vm_page_guard_addr)
1675 return 0;
1676
1677 pai = ppn_to_pai(pn);
1678
1679 if (!IS_MANAGED_PAGE(pai)) {
1680 /*
1681 * Not a managed page.
1682 */
1683 return 0;
1684 }
1685
1686 /*
1687 * Fast check... if bits already collected
1688 * no need to take any locks...
1689 * if not set, we need to recheck after taking
1690 * the lock in case they got pulled in while
1691 * we were waiting for the lock
1692 */
1693 if ((pmap_phys_attributes[pai] & bits) == bits)
1694 return bits;
1695
1696 pv_h = pai_to_pvh(pai);
1697
1698 LOCK_PVH(pai);
1699
1700 attributes = pmap_phys_attributes[pai] & bits;
1701
1702
1703 /*
1704 * Walk down PV list, checking the mappings until we
1705 * reach the end or we've found the desired attributes.
1706 */
1707 if (attributes != bits &&
1708 pv_h->pmap != PMAP_NULL) {
1709 /*
1710 * There are some mappings.
1711 */
1712 pv_e = (pv_hashed_entry_t)pv_h;
1713 do {
1714 vm_map_offset_t va;
1715
1716 pmap = pv_e->pmap;
1717 va = pv_e->va;
1718 /*
1719 * pick up modify and/or reference bits from mapping
1720 */
1721
1722 pte = pmap_pte(pmap, va);
1723 attributes |= (int)(*pte & bits);
1724
1725 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1726
1727 } while ((attributes != bits) &&
1728 (pv_e != (pv_hashed_entry_t)pv_h));
1729 }
1730 pmap_phys_attributes[pai] |= attributes;
1731
1732 UNLOCK_PVH(pai);
1733 return (attributes);
1734}
1735
1736/*
1737 * Routine: pmap_change_wiring
1738 * Function: Change the wiring attribute for a map/virtual-address
1739 * pair.
1740 * In/out conditions:
1741 * The mapping must already exist in the pmap.
1742 */
1743void
1744pmap_change_wiring(
1745 pmap_t map,
1746 vm_map_offset_t vaddr,
1747 boolean_t wired)
1748{
1749 pt_entry_t *pte;
1750
1751 PMAP_LOCK(map);
1752
1753 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
1754 panic("pmap_change_wiring: pte missing");
1755
1756 if (wired && !iswired(*pte)) {
1757 /*
1758 * wiring down mapping
1759 */
1760 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
1761 OSAddAtomic(+1, &map->stats.wired_count);
1762 pmap_update_pte(pte, 0, INTEL_PTE_WIRED);
1763 }
1764 else if (!wired && iswired(*pte)) {
1765 /*
1766 * unwiring mapping
1767 */
1768 assert(map->stats.wired_count >= 1);
1769 OSAddAtomic(-1, &map->stats.wired_count);
1770 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
1771 pmap_update_pte(pte, INTEL_PTE_WIRED, 0);
1772 }
1773
1774 PMAP_UNLOCK(map);
1775}
1776
1777/*
1778 * "Backdoor" direct map routine for early mappings.
1779 * Useful for mapping memory outside the range
1780 * Sets A, D and NC if requested
1781 */
1782
1783vm_offset_t
1784pmap_map_bd(
1785 vm_offset_t virt,
1786 vm_map_offset_t start_addr,
1787 vm_map_offset_t end_addr,
1788 vm_prot_t prot,
1789 unsigned int flags)
1790{
1791 pt_entry_t template;
1792 pt_entry_t *pte;
1793 spl_t spl;
1794 vm_offset_t base = virt;
1795 template = pa_to_pte(start_addr)
1796 | INTEL_PTE_REF
1797 | INTEL_PTE_MOD
1798 | INTEL_PTE_WIRED
1799 | INTEL_PTE_VALID;
1800
1801 if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
1802 template |= INTEL_PTE_NCACHE;
1803 if (!(flags & (VM_MEM_GUARDED)))
1804 template |= INTEL_PTE_PTA;
1805 }
1806
1807#if defined(__x86_64__)
1808 if ((prot & VM_PROT_EXECUTE) == 0)
1809 template |= INTEL_PTE_NX;
1810#endif
1811
1812 if (prot & VM_PROT_WRITE)
1813 template |= INTEL_PTE_WRITE;
1814
1815 while (start_addr < end_addr) {
1816 spl = splhigh();
1817 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
1818 if (pte == PT_ENTRY_NULL) {
1819 panic("pmap_map_bd: Invalid kernel address\n");
1820 }
1821 pmap_store_pte(pte, template);
1822 splx(spl);
1823 pte_increment_pa(template);
1824 virt += PAGE_SIZE;
1825 start_addr += PAGE_SIZE;
1826 }
1827 flush_tlb_raw();
1828 PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
1829 return(virt);
1830}
1831
1832unsigned int
1833pmap_query_resident(
1834 pmap_t pmap,
1835 addr64_t s64,
1836 addr64_t e64)
1837{
1838 pt_entry_t *pde;
1839 pt_entry_t *spte, *epte;
1840 addr64_t l64;
1841 uint64_t deadline;
1842 unsigned int result;
1843
1844 pmap_intr_assert();
1845
1846 if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64)
1847 return 0;
1848
1849 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
1850 pmap,
1851 (uint32_t) (s64 >> 32), s64,
1852 (uint32_t) (e64 >> 32), e64);
1853
1854 result = 0;
1855
1856 PMAP_LOCK(pmap);
1857
1858 deadline = rdtsc64() + max_preemption_latency_tsc;
1859
1860 while (s64 < e64) {
1861 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1862 if (l64 > e64)
1863 l64 = e64;
1864 pde = pmap_pde(pmap, s64);
1865
1866 if (pde && (*pde & INTEL_PTE_VALID)) {
1867 if (*pde & INTEL_PTE_PS) {
1868 /* superpage: not supported */
1869 } else {
1870 spte = pmap_pte(pmap,
1871 (s64 & ~(pde_mapped_size - 1)));
1872 spte = &spte[ptenum(s64)];
1873 epte = &spte[intel_btop(l64 - s64)];
1874
1875 for (; spte < epte; spte++) {
1876 if (pte_to_pa(*spte) != 0) {
1877 result++;
1878 }
1879 }
1880
1881 }
1882 }
1883 s64 = l64;
1884
1885 if (s64 < e64 && rdtsc64() >= deadline) {
1886 PMAP_UNLOCK(pmap);
1887 PMAP_LOCK(pmap);
1888 deadline = rdtsc64() + max_preemption_latency_tsc;
1889 }
1890 }
1891
1892 PMAP_UNLOCK(pmap);
1893
1894 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
1895 pmap, 0, 0, 0, 0);
1896
1897 return result;
1898}
1899
1900#if MACH_ASSERT
1901void
1902pmap_set_process(
1903 __unused pmap_t pmap,
1904 __unused int pid,
1905 __unused char *procname)
1906{
1907}
1908#endif /* MACH_ASSERT */