]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/pmap_x86_common.c
cc584a9a11b85a3b198788c2da2129031e798d1f
[apple/xnu.git] / osfmk / i386 / pmap_x86_common.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <vm/pmap.h>
29 #include <vm/vm_map.h>
30 #include <kern/ledger.h>
31 #include <i386/pmap_internal.h>
32
33 void pmap_remove_range(
34 pmap_t pmap,
35 vm_map_offset_t va,
36 pt_entry_t *spte,
37 pt_entry_t *epte);
38
39 void pmap_remove_range_options(
40 pmap_t pmap,
41 vm_map_offset_t va,
42 pt_entry_t *spte,
43 pt_entry_t *epte,
44 int options);
45
46 void pmap_reusable_range(
47 pmap_t pmap,
48 vm_map_offset_t va,
49 pt_entry_t *spte,
50 pt_entry_t *epte,
51 boolean_t reusable);
52
53 uint32_t pmap_update_clear_pte_count;
54
55 /*
56 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
57 * on a NBPDE boundary.
58 */
59
60 /* These symbols may be referenced directly by VM */
61 uint64_t pmap_nesting_size_min = NBPDE;
62 uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE;
63
64 /*
65 * kern_return_t pmap_nest(grand, subord, va_start, size)
66 *
67 * grand = the pmap that we will nest subord into
68 * subord = the pmap that goes into the grand
69 * va_start = start of range in pmap to be inserted
70 * nstart = start of range in pmap nested pmap
71 * size = Size of nest area (up to 16TB)
72 *
73 * Inserts a pmap into another. This is used to implement shared segments.
74 *
75 * Note that we depend upon higher level VM locks to insure that things don't change while
76 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
77 * or do 2 nests at once.
78 */
79
80 /*
81 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
82 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
83 * container and the "grand" parent. A minor optimization to consider for the
84 * future: make the "subord" truly a container rather than a full-fledged
85 * pagetable hierarchy which can be unnecessarily sparse (DRK).
86 */
87
88 kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) {
89 vm_map_offset_t vaddr, nvaddr;
90 pd_entry_t *pde,*npde;
91 unsigned int i;
92 uint64_t num_pde;
93
94 if ((size & (pmap_nesting_size_min-1)) ||
95 (va_start & (pmap_nesting_size_min-1)) ||
96 (nstart & (pmap_nesting_size_min-1)) ||
97 ((size >> 28) > 65536)) /* Max size we can nest is 16TB */
98 return KERN_INVALID_VALUE;
99
100 if(size == 0) {
101 panic("pmap_nest: size is invalid - %016llX\n", size);
102 }
103
104 if (va_start != nstart)
105 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
106
107 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
108 (uintptr_t) grand, (uintptr_t) subord,
109 (uintptr_t) (va_start>>32), (uintptr_t) va_start, 0);
110
111 nvaddr = (vm_map_offset_t)nstart;
112 num_pde = size >> PDESHIFT;
113
114 PMAP_LOCK(subord);
115
116 subord->pm_shared = TRUE;
117
118 for (i = 0; i < num_pde;) {
119 if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) {
120
121 npde = pmap64_pdpt(subord, nvaddr);
122
123 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
124 PMAP_UNLOCK(subord);
125 pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
126 PMAP_LOCK(subord);
127 npde = pmap64_pdpt(subord, nvaddr);
128 }
129 *npde |= INTEL_PDPTE_NESTED;
130 nvaddr += NBPDPT;
131 i += (uint32_t)NPDEPG;
132 }
133 else {
134 npde = pmap_pde(subord, nvaddr);
135
136 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
137 PMAP_UNLOCK(subord);
138 pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
139 PMAP_LOCK(subord);
140 npde = pmap_pde(subord, nvaddr);
141 }
142 nvaddr += NBPDE;
143 i++;
144 }
145 }
146
147 PMAP_UNLOCK(subord);
148
149 vaddr = (vm_map_offset_t)va_start;
150
151 PMAP_LOCK(grand);
152
153 for (i = 0;i < num_pde;) {
154 pd_entry_t tpde;
155
156 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) {
157 npde = pmap64_pdpt(subord, vaddr);
158 if (npde == 0)
159 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
160 tpde = *npde;
161 pde = pmap64_pdpt(grand, vaddr);
162 if (0 == pde) {
163 PMAP_UNLOCK(grand);
164 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
165 PMAP_LOCK(grand);
166 pde = pmap64_pdpt(grand, vaddr);
167 }
168 if (pde == 0)
169 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr);
170 pmap_store_pte(pde, tpde);
171 vaddr += NBPDPT;
172 i += (uint32_t) NPDEPG;
173 }
174 else {
175 npde = pmap_pde(subord, nstart);
176 if (npde == 0)
177 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart);
178 tpde = *npde;
179 nstart += NBPDE;
180 pde = pmap_pde(grand, vaddr);
181 if ((0 == pde) && cpu_64bit) {
182 PMAP_UNLOCK(grand);
183 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
184 PMAP_LOCK(grand);
185 pde = pmap_pde(grand, vaddr);
186 }
187
188 if (pde == 0)
189 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
190 vaddr += NBPDE;
191 pmap_store_pte(pde, tpde);
192 i++;
193 }
194 }
195
196 PMAP_UNLOCK(grand);
197
198 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
199
200 return KERN_SUCCESS;
201 }
202
203 /*
204 * kern_return_t pmap_unnest(grand, vaddr)
205 *
206 * grand = the pmap that we will un-nest subord from
207 * vaddr = start of range in pmap to be unnested
208 *
209 * Removes a pmap from another. This is used to implement shared segments.
210 */
211
212 kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
213
214 pd_entry_t *pde;
215 unsigned int i;
216 uint64_t num_pde;
217 addr64_t va_start, va_end;
218 uint64_t npdpt = PMAP_INVALID_PDPTNUM;
219
220 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
221 (uintptr_t) grand,
222 (uintptr_t) (vaddr>>32), (uintptr_t) vaddr, 0, 0);
223
224 if ((size & (pmap_nesting_size_min-1)) ||
225 (vaddr & (pmap_nesting_size_min-1))) {
226 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
227 grand, vaddr, size);
228 }
229
230 /* align everything to PDE boundaries */
231 va_start = vaddr & ~(NBPDE-1);
232 va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
233 size = va_end - va_start;
234
235 PMAP_LOCK(grand);
236
237 num_pde = size >> PDESHIFT;
238 vaddr = va_start;
239
240 for (i = 0; i < num_pde; ) {
241 if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) {
242 npdpt = pdptnum(grand, vaddr);
243 pde = pmap64_pdpt(grand, vaddr);
244 if (pde && (*pde & INTEL_PDPTE_NESTED)) {
245 pmap_store_pte(pde, (pd_entry_t)0);
246 i += (uint32_t) NPDEPG;
247 vaddr += NBPDPT;
248 continue;
249 }
250 }
251 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
252 if (pde == 0)
253 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr);
254 pmap_store_pte(pde, (pd_entry_t)0);
255 i++;
256 vaddr += NBPDE;
257 }
258
259 PMAP_UPDATE_TLBS(grand, va_start, va_end);
260
261 PMAP_UNLOCK(grand);
262
263 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
264
265 return KERN_SUCCESS;
266 }
267
268 /* Invoked by the Mach VM to determine the platform specific unnest region */
269
270 boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) {
271 pd_entry_t *pdpte;
272 boolean_t rval = FALSE;
273
274 if (!cpu_64bit)
275 return rval;
276
277 PMAP_LOCK(p);
278
279 pdpte = pmap64_pdpt(p, *s);
280 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
281 *s &= ~(NBPDPT -1);
282 rval = TRUE;
283 }
284
285 pdpte = pmap64_pdpt(p, *e);
286 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
287 *e = ((*e + NBPDPT) & ~(NBPDPT -1));
288 rval = TRUE;
289 }
290
291 PMAP_UNLOCK(p);
292
293 return rval;
294 }
295
296 /*
297 * pmap_find_phys returns the (4K) physical page number containing a
298 * given virtual address in a given pmap.
299 * Note that pmap_pte may return a pde if this virtual address is
300 * mapped by a large page and this is taken into account in order
301 * to return the correct page number in this case.
302 */
303 ppnum_t
304 pmap_find_phys(pmap_t pmap, addr64_t va)
305 {
306 pt_entry_t *ptp;
307 pd_entry_t *pdep;
308 ppnum_t ppn = 0;
309 pd_entry_t pde;
310 pt_entry_t pte;
311
312 mp_disable_preemption();
313
314 /* This refcount test is a band-aid--several infrastructural changes
315 * are necessary to eliminate invocation of this routine from arbitrary
316 * contexts.
317 */
318
319 if (!pmap->ref_count)
320 goto pfp_exit;
321
322 pdep = pmap_pde(pmap, va);
323
324 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) {
325 if (pde & INTEL_PTE_PS) {
326 ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
327 ppn += (ppnum_t) ptenum(va);
328 }
329 else {
330 ptp = pmap_pte(pmap, va);
331 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) {
332 ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
333 }
334 }
335 }
336 pfp_exit:
337 mp_enable_preemption();
338
339 return ppn;
340 }
341
342 /*
343 * Update cache attributes for all extant managed mappings.
344 * Assumes PV for this page is locked, and that the page
345 * is managed.
346 */
347
348 void
349 pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
350 pv_rooted_entry_t pv_h, pv_e;
351 pv_hashed_entry_t pvh_e, nexth;
352 vm_map_offset_t vaddr;
353 pmap_t pmap;
354 pt_entry_t *ptep;
355
356 assert(IS_MANAGED_PAGE(pn));
357
358 pv_h = pai_to_pvh(pn);
359 /* TODO: translate the PHYS_* bits to PTE bits, while they're
360 * currently identical, they may not remain so
361 * Potential optimization (here and in page_protect),
362 * parallel shootdowns, check for redundant
363 * attribute modifications.
364 */
365
366 /*
367 * Alter attributes on all mappings
368 */
369 if (pv_h->pmap != PMAP_NULL) {
370 pv_e = pv_h;
371 pvh_e = (pv_hashed_entry_t)pv_e;
372
373 do {
374 pmap = pv_e->pmap;
375 vaddr = pv_e->va;
376 ptep = pmap_pte(pmap, vaddr);
377
378 if (0 == ptep)
379 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
380
381 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
382 pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
383 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
384 pvh_e = nexth;
385 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
386 }
387 }
388
389 void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
390 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
391
392 if (dofilter) {
393 CPU_CR3_MARK_INACTIVE();
394 } else {
395 CPU_CR3_MARK_ACTIVE();
396 mfence();
397 if (current_cpu_datap()->cpu_tlb_invalid)
398 process_pmap_updates();
399 }
400 }
401
402
403 /*
404 * Insert the given physical page (p) at
405 * the specified virtual address (v) in the
406 * target physical map with the protection requested.
407 *
408 * If specified, the page will be wired down, meaning
409 * that the related pte cannot be reclaimed.
410 *
411 * NB: This is the only routine which MAY NOT lazy-evaluate
412 * or lose information. That is, this routine must actually
413 * insert this page into the given map NOW.
414 */
415
416 void
417 pmap_enter(
418 register pmap_t pmap,
419 vm_map_offset_t vaddr,
420 ppnum_t pn,
421 vm_prot_t prot,
422 vm_prot_t fault_type,
423 unsigned int flags,
424 boolean_t wired)
425 {
426 (void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL);
427 }
428
429
430 kern_return_t
431 pmap_enter_options(
432 register pmap_t pmap,
433 vm_map_offset_t vaddr,
434 ppnum_t pn,
435 vm_prot_t prot,
436 __unused vm_prot_t fault_type,
437 unsigned int flags,
438 boolean_t wired,
439 unsigned int options,
440 void *arg)
441 {
442 pt_entry_t *pte;
443 pv_rooted_entry_t pv_h;
444 ppnum_t pai;
445 pv_hashed_entry_t pvh_e;
446 pv_hashed_entry_t pvh_new;
447 pt_entry_t template;
448 pmap_paddr_t old_pa;
449 pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn);
450 boolean_t need_tlbflush = FALSE;
451 boolean_t set_NX;
452 char oattr;
453 boolean_t old_pa_locked;
454 /* 2MiB mappings are confined to x86_64 by VM */
455 boolean_t superpage = flags & VM_MEM_SUPERPAGE;
456 vm_object_t delpage_pm_obj = NULL;
457 int delpage_pde_index = 0;
458 pt_entry_t old_pte;
459 kern_return_t kr_expand;
460
461 pmap_intr_assert();
462
463 if (pmap == PMAP_NULL)
464 return KERN_INVALID_ARGUMENT;
465
466 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
467 * unused value for that scenario.
468 */
469 assert(pn != vm_page_fictitious_addr);
470
471 if (pn == vm_page_guard_addr)
472 return KERN_INVALID_ARGUMENT;
473
474 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
475 pmap,
476 (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
477 pn, prot);
478
479 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
480 set_NX = FALSE;
481 else
482 set_NX = TRUE;
483
484 if (__improbable(set_NX && (pmap == kernel_pmap) && ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) || (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
485 set_NX = FALSE;
486 }
487
488 /*
489 * Must allocate a new pvlist entry while we're unlocked;
490 * zalloc may cause pageout (which will lock the pmap system).
491 * If we determine we need a pvlist entry, we will unlock
492 * and allocate one. Then we will retry, throughing away
493 * the allocated entry later (if we no longer need it).
494 */
495
496 pvh_new = PV_HASHED_ENTRY_NULL;
497 Retry:
498 pvh_e = PV_HASHED_ENTRY_NULL;
499
500 PMAP_LOCK(pmap);
501
502 /*
503 * Expand pmap to include this pte. Assume that
504 * pmap is always expanded to include enough hardware
505 * pages to map one VM page.
506 */
507 if(superpage) {
508 while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
509 /* need room for another pde entry */
510 PMAP_UNLOCK(pmap);
511 kr_expand = pmap_expand_pdpt(pmap, vaddr, options);
512 if (kr_expand != KERN_SUCCESS)
513 return kr_expand;
514 PMAP_LOCK(pmap);
515 }
516 } else {
517 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
518 /*
519 * Must unlock to expand the pmap
520 * going to grow pde level page(s)
521 */
522 PMAP_UNLOCK(pmap);
523 kr_expand = pmap_expand(pmap, vaddr, options);
524 if (kr_expand != KERN_SUCCESS)
525 return kr_expand;
526 PMAP_LOCK(pmap);
527 }
528 }
529 if (options & PMAP_EXPAND_OPTIONS_NOENTER) {
530 PMAP_UNLOCK(pmap);
531 return KERN_SUCCESS;
532 }
533
534 if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
535 /*
536 * There is still an empty page table mapped that
537 * was used for a previous base page mapping.
538 * Remember the PDE and the PDE index, so that we
539 * can free the page at the end of this function.
540 */
541 delpage_pde_index = (int)pdeidx(pmap, vaddr);
542 delpage_pm_obj = pmap->pm_obj;
543 *pte = 0;
544 }
545
546 old_pa = pte_to_pa(*pte);
547 pai = pa_index(old_pa);
548 old_pa_locked = FALSE;
549
550 if (old_pa == 0 &&
551 (*pte & INTEL_PTE_COMPRESSED)) {
552 /* one less "compressed" */
553 OSAddAtomic64(-1, &pmap->stats.compressed);
554 /* marker will be cleared below */
555 }
556
557 /*
558 * if we have a previous managed page, lock the pv entry now. after
559 * we lock it, check to see if someone beat us to the lock and if so
560 * drop the lock
561 */
562 if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
563 LOCK_PVH(pai);
564 old_pa_locked = TRUE;
565 old_pa = pte_to_pa(*pte);
566 if (0 == old_pa) {
567 UNLOCK_PVH(pai); /* another path beat us to it */
568 old_pa_locked = FALSE;
569 }
570 }
571
572 /*
573 * Special case if the incoming physical page is already mapped
574 * at this address.
575 */
576 if (old_pa == pa) {
577 pt_entry_t old_attributes =
578 *pte & ~(INTEL_PTE_REF | INTEL_PTE_MOD);
579
580 /*
581 * May be changing its wired attribute or protection
582 */
583
584 template = pa_to_pte(pa) | INTEL_PTE_VALID;
585 template |= pmap_get_cache_attributes(pa_index(pa));
586
587 if (VM_MEM_NOT_CACHEABLE ==
588 (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
589 if (!(flags & VM_MEM_GUARDED))
590 template |= INTEL_PTE_PTA;
591 template |= INTEL_PTE_NCACHE;
592 }
593 if (pmap != kernel_pmap)
594 template |= INTEL_PTE_USER;
595 if (prot & VM_PROT_WRITE) {
596 template |= INTEL_PTE_WRITE;
597 }
598
599 if (set_NX)
600 template |= INTEL_PTE_NX;
601
602 if (wired) {
603 template |= INTEL_PTE_WIRED;
604 if (!iswired(old_attributes)) {
605 OSAddAtomic(+1, &pmap->stats.wired_count);
606 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
607 }
608 } else {
609 if (iswired(old_attributes)) {
610 assert(pmap->stats.wired_count >= 1);
611 OSAddAtomic(-1, &pmap->stats.wired_count);
612 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
613 }
614 }
615 if (superpage) /* this path can not be used */
616 template |= INTEL_PTE_PS; /* to change the page size! */
617
618 if (old_attributes == template)
619 goto dont_update_pte;
620
621 /* Determine delta, PV locked */
622 need_tlbflush =
623 ((old_attributes ^ template) != INTEL_PTE_WIRED);
624
625 if (need_tlbflush == TRUE && !(old_attributes & INTEL_PTE_WRITE)) {
626 if ((old_attributes ^ template) == INTEL_PTE_WRITE)
627 need_tlbflush = FALSE;
628 }
629
630 /* store modified PTE and preserve RC bits */
631 pt_entry_t npte, opte;;
632 do {
633 opte = *pte;
634 npte = template | (opte & (INTEL_PTE_REF | INTEL_PTE_MOD));
635 } while (!pmap_cmpx_pte(pte, opte, npte));
636 dont_update_pte:
637 if (old_pa_locked) {
638 UNLOCK_PVH(pai);
639 old_pa_locked = FALSE;
640 }
641 goto Done;
642 }
643
644 /*
645 * Outline of code from here:
646 * 1) If va was mapped, update TLBs, remove the mapping
647 * and remove old pvlist entry.
648 * 2) Add pvlist entry for new mapping
649 * 3) Enter new mapping.
650 *
651 * If the old physical page is not managed step 1) is skipped
652 * (except for updating the TLBs), and the mapping is
653 * overwritten at step 3). If the new physical page is not
654 * managed, step 2) is skipped.
655 */
656
657 if (old_pa != (pmap_paddr_t) 0) {
658
659 /*
660 * Don't do anything to pages outside valid memory here.
661 * Instead convince the code that enters a new mapping
662 * to overwrite the old one.
663 */
664
665 /* invalidate the PTE */
666 pmap_update_pte(pte, INTEL_PTE_VALID, 0);
667 /* propagate invalidate everywhere */
668 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
669 /* remember reference and change */
670 old_pte = *pte;
671 oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED));
672 /* completely invalidate the PTE */
673 pmap_store_pte(pte, 0);
674
675 if (IS_MANAGED_PAGE(pai)) {
676 pmap_assert(old_pa_locked == TRUE);
677 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
678 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
679 assert(pmap->stats.resident_count >= 1);
680 OSAddAtomic(-1, &pmap->stats.resident_count);
681 if (pmap != kernel_pmap) {
682 if (IS_REUSABLE_PAGE(pai)) {
683 assert(pmap->stats.reusable > 0);
684 OSAddAtomic(-1, &pmap->stats.reusable);
685 } else if (IS_INTERNAL_PAGE(pai)) {
686 assert(pmap->stats.internal > 0);
687 OSAddAtomic(-1, &pmap->stats.internal);
688 } else {
689 assert(pmap->stats.external > 0);
690 OSAddAtomic(-1, &pmap->stats.external);
691 }
692 }
693 if (iswired(*pte)) {
694 assert(pmap->stats.wired_count >= 1);
695 OSAddAtomic(-1, &pmap->stats.wired_count);
696 pmap_ledger_debit(pmap, task_ledgers.wired_mem,
697 PAGE_SIZE);
698 }
699 pmap_phys_attributes[pai] |= oattr;
700
701 /*
702 * Remove the mapping from the pvlist for
703 * this physical page.
704 * We'll end up with either a rooted pv or a
705 * hashed pv
706 */
707 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte);
708
709 } else {
710
711 /*
712 * old_pa is not managed.
713 * Do removal part of accounting.
714 */
715
716 if (pmap != kernel_pmap) {
717 #if 00
718 assert(pmap->stats.device > 0);
719 OSAddAtomic(-1, &pmap->stats.device);
720 #endif
721 }
722 if (iswired(*pte)) {
723 assert(pmap->stats.wired_count >= 1);
724 OSAddAtomic(-1, &pmap->stats.wired_count);
725 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
726 }
727 }
728 }
729
730 /*
731 * if we had a previously managed paged locked, unlock it now
732 */
733 if (old_pa_locked) {
734 UNLOCK_PVH(pai);
735 old_pa_locked = FALSE;
736 }
737
738 pai = pa_index(pa); /* now working with new incoming phys page */
739 if (IS_MANAGED_PAGE(pai)) {
740
741 /*
742 * Step 2) Enter the mapping in the PV list for this
743 * physical page.
744 */
745 pv_h = pai_to_pvh(pai);
746
747 LOCK_PVH(pai);
748
749 if (pv_h->pmap == PMAP_NULL) {
750 /*
751 * No mappings yet, use rooted pv
752 */
753 pv_h->va = vaddr;
754 pv_h->pmap = pmap;
755 queue_init(&pv_h->qlink);
756
757 if (options & PMAP_OPTIONS_INTERNAL) {
758 pmap_phys_attributes[pai] |= PHYS_INTERNAL;
759 } else {
760 pmap_phys_attributes[pai] &= ~PHYS_INTERNAL;
761 }
762 if (options & PMAP_OPTIONS_REUSABLE) {
763 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
764 } else {
765 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
766 }
767 } else {
768 /*
769 * Add new pv_hashed_entry after header.
770 */
771 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
772 pvh_e = pvh_new;
773 pvh_new = PV_HASHED_ENTRY_NULL;
774 } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
775 PV_HASHED_ALLOC(&pvh_e);
776 if (PV_HASHED_ENTRY_NULL == pvh_e) {
777 /*
778 * the pv list is empty. if we are on
779 * the kernel pmap we'll use one of
780 * the special private kernel pv_e's,
781 * else, we need to unlock
782 * everything, zalloc a pv_e, and
783 * restart bringing in the pv_e with
784 * us.
785 */
786 if (kernel_pmap == pmap) {
787 PV_HASHED_KERN_ALLOC(&pvh_e);
788 } else {
789 UNLOCK_PVH(pai);
790 PMAP_UNLOCK(pmap);
791 pmap_pv_throttle(pmap);
792 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
793 goto Retry;
794 }
795 }
796 }
797
798 if (PV_HASHED_ENTRY_NULL == pvh_e)
799 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
800
801 pvh_e->va = vaddr;
802 pvh_e->pmap = pmap;
803 pvh_e->ppn = pn;
804 pv_hash_add(pvh_e, pv_h);
805
806 /*
807 * Remember that we used the pvlist entry.
808 */
809 pvh_e = PV_HASHED_ENTRY_NULL;
810 }
811
812 /*
813 * only count the mapping
814 * for 'managed memory'
815 */
816 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
817 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
818 OSAddAtomic(+1, &pmap->stats.resident_count);
819 if (pmap->stats.resident_count > pmap->stats.resident_max) {
820 pmap->stats.resident_max = pmap->stats.resident_count;
821 }
822 if (pmap != kernel_pmap) {
823 if (IS_REUSABLE_PAGE(pai)) {
824 OSAddAtomic(+1, &pmap->stats.reusable);
825 PMAP_STATS_PEAK(pmap->stats.reusable);
826 } else if (IS_INTERNAL_PAGE(pai)) {
827 OSAddAtomic(+1, &pmap->stats.internal);
828 PMAP_STATS_PEAK(pmap->stats.internal);
829 } else {
830 OSAddAtomic(+1, &pmap->stats.external);
831 PMAP_STATS_PEAK(pmap->stats.external);
832 }
833 }
834 } else if (last_managed_page == 0) {
835 /* Account for early mappings created before "managed pages"
836 * are determined. Consider consulting the available DRAM map.
837 */
838 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
839 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
840 OSAddAtomic(+1, &pmap->stats.resident_count);
841 if (pmap != kernel_pmap) {
842 #if 00
843 OSAddAtomic(+1, &pmap->stats.device);
844 PMAP_STATS_PEAK(pmap->stats.device);
845 #endif
846 }
847 }
848 /*
849 * Step 3) Enter the mapping.
850 *
851 * Build a template to speed up entering -
852 * only the pfn changes.
853 */
854 template = pa_to_pte(pa) | INTEL_PTE_VALID;
855 /*
856 * DRK: It may be worth asserting on cache attribute flags that diverge
857 * from the existing physical page attributes.
858 */
859
860 template |= pmap_get_cache_attributes(pa_index(pa));
861
862 if (flags & VM_MEM_NOT_CACHEABLE) {
863 if (!(flags & VM_MEM_GUARDED))
864 template |= INTEL_PTE_PTA;
865 template |= INTEL_PTE_NCACHE;
866 }
867 if (pmap != kernel_pmap)
868 template |= INTEL_PTE_USER;
869 if (prot & VM_PROT_WRITE)
870 template |= INTEL_PTE_WRITE;
871 if (set_NX)
872 template |= INTEL_PTE_NX;
873 if (wired) {
874 template |= INTEL_PTE_WIRED;
875 OSAddAtomic(+1, & pmap->stats.wired_count);
876 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
877 }
878 if (superpage)
879 template |= INTEL_PTE_PS;
880 pmap_store_pte(pte, template);
881
882 /*
883 * if this was a managed page we delayed unlocking the pv until here
884 * to prevent pmap_page_protect et al from finding it until the pte
885 * has been stored
886 */
887 if (IS_MANAGED_PAGE(pai)) {
888 UNLOCK_PVH(pai);
889 }
890 Done:
891 if (need_tlbflush == TRUE) {
892 if (options & PMAP_OPTIONS_NOFLUSH)
893 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
894 else
895 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
896 }
897 if (pvh_e != PV_HASHED_ENTRY_NULL) {
898 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
899 }
900 if (pvh_new != PV_HASHED_ENTRY_NULL) {
901 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
902 }
903 PMAP_UNLOCK(pmap);
904
905 if (delpage_pm_obj) {
906 vm_page_t m;
907
908 vm_object_lock(delpage_pm_obj);
909 m = vm_page_lookup(delpage_pm_obj, delpage_pde_index);
910 if (m == VM_PAGE_NULL)
911 panic("pmap_enter: pte page not in object");
912 vm_object_unlock(delpage_pm_obj);
913 VM_PAGE_FREE(m);
914 OSAddAtomic(-1, &inuse_ptepages_count);
915 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
916 }
917
918 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
919 return KERN_SUCCESS;
920 }
921
922 /*
923 * Remove a range of hardware page-table entries.
924 * The entries given are the first (inclusive)
925 * and last (exclusive) entries for the VM pages.
926 * The virtual address is the va for the first pte.
927 *
928 * The pmap must be locked.
929 * If the pmap is not the kernel pmap, the range must lie
930 * entirely within one pte-page. This is NOT checked.
931 * Assumes that the pte-page exists.
932 */
933
934 void
935 pmap_remove_range(
936 pmap_t pmap,
937 vm_map_offset_t start_vaddr,
938 pt_entry_t *spte,
939 pt_entry_t *epte)
940 {
941 pmap_remove_range_options(pmap, start_vaddr, spte, epte, 0);
942 }
943
944 void
945 pmap_remove_range_options(
946 pmap_t pmap,
947 vm_map_offset_t start_vaddr,
948 pt_entry_t *spte,
949 pt_entry_t *epte,
950 int options)
951 {
952 pt_entry_t *cpte;
953 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
954 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
955 pv_hashed_entry_t pvh_e;
956 int pvh_cnt = 0;
957 int num_removed, num_unwired, num_found, num_invalid;
958 int num_device, num_external, num_internal, num_reusable;
959 uint64_t num_compressed;
960 ppnum_t pai;
961 pmap_paddr_t pa;
962 vm_map_offset_t vaddr;
963
964 num_removed = 0;
965 num_unwired = 0;
966 num_found = 0;
967 num_invalid = 0;
968 num_device = 0;
969 num_external = 0;
970 num_internal = 0;
971 num_reusable = 0;
972 num_compressed = 0;
973 /* invalidate the PTEs first to "freeze" them */
974 for (cpte = spte, vaddr = start_vaddr;
975 cpte < epte;
976 cpte++, vaddr += PAGE_SIZE_64) {
977 pt_entry_t p = *cpte;
978
979 pa = pte_to_pa(p);
980 if (pa == 0) {
981 if (pmap != kernel_pmap &&
982 (options & PMAP_OPTIONS_REMOVE) &&
983 (p & INTEL_PTE_COMPRESSED)) {
984 /* one less "compressed" */
985 num_compressed++;
986 /* clear marker */
987 /* XXX probably does not need to be atomic! */
988 pmap_update_pte(cpte, INTEL_PTE_COMPRESSED, 0);
989 }
990 continue;
991 }
992 num_found++;
993
994 if (iswired(p))
995 num_unwired++;
996
997 pai = pa_index(pa);
998
999 if (!IS_MANAGED_PAGE(pai)) {
1000 /*
1001 * Outside range of managed physical memory.
1002 * Just remove the mappings.
1003 */
1004 pmap_store_pte(cpte, 0);
1005 num_device++;
1006 continue;
1007 }
1008
1009 if ((p & INTEL_PTE_VALID) == 0)
1010 num_invalid++;
1011
1012 /* invalidate the PTE */
1013 pmap_update_pte(cpte, INTEL_PTE_VALID, 0);
1014 }
1015
1016 if (num_found == 0) {
1017 /* nothing was changed: we're done */
1018 goto update_counts;
1019 }
1020
1021 /* propagate the invalidates to other CPUs */
1022
1023 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
1024
1025 for (cpte = spte, vaddr = start_vaddr;
1026 cpte < epte;
1027 cpte++, vaddr += PAGE_SIZE_64) {
1028
1029 pa = pte_to_pa(*cpte);
1030 if (pa == 0)
1031 continue;
1032
1033 pai = pa_index(pa);
1034
1035 LOCK_PVH(pai);
1036
1037 pa = pte_to_pa(*cpte);
1038 if (pa == 0) {
1039 UNLOCK_PVH(pai);
1040 continue;
1041 }
1042 num_removed++;
1043 if (IS_REUSABLE_PAGE(pai)) {
1044 num_reusable++;
1045 } else if (IS_INTERNAL_PAGE(pai)) {
1046 num_internal++;
1047 } else {
1048 num_external++;
1049 }
1050
1051 /*
1052 * Get the modify and reference bits, then
1053 * nuke the entry in the page table
1054 */
1055 /* remember reference and change */
1056 pmap_phys_attributes[pai] |=
1057 (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
1058
1059 /*
1060 * Remove the mapping from the pvlist for this physical page.
1061 */
1062 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte);
1063
1064 /* completely invalidate the PTE */
1065 pmap_store_pte(cpte, 0);
1066
1067 UNLOCK_PVH(pai);
1068
1069 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1070 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1071 pvh_eh = pvh_e;
1072
1073 if (pvh_et == PV_HASHED_ENTRY_NULL) {
1074 pvh_et = pvh_e;
1075 }
1076 pvh_cnt++;
1077 }
1078 } /* for loop */
1079
1080 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1081 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1082 }
1083 update_counts:
1084 /*
1085 * Update the counts
1086 */
1087 #if TESTING
1088 if (pmap->stats.resident_count < num_removed)
1089 panic("pmap_remove_range: resident_count");
1090 #endif
1091 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
1092 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(num_removed));
1093 assert(pmap->stats.resident_count >= num_removed);
1094 OSAddAtomic(-num_removed, &pmap->stats.resident_count);
1095
1096 if (pmap != kernel_pmap) {
1097 #if 00
1098 assert(pmap->stats.device >= num_device);
1099 if (num_device)
1100 OSAddAtomic(-num_device, &pmap->stats.device);
1101 #endif /* 00 */
1102 assert(pmap->stats.external >= num_external);
1103 if (num_external)
1104 OSAddAtomic(-num_external, &pmap->stats.external);
1105 assert(pmap->stats.internal >= num_internal);
1106 if (num_internal)
1107 OSAddAtomic(-num_internal, &pmap->stats.internal);
1108 assert(pmap->stats.reusable >= num_reusable);
1109 if (num_reusable)
1110 OSAddAtomic(-num_reusable, &pmap->stats.reusable);
1111 assert(pmap->stats.compressed >= num_compressed);
1112 if (num_compressed)
1113 OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
1114 }
1115
1116 #if TESTING
1117 if (pmap->stats.wired_count < num_unwired)
1118 panic("pmap_remove_range: wired_count");
1119 #endif
1120 assert(pmap->stats.wired_count >= num_unwired);
1121 OSAddAtomic(-num_unwired, &pmap->stats.wired_count);
1122 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
1123
1124 return;
1125 }
1126
1127
1128 /*
1129 * Remove the given range of addresses
1130 * from the specified map.
1131 *
1132 * It is assumed that the start and end are properly
1133 * rounded to the hardware page size.
1134 */
1135 void
1136 pmap_remove(
1137 pmap_t map,
1138 addr64_t s64,
1139 addr64_t e64)
1140 {
1141 pmap_remove_options(map, s64, e64, 0);
1142 }
1143
1144 void
1145 pmap_remove_options(
1146 pmap_t map,
1147 addr64_t s64,
1148 addr64_t e64,
1149 int options)
1150 {
1151 pt_entry_t *pde;
1152 pt_entry_t *spte, *epte;
1153 addr64_t l64;
1154 uint64_t deadline;
1155
1156 pmap_intr_assert();
1157
1158 if (map == PMAP_NULL || s64 == e64)
1159 return;
1160
1161 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
1162 map,
1163 (uint32_t) (s64 >> 32), s64,
1164 (uint32_t) (e64 >> 32), e64);
1165
1166
1167 PMAP_LOCK(map);
1168
1169 #if 0
1170 /*
1171 * Check that address range in the kernel does not overlap the stacks.
1172 * We initialize local static min/max variables once to avoid making
1173 * 2 function calls for every remove. Note also that these functions
1174 * both return 0 before kernel stacks have been initialized, and hence
1175 * the panic is not triggered in this case.
1176 */
1177 if (map == kernel_pmap) {
1178 static vm_offset_t kernel_stack_min = 0;
1179 static vm_offset_t kernel_stack_max = 0;
1180
1181 if (kernel_stack_min == 0) {
1182 kernel_stack_min = min_valid_stack_address();
1183 kernel_stack_max = max_valid_stack_address();
1184 }
1185 if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
1186 (kernel_stack_min < e64 && e64 <= kernel_stack_max))
1187 panic("pmap_remove() attempted in kernel stack");
1188 }
1189 #else
1190
1191 /*
1192 * The values of kernel_stack_min and kernel_stack_max are no longer
1193 * relevant now that we allocate kernel stacks in the kernel map,
1194 * so the old code above no longer applies. If we wanted to check that
1195 * we weren't removing a mapping of a page in a kernel stack we'd
1196 * mark the PTE with an unused bit and check that here.
1197 */
1198
1199 #endif
1200
1201 deadline = rdtsc64() + max_preemption_latency_tsc;
1202
1203 while (s64 < e64) {
1204 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1205 if (l64 > e64)
1206 l64 = e64;
1207 pde = pmap_pde(map, s64);
1208
1209 if (pde && (*pde & INTEL_PTE_VALID)) {
1210 if (*pde & INTEL_PTE_PS) {
1211 /*
1212 * If we're removing a superpage, pmap_remove_range()
1213 * must work on level 2 instead of level 1; and we're
1214 * only passing a single level 2 entry instead of a
1215 * level 1 range.
1216 */
1217 spte = pde;
1218 epte = spte+1; /* excluded */
1219 } else {
1220 spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
1221 spte = &spte[ptenum(s64)];
1222 epte = &spte[intel_btop(l64 - s64)];
1223 }
1224 pmap_remove_range_options(map, s64, spte, epte,
1225 options);
1226 }
1227 s64 = l64;
1228
1229 if (s64 < e64 && rdtsc64() >= deadline) {
1230 PMAP_UNLOCK(map)
1231 PMAP_LOCK(map)
1232 deadline = rdtsc64() + max_preemption_latency_tsc;
1233 }
1234 }
1235
1236 PMAP_UNLOCK(map);
1237
1238 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
1239 map, 0, 0, 0, 0);
1240
1241 }
1242
1243 void
1244 pmap_page_protect(
1245 ppnum_t pn,
1246 vm_prot_t prot)
1247 {
1248 pmap_page_protect_options(pn, prot, 0, NULL);
1249 }
1250
1251 /*
1252 * Routine: pmap_page_protect_options
1253 *
1254 * Function:
1255 * Lower the permission for all mappings to a given
1256 * page.
1257 */
1258 void
1259 pmap_page_protect_options(
1260 ppnum_t pn,
1261 vm_prot_t prot,
1262 unsigned int options,
1263 void *arg)
1264 {
1265 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1266 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1267 pv_hashed_entry_t nexth;
1268 int pvh_cnt = 0;
1269 pv_rooted_entry_t pv_h;
1270 pv_rooted_entry_t pv_e;
1271 pv_hashed_entry_t pvh_e;
1272 pt_entry_t *pte;
1273 int pai;
1274 pmap_t pmap;
1275 boolean_t remove;
1276 pt_entry_t new_pte_value;
1277
1278 pmap_intr_assert();
1279 assert(pn != vm_page_fictitious_addr);
1280 if (pn == vm_page_guard_addr)
1281 return;
1282
1283 pai = ppn_to_pai(pn);
1284
1285 if (!IS_MANAGED_PAGE(pai)) {
1286 /*
1287 * Not a managed page.
1288 */
1289 return;
1290 }
1291 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
1292 pn, prot, 0, 0, 0);
1293
1294 /*
1295 * Determine the new protection.
1296 */
1297 switch (prot) {
1298 case VM_PROT_READ:
1299 case VM_PROT_READ | VM_PROT_EXECUTE:
1300 remove = FALSE;
1301 break;
1302 case VM_PROT_ALL:
1303 return; /* nothing to do */
1304 default:
1305 remove = TRUE;
1306 break;
1307 }
1308
1309 pv_h = pai_to_pvh(pai);
1310
1311 LOCK_PVH(pai);
1312
1313
1314 /*
1315 * Walk down PV list, if any, changing or removing all mappings.
1316 */
1317 if (pv_h->pmap == PMAP_NULL)
1318 goto done;
1319
1320 pv_e = pv_h;
1321 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */
1322
1323 do {
1324 vm_map_offset_t vaddr;
1325
1326 pmap = pv_e->pmap;
1327 vaddr = pv_e->va;
1328 pte = pmap_pte(pmap, vaddr);
1329
1330 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1331 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1332
1333 if (0 == pte) {
1334 panic("pmap_page_protect() "
1335 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1336 pmap, pn, vaddr);
1337 }
1338 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1339
1340 /*
1341 * Remove the mapping if new protection is NONE
1342 */
1343 if (remove) {
1344
1345 /* Remove per-pmap wired count */
1346 if (iswired(*pte)) {
1347 OSAddAtomic(-1, &pmap->stats.wired_count);
1348 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1349 }
1350
1351 if (pmap != kernel_pmap &&
1352 (options & PMAP_OPTIONS_COMPRESSOR) &&
1353 IS_INTERNAL_PAGE(pai)) {
1354 /* adjust "reclaimed" stats */
1355 OSAddAtomic64(+1, &pmap->stats.compressed);
1356 PMAP_STATS_PEAK(pmap->stats.compressed);
1357 pmap->stats.compressed_lifetime++;
1358 /* mark this PTE as having been "reclaimed" */
1359 new_pte_value = INTEL_PTE_COMPRESSED;
1360 } else {
1361 new_pte_value = 0;
1362 }
1363
1364 if (options & PMAP_OPTIONS_NOREFMOD) {
1365 pmap_store_pte(pte, new_pte_value);
1366
1367 if (options & PMAP_OPTIONS_NOFLUSH)
1368 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1369 else
1370 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1371 } else {
1372 /*
1373 * Remove the mapping, collecting dirty bits.
1374 */
1375 pmap_update_pte(pte, INTEL_PTE_VALID, 0);
1376
1377 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1378 pmap_phys_attributes[pai] |=
1379 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1380 pmap_store_pte(pte, new_pte_value);
1381 }
1382 #if TESTING
1383 if (pmap->stats.resident_count < 1)
1384 panic("pmap_page_protect: resident_count");
1385 #endif
1386 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1387 assert(pmap->stats.resident_count >= 1);
1388 OSAddAtomic(-1, &pmap->stats.resident_count);
1389 if (options & PMAP_OPTIONS_COMPRESSOR) {
1390 /*
1391 * This removal is only being done so we can send this page to
1392 * the compressor; therefore it mustn't affect total task footprint.
1393 */
1394 pmap_ledger_credit(pmap, task_ledgers.phys_compressed, PAGE_SIZE);
1395 } else {
1396 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1397 }
1398
1399 if (pmap != kernel_pmap) {
1400 if (IS_REUSABLE_PAGE(pai)) {
1401 assert(pmap->stats.reusable > 0);
1402 OSAddAtomic(-1, &pmap->stats.reusable);
1403 } else if (IS_INTERNAL_PAGE(pai)) {
1404 assert(pmap->stats.internal > 0);
1405 OSAddAtomic(-1, &pmap->stats.internal);
1406 } else {
1407 assert(pmap->stats.external > 0);
1408 OSAddAtomic(-1, &pmap->stats.external);
1409 }
1410 }
1411
1412 /*
1413 * Deal with the pv_rooted_entry.
1414 */
1415
1416 if (pv_e == pv_h) {
1417 /*
1418 * Fix up head later.
1419 */
1420 pv_h->pmap = PMAP_NULL;
1421 } else {
1422 /*
1423 * Delete this entry.
1424 */
1425 pv_hash_remove(pvh_e);
1426 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1427 pvh_eh = pvh_e;
1428
1429 if (pvh_et == PV_HASHED_ENTRY_NULL)
1430 pvh_et = pvh_e;
1431 pvh_cnt++;
1432 }
1433 } else {
1434 /*
1435 * Write-protect, after opportunistic refmod collect
1436 */
1437 pmap_phys_attributes[pai] |=
1438 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1439 pmap_update_pte(pte, INTEL_PTE_WRITE, 0);
1440
1441 if (options & PMAP_OPTIONS_NOFLUSH)
1442 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1443 else
1444 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1445 }
1446 pvh_e = nexth;
1447 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1448
1449
1450 /*
1451 * If pv_head mapping was removed, fix it up.
1452 */
1453 if (pv_h->pmap == PMAP_NULL) {
1454 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1455
1456 if (pvh_e != (pv_hashed_entry_t) pv_h) {
1457 pv_hash_remove(pvh_e);
1458 pv_h->pmap = pvh_e->pmap;
1459 pv_h->va = pvh_e->va;
1460 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1461 pvh_eh = pvh_e;
1462
1463 if (pvh_et == PV_HASHED_ENTRY_NULL)
1464 pvh_et = pvh_e;
1465 pvh_cnt++;
1466 }
1467 }
1468 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1469 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1470 }
1471 done:
1472 UNLOCK_PVH(pai);
1473
1474 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
1475 0, 0, 0, 0, 0);
1476 }
1477
1478
1479 /*
1480 * Clear specified attribute bits.
1481 */
1482 void
1483 phys_attribute_clear(
1484 ppnum_t pn,
1485 int bits,
1486 unsigned int options,
1487 void *arg)
1488 {
1489 pv_rooted_entry_t pv_h;
1490 pv_hashed_entry_t pv_e;
1491 pt_entry_t *pte;
1492 int pai;
1493 pmap_t pmap;
1494 char attributes = 0;
1495
1496 pmap_intr_assert();
1497 assert(pn != vm_page_fictitious_addr);
1498 if (pn == vm_page_guard_addr)
1499 return;
1500
1501 pai = ppn_to_pai(pn);
1502
1503 if (!IS_MANAGED_PAGE(pai)) {
1504 /*
1505 * Not a managed page.
1506 */
1507 return;
1508 }
1509
1510 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
1511 pn, bits, 0, 0, 0);
1512
1513 pv_h = pai_to_pvh(pai);
1514
1515 LOCK_PVH(pai);
1516
1517 /*
1518 * Walk down PV list, clearing all modify or reference bits.
1519 * We do not have to lock the pv_list because we have
1520 * the per-pmap lock
1521 */
1522 if (pv_h->pmap != PMAP_NULL) {
1523 /*
1524 * There are some mappings.
1525 */
1526
1527 pv_e = (pv_hashed_entry_t)pv_h;
1528
1529 do {
1530 vm_map_offset_t va;
1531
1532 pmap = pv_e->pmap;
1533 va = pv_e->va;
1534
1535 /*
1536 * Clear modify and/or reference bits.
1537 */
1538 pte = pmap_pte(pmap, va);
1539 attributes |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1540 pmap_update_pte(pte, bits, 0);
1541 /* Ensure all processors using this translation
1542 * invalidate this TLB entry. The invalidation *must*
1543 * follow the PTE update, to ensure that the TLB
1544 * shadow of the 'D' bit (in particular) is
1545 * synchronized with the updated PTE.
1546 */
1547 if (options & PMAP_OPTIONS_NOFLUSH) {
1548 if (arg)
1549 PMAP_UPDATE_TLBS_DELAYED(pmap, va, va + PAGE_SIZE, (pmap_flush_context *)arg);
1550 } else
1551 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1552
1553 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1554
1555 } while (pv_e != (pv_hashed_entry_t)pv_h);
1556 }
1557 /* Opportunistic refmod collection, annulled
1558 * if both REF and MOD are being cleared.
1559 */
1560
1561 pmap_phys_attributes[pai] |= attributes;
1562 pmap_phys_attributes[pai] &= (~bits);
1563
1564 UNLOCK_PVH(pai);
1565
1566 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
1567 0, 0, 0, 0, 0);
1568 }
1569
1570 /*
1571 * Check specified attribute bits.
1572 */
1573 int
1574 phys_attribute_test(
1575 ppnum_t pn,
1576 int bits)
1577 {
1578 pv_rooted_entry_t pv_h;
1579 pv_hashed_entry_t pv_e;
1580 pt_entry_t *pte;
1581 int pai;
1582 pmap_t pmap;
1583 int attributes = 0;
1584
1585 pmap_intr_assert();
1586 assert(pn != vm_page_fictitious_addr);
1587 if (pn == vm_page_guard_addr)
1588 return 0;
1589
1590 pai = ppn_to_pai(pn);
1591
1592 if (!IS_MANAGED_PAGE(pai)) {
1593 /*
1594 * Not a managed page.
1595 */
1596 return 0;
1597 }
1598
1599 /*
1600 * Fast check... if bits already collected
1601 * no need to take any locks...
1602 * if not set, we need to recheck after taking
1603 * the lock in case they got pulled in while
1604 * we were waiting for the lock
1605 */
1606 if ((pmap_phys_attributes[pai] & bits) == bits)
1607 return bits;
1608
1609 pv_h = pai_to_pvh(pai);
1610
1611 LOCK_PVH(pai);
1612
1613 attributes = pmap_phys_attributes[pai] & bits;
1614
1615
1616 /*
1617 * Walk down PV list, checking the mappings until we
1618 * reach the end or we've found the desired attributes.
1619 */
1620 if (attributes != bits &&
1621 pv_h->pmap != PMAP_NULL) {
1622 /*
1623 * There are some mappings.
1624 */
1625 pv_e = (pv_hashed_entry_t)pv_h;
1626 do {
1627 vm_map_offset_t va;
1628
1629 pmap = pv_e->pmap;
1630 va = pv_e->va;
1631 /*
1632 * pick up modify and/or reference bits from mapping
1633 */
1634
1635 pte = pmap_pte(pmap, va);
1636 attributes |= (int)(*pte & bits);
1637
1638 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1639
1640 } while ((attributes != bits) &&
1641 (pv_e != (pv_hashed_entry_t)pv_h));
1642 }
1643 pmap_phys_attributes[pai] |= attributes;
1644
1645 UNLOCK_PVH(pai);
1646 return (attributes);
1647 }
1648
1649 /*
1650 * Routine: pmap_change_wiring
1651 * Function: Change the wiring attribute for a map/virtual-address
1652 * pair.
1653 * In/out conditions:
1654 * The mapping must already exist in the pmap.
1655 */
1656 void
1657 pmap_change_wiring(
1658 pmap_t map,
1659 vm_map_offset_t vaddr,
1660 boolean_t wired)
1661 {
1662 pt_entry_t *pte;
1663
1664 PMAP_LOCK(map);
1665
1666 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
1667 panic("pmap_change_wiring: pte missing");
1668
1669 if (wired && !iswired(*pte)) {
1670 /*
1671 * wiring down mapping
1672 */
1673 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
1674 OSAddAtomic(+1, &map->stats.wired_count);
1675 pmap_update_pte(pte, 0, INTEL_PTE_WIRED);
1676 }
1677 else if (!wired && iswired(*pte)) {
1678 /*
1679 * unwiring mapping
1680 */
1681 assert(map->stats.wired_count >= 1);
1682 OSAddAtomic(-1, &map->stats.wired_count);
1683 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
1684 pmap_update_pte(pte, INTEL_PTE_WIRED, 0);
1685 }
1686
1687 PMAP_UNLOCK(map);
1688 }
1689
1690 /*
1691 * "Backdoor" direct map routine for early mappings.
1692 * Useful for mapping memory outside the range
1693 * Sets A, D and NC if requested
1694 */
1695
1696 vm_offset_t
1697 pmap_map_bd(
1698 vm_offset_t virt,
1699 vm_map_offset_t start_addr,
1700 vm_map_offset_t end_addr,
1701 vm_prot_t prot,
1702 unsigned int flags)
1703 {
1704 pt_entry_t template;
1705 pt_entry_t *pte;
1706 spl_t spl;
1707 vm_offset_t base = virt;
1708 template = pa_to_pte(start_addr)
1709 | INTEL_PTE_REF
1710 | INTEL_PTE_MOD
1711 | INTEL_PTE_WIRED
1712 | INTEL_PTE_VALID;
1713
1714 if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
1715 template |= INTEL_PTE_NCACHE;
1716 if (!(flags & (VM_MEM_GUARDED)))
1717 template |= INTEL_PTE_PTA;
1718 }
1719
1720 #if defined(__x86_64__)
1721 if ((prot & VM_PROT_EXECUTE) == 0)
1722 template |= INTEL_PTE_NX;
1723 #endif
1724
1725 if (prot & VM_PROT_WRITE)
1726 template |= INTEL_PTE_WRITE;
1727
1728 while (start_addr < end_addr) {
1729 spl = splhigh();
1730 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
1731 if (pte == PT_ENTRY_NULL) {
1732 panic("pmap_map_bd: Invalid kernel address\n");
1733 }
1734 pmap_store_pte(pte, template);
1735 splx(spl);
1736 pte_increment_pa(template);
1737 virt += PAGE_SIZE;
1738 start_addr += PAGE_SIZE;
1739 }
1740 flush_tlb_raw();
1741 PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
1742 return(virt);
1743 }
1744
1745 void
1746 pmap_reusable(
1747 pmap_t pmap,
1748 addr64_t s64,
1749 addr64_t e64,
1750 boolean_t reusable)
1751 {
1752 pt_entry_t *pde;
1753 pt_entry_t *spte, *epte;
1754 addr64_t l64;
1755 uint64_t deadline;
1756
1757 pmap_intr_assert();
1758
1759 if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64)
1760 return;
1761
1762 PMAP_TRACE(PMAP_CODE(PMAP__REUSABLE) | DBG_FUNC_START,
1763 pmap,
1764 (uint32_t) (s64 >> 32), s64,
1765 (uint32_t) (e64 >> 32), e64);
1766
1767 PMAP_LOCK(pmap);
1768
1769 deadline = rdtsc64() + max_preemption_latency_tsc;
1770
1771 while (s64 < e64) {
1772 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1773 if (l64 > e64)
1774 l64 = e64;
1775 pde = pmap_pde(pmap, s64);
1776
1777 if (pde && (*pde & INTEL_PTE_VALID)) {
1778 if (*pde & INTEL_PTE_PS) {
1779 /* superpage: not supported */
1780 } else {
1781 spte = pmap_pte(pmap,
1782 (s64 & ~(pde_mapped_size - 1)));
1783 spte = &spte[ptenum(s64)];
1784 epte = &spte[intel_btop(l64 - s64)];
1785 pmap_reusable_range(pmap, s64, spte, epte,
1786 reusable);
1787 }
1788 }
1789 s64 = l64;
1790
1791 if (s64 < e64 && rdtsc64() >= deadline) {
1792 PMAP_UNLOCK(pmap);
1793 PMAP_LOCK(pmap);
1794 deadline = rdtsc64() + max_preemption_latency_tsc;
1795 }
1796 }
1797
1798 PMAP_UNLOCK(pmap);
1799
1800 PMAP_TRACE(PMAP_CODE(PMAP__REUSABLE) | DBG_FUNC_END,
1801 pmap, reusable, 0, 0, 0);
1802 }
1803
1804 void
1805 pmap_reusable_range(
1806 pmap_t pmap,
1807 vm_map_offset_t start_vaddr,
1808 pt_entry_t *spte,
1809 pt_entry_t *epte,
1810 boolean_t reusable)
1811 {
1812 pt_entry_t *cpte;
1813 int num_external, num_internal, num_reusable;
1814 ppnum_t pai;
1815 pmap_paddr_t pa;
1816 vm_map_offset_t vaddr;
1817
1818 num_external = 0;
1819 num_internal = 0;
1820 num_reusable = 0;
1821
1822 for (cpte = spte, vaddr = start_vaddr;
1823 cpte < epte;
1824 cpte++, vaddr += PAGE_SIZE_64) {
1825
1826 pa = pte_to_pa(*cpte);
1827 if (pa == 0)
1828 continue;
1829
1830 pai = pa_index(pa);
1831
1832 LOCK_PVH(pai);
1833
1834 pa = pte_to_pa(*cpte);
1835 if (pa == 0) {
1836 UNLOCK_PVH(pai);
1837 continue;
1838 }
1839 if (reusable) {
1840 /* we want to set "reusable" */
1841 if (IS_REUSABLE_PAGE(pai)) {
1842 /* already reusable: no change */
1843 } else {
1844 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
1845 /* one more "reusable" */
1846 num_reusable++;
1847 if (IS_INTERNAL_PAGE(pai)) {
1848 /* one less "internal" */
1849 num_internal--;
1850 } else {
1851 /* one less "external" */
1852 num_external--;
1853 }
1854 }
1855 } else {
1856 /* we want to clear "reusable" */
1857 if (IS_REUSABLE_PAGE(pai)) {
1858 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
1859 /* one less "reusable" */
1860 num_reusable--;
1861 if (IS_INTERNAL_PAGE(pai)) {
1862 /* one more "internal" */
1863 num_internal++;
1864 } else {
1865 /* one more "external" */
1866 num_external++;
1867 }
1868 } else {
1869 /* already not reusable: no change */
1870 }
1871 }
1872
1873 UNLOCK_PVH(pai);
1874
1875 } /* for loop */
1876
1877 /*
1878 * Update the counts
1879 */
1880 if (pmap != kernel_pmap) {
1881 if (num_external) {
1882 OSAddAtomic(num_external, &pmap->stats.external);
1883 PMAP_STATS_PEAK(pmap->stats.external);
1884 }
1885 assert(pmap->stats.external >= 0);
1886 if (num_internal) {
1887 OSAddAtomic(num_internal, &pmap->stats.internal);
1888 PMAP_STATS_PEAK(pmap->stats.internal);
1889 }
1890 assert(pmap->stats.internal >= 0);
1891 if (num_reusable) {
1892 OSAddAtomic(num_reusable, &pmap->stats.reusable);
1893 PMAP_STATS_PEAK(pmap->stats.reusable);
1894 }
1895 assert(pmap->stats.reusable >= 0);
1896 }
1897
1898 return;
1899 }
1900
1901 unsigned int
1902 pmap_query_resident(
1903 pmap_t pmap,
1904 addr64_t s64,
1905 addr64_t e64)
1906 {
1907 pt_entry_t *pde;
1908 pt_entry_t *spte, *epte;
1909 addr64_t l64;
1910 uint64_t deadline;
1911 unsigned int result;
1912
1913 pmap_intr_assert();
1914
1915 if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64)
1916 return 0;
1917
1918 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
1919 pmap,
1920 (uint32_t) (s64 >> 32), s64,
1921 (uint32_t) (e64 >> 32), e64);
1922
1923 result = 0;
1924
1925 PMAP_LOCK(pmap);
1926
1927 deadline = rdtsc64() + max_preemption_latency_tsc;
1928
1929 while (s64 < e64) {
1930 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1931 if (l64 > e64)
1932 l64 = e64;
1933 pde = pmap_pde(pmap, s64);
1934
1935 if (pde && (*pde & INTEL_PTE_VALID)) {
1936 if (*pde & INTEL_PTE_PS) {
1937 /* superpage: not supported */
1938 } else {
1939 spte = pmap_pte(pmap,
1940 (s64 & ~(pde_mapped_size - 1)));
1941 spte = &spte[ptenum(s64)];
1942 epte = &spte[intel_btop(l64 - s64)];
1943
1944 for (; spte < epte; spte++) {
1945 if (pte_to_pa(*spte) != 0) {
1946 result++;
1947 }
1948 }
1949
1950 }
1951 }
1952 s64 = l64;
1953
1954 if (s64 < e64 && rdtsc64() >= deadline) {
1955 PMAP_UNLOCK(pmap);
1956 PMAP_LOCK(pmap);
1957 deadline = rdtsc64() + max_preemption_latency_tsc;
1958 }
1959 }
1960
1961 PMAP_UNLOCK(pmap);
1962
1963 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
1964 pmap, 0, 0, 0, 0);
1965
1966 return result;
1967 }