]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap_x86_common.c
xnu-2050.48.11.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap_x86_common.c
CommitLineData
b0d623f7
A
1/*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <vm/pmap.h>
29#include <vm/vm_map.h>
316670eb 30#include <kern/ledger.h>
b0d623f7 31#include <i386/pmap_internal.h>
b7266188 32
b7266188
A
33void pmap_remove_range(
34 pmap_t pmap,
35 vm_map_offset_t va,
36 pt_entry_t *spte,
37 pt_entry_t *epte);
38
316670eb
A
39uint32_t pmap_update_clear_pte_count;
40
b0d623f7
A
41/*
42 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
43 * on a NBPDE boundary.
44 */
45
46/* These symbols may be referenced directly by VM */
47uint64_t pmap_nesting_size_min = NBPDE;
48uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE;
49
50/*
51 * kern_return_t pmap_nest(grand, subord, va_start, size)
52 *
53 * grand = the pmap that we will nest subord into
54 * subord = the pmap that goes into the grand
55 * va_start = start of range in pmap to be inserted
56 * nstart = start of range in pmap nested pmap
57 * size = Size of nest area (up to 16TB)
58 *
59 * Inserts a pmap into another. This is used to implement shared segments.
60 *
61 * Note that we depend upon higher level VM locks to insure that things don't change while
62 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
63 * or do 2 nests at once.
64 */
65
66/*
67 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
68 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
69 * container and the "grand" parent. A minor optimization to consider for the
70 * future: make the "subord" truly a container rather than a full-fledged
71 * pagetable hierarchy which can be unnecessarily sparse (DRK).
72 */
73
74kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) {
75 vm_map_offset_t vaddr, nvaddr;
76 pd_entry_t *pde,*npde;
77 unsigned int i;
78 uint64_t num_pde;
79
80 if ((size & (pmap_nesting_size_min-1)) ||
81 (va_start & (pmap_nesting_size_min-1)) ||
82 (nstart & (pmap_nesting_size_min-1)) ||
83 ((size >> 28) > 65536)) /* Max size we can nest is 16TB */
84 return KERN_INVALID_VALUE;
85
86 if(size == 0) {
87 panic("pmap_nest: size is invalid - %016llX\n", size);
88 }
89
90 if (va_start != nstart)
91 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
92
93 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
6d2010ae
A
94 (uintptr_t) grand, (uintptr_t) subord,
95 (uintptr_t) (va_start>>32), (uintptr_t) va_start, 0);
b0d623f7
A
96
97 nvaddr = (vm_map_offset_t)nstart;
98 num_pde = size >> PDESHIFT;
99
100 PMAP_LOCK(subord);
101
102 subord->pm_shared = TRUE;
103
104 for (i = 0; i < num_pde;) {
105 if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) {
106
107 npde = pmap64_pdpt(subord, nvaddr);
108
109 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
110 PMAP_UNLOCK(subord);
316670eb 111 pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
b0d623f7
A
112 PMAP_LOCK(subord);
113 npde = pmap64_pdpt(subord, nvaddr);
114 }
115 *npde |= INTEL_PDPTE_NESTED;
116 nvaddr += NBPDPT;
117 i += (uint32_t)NPDEPG;
118 }
119 else {
120 npde = pmap_pde(subord, nvaddr);
121
122 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
123 PMAP_UNLOCK(subord);
316670eb 124 pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
b0d623f7
A
125 PMAP_LOCK(subord);
126 npde = pmap_pde(subord, nvaddr);
127 }
128 nvaddr += NBPDE;
129 i++;
130 }
131 }
132
133 PMAP_UNLOCK(subord);
134
135 vaddr = (vm_map_offset_t)va_start;
136
137 PMAP_LOCK(grand);
138
139 for (i = 0;i < num_pde;) {
140 pd_entry_t tpde;
141
142 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) {
143 npde = pmap64_pdpt(subord, vaddr);
144 if (npde == 0)
145 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
146 tpde = *npde;
147 pde = pmap64_pdpt(grand, vaddr);
148 if (0 == pde) {
149 PMAP_UNLOCK(grand);
316670eb 150 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
b0d623f7
A
151 PMAP_LOCK(grand);
152 pde = pmap64_pdpt(grand, vaddr);
153 }
154 if (pde == 0)
155 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr);
156 pmap_store_pte(pde, tpde);
157 vaddr += NBPDPT;
158 i += (uint32_t) NPDEPG;
159 }
160 else {
161 npde = pmap_pde(subord, nstart);
162 if (npde == 0)
163 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart);
164 tpde = *npde;
165 nstart += NBPDE;
166 pde = pmap_pde(grand, vaddr);
167 if ((0 == pde) && cpu_64bit) {
168 PMAP_UNLOCK(grand);
316670eb 169 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
b0d623f7
A
170 PMAP_LOCK(grand);
171 pde = pmap_pde(grand, vaddr);
172 }
173
174 if (pde == 0)
175 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
176 vaddr += NBPDE;
177 pmap_store_pte(pde, tpde);
178 i++;
179 }
180 }
181
182 PMAP_UNLOCK(grand);
183
184 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
185
186 return KERN_SUCCESS;
187}
188
189/*
190 * kern_return_t pmap_unnest(grand, vaddr)
191 *
192 * grand = the pmap that we will un-nest subord from
193 * vaddr = start of range in pmap to be unnested
194 *
195 * Removes a pmap from another. This is used to implement shared segments.
196 */
197
198kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
199
200 pd_entry_t *pde;
201 unsigned int i;
202 uint64_t num_pde;
203 addr64_t va_start, va_end;
204 uint64_t npdpt = PMAP_INVALID_PDPTNUM;
205
206 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
6d2010ae
A
207 (uintptr_t) grand,
208 (uintptr_t) (vaddr>>32), (uintptr_t) vaddr, 0, 0);
b0d623f7
A
209
210 if ((size & (pmap_nesting_size_min-1)) ||
211 (vaddr & (pmap_nesting_size_min-1))) {
212 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
213 grand, vaddr, size);
214 }
215
216 /* align everything to PDE boundaries */
217 va_start = vaddr & ~(NBPDE-1);
218 va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
219 size = va_end - va_start;
220
221 PMAP_LOCK(grand);
222
223 num_pde = size >> PDESHIFT;
224 vaddr = va_start;
225
226 for (i = 0; i < num_pde; ) {
227 if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) {
228 npdpt = pdptnum(grand, vaddr);
229 pde = pmap64_pdpt(grand, vaddr);
230 if (pde && (*pde & INTEL_PDPTE_NESTED)) {
231 pmap_store_pte(pde, (pd_entry_t)0);
232 i += (uint32_t) NPDEPG;
233 vaddr += NBPDPT;
234 continue;
235 }
236 }
237 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
238 if (pde == 0)
239 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr);
240 pmap_store_pte(pde, (pd_entry_t)0);
241 i++;
242 vaddr += NBPDE;
243 }
244
245 PMAP_UPDATE_TLBS(grand, va_start, va_end);
246
247 PMAP_UNLOCK(grand);
248
249 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
250
251 return KERN_SUCCESS;
252}
253
254/* Invoked by the Mach VM to determine the platform specific unnest region */
255
256boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) {
257 pd_entry_t *pdpte;
258 boolean_t rval = FALSE;
259
260 if (!cpu_64bit)
261 return rval;
262
263 PMAP_LOCK(p);
264
265 pdpte = pmap64_pdpt(p, *s);
266 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
267 *s &= ~(NBPDPT -1);
268 rval = TRUE;
269 }
270
271 pdpte = pmap64_pdpt(p, *e);
272 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
273 *e = ((*e + NBPDPT) & ~(NBPDPT -1));
274 rval = TRUE;
275 }
276
277 PMAP_UNLOCK(p);
278
279 return rval;
280}
281
282/*
283 * pmap_find_phys returns the (4K) physical page number containing a
284 * given virtual address in a given pmap.
285 * Note that pmap_pte may return a pde if this virtual address is
286 * mapped by a large page and this is taken into account in order
287 * to return the correct page number in this case.
288 */
289ppnum_t
290pmap_find_phys(pmap_t pmap, addr64_t va)
291{
292 pt_entry_t *ptp;
293 pd_entry_t *pdep;
294 ppnum_t ppn = 0;
295 pd_entry_t pde;
296 pt_entry_t pte;
297
298 mp_disable_preemption();
299
300 /* This refcount test is a band-aid--several infrastructural changes
301 * are necessary to eliminate invocation of this routine from arbitrary
302 * contexts.
303 */
304
305 if (!pmap->ref_count)
306 goto pfp_exit;
307
308 pdep = pmap_pde(pmap, va);
309
310 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) {
311 if (pde & INTEL_PTE_PS) {
312 ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
313 ppn += (ppnum_t) ptenum(va);
314 }
315 else {
316 ptp = pmap_pte(pmap, va);
317 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) {
318 ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
319 }
320 }
321 }
322pfp_exit:
323 mp_enable_preemption();
324
325 return ppn;
326}
327
6d2010ae
A
328/*
329 * Update cache attributes for all extant managed mappings.
330 * Assumes PV for this page is locked, and that the page
331 * is managed.
332 */
333
334void
335pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
336 pv_rooted_entry_t pv_h, pv_e;
337 pv_hashed_entry_t pvh_e, nexth;
338 vm_map_offset_t vaddr;
339 pmap_t pmap;
340 pt_entry_t *ptep;
341
342 assert(IS_MANAGED_PAGE(pn));
343
344 pv_h = pai_to_pvh(pn);
345 /* TODO: translate the PHYS_* bits to PTE bits, while they're
346 * currently identical, they may not remain so
347 * Potential optimization (here and in page_protect),
348 * parallel shootdowns, check for redundant
349 * attribute modifications.
350 */
351
352 /*
353 * Alter attributes on all mappings
354 */
355 if (pv_h->pmap != PMAP_NULL) {
356 pv_e = pv_h;
357 pvh_e = (pv_hashed_entry_t)pv_e;
358
359 do {
360 pmap = pv_e->pmap;
361 vaddr = pv_e->va;
362 ptep = pmap_pte(pmap, vaddr);
363
364 if (0 == ptep)
365 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
366
367 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
316670eb 368 pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
6d2010ae
A
369 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
370 pvh_e = nexth;
371 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
372 }
373}
374
375void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
376 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
377
378 if (dofilter) {
379 CPU_CR3_MARK_INACTIVE();
380 } else {
381 CPU_CR3_MARK_ACTIVE();
382 __asm__ volatile("mfence");
383 if (current_cpu_datap()->cpu_tlb_invalid)
384 process_pmap_updates();
385 }
386}
387
388
b7266188
A
389/*
390 * Insert the given physical page (p) at
391 * the specified virtual address (v) in the
392 * target physical map with the protection requested.
393 *
394 * If specified, the page will be wired down, meaning
395 * that the related pte cannot be reclaimed.
396 *
397 * NB: This is the only routine which MAY NOT lazy-evaluate
398 * or lose information. That is, this routine must actually
399 * insert this page into the given map NOW.
400 */
316670eb 401
b7266188
A
402void
403pmap_enter(
404 register pmap_t pmap,
405 vm_map_offset_t vaddr,
406 ppnum_t pn,
407 vm_prot_t prot,
316670eb 408 vm_prot_t fault_type,
b7266188
A
409 unsigned int flags,
410 boolean_t wired)
316670eb
A
411{
412 (void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE);
413}
414
415kern_return_t
416pmap_enter_options(
417 register pmap_t pmap,
418 vm_map_offset_t vaddr,
419 ppnum_t pn,
420 vm_prot_t prot,
421 __unused vm_prot_t fault_type,
422 unsigned int flags,
423 boolean_t wired,
424 unsigned int options)
b7266188
A
425{
426 pt_entry_t *pte;
427 pv_rooted_entry_t pv_h;
316670eb 428 ppnum_t pai;
b7266188
A
429 pv_hashed_entry_t pvh_e;
430 pv_hashed_entry_t pvh_new;
431 pt_entry_t template;
432 pmap_paddr_t old_pa;
433 pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn);
434 boolean_t need_tlbflush = FALSE;
435 boolean_t set_NX;
436 char oattr;
437 boolean_t old_pa_locked;
438 /* 2MiB mappings are confined to x86_64 by VM */
439 boolean_t superpage = flags & VM_MEM_SUPERPAGE;
440 vm_object_t delpage_pm_obj = NULL;
441 int delpage_pde_index = 0;
442 pt_entry_t old_pte;
316670eb 443 kern_return_t kr_expand;
b7266188
A
444
445 pmap_intr_assert();
b7266188
A
446
447 if (pmap == PMAP_NULL)
316670eb
A
448 return KERN_INVALID_ARGUMENT;
449
450 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
451 * unused value for that scenario.
452 */
453 assert(pn != vm_page_fictitious_addr);
454
b7266188 455 if (pn == vm_page_guard_addr)
316670eb 456 return KERN_INVALID_ARGUMENT;
b7266188
A
457
458 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
316670eb
A
459 pmap,
460 (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
461 pn, prot);
b7266188
A
462
463 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
464 set_NX = FALSE;
465 else
466 set_NX = TRUE;
467
316670eb
A
468 if (__improbable(set_NX && (pmap == kernel_pmap) && ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) || (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
469 set_NX = FALSE;
470 }
471
b7266188
A
472 /*
473 * Must allocate a new pvlist entry while we're unlocked;
474 * zalloc may cause pageout (which will lock the pmap system).
475 * If we determine we need a pvlist entry, we will unlock
476 * and allocate one. Then we will retry, throughing away
477 * the allocated entry later (if we no longer need it).
478 */
479
480 pvh_new = PV_HASHED_ENTRY_NULL;
481Retry:
482 pvh_e = PV_HASHED_ENTRY_NULL;
483
484 PMAP_LOCK(pmap);
485
486 /*
487 * Expand pmap to include this pte. Assume that
488 * pmap is always expanded to include enough hardware
489 * pages to map one VM page.
490 */
491 if(superpage) {
492 while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
493 /* need room for another pde entry */
494 PMAP_UNLOCK(pmap);
316670eb
A
495 kr_expand = pmap_expand_pdpt(pmap, vaddr, options);
496 if (kr_expand != KERN_SUCCESS)
497 return kr_expand;
b7266188
A
498 PMAP_LOCK(pmap);
499 }
500 } else {
501 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
502 /*
503 * Must unlock to expand the pmap
504 * going to grow pde level page(s)
505 */
506 PMAP_UNLOCK(pmap);
316670eb
A
507 kr_expand = pmap_expand(pmap, vaddr, options);
508 if (kr_expand != KERN_SUCCESS)
509 return kr_expand;
b7266188
A
510 PMAP_LOCK(pmap);
511 }
512 }
316670eb
A
513 if (options & PMAP_EXPAND_OPTIONS_NOENTER) {
514 PMAP_UNLOCK(pmap);
515 return KERN_SUCCESS;
516 }
b7266188
A
517
518 if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
519 /*
520 * There is still an empty page table mapped that
521 * was used for a previous base page mapping.
522 * Remember the PDE and the PDE index, so that we
523 * can free the page at the end of this function.
524 */
525 delpage_pde_index = (int)pdeidx(pmap, vaddr);
526 delpage_pm_obj = pmap->pm_obj;
527 *pte = 0;
528 }
529
b7266188
A
530 old_pa = pte_to_pa(*pte);
531 pai = pa_index(old_pa);
532 old_pa_locked = FALSE;
533
534 /*
535 * if we have a previous managed page, lock the pv entry now. after
536 * we lock it, check to see if someone beat us to the lock and if so
537 * drop the lock
538 */
539 if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
540 LOCK_PVH(pai);
541 old_pa_locked = TRUE;
542 old_pa = pte_to_pa(*pte);
543 if (0 == old_pa) {
544 UNLOCK_PVH(pai); /* another path beat us to it */
545 old_pa_locked = FALSE;
546 }
547 }
548
549 /*
550 * Special case if the incoming physical page is already mapped
551 * at this address.
552 */
553 if (old_pa == pa) {
6d2010ae
A
554 pt_entry_t old_attributes =
555 *pte & ~(INTEL_PTE_REF | INTEL_PTE_MOD);
b7266188
A
556
557 /*
558 * May be changing its wired attribute or protection
559 */
560
561 template = pa_to_pte(pa) | INTEL_PTE_VALID;
6d2010ae 562 template |= pmap_get_cache_attributes(pa_index(pa));
b7266188
A
563
564 if (VM_MEM_NOT_CACHEABLE ==
565 (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
566 if (!(flags & VM_MEM_GUARDED))
567 template |= INTEL_PTE_PTA;
568 template |= INTEL_PTE_NCACHE;
569 }
570 if (pmap != kernel_pmap)
571 template |= INTEL_PTE_USER;
572 if (prot & VM_PROT_WRITE)
573 template |= INTEL_PTE_WRITE;
574
575 if (set_NX)
576 template |= INTEL_PTE_NX;
577
578 if (wired) {
579 template |= INTEL_PTE_WIRED;
316670eb
A
580 if (!iswired(old_attributes)) {
581 OSAddAtomic(+1, &pmap->stats.wired_count);
582 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
583 }
b7266188 584 } else {
6d2010ae 585 if (iswired(old_attributes)) {
b7266188 586 assert(pmap->stats.wired_count >= 1);
316670eb
A
587 OSAddAtomic(-1, &pmap->stats.wired_count);
588 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
b7266188
A
589 }
590 }
591 if (superpage) /* this path can not be used */
592 template |= INTEL_PTE_PS; /* to change the page size! */
6d2010ae
A
593 /* Determine delta, PV locked */
594 need_tlbflush =
595 ((old_attributes ^ template) != INTEL_PTE_WIRED);
b7266188
A
596
597 /* store modified PTE and preserve RC bits */
316670eb
A
598 pt_entry_t npte, opte;;
599 do {
600 opte = *pte;
601 npte = template | (opte & (INTEL_PTE_REF | INTEL_PTE_MOD));
602 } while (!pmap_cmpx_pte(pte, opte, npte));
b7266188
A
603 if (old_pa_locked) {
604 UNLOCK_PVH(pai);
605 old_pa_locked = FALSE;
606 }
b7266188
A
607 goto Done;
608 }
609
610 /*
611 * Outline of code from here:
612 * 1) If va was mapped, update TLBs, remove the mapping
613 * and remove old pvlist entry.
614 * 2) Add pvlist entry for new mapping
615 * 3) Enter new mapping.
616 *
617 * If the old physical page is not managed step 1) is skipped
618 * (except for updating the TLBs), and the mapping is
619 * overwritten at step 3). If the new physical page is not
620 * managed, step 2) is skipped.
621 */
622
623 if (old_pa != (pmap_paddr_t) 0) {
624
625 /*
626 * Don't do anything to pages outside valid memory here.
627 * Instead convince the code that enters a new mapping
628 * to overwrite the old one.
629 */
630
631 /* invalidate the PTE */
316670eb 632 pmap_update_pte(pte, INTEL_PTE_VALID, 0);
b7266188
A
633 /* propagate invalidate everywhere */
634 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
635 /* remember reference and change */
636 old_pte = *pte;
637 oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED));
638 /* completely invalidate the PTE */
639 pmap_store_pte(pte, 0);
640
641 if (IS_MANAGED_PAGE(pai)) {
6d2010ae 642 pmap_assert(old_pa_locked == TRUE);
316670eb 643 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
b7266188 644 assert(pmap->stats.resident_count >= 1);
316670eb 645 OSAddAtomic(-1, &pmap->stats.resident_count);
b7266188 646 if (iswired(*pte)) {
b7266188 647 assert(pmap->stats.wired_count >= 1);
316670eb
A
648 OSAddAtomic(-1, &pmap->stats.wired_count);
649 pmap_ledger_debit(pmap, task_ledgers.wired_mem,
650 PAGE_SIZE);
b7266188
A
651 }
652 pmap_phys_attributes[pai] |= oattr;
653
654 /*
655 * Remove the mapping from the pvlist for
656 * this physical page.
657 * We'll end up with either a rooted pv or a
658 * hashed pv
659 */
660 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte);
661
662 } else {
663
664 /*
665 * old_pa is not managed.
666 * Do removal part of accounting.
667 */
668
669 if (iswired(*pte)) {
670 assert(pmap->stats.wired_count >= 1);
316670eb
A
671 OSAddAtomic(-1, &pmap->stats.wired_count);
672 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
b7266188
A
673 }
674 }
675 }
676
677 /*
678 * if we had a previously managed paged locked, unlock it now
679 */
680 if (old_pa_locked) {
681 UNLOCK_PVH(pai);
682 old_pa_locked = FALSE;
683 }
684
685 pai = pa_index(pa); /* now working with new incoming phys page */
686 if (IS_MANAGED_PAGE(pai)) {
687
688 /*
689 * Step 2) Enter the mapping in the PV list for this
690 * physical page.
691 */
692 pv_h = pai_to_pvh(pai);
693
694 LOCK_PVH(pai);
695
696 if (pv_h->pmap == PMAP_NULL) {
697 /*
698 * No mappings yet, use rooted pv
699 */
700 pv_h->va = vaddr;
701 pv_h->pmap = pmap;
702 queue_init(&pv_h->qlink);
703 } else {
704 /*
705 * Add new pv_hashed_entry after header.
706 */
707 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
708 pvh_e = pvh_new;
709 pvh_new = PV_HASHED_ENTRY_NULL;
710 } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
6d2010ae 711 PV_HASHED_ALLOC(&pvh_e);
b7266188
A
712 if (PV_HASHED_ENTRY_NULL == pvh_e) {
713 /*
714 * the pv list is empty. if we are on
715 * the kernel pmap we'll use one of
716 * the special private kernel pv_e's,
717 * else, we need to unlock
718 * everything, zalloc a pv_e, and
719 * restart bringing in the pv_e with
720 * us.
721 */
722 if (kernel_pmap == pmap) {
6d2010ae 723 PV_HASHED_KERN_ALLOC(&pvh_e);
b7266188
A
724 } else {
725 UNLOCK_PVH(pai);
726 PMAP_UNLOCK(pmap);
6d2010ae 727 pmap_pv_throttle(pmap);
b7266188
A
728 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
729 goto Retry;
730 }
731 }
732 }
733
734 if (PV_HASHED_ENTRY_NULL == pvh_e)
735 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
736
737 pvh_e->va = vaddr;
738 pvh_e->pmap = pmap;
739 pvh_e->ppn = pn;
740 pv_hash_add(pvh_e, pv_h);
741
742 /*
743 * Remember that we used the pvlist entry.
744 */
745 pvh_e = PV_HASHED_ENTRY_NULL;
746 }
747
748 /*
749 * only count the mapping
750 * for 'managed memory'
751 */
316670eb 752 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
6d2010ae 753 OSAddAtomic(+1, &pmap->stats.resident_count);
b7266188
A
754 if (pmap->stats.resident_count > pmap->stats.resident_max) {
755 pmap->stats.resident_max = pmap->stats.resident_count;
756 }
060df5ea
A
757 } else if (last_managed_page == 0) {
758 /* Account for early mappings created before "managed pages"
759 * are determined. Consider consulting the available DRAM map.
760 */
316670eb 761 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
060df5ea 762 OSAddAtomic(+1, &pmap->stats.resident_count);
b7266188
A
763 }
764 /*
765 * Step 3) Enter the mapping.
766 *
767 * Build a template to speed up entering -
768 * only the pfn changes.
769 */
770 template = pa_to_pte(pa) | INTEL_PTE_VALID;
6d2010ae
A
771 /*
772 * DRK: It may be worth asserting on cache attribute flags that diverge
773 * from the existing physical page attributes.
774 */
b7266188 775
6d2010ae
A
776 template |= pmap_get_cache_attributes(pa_index(pa));
777
b7266188
A
778 if (flags & VM_MEM_NOT_CACHEABLE) {
779 if (!(flags & VM_MEM_GUARDED))
780 template |= INTEL_PTE_PTA;
781 template |= INTEL_PTE_NCACHE;
782 }
783 if (pmap != kernel_pmap)
784 template |= INTEL_PTE_USER;
785 if (prot & VM_PROT_WRITE)
786 template |= INTEL_PTE_WRITE;
787 if (set_NX)
788 template |= INTEL_PTE_NX;
789 if (wired) {
790 template |= INTEL_PTE_WIRED;
791 OSAddAtomic(+1, & pmap->stats.wired_count);
316670eb 792 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
b7266188
A
793 }
794 if (superpage)
795 template |= INTEL_PTE_PS;
796 pmap_store_pte(pte, template);
797
798 /*
799 * if this was a managed page we delayed unlocking the pv until here
800 * to prevent pmap_page_protect et al from finding it until the pte
801 * has been stored
802 */
803 if (IS_MANAGED_PAGE(pai)) {
804 UNLOCK_PVH(pai);
805 }
806Done:
807 if (need_tlbflush == TRUE)
808 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
809
810 if (pvh_e != PV_HASHED_ENTRY_NULL) {
811 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
812 }
813 if (pvh_new != PV_HASHED_ENTRY_NULL) {
814 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
815 }
816 PMAP_UNLOCK(pmap);
817
818 if (delpage_pm_obj) {
819 vm_page_t m;
820
821 vm_object_lock(delpage_pm_obj);
822 m = vm_page_lookup(delpage_pm_obj, delpage_pde_index);
823 if (m == VM_PAGE_NULL)
824 panic("pmap_enter: pte page not in object");
6d2010ae 825 vm_object_unlock(delpage_pm_obj);
b7266188
A
826 VM_PAGE_FREE(m);
827 OSAddAtomic(-1, &inuse_ptepages_count);
316670eb 828 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
b7266188
A
829 }
830
831 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
316670eb 832 return KERN_SUCCESS;
b7266188
A
833}
834
835/*
836 * Remove a range of hardware page-table entries.
837 * The entries given are the first (inclusive)
838 * and last (exclusive) entries for the VM pages.
839 * The virtual address is the va for the first pte.
840 *
841 * The pmap must be locked.
842 * If the pmap is not the kernel pmap, the range must lie
843 * entirely within one pte-page. This is NOT checked.
844 * Assumes that the pte-page exists.
845 */
846
847void
848pmap_remove_range(
849 pmap_t pmap,
850 vm_map_offset_t start_vaddr,
851 pt_entry_t *spte,
852 pt_entry_t *epte)
853{
854 pt_entry_t *cpte;
855 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
856 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
857 pv_hashed_entry_t pvh_e;
858 int pvh_cnt = 0;
859 int num_removed, num_unwired, num_found, num_invalid;
316670eb 860 ppnum_t pai;
b7266188
A
861 pmap_paddr_t pa;
862 vm_map_offset_t vaddr;
863
864 num_removed = 0;
865 num_unwired = 0;
866 num_found = 0;
867 num_invalid = 0;
868#if defined(__i386__)
869 if (pmap != kernel_pmap &&
870 pmap->pm_task_map == TASK_MAP_32BIT &&
871 start_vaddr >= HIGH_MEM_BASE) {
872 /*
873 * The range is in the "high_shared_pde" which is shared
874 * between the kernel and all 32-bit tasks. It holds
875 * the 32-bit commpage but also the trampolines, GDT, etc...
876 * so we can't let user tasks remove anything from it.
877 */
878 return;
879 }
880#endif
881 /* invalidate the PTEs first to "freeze" them */
882 for (cpte = spte, vaddr = start_vaddr;
883 cpte < epte;
884 cpte++, vaddr += PAGE_SIZE_64) {
885 pt_entry_t p = *cpte;
886
887 pa = pte_to_pa(p);
888 if (pa == 0)
889 continue;
890 num_found++;
891
892 if (iswired(p))
893 num_unwired++;
894
895 pai = pa_index(pa);
896
897 if (!IS_MANAGED_PAGE(pai)) {
898 /*
899 * Outside range of managed physical memory.
900 * Just remove the mappings.
901 */
902 pmap_store_pte(cpte, 0);
903 continue;
904 }
905
906 if ((p & INTEL_PTE_VALID) == 0)
907 num_invalid++;
908
316670eb
A
909 /* invalidate the PTE */
910 pmap_update_pte(cpte, INTEL_PTE_VALID, 0);
b7266188
A
911 }
912
913 if (num_found == 0) {
914 /* nothing was changed: we're done */
915 goto update_counts;
916 }
917
918 /* propagate the invalidates to other CPUs */
919
920 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
921
922 for (cpte = spte, vaddr = start_vaddr;
923 cpte < epte;
924 cpte++, vaddr += PAGE_SIZE_64) {
925
926 pa = pte_to_pa(*cpte);
927 if (pa == 0)
928 continue;
929
930 pai = pa_index(pa);
931
932 LOCK_PVH(pai);
933
934 pa = pte_to_pa(*cpte);
935 if (pa == 0) {
936 UNLOCK_PVH(pai);
937 continue;
938 }
939 num_removed++;
940
941 /*
942 * Get the modify and reference bits, then
943 * nuke the entry in the page table
944 */
945 /* remember reference and change */
946 pmap_phys_attributes[pai] |=
947 (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
948
949 /*
950 * Remove the mapping from the pvlist for this physical page.
951 */
952 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte);
953
954 /* completely invalidate the PTE */
955 pmap_store_pte(cpte, 0);
956
957 UNLOCK_PVH(pai);
958
959 if (pvh_e != PV_HASHED_ENTRY_NULL) {
960 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
961 pvh_eh = pvh_e;
962
963 if (pvh_et == PV_HASHED_ENTRY_NULL) {
964 pvh_et = pvh_e;
965 }
966 pvh_cnt++;
967 }
968 } /* for loop */
969
970 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
971 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
972 }
973update_counts:
974 /*
975 * Update the counts
976 */
977#if TESTING
978 if (pmap->stats.resident_count < num_removed)
979 panic("pmap_remove_range: resident_count");
980#endif
316670eb 981 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
b7266188
A
982 assert(pmap->stats.resident_count >= num_removed);
983 OSAddAtomic(-num_removed, &pmap->stats.resident_count);
984
985#if TESTING
986 if (pmap->stats.wired_count < num_unwired)
987 panic("pmap_remove_range: wired_count");
988#endif
989 assert(pmap->stats.wired_count >= num_unwired);
990 OSAddAtomic(-num_unwired, &pmap->stats.wired_count);
316670eb 991 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
b7266188
A
992
993 return;
994}
995
996
997/*
998 * Remove the given range of addresses
999 * from the specified map.
1000 *
1001 * It is assumed that the start and end are properly
1002 * rounded to the hardware page size.
1003 */
1004void
1005pmap_remove(
1006 pmap_t map,
1007 addr64_t s64,
1008 addr64_t e64)
1009{
1010 pt_entry_t *pde;
1011 pt_entry_t *spte, *epte;
1012 addr64_t l64;
1013 uint64_t deadline;
1014
1015 pmap_intr_assert();
1016
1017 if (map == PMAP_NULL || s64 == e64)
1018 return;
1019
1020 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
1021 map,
1022 (uint32_t) (s64 >> 32), s64,
1023 (uint32_t) (e64 >> 32), e64);
1024
1025
1026 PMAP_LOCK(map);
1027
1028#if 0
1029 /*
1030 * Check that address range in the kernel does not overlap the stacks.
1031 * We initialize local static min/max variables once to avoid making
1032 * 2 function calls for every remove. Note also that these functions
1033 * both return 0 before kernel stacks have been initialized, and hence
1034 * the panic is not triggered in this case.
1035 */
1036 if (map == kernel_pmap) {
1037 static vm_offset_t kernel_stack_min = 0;
1038 static vm_offset_t kernel_stack_max = 0;
1039
1040 if (kernel_stack_min == 0) {
1041 kernel_stack_min = min_valid_stack_address();
1042 kernel_stack_max = max_valid_stack_address();
1043 }
1044 if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
1045 (kernel_stack_min < e64 && e64 <= kernel_stack_max))
1046 panic("pmap_remove() attempted in kernel stack");
1047 }
1048#else
1049
1050 /*
1051 * The values of kernel_stack_min and kernel_stack_max are no longer
1052 * relevant now that we allocate kernel stacks in the kernel map,
1053 * so the old code above no longer applies. If we wanted to check that
1054 * we weren't removing a mapping of a page in a kernel stack we'd
1055 * mark the PTE with an unused bit and check that here.
1056 */
1057
1058#endif
1059
1060 deadline = rdtsc64() + max_preemption_latency_tsc;
1061
1062 while (s64 < e64) {
1063 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1064 if (l64 > e64)
1065 l64 = e64;
1066 pde = pmap_pde(map, s64);
1067
1068 if (pde && (*pde & INTEL_PTE_VALID)) {
1069 if (*pde & INTEL_PTE_PS) {
1070 /*
1071 * If we're removing a superpage, pmap_remove_range()
1072 * must work on level 2 instead of level 1; and we're
1073 * only passing a single level 2 entry instead of a
1074 * level 1 range.
1075 */
1076 spte = pde;
1077 epte = spte+1; /* excluded */
1078 } else {
1079 spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
1080 spte = &spte[ptenum(s64)];
1081 epte = &spte[intel_btop(l64 - s64)];
1082 }
1083 pmap_remove_range(map, s64, spte, epte);
1084 }
1085 s64 = l64;
1086
1087 if (s64 < e64 && rdtsc64() >= deadline) {
1088 PMAP_UNLOCK(map)
1089 PMAP_LOCK(map)
1090 deadline = rdtsc64() + max_preemption_latency_tsc;
1091 }
1092 }
1093
1094 PMAP_UNLOCK(map);
1095
1096 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
1097 map, 0, 0, 0, 0);
1098
1099}
1100
1101/*
1102 * Routine: pmap_page_protect
1103 *
1104 * Function:
1105 * Lower the permission for all mappings to a given
1106 * page.
1107 */
1108void
1109pmap_page_protect(
1110 ppnum_t pn,
1111 vm_prot_t prot)
1112{
1113 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1114 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1115 pv_hashed_entry_t nexth;
1116 int pvh_cnt = 0;
1117 pv_rooted_entry_t pv_h;
1118 pv_rooted_entry_t pv_e;
1119 pv_hashed_entry_t pvh_e;
1120 pt_entry_t *pte;
1121 int pai;
1122 pmap_t pmap;
1123 boolean_t remove;
1124
1125 pmap_intr_assert();
1126 assert(pn != vm_page_fictitious_addr);
1127 if (pn == vm_page_guard_addr)
1128 return;
1129
1130 pai = ppn_to_pai(pn);
1131
1132 if (!IS_MANAGED_PAGE(pai)) {
1133 /*
1134 * Not a managed page.
1135 */
1136 return;
1137 }
1138 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
1139 pn, prot, 0, 0, 0);
1140
1141 /*
1142 * Determine the new protection.
1143 */
1144 switch (prot) {
1145 case VM_PROT_READ:
1146 case VM_PROT_READ | VM_PROT_EXECUTE:
1147 remove = FALSE;
1148 break;
1149 case VM_PROT_ALL:
1150 return; /* nothing to do */
1151 default:
1152 remove = TRUE;
1153 break;
1154 }
1155
1156 pv_h = pai_to_pvh(pai);
1157
1158 LOCK_PVH(pai);
1159
1160
1161 /*
1162 * Walk down PV list, if any, changing or removing all mappings.
1163 */
1164 if (pv_h->pmap == PMAP_NULL)
1165 goto done;
1166
1167 pv_e = pv_h;
1168 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */
1169
1170 do {
1171 vm_map_offset_t vaddr;
1172
1173 pmap = pv_e->pmap;
1174 vaddr = pv_e->va;
1175 pte = pmap_pte(pmap, vaddr);
1176
6d2010ae
A
1177 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1178 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1179
b7266188
A
1180 if (0 == pte) {
1181 panic("pmap_page_protect() "
1182 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1183 pmap, pn, vaddr);
1184 }
1185 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1186
1187 /*
1188 * Remove the mapping if new protection is NONE
b7266188 1189 */
6d2010ae 1190 if (remove) {
b7266188
A
1191 /*
1192 * Remove the mapping, collecting dirty bits.
1193 */
316670eb 1194 pmap_update_pte(pte, INTEL_PTE_VALID, 0);
6d2010ae
A
1195
1196 /* Remove per-pmap wired count */
1197 if (iswired(*pte)) {
1198 OSAddAtomic(-1, &pmap->stats.wired_count);
316670eb 1199 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6d2010ae
A
1200 }
1201
b7266188
A
1202 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1203 pmap_phys_attributes[pai] |=
6d2010ae 1204 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
b7266188
A
1205 pmap_store_pte(pte, 0);
1206
1207#if TESTING
1208 if (pmap->stats.resident_count < 1)
1209 panic("pmap_page_protect: resident_count");
1210#endif
316670eb 1211 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
b7266188
A
1212 assert(pmap->stats.resident_count >= 1);
1213 OSAddAtomic(-1, &pmap->stats.resident_count);
b7266188
A
1214 /*
1215 * Deal with the pv_rooted_entry.
1216 */
1217
1218 if (pv_e == pv_h) {
1219 /*
1220 * Fix up head later.
1221 */
1222 pv_h->pmap = PMAP_NULL;
1223 } else {
1224 /*
1225 * Delete this entry.
1226 */
1227 pv_hash_remove(pvh_e);
1228 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1229 pvh_eh = pvh_e;
1230
1231 if (pvh_et == PV_HASHED_ENTRY_NULL)
1232 pvh_et = pvh_e;
1233 pvh_cnt++;
1234 }
1235 } else {
1236 /*
6d2010ae 1237 * Write-protect, after opportunistic refmod collect
b7266188 1238 */
6d2010ae
A
1239 pmap_phys_attributes[pai] |=
1240 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
316670eb 1241 pmap_update_pte(pte, INTEL_PTE_WRITE, 0);
b7266188
A
1242 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1243 }
1244 pvh_e = nexth;
1245 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1246
1247
1248 /*
1249 * If pv_head mapping was removed, fix it up.
1250 */
1251 if (pv_h->pmap == PMAP_NULL) {
1252 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1253
1254 if (pvh_e != (pv_hashed_entry_t) pv_h) {
1255 pv_hash_remove(pvh_e);
1256 pv_h->pmap = pvh_e->pmap;
1257 pv_h->va = pvh_e->va;
1258 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1259 pvh_eh = pvh_e;
1260
1261 if (pvh_et == PV_HASHED_ENTRY_NULL)
1262 pvh_et = pvh_e;
1263 pvh_cnt++;
1264 }
1265 }
1266 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1267 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1268 }
1269done:
1270 UNLOCK_PVH(pai);
1271
1272 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
1273 0, 0, 0, 0, 0);
1274}
1275
6d2010ae
A
1276/*
1277 * Clear specified attribute bits.
1278 */
b7266188 1279void
6d2010ae
A
1280phys_attribute_clear(
1281 ppnum_t pn,
1282 int bits)
b7266188 1283{
6d2010ae
A
1284 pv_rooted_entry_t pv_h;
1285 pv_hashed_entry_t pv_e;
1286 pt_entry_t *pte;
1287 int pai;
1288 pmap_t pmap;
1289 char attributes = 0;
1290
1291 pmap_intr_assert();
1292 assert(pn != vm_page_fictitious_addr);
1293 if (pn == vm_page_guard_addr)
1294 return;
b7266188 1295
6d2010ae 1296 pai = ppn_to_pai(pn);
b7266188 1297
6d2010ae
A
1298 if (!IS_MANAGED_PAGE(pai)) {
1299 /*
1300 * Not a managed page.
1301 */
1302 return;
b7266188 1303 }
b7266188 1304
6d2010ae
A
1305 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
1306 pn, bits, 0, 0, 0);
b7266188 1307
6d2010ae 1308 pv_h = pai_to_pvh(pai);
b7266188 1309
6d2010ae 1310 LOCK_PVH(pai);
b7266188 1311
6d2010ae
A
1312 /*
1313 * Walk down PV list, clearing all modify or reference bits.
1314 * We do not have to lock the pv_list because we have
316670eb 1315 * the per-pmap lock
6d2010ae
A
1316 */
1317 if (pv_h->pmap != PMAP_NULL) {
1318 /*
1319 * There are some mappings.
1320 */
b7266188 1321
6d2010ae 1322 pv_e = (pv_hashed_entry_t)pv_h;
b7266188 1323
6d2010ae
A
1324 do {
1325 vm_map_offset_t va;
b7266188 1326
6d2010ae
A
1327 pmap = pv_e->pmap;
1328 va = pv_e->va;
b7266188 1329
6d2010ae
A
1330 /*
1331 * Clear modify and/or reference bits.
1332 */
1333 pte = pmap_pte(pmap, va);
1334 attributes |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
316670eb 1335 pmap_update_pte(pte, bits, 0);
6d2010ae
A
1336 /* Ensure all processors using this translation
1337 * invalidate this TLB entry. The invalidation *must*
1338 * follow the PTE update, to ensure that the TLB
1339 * shadow of the 'D' bit (in particular) is
1340 * synchronized with the updated PTE.
1341 */
1342 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
b7266188 1343
6d2010ae
A
1344 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1345
1346 } while (pv_e != (pv_hashed_entry_t)pv_h);
b7266188 1347 }
6d2010ae
A
1348 /* Opportunistic refmod collection, annulled
1349 * if both REF and MOD are being cleared.
1350 */
b7266188 1351
6d2010ae
A
1352 pmap_phys_attributes[pai] |= attributes;
1353 pmap_phys_attributes[pai] &= (~bits);
b7266188 1354
6d2010ae 1355 UNLOCK_PVH(pai);
b7266188 1356
6d2010ae
A
1357 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
1358 0, 0, 0, 0, 0);
b7266188
A
1359}
1360
6d2010ae
A
1361/*
1362 * Check specified attribute bits.
1363 */
1364int
1365phys_attribute_test(
1366 ppnum_t pn,
1367 int bits)
0b4c1975 1368{
6d2010ae
A
1369 pv_rooted_entry_t pv_h;
1370 pv_hashed_entry_t pv_e;
1371 pt_entry_t *pte;
1372 int pai;
1373 pmap_t pmap;
1374 int attributes = 0;
1375
1376 pmap_intr_assert();
1377 assert(pn != vm_page_fictitious_addr);
1378 if (pn == vm_page_guard_addr)
1379 return 0;
0b4c1975
A
1380
1381 pai = ppn_to_pai(pn);
1382
6d2010ae
A
1383 if (!IS_MANAGED_PAGE(pai)) {
1384 /*
1385 * Not a managed page.
1386 */
1387 return 0;
1388 }
0b4c1975 1389
6d2010ae
A
1390 /*
1391 * Fast check... if bits already collected
1392 * no need to take any locks...
1393 * if not set, we need to recheck after taking
1394 * the lock in case they got pulled in while
1395 * we were waiting for the lock
1396 */
1397 if ((pmap_phys_attributes[pai] & bits) == bits)
1398 return bits;
0b4c1975 1399
6d2010ae 1400 pv_h = pai_to_pvh(pai);
0b4c1975 1401
6d2010ae 1402 LOCK_PVH(pai);
0b4c1975 1403
6d2010ae 1404 attributes = pmap_phys_attributes[pai] & bits;
0b4c1975 1405
0b4c1975 1406
6d2010ae
A
1407 /*
1408 * Walk down PV list, checking the mappings until we
1409 * reach the end or we've found the desired attributes.
1410 */
1411 if (attributes != bits &&
1412 pv_h->pmap != PMAP_NULL) {
1413 /*
1414 * There are some mappings.
1415 */
1416 pv_e = (pv_hashed_entry_t)pv_h;
1417 do {
1418 vm_map_offset_t va;
0b4c1975 1419
6d2010ae
A
1420 pmap = pv_e->pmap;
1421 va = pv_e->va;
1422 /*
1423 * pick up modify and/or reference bits from mapping
1424 */
0b4c1975 1425
6d2010ae
A
1426 pte = pmap_pte(pmap, va);
1427 attributes |= (int)(*pte & bits);
1428
1429 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1430
1431 } while ((attributes != bits) &&
1432 (pv_e != (pv_hashed_entry_t)pv_h));
0b4c1975 1433 }
6d2010ae 1434 pmap_phys_attributes[pai] |= attributes;
0b4c1975 1435
6d2010ae
A
1436 UNLOCK_PVH(pai);
1437 return (attributes);
1438}
0b4c1975 1439
6d2010ae
A
1440/*
1441 * Routine: pmap_change_wiring
1442 * Function: Change the wiring attribute for a map/virtual-address
1443 * pair.
1444 * In/out conditions:
1445 * The mapping must already exist in the pmap.
1446 */
0b4c1975 1447void
6d2010ae
A
1448pmap_change_wiring(
1449 pmap_t map,
1450 vm_map_offset_t vaddr,
1451 boolean_t wired)
0b4c1975 1452{
6d2010ae 1453 pt_entry_t *pte;
0b4c1975 1454
6d2010ae 1455 PMAP_LOCK(map);
0b4c1975 1456
6d2010ae
A
1457 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
1458 panic("pmap_change_wiring: pte missing");
0b4c1975 1459
6d2010ae
A
1460 if (wired && !iswired(*pte)) {
1461 /*
1462 * wiring down mapping
1463 */
316670eb 1464 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
6d2010ae 1465 OSAddAtomic(+1, &map->stats.wired_count);
316670eb 1466 pmap_update_pte(pte, 0, INTEL_PTE_WIRED);
0b4c1975 1467 }
6d2010ae
A
1468 else if (!wired && iswired(*pte)) {
1469 /*
1470 * unwiring mapping
1471 */
1472 assert(map->stats.wired_count >= 1);
1473 OSAddAtomic(-1, &map->stats.wired_count);
316670eb
A
1474 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
1475 pmap_update_pte(pte, INTEL_PTE_WIRED, 0);
060df5ea 1476 }
060df5ea 1477
6d2010ae
A
1478 PMAP_UNLOCK(map);
1479}
7ddcb079
A
1480
1481/*
1482 * "Backdoor" direct map routine for early mappings.
1483 * Useful for mapping memory outside the range
1484 * Sets A, D and NC if requested
1485 */
1486
1487vm_offset_t
1488pmap_map_bd(
1489 vm_offset_t virt,
1490 vm_map_offset_t start_addr,
1491 vm_map_offset_t end_addr,
1492 vm_prot_t prot,
1493 unsigned int flags)
1494{
1495 pt_entry_t template;
1496 pt_entry_t *pte;
1497 spl_t spl;
1498 vm_offset_t base = virt;
1499 template = pa_to_pte(start_addr)
1500 | INTEL_PTE_REF
1501 | INTEL_PTE_MOD
1502 | INTEL_PTE_WIRED
1503 | INTEL_PTE_VALID;
1504
1505 if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
1506 template |= INTEL_PTE_NCACHE;
1507 if (!(flags & (VM_MEM_GUARDED)))
1508 template |= INTEL_PTE_PTA;
1509 }
316670eb
A
1510
1511#if defined(__x86_64__)
1512 if ((prot & VM_PROT_EXECUTE) == 0)
1513 template |= INTEL_PTE_NX;
1514#endif
1515
7ddcb079
A
1516 if (prot & VM_PROT_WRITE)
1517 template |= INTEL_PTE_WRITE;
1518
1519 while (start_addr < end_addr) {
1520 spl = splhigh();
1521 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
1522 if (pte == PT_ENTRY_NULL) {
1523 panic("pmap_map_bd: Invalid kernel address\n");
1524 }
1525 pmap_store_pte(pte, template);
1526 splx(spl);
1527 pte_increment_pa(template);
1528 virt += PAGE_SIZE;
1529 start_addr += PAGE_SIZE;
1530 }
1531 flush_tlb_raw();
1532 PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
1533 return(virt);
1534}