]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap_x86_common.c
xnu-1504.3.12.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap_x86_common.c
CommitLineData
b0d623f7
A
1/*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <vm/pmap.h>
29#include <vm/vm_map.h>
30#include <i386/pmap_internal.h>
b7266188
A
31
32
33void pmap_remove_range(
34 pmap_t pmap,
35 vm_map_offset_t va,
36 pt_entry_t *spte,
37 pt_entry_t *epte);
38
39pv_rooted_entry_t pv_head_table; /* array of entries, one per
40 * page */
41thread_call_t mapping_adjust_call;
42static thread_call_data_t mapping_adjust_call_data;
43uint32_t mappingrecurse = 0;
44
45pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[PMAP_PAGETABLE_CORRUPTION_MAX_LOG];
46uint32_t pmap_pagetable_corruption_incidents;
47uint64_t pmap_pagetable_corruption_last_abstime = (~(0ULL) >> 1);
48uint64_t pmap_pagetable_corruption_interval_abstime;
49thread_call_t pmap_pagetable_corruption_log_call;
50static thread_call_data_t pmap_pagetable_corruption_log_call_data;
51boolean_t pmap_pagetable_corruption_timeout = FALSE;
52
b0d623f7
A
53/*
54 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
55 * on a NBPDE boundary.
56 */
57
58/* These symbols may be referenced directly by VM */
59uint64_t pmap_nesting_size_min = NBPDE;
60uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE;
61
62/*
63 * kern_return_t pmap_nest(grand, subord, va_start, size)
64 *
65 * grand = the pmap that we will nest subord into
66 * subord = the pmap that goes into the grand
67 * va_start = start of range in pmap to be inserted
68 * nstart = start of range in pmap nested pmap
69 * size = Size of nest area (up to 16TB)
70 *
71 * Inserts a pmap into another. This is used to implement shared segments.
72 *
73 * Note that we depend upon higher level VM locks to insure that things don't change while
74 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
75 * or do 2 nests at once.
76 */
77
78/*
79 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
80 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
81 * container and the "grand" parent. A minor optimization to consider for the
82 * future: make the "subord" truly a container rather than a full-fledged
83 * pagetable hierarchy which can be unnecessarily sparse (DRK).
84 */
85
86kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) {
87 vm_map_offset_t vaddr, nvaddr;
88 pd_entry_t *pde,*npde;
89 unsigned int i;
90 uint64_t num_pde;
91
92 if ((size & (pmap_nesting_size_min-1)) ||
93 (va_start & (pmap_nesting_size_min-1)) ||
94 (nstart & (pmap_nesting_size_min-1)) ||
95 ((size >> 28) > 65536)) /* Max size we can nest is 16TB */
96 return KERN_INVALID_VALUE;
97
98 if(size == 0) {
99 panic("pmap_nest: size is invalid - %016llX\n", size);
100 }
101
102 if (va_start != nstart)
103 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
104
105 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
106 (int) grand, (int) subord,
107 (int) (va_start>>32), (int) va_start, 0);
108
109 nvaddr = (vm_map_offset_t)nstart;
110 num_pde = size >> PDESHIFT;
111
112 PMAP_LOCK(subord);
113
114 subord->pm_shared = TRUE;
115
116 for (i = 0; i < num_pde;) {
117 if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) {
118
119 npde = pmap64_pdpt(subord, nvaddr);
120
121 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
122 PMAP_UNLOCK(subord);
123 pmap_expand_pdpt(subord, nvaddr);
124 PMAP_LOCK(subord);
125 npde = pmap64_pdpt(subord, nvaddr);
126 }
127 *npde |= INTEL_PDPTE_NESTED;
128 nvaddr += NBPDPT;
129 i += (uint32_t)NPDEPG;
130 }
131 else {
132 npde = pmap_pde(subord, nvaddr);
133
134 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
135 PMAP_UNLOCK(subord);
136 pmap_expand(subord, nvaddr);
137 PMAP_LOCK(subord);
138 npde = pmap_pde(subord, nvaddr);
139 }
140 nvaddr += NBPDE;
141 i++;
142 }
143 }
144
145 PMAP_UNLOCK(subord);
146
147 vaddr = (vm_map_offset_t)va_start;
148
149 PMAP_LOCK(grand);
150
151 for (i = 0;i < num_pde;) {
152 pd_entry_t tpde;
153
154 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) {
155 npde = pmap64_pdpt(subord, vaddr);
156 if (npde == 0)
157 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
158 tpde = *npde;
159 pde = pmap64_pdpt(grand, vaddr);
160 if (0 == pde) {
161 PMAP_UNLOCK(grand);
162 pmap_expand_pml4(grand, vaddr);
163 PMAP_LOCK(grand);
164 pde = pmap64_pdpt(grand, vaddr);
165 }
166 if (pde == 0)
167 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr);
168 pmap_store_pte(pde, tpde);
169 vaddr += NBPDPT;
170 i += (uint32_t) NPDEPG;
171 }
172 else {
173 npde = pmap_pde(subord, nstart);
174 if (npde == 0)
175 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart);
176 tpde = *npde;
177 nstart += NBPDE;
178 pde = pmap_pde(grand, vaddr);
179 if ((0 == pde) && cpu_64bit) {
180 PMAP_UNLOCK(grand);
181 pmap_expand_pdpt(grand, vaddr);
182 PMAP_LOCK(grand);
183 pde = pmap_pde(grand, vaddr);
184 }
185
186 if (pde == 0)
187 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
188 vaddr += NBPDE;
189 pmap_store_pte(pde, tpde);
190 i++;
191 }
192 }
193
194 PMAP_UNLOCK(grand);
195
196 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
197
198 return KERN_SUCCESS;
199}
200
201/*
202 * kern_return_t pmap_unnest(grand, vaddr)
203 *
204 * grand = the pmap that we will un-nest subord from
205 * vaddr = start of range in pmap to be unnested
206 *
207 * Removes a pmap from another. This is used to implement shared segments.
208 */
209
210kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
211
212 pd_entry_t *pde;
213 unsigned int i;
214 uint64_t num_pde;
215 addr64_t va_start, va_end;
216 uint64_t npdpt = PMAP_INVALID_PDPTNUM;
217
218 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
219 (int) grand,
220 (int) (vaddr>>32), (int) vaddr, 0, 0);
221
222 if ((size & (pmap_nesting_size_min-1)) ||
223 (vaddr & (pmap_nesting_size_min-1))) {
224 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
225 grand, vaddr, size);
226 }
227
228 /* align everything to PDE boundaries */
229 va_start = vaddr & ~(NBPDE-1);
230 va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
231 size = va_end - va_start;
232
233 PMAP_LOCK(grand);
234
235 num_pde = size >> PDESHIFT;
236 vaddr = va_start;
237
238 for (i = 0; i < num_pde; ) {
239 if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) {
240 npdpt = pdptnum(grand, vaddr);
241 pde = pmap64_pdpt(grand, vaddr);
242 if (pde && (*pde & INTEL_PDPTE_NESTED)) {
243 pmap_store_pte(pde, (pd_entry_t)0);
244 i += (uint32_t) NPDEPG;
245 vaddr += NBPDPT;
246 continue;
247 }
248 }
249 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
250 if (pde == 0)
251 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr);
252 pmap_store_pte(pde, (pd_entry_t)0);
253 i++;
254 vaddr += NBPDE;
255 }
256
257 PMAP_UPDATE_TLBS(grand, va_start, va_end);
258
259 PMAP_UNLOCK(grand);
260
261 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
262
263 return KERN_SUCCESS;
264}
265
266/* Invoked by the Mach VM to determine the platform specific unnest region */
267
268boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) {
269 pd_entry_t *pdpte;
270 boolean_t rval = FALSE;
271
272 if (!cpu_64bit)
273 return rval;
274
275 PMAP_LOCK(p);
276
277 pdpte = pmap64_pdpt(p, *s);
278 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
279 *s &= ~(NBPDPT -1);
280 rval = TRUE;
281 }
282
283 pdpte = pmap64_pdpt(p, *e);
284 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
285 *e = ((*e + NBPDPT) & ~(NBPDPT -1));
286 rval = TRUE;
287 }
288
289 PMAP_UNLOCK(p);
290
291 return rval;
292}
293
294/*
295 * pmap_find_phys returns the (4K) physical page number containing a
296 * given virtual address in a given pmap.
297 * Note that pmap_pte may return a pde if this virtual address is
298 * mapped by a large page and this is taken into account in order
299 * to return the correct page number in this case.
300 */
301ppnum_t
302pmap_find_phys(pmap_t pmap, addr64_t va)
303{
304 pt_entry_t *ptp;
305 pd_entry_t *pdep;
306 ppnum_t ppn = 0;
307 pd_entry_t pde;
308 pt_entry_t pte;
309
310 mp_disable_preemption();
311
312 /* This refcount test is a band-aid--several infrastructural changes
313 * are necessary to eliminate invocation of this routine from arbitrary
314 * contexts.
315 */
316
317 if (!pmap->ref_count)
318 goto pfp_exit;
319
320 pdep = pmap_pde(pmap, va);
321
322 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) {
323 if (pde & INTEL_PTE_PS) {
324 ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
325 ppn += (ppnum_t) ptenum(va);
326 }
327 else {
328 ptp = pmap_pte(pmap, va);
329 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) {
330 ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
331 }
332 }
333 }
334pfp_exit:
335 mp_enable_preemption();
336
337 return ppn;
338}
339
b7266188
A
340/*
341 * Insert the given physical page (p) at
342 * the specified virtual address (v) in the
343 * target physical map with the protection requested.
344 *
345 * If specified, the page will be wired down, meaning
346 * that the related pte cannot be reclaimed.
347 *
348 * NB: This is the only routine which MAY NOT lazy-evaluate
349 * or lose information. That is, this routine must actually
350 * insert this page into the given map NOW.
351 */
352void
353pmap_enter(
354 register pmap_t pmap,
355 vm_map_offset_t vaddr,
356 ppnum_t pn,
357 vm_prot_t prot,
358 unsigned int flags,
359 boolean_t wired)
360{
361 pt_entry_t *pte;
362 pv_rooted_entry_t pv_h;
363 int pai;
364 pv_hashed_entry_t pvh_e;
365 pv_hashed_entry_t pvh_new;
366 pt_entry_t template;
367 pmap_paddr_t old_pa;
368 pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn);
369 boolean_t need_tlbflush = FALSE;
370 boolean_t set_NX;
371 char oattr;
372 boolean_t old_pa_locked;
373 /* 2MiB mappings are confined to x86_64 by VM */
374 boolean_t superpage = flags & VM_MEM_SUPERPAGE;
375 vm_object_t delpage_pm_obj = NULL;
376 int delpage_pde_index = 0;
377 pt_entry_t old_pte;
378
379 pmap_intr_assert();
380 assert(pn != vm_page_fictitious_addr);
381
382 if (pmap == PMAP_NULL)
383 return;
384 if (pn == vm_page_guard_addr)
385 return;
386
387 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
388 pmap,
389 (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
390 pn, prot);
391
392 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
393 set_NX = FALSE;
394 else
395 set_NX = TRUE;
396
397 /*
398 * Must allocate a new pvlist entry while we're unlocked;
399 * zalloc may cause pageout (which will lock the pmap system).
400 * If we determine we need a pvlist entry, we will unlock
401 * and allocate one. Then we will retry, throughing away
402 * the allocated entry later (if we no longer need it).
403 */
404
405 pvh_new = PV_HASHED_ENTRY_NULL;
406Retry:
407 pvh_e = PV_HASHED_ENTRY_NULL;
408
409 PMAP_LOCK(pmap);
410
411 /*
412 * Expand pmap to include this pte. Assume that
413 * pmap is always expanded to include enough hardware
414 * pages to map one VM page.
415 */
416 if(superpage) {
417 while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
418 /* need room for another pde entry */
419 PMAP_UNLOCK(pmap);
420 pmap_expand_pdpt(pmap, vaddr);
421 PMAP_LOCK(pmap);
422 }
423 } else {
424 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
425 /*
426 * Must unlock to expand the pmap
427 * going to grow pde level page(s)
428 */
429 PMAP_UNLOCK(pmap);
430 pmap_expand(pmap, vaddr);
431 PMAP_LOCK(pmap);
432 }
433 }
434
435 if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
436 /*
437 * There is still an empty page table mapped that
438 * was used for a previous base page mapping.
439 * Remember the PDE and the PDE index, so that we
440 * can free the page at the end of this function.
441 */
442 delpage_pde_index = (int)pdeidx(pmap, vaddr);
443 delpage_pm_obj = pmap->pm_obj;
444 *pte = 0;
445 }
446
447
448 old_pa = pte_to_pa(*pte);
449 pai = pa_index(old_pa);
450 old_pa_locked = FALSE;
451
452 /*
453 * if we have a previous managed page, lock the pv entry now. after
454 * we lock it, check to see if someone beat us to the lock and if so
455 * drop the lock
456 */
457 if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
458 LOCK_PVH(pai);
459 old_pa_locked = TRUE;
460 old_pa = pte_to_pa(*pte);
461 if (0 == old_pa) {
462 UNLOCK_PVH(pai); /* another path beat us to it */
463 old_pa_locked = FALSE;
464 }
465 }
466
467 /*
468 * Special case if the incoming physical page is already mapped
469 * at this address.
470 */
471 if (old_pa == pa) {
472
473 /*
474 * May be changing its wired attribute or protection
475 */
476
477 template = pa_to_pte(pa) | INTEL_PTE_VALID;
478
479 if (VM_MEM_NOT_CACHEABLE ==
480 (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
481 if (!(flags & VM_MEM_GUARDED))
482 template |= INTEL_PTE_PTA;
483 template |= INTEL_PTE_NCACHE;
484 }
485 if (pmap != kernel_pmap)
486 template |= INTEL_PTE_USER;
487 if (prot & VM_PROT_WRITE)
488 template |= INTEL_PTE_WRITE;
489
490 if (set_NX)
491 template |= INTEL_PTE_NX;
492
493 if (wired) {
494 template |= INTEL_PTE_WIRED;
495 if (!iswired(*pte))
496 OSAddAtomic(+1,
497 &pmap->stats.wired_count);
498 } else {
499 if (iswired(*pte)) {
500 assert(pmap->stats.wired_count >= 1);
501 OSAddAtomic(-1,
502 &pmap->stats.wired_count);
503 }
504 }
505 if (superpage) /* this path can not be used */
506 template |= INTEL_PTE_PS; /* to change the page size! */
507
508 /* store modified PTE and preserve RC bits */
509 pmap_update_pte(pte, *pte,
510 template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD)));
511 if (old_pa_locked) {
512 UNLOCK_PVH(pai);
513 old_pa_locked = FALSE;
514 }
515 need_tlbflush = TRUE;
516 goto Done;
517 }
518
519 /*
520 * Outline of code from here:
521 * 1) If va was mapped, update TLBs, remove the mapping
522 * and remove old pvlist entry.
523 * 2) Add pvlist entry for new mapping
524 * 3) Enter new mapping.
525 *
526 * If the old physical page is not managed step 1) is skipped
527 * (except for updating the TLBs), and the mapping is
528 * overwritten at step 3). If the new physical page is not
529 * managed, step 2) is skipped.
530 */
531
532 if (old_pa != (pmap_paddr_t) 0) {
533
534 /*
535 * Don't do anything to pages outside valid memory here.
536 * Instead convince the code that enters a new mapping
537 * to overwrite the old one.
538 */
539
540 /* invalidate the PTE */
541 pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
542 /* propagate invalidate everywhere */
543 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
544 /* remember reference and change */
545 old_pte = *pte;
546 oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED));
547 /* completely invalidate the PTE */
548 pmap_store_pte(pte, 0);
549
550 if (IS_MANAGED_PAGE(pai)) {
551#if TESTING
552 if (pmap->stats.resident_count < 1)
553 panic("pmap_enter: resident_count");
554#endif
555 assert(pmap->stats.resident_count >= 1);
556 OSAddAtomic(-1,
557 &pmap->stats.resident_count);
558
559 if (iswired(*pte)) {
560#if TESTING
561 if (pmap->stats.wired_count < 1)
562 panic("pmap_enter: wired_count");
563#endif
564 assert(pmap->stats.wired_count >= 1);
565 OSAddAtomic(-1,
566 &pmap->stats.wired_count);
567 }
568 pmap_phys_attributes[pai] |= oattr;
569
570 /*
571 * Remove the mapping from the pvlist for
572 * this physical page.
573 * We'll end up with either a rooted pv or a
574 * hashed pv
575 */
576 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte);
577
578 } else {
579
580 /*
581 * old_pa is not managed.
582 * Do removal part of accounting.
583 */
584
585 if (iswired(*pte)) {
586 assert(pmap->stats.wired_count >= 1);
587 OSAddAtomic(-1,
588 &pmap->stats.wired_count);
589 }
590 }
591 }
592
593 /*
594 * if we had a previously managed paged locked, unlock it now
595 */
596 if (old_pa_locked) {
597 UNLOCK_PVH(pai);
598 old_pa_locked = FALSE;
599 }
600
601 pai = pa_index(pa); /* now working with new incoming phys page */
602 if (IS_MANAGED_PAGE(pai)) {
603
604 /*
605 * Step 2) Enter the mapping in the PV list for this
606 * physical page.
607 */
608 pv_h = pai_to_pvh(pai);
609
610 LOCK_PVH(pai);
611
612 if (pv_h->pmap == PMAP_NULL) {
613 /*
614 * No mappings yet, use rooted pv
615 */
616 pv_h->va = vaddr;
617 pv_h->pmap = pmap;
618 queue_init(&pv_h->qlink);
619 } else {
620 /*
621 * Add new pv_hashed_entry after header.
622 */
623 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
624 pvh_e = pvh_new;
625 pvh_new = PV_HASHED_ENTRY_NULL;
626 } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
627 PV_HASHED_ALLOC(pvh_e);
628 if (PV_HASHED_ENTRY_NULL == pvh_e) {
629 /*
630 * the pv list is empty. if we are on
631 * the kernel pmap we'll use one of
632 * the special private kernel pv_e's,
633 * else, we need to unlock
634 * everything, zalloc a pv_e, and
635 * restart bringing in the pv_e with
636 * us.
637 */
638 if (kernel_pmap == pmap) {
639 PV_HASHED_KERN_ALLOC(pvh_e);
640 } else {
641 UNLOCK_PVH(pai);
642 PMAP_UNLOCK(pmap);
643 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
644 goto Retry;
645 }
646 }
647 }
648
649 if (PV_HASHED_ENTRY_NULL == pvh_e)
650 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
651
652 pvh_e->va = vaddr;
653 pvh_e->pmap = pmap;
654 pvh_e->ppn = pn;
655 pv_hash_add(pvh_e, pv_h);
656
657 /*
658 * Remember that we used the pvlist entry.
659 */
660 pvh_e = PV_HASHED_ENTRY_NULL;
661 }
662
663 /*
664 * only count the mapping
665 * for 'managed memory'
666 */
667 OSAddAtomic(+1, & pmap->stats.resident_count);
668 if (pmap->stats.resident_count > pmap->stats.resident_max) {
669 pmap->stats.resident_max = pmap->stats.resident_count;
670 }
671 }
672 /*
673 * Step 3) Enter the mapping.
674 *
675 * Build a template to speed up entering -
676 * only the pfn changes.
677 */
678 template = pa_to_pte(pa) | INTEL_PTE_VALID;
679
680 if (flags & VM_MEM_NOT_CACHEABLE) {
681 if (!(flags & VM_MEM_GUARDED))
682 template |= INTEL_PTE_PTA;
683 template |= INTEL_PTE_NCACHE;
684 }
685 if (pmap != kernel_pmap)
686 template |= INTEL_PTE_USER;
687 if (prot & VM_PROT_WRITE)
688 template |= INTEL_PTE_WRITE;
689 if (set_NX)
690 template |= INTEL_PTE_NX;
691 if (wired) {
692 template |= INTEL_PTE_WIRED;
693 OSAddAtomic(+1, & pmap->stats.wired_count);
694 }
695 if (superpage)
696 template |= INTEL_PTE_PS;
697 pmap_store_pte(pte, template);
698
699 /*
700 * if this was a managed page we delayed unlocking the pv until here
701 * to prevent pmap_page_protect et al from finding it until the pte
702 * has been stored
703 */
704 if (IS_MANAGED_PAGE(pai)) {
705 UNLOCK_PVH(pai);
706 }
707Done:
708 if (need_tlbflush == TRUE)
709 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
710
711 if (pvh_e != PV_HASHED_ENTRY_NULL) {
712 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
713 }
714 if (pvh_new != PV_HASHED_ENTRY_NULL) {
715 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
716 }
717 PMAP_UNLOCK(pmap);
718
719 if (delpage_pm_obj) {
720 vm_page_t m;
721
722 vm_object_lock(delpage_pm_obj);
723 m = vm_page_lookup(delpage_pm_obj, delpage_pde_index);
724 if (m == VM_PAGE_NULL)
725 panic("pmap_enter: pte page not in object");
726 VM_PAGE_FREE(m);
727 OSAddAtomic(-1, &inuse_ptepages_count);
728 vm_object_unlock(delpage_pm_obj);
729 }
730
731 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
732}
733
734/*
735 * Remove a range of hardware page-table entries.
736 * The entries given are the first (inclusive)
737 * and last (exclusive) entries for the VM pages.
738 * The virtual address is the va for the first pte.
739 *
740 * The pmap must be locked.
741 * If the pmap is not the kernel pmap, the range must lie
742 * entirely within one pte-page. This is NOT checked.
743 * Assumes that the pte-page exists.
744 */
745
746void
747pmap_remove_range(
748 pmap_t pmap,
749 vm_map_offset_t start_vaddr,
750 pt_entry_t *spte,
751 pt_entry_t *epte)
752{
753 pt_entry_t *cpte;
754 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
755 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
756 pv_hashed_entry_t pvh_e;
757 int pvh_cnt = 0;
758 int num_removed, num_unwired, num_found, num_invalid;
759 int pai;
760 pmap_paddr_t pa;
761 vm_map_offset_t vaddr;
762
763 num_removed = 0;
764 num_unwired = 0;
765 num_found = 0;
766 num_invalid = 0;
767#if defined(__i386__)
768 if (pmap != kernel_pmap &&
769 pmap->pm_task_map == TASK_MAP_32BIT &&
770 start_vaddr >= HIGH_MEM_BASE) {
771 /*
772 * The range is in the "high_shared_pde" which is shared
773 * between the kernel and all 32-bit tasks. It holds
774 * the 32-bit commpage but also the trampolines, GDT, etc...
775 * so we can't let user tasks remove anything from it.
776 */
777 return;
778 }
779#endif
780 /* invalidate the PTEs first to "freeze" them */
781 for (cpte = spte, vaddr = start_vaddr;
782 cpte < epte;
783 cpte++, vaddr += PAGE_SIZE_64) {
784 pt_entry_t p = *cpte;
785
786 pa = pte_to_pa(p);
787 if (pa == 0)
788 continue;
789 num_found++;
790
791 if (iswired(p))
792 num_unwired++;
793
794 pai = pa_index(pa);
795
796 if (!IS_MANAGED_PAGE(pai)) {
797 /*
798 * Outside range of managed physical memory.
799 * Just remove the mappings.
800 */
801 pmap_store_pte(cpte, 0);
802 continue;
803 }
804
805 if ((p & INTEL_PTE_VALID) == 0)
806 num_invalid++;
807
808 /* invalidate the PTE */
809 pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID));
810 }
811
812 if (num_found == 0) {
813 /* nothing was changed: we're done */
814 goto update_counts;
815 }
816
817 /* propagate the invalidates to other CPUs */
818
819 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
820
821 for (cpte = spte, vaddr = start_vaddr;
822 cpte < epte;
823 cpte++, vaddr += PAGE_SIZE_64) {
824
825 pa = pte_to_pa(*cpte);
826 if (pa == 0)
827 continue;
828
829 pai = pa_index(pa);
830
831 LOCK_PVH(pai);
832
833 pa = pte_to_pa(*cpte);
834 if (pa == 0) {
835 UNLOCK_PVH(pai);
836 continue;
837 }
838 num_removed++;
839
840 /*
841 * Get the modify and reference bits, then
842 * nuke the entry in the page table
843 */
844 /* remember reference and change */
845 pmap_phys_attributes[pai] |=
846 (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
847
848 /*
849 * Remove the mapping from the pvlist for this physical page.
850 */
851 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte);
852
853 /* completely invalidate the PTE */
854 pmap_store_pte(cpte, 0);
855
856 UNLOCK_PVH(pai);
857
858 if (pvh_e != PV_HASHED_ENTRY_NULL) {
859 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
860 pvh_eh = pvh_e;
861
862 if (pvh_et == PV_HASHED_ENTRY_NULL) {
863 pvh_et = pvh_e;
864 }
865 pvh_cnt++;
866 }
867 } /* for loop */
868
869 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
870 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
871 }
872update_counts:
873 /*
874 * Update the counts
875 */
876#if TESTING
877 if (pmap->stats.resident_count < num_removed)
878 panic("pmap_remove_range: resident_count");
879#endif
880 assert(pmap->stats.resident_count >= num_removed);
881 OSAddAtomic(-num_removed, &pmap->stats.resident_count);
882
883#if TESTING
884 if (pmap->stats.wired_count < num_unwired)
885 panic("pmap_remove_range: wired_count");
886#endif
887 assert(pmap->stats.wired_count >= num_unwired);
888 OSAddAtomic(-num_unwired, &pmap->stats.wired_count);
889
890 return;
891}
892
893
894/*
895 * Remove the given range of addresses
896 * from the specified map.
897 *
898 * It is assumed that the start and end are properly
899 * rounded to the hardware page size.
900 */
901void
902pmap_remove(
903 pmap_t map,
904 addr64_t s64,
905 addr64_t e64)
906{
907 pt_entry_t *pde;
908 pt_entry_t *spte, *epte;
909 addr64_t l64;
910 uint64_t deadline;
911
912 pmap_intr_assert();
913
914 if (map == PMAP_NULL || s64 == e64)
915 return;
916
917 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
918 map,
919 (uint32_t) (s64 >> 32), s64,
920 (uint32_t) (e64 >> 32), e64);
921
922
923 PMAP_LOCK(map);
924
925#if 0
926 /*
927 * Check that address range in the kernel does not overlap the stacks.
928 * We initialize local static min/max variables once to avoid making
929 * 2 function calls for every remove. Note also that these functions
930 * both return 0 before kernel stacks have been initialized, and hence
931 * the panic is not triggered in this case.
932 */
933 if (map == kernel_pmap) {
934 static vm_offset_t kernel_stack_min = 0;
935 static vm_offset_t kernel_stack_max = 0;
936
937 if (kernel_stack_min == 0) {
938 kernel_stack_min = min_valid_stack_address();
939 kernel_stack_max = max_valid_stack_address();
940 }
941 if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
942 (kernel_stack_min < e64 && e64 <= kernel_stack_max))
943 panic("pmap_remove() attempted in kernel stack");
944 }
945#else
946
947 /*
948 * The values of kernel_stack_min and kernel_stack_max are no longer
949 * relevant now that we allocate kernel stacks in the kernel map,
950 * so the old code above no longer applies. If we wanted to check that
951 * we weren't removing a mapping of a page in a kernel stack we'd
952 * mark the PTE with an unused bit and check that here.
953 */
954
955#endif
956
957 deadline = rdtsc64() + max_preemption_latency_tsc;
958
959 while (s64 < e64) {
960 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
961 if (l64 > e64)
962 l64 = e64;
963 pde = pmap_pde(map, s64);
964
965 if (pde && (*pde & INTEL_PTE_VALID)) {
966 if (*pde & INTEL_PTE_PS) {
967 /*
968 * If we're removing a superpage, pmap_remove_range()
969 * must work on level 2 instead of level 1; and we're
970 * only passing a single level 2 entry instead of a
971 * level 1 range.
972 */
973 spte = pde;
974 epte = spte+1; /* excluded */
975 } else {
976 spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
977 spte = &spte[ptenum(s64)];
978 epte = &spte[intel_btop(l64 - s64)];
979 }
980 pmap_remove_range(map, s64, spte, epte);
981 }
982 s64 = l64;
983
984 if (s64 < e64 && rdtsc64() >= deadline) {
985 PMAP_UNLOCK(map)
986 PMAP_LOCK(map)
987 deadline = rdtsc64() + max_preemption_latency_tsc;
988 }
989 }
990
991 PMAP_UNLOCK(map);
992
993 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
994 map, 0, 0, 0, 0);
995
996}
997
998/*
999 * Routine: pmap_page_protect
1000 *
1001 * Function:
1002 * Lower the permission for all mappings to a given
1003 * page.
1004 */
1005void
1006pmap_page_protect(
1007 ppnum_t pn,
1008 vm_prot_t prot)
1009{
1010 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1011 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1012 pv_hashed_entry_t nexth;
1013 int pvh_cnt = 0;
1014 pv_rooted_entry_t pv_h;
1015 pv_rooted_entry_t pv_e;
1016 pv_hashed_entry_t pvh_e;
1017 pt_entry_t *pte;
1018 int pai;
1019 pmap_t pmap;
1020 boolean_t remove;
1021
1022 pmap_intr_assert();
1023 assert(pn != vm_page_fictitious_addr);
1024 if (pn == vm_page_guard_addr)
1025 return;
1026
1027 pai = ppn_to_pai(pn);
1028
1029 if (!IS_MANAGED_PAGE(pai)) {
1030 /*
1031 * Not a managed page.
1032 */
1033 return;
1034 }
1035 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
1036 pn, prot, 0, 0, 0);
1037
1038 /*
1039 * Determine the new protection.
1040 */
1041 switch (prot) {
1042 case VM_PROT_READ:
1043 case VM_PROT_READ | VM_PROT_EXECUTE:
1044 remove = FALSE;
1045 break;
1046 case VM_PROT_ALL:
1047 return; /* nothing to do */
1048 default:
1049 remove = TRUE;
1050 break;
1051 }
1052
1053 pv_h = pai_to_pvh(pai);
1054
1055 LOCK_PVH(pai);
1056
1057
1058 /*
1059 * Walk down PV list, if any, changing or removing all mappings.
1060 */
1061 if (pv_h->pmap == PMAP_NULL)
1062 goto done;
1063
1064 pv_e = pv_h;
1065 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */
1066
1067 do {
1068 vm_map_offset_t vaddr;
1069
1070 pmap = pv_e->pmap;
1071 vaddr = pv_e->va;
1072 pte = pmap_pte(pmap, vaddr);
1073
1074#if DEBUG
1075 if (pa_index(pte_to_pa(*pte)) != pn)
1076 panic("pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1077#endif
1078 if (0 == pte) {
1079 panic("pmap_page_protect() "
1080 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1081 pmap, pn, vaddr);
1082 }
1083 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1084
1085 /*
1086 * Remove the mapping if new protection is NONE
1087 * or if write-protecting a kernel mapping.
1088 */
1089 if (remove || pmap == kernel_pmap) {
1090 /*
1091 * Remove the mapping, collecting dirty bits.
1092 */
1093 pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_VALID);
1094 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1095 pmap_phys_attributes[pai] |=
1096 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1097 pmap_store_pte(pte, 0);
1098
1099#if TESTING
1100 if (pmap->stats.resident_count < 1)
1101 panic("pmap_page_protect: resident_count");
1102#endif
1103 assert(pmap->stats.resident_count >= 1);
1104 OSAddAtomic(-1, &pmap->stats.resident_count);
1105
1106 /*
1107 * Deal with the pv_rooted_entry.
1108 */
1109
1110 if (pv_e == pv_h) {
1111 /*
1112 * Fix up head later.
1113 */
1114 pv_h->pmap = PMAP_NULL;
1115 } else {
1116 /*
1117 * Delete this entry.
1118 */
1119 pv_hash_remove(pvh_e);
1120 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1121 pvh_eh = pvh_e;
1122
1123 if (pvh_et == PV_HASHED_ENTRY_NULL)
1124 pvh_et = pvh_e;
1125 pvh_cnt++;
1126 }
1127 } else {
1128 /*
1129 * Write-protect.
1130 */
1131 pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_WRITE);
1132 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1133 }
1134 pvh_e = nexth;
1135 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1136
1137
1138 /*
1139 * If pv_head mapping was removed, fix it up.
1140 */
1141 if (pv_h->pmap == PMAP_NULL) {
1142 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1143
1144 if (pvh_e != (pv_hashed_entry_t) pv_h) {
1145 pv_hash_remove(pvh_e);
1146 pv_h->pmap = pvh_e->pmap;
1147 pv_h->va = pvh_e->va;
1148 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1149 pvh_eh = pvh_e;
1150
1151 if (pvh_et == PV_HASHED_ENTRY_NULL)
1152 pvh_et = pvh_e;
1153 pvh_cnt++;
1154 }
1155 }
1156 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1157 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1158 }
1159done:
1160 UNLOCK_PVH(pai);
1161
1162 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
1163 0, 0, 0, 0, 0);
1164}
1165
1166__private_extern__ void
1167pmap_pagetable_corruption_msg_log(int (*log_func)(const char * fmt, ...)__printflike(1,2)) {
1168 if (pmap_pagetable_corruption_incidents > 0) {
1169 int i, e = MIN(pmap_pagetable_corruption_incidents, PMAP_PAGETABLE_CORRUPTION_MAX_LOG);
1170 (*log_func)("%u pagetable corruption incident(s) detected, timeout: %u\n", pmap_pagetable_corruption_incidents, pmap_pagetable_corruption_timeout);
1171 for (i = 0; i < e; i++) {
1172 (*log_func)("Incident 0x%x, reason: 0x%x, action: 0x%x, time: 0x%llx\n", pmap_pagetable_corruption_records[i].incident, pmap_pagetable_corruption_records[i].reason, pmap_pagetable_corruption_records[i].action, pmap_pagetable_corruption_records[i].abstime);
1173 }
1174 }
1175}
1176
1177void
1178mapping_free_prime(void)
1179{
1180 int i;
1181 pv_hashed_entry_t pvh_e;
1182 pv_hashed_entry_t pvh_eh;
1183 pv_hashed_entry_t pvh_et;
1184 int pv_cnt;
1185
1186 pv_cnt = 0;
1187 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
1188 for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) {
1189 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
1190
1191 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
1192 pvh_eh = pvh_e;
1193
1194 if (pvh_et == PV_HASHED_ENTRY_NULL)
1195 pvh_et = pvh_e;
1196 pv_cnt++;
1197 }
1198 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
1199
1200 pv_cnt = 0;
1201 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
1202 for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
1203 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
1204
1205 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
1206 pvh_eh = pvh_e;
1207
1208 if (pvh_et == PV_HASHED_ENTRY_NULL)
1209 pvh_et = pvh_e;
1210 pv_cnt++;
1211 }
1212 PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
1213
1214}
1215
1216static inline void
1217pmap_pagetable_corruption_log_setup(void) {
1218 if (pmap_pagetable_corruption_log_call == NULL) {
1219 nanotime_to_absolutetime(PMAP_PAGETABLE_CORRUPTION_INTERVAL, 0, &pmap_pagetable_corruption_interval_abstime);
1220 thread_call_setup(&pmap_pagetable_corruption_log_call_data,
1221 (thread_call_func_t) pmap_pagetable_corruption_msg_log,
1222 (thread_call_param_t) &printf);
1223 pmap_pagetable_corruption_log_call = &pmap_pagetable_corruption_log_call_data;
1224 }
1225}
1226
1227void
1228mapping_adjust(void)
1229{
1230 pv_hashed_entry_t pvh_e;
1231 pv_hashed_entry_t pvh_eh;
1232 pv_hashed_entry_t pvh_et;
1233 int pv_cnt;
1234 int i;
1235
1236 if (mapping_adjust_call == NULL) {
1237 thread_call_setup(&mapping_adjust_call_data,
1238 (thread_call_func_t) mapping_adjust,
1239 (thread_call_param_t) NULL);
1240 mapping_adjust_call = &mapping_adjust_call_data;
1241 }
1242
1243 pmap_pagetable_corruption_log_setup();
1244
1245 pv_cnt = 0;
1246 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
1247 if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) {
1248 for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
1249 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
1250
1251 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
1252 pvh_eh = pvh_e;
1253
1254 if (pvh_et == PV_HASHED_ENTRY_NULL)
1255 pvh_et = pvh_e;
1256 pv_cnt++;
1257 }
1258 PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
1259 }
1260
1261 pv_cnt = 0;
1262 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
1263 if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) {
1264 for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) {
1265 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
1266
1267 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
1268 pvh_eh = pvh_e;
1269
1270 if (pvh_et == PV_HASHED_ENTRY_NULL)
1271 pvh_et = pvh_e;
1272 pv_cnt++;
1273 }
1274 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
1275 }
1276 mappingrecurse = 0;
1277}
1278