2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
29 * Mach Operating System
30 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
31 * All Rights Reserved.
33 * Permission to use, copy, modify and distribute this software and its
34 * documentation is hereby granted, provided that both the copyright
35 * notice and this permission notice appear in all copies of the
36 * software, derivative works or modified versions, and any portions
37 * thereof, and that both notices appear in supporting documentation.
39 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
40 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
41 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
43 * Carnegie Mellon requests users of this software to return to
45 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
46 * School of Computer Science
47 * Carnegie Mellon University
48 * Pittsburgh PA 15213-3890
50 * any improvements or extensions that they make and grant Carnegie Mellon
51 * the rights to redistribute these changes.
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * (These guys wrote the Vax version)
61 * Physical Map management code for Intel i386, i486, and i860.
63 * Manages physical address maps.
65 * In addition to hardware address maps, this
66 * module is called upon to provide software-use-only
67 * maps which may or may not be stored in the same
68 * form as hardware maps. These pseudo-maps are
69 * used to store intermediate results from copy
70 * operations to and from address spaces.
72 * Since the information managed by this module is
73 * also stored by the logical address mapping module,
74 * this module may throw away valid virtual-to-physical
75 * mappings at almost any time. However, invalidations
76 * of virtual-to-physical mappings must be done as
79 * In order to cope with hardware architectures which
80 * make virtual-to-physical map invalidates expensive,
81 * this module may delay invalidate or reduced protection
82 * operations until such time as they are actually
83 * necessary. This module is given full information as
84 * to which processors are currently using which maps,
85 * and to when physical maps must be made correct.
93 #include <mach_ldebug.h>
95 #include <mach/machine/vm_types.h>
97 #include <mach/boolean.h>
98 #include <kern/thread.h>
99 #include <kern/zalloc.h>
101 #include <kern/lock.h>
102 #include <kern/spl.h>
105 #include <vm/vm_map.h>
106 #include <vm/vm_kern.h>
107 #include <mach/vm_param.h>
108 #include <mach/vm_prot.h>
109 #include <vm/vm_object.h>
110 #include <vm/vm_page.h>
112 #include <mach/machine/vm_param.h>
113 #include <machine/thread.h>
115 #include <kern/misc_protos.h> /* prototyping */
116 #include <i386/misc_protos.h>
118 #include <i386/cpuid.h>
121 #include <ddb/db_command.h>
122 #include <ddb/db_output.h>
123 #include <ddb/db_sym.h>
124 #include <ddb/db_print.h>
125 #endif /* MACH_KDB */
127 #include <kern/xpr.h>
130 #include <i386/AT386/mp/mp_events.h>
134 * Forward declarations for internal functions.
140 extern void pmap_remove_range(
146 void phys_attribute_clear(
150 boolean_t
phys_attribute_test(
154 void pmap_set_modify(vm_offset_t phys
);
156 void phys_attribute_set(
162 void set_dirbase(vm_offset_t dirbase
);
163 #endif /* set_dirbase */
165 #define PA_TO_PTE(pa) (pa_to_pte((pa) - VM_MIN_KERNEL_ADDRESS))
166 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
168 pmap_t real_pmap
[NCPUS
];
170 #define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
171 #define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry);
174 * Private data structures.
178 * For each vm_page_t, there is a list of all currently
179 * valid virtual mappings of that page. An entry is
180 * a pv_entry_t; the list is the pv_table.
183 typedef struct pv_entry
{
184 struct pv_entry
*next
; /* next pv_entry */
185 pmap_t pmap
; /* pmap where mapping lies */
186 vm_offset_t va
; /* virtual address for mapping */
189 #define PV_ENTRY_NULL ((pv_entry_t) 0)
191 pv_entry_t pv_head_table
; /* array of entries, one per page */
194 * pv_list entries are kept on a list that can only be accessed
195 * with the pmap system locked (at SPLVM, not in the cpus_active set).
196 * The list is refilled from the pv_list_zone if it becomes empty.
198 pv_entry_t pv_free_list
; /* free list at SPLVM */
199 decl_simple_lock_data(,pv_free_list_lock
)
201 #define PV_ALLOC(pv_e) { \
202 simple_lock(&pv_free_list_lock); \
203 if ((pv_e = pv_free_list) != 0) { \
204 pv_free_list = pv_e->next; \
206 simple_unlock(&pv_free_list_lock); \
209 #define PV_FREE(pv_e) { \
210 simple_lock(&pv_free_list_lock); \
211 pv_e->next = pv_free_list; \
212 pv_free_list = pv_e; \
213 simple_unlock(&pv_free_list_lock); \
216 zone_t pv_list_zone
; /* zone of pv_entry structures */
219 * Each entry in the pv_head_table is locked by a bit in the
220 * pv_lock_table. The lock bits are accessed by the physical
221 * address of the page they lock.
224 char *pv_lock_table
; /* pointer to array of bits */
225 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
228 * First and last physical addresses that we maintain any information
229 * for. Initialized to zero so that pmap operations done before
230 * pmap_init won't touch any non-existent structures.
232 vm_offset_t vm_first_phys
= (vm_offset_t
) 0;
233 vm_offset_t vm_last_phys
= (vm_offset_t
) 0;
234 boolean_t pmap_initialized
= FALSE
;/* Has pmap_init completed? */
237 * Index into pv_head table, its lock bits, and the modify/reference
238 * bits starting at vm_first_phys.
241 #define pa_index(pa) (atop(pa - vm_first_phys))
243 #define pai_to_pvh(pai) (&pv_head_table[pai])
244 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
245 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
248 * Array of physical page attribites for managed pages.
249 * One byte per physical page.
251 char *pmap_phys_attributes
;
254 * Physical page attributes. Copy bits from PTE definition.
256 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
257 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
258 #define PHYS_NCACHE INTEL_PTE_NCACHE
261 * Amount of virtual memory mapped by one
262 * page-directory entry.
264 #define PDE_MAPPED_SIZE (pdetova(1))
267 * We allocate page table pages directly from the VM system
268 * through this object. It maps physical memory.
270 vm_object_t pmap_object
= VM_OBJECT_NULL
;
273 * Locking and TLB invalidation
279 * There are two structures in the pmap module that need locking:
280 * the pmaps themselves, and the per-page pv_lists (which are locked
281 * by locking the pv_lock_table entry that corresponds to the pv_head
282 * for the list in question.) Most routines want to lock a pmap and
283 * then do operations in it that require pv_list locking -- however
284 * pmap_remove_all and pmap_copy_on_write operate on a physical page
285 * basis and want to do the locking in the reverse order, i.e. lock
286 * a pv_list and then go through all the pmaps referenced by that list.
287 * To protect against deadlock between these two cases, the pmap_lock
288 * is used. There are three different locking protocols as a result:
290 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
293 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
294 * lock on the pmap_lock (shared read), then lock the pmap
295 * and finally the pv_lists as needed [i.e. pmap lock before
298 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
299 * Get a write lock on the pmap_lock (exclusive write); this
300 * also guaranteees exclusive access to the pv_lists. Lock the
303 * At no time may any routine hold more than one pmap lock or more than
304 * one pv_list lock. Because interrupt level routines can allocate
305 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
306 * kernel_pmap can only be held at splhigh.
311 * We raise the interrupt level to splhigh, to block interprocessor
312 * interrupts during pmap operations. We must take the CPU out of
313 * the cpus_active set while interrupts are blocked.
315 #define SPLVM(spl) { \
317 mp_disable_preemption(); \
318 i_bit_clear(cpu_number(), &cpus_active); \
319 mp_enable_preemption(); \
322 #define SPLX(spl) { \
323 mp_disable_preemption(); \
324 i_bit_set(cpu_number(), &cpus_active); \
325 mp_enable_preemption(); \
330 * Lock on pmap system
332 lock_t pmap_system_lock
;
334 #define PMAP_READ_LOCK(pmap, spl) { \
336 lock_read(&pmap_system_lock); \
337 simple_lock(&(pmap)->lock); \
340 #define PMAP_WRITE_LOCK(spl) { \
342 lock_write(&pmap_system_lock); \
345 #define PMAP_READ_UNLOCK(pmap, spl) { \
346 simple_unlock(&(pmap)->lock); \
347 lock_read_done(&pmap_system_lock); \
351 #define PMAP_WRITE_UNLOCK(spl) { \
352 lock_write_done(&pmap_system_lock); \
356 #define PMAP_WRITE_TO_READ_LOCK(pmap) { \
357 simple_lock(&(pmap)->lock); \
358 lock_write_to_read(&pmap_system_lock); \
361 #define LOCK_PVH(index) lock_pvh_pai(index)
363 #define UNLOCK_PVH(index) unlock_pvh_pai(index)
365 #define PMAP_FLUSH_TLBS() \
368 i386_signal_cpus(MP_TLB_FLUSH); \
371 #define PMAP_RELOAD_TLBS() { \
372 i386_signal_cpus(MP_TLB_RELOAD); \
373 set_cr3(kernel_pmap->pdirbase); \
376 #define PMAP_INVALIDATE_PAGE(map, addr) { \
377 if (map == kernel_pmap) \
378 invlpg((vm_offset_t) addr); \
381 i386_signal_cpus(MP_TLB_FLUSH); \
384 #else /* NCPUS > 1 */
387 #define SPLVM(spl) { (spl) = splhigh(); }
388 #define SPLX(spl) splx (spl)
394 #define PMAP_READ_LOCK(pmap, spl) SPLVM(spl)
395 #define PMAP_WRITE_LOCK(spl) SPLVM(spl)
396 #define PMAP_READ_UNLOCK(pmap, spl) SPLX(spl)
397 #define PMAP_WRITE_UNLOCK(spl) SPLX(spl)
398 #define PMAP_WRITE_TO_READ_LOCK(pmap)
401 #define LOCK_PVH(index) disable_preemption()
402 #define UNLOCK_PVH(index) enable_preemption()
404 #define LOCK_PVH(index)
405 #define UNLOCK_PVH(index)
408 #define PMAP_FLUSH_TLBS() flush_tlb()
409 #define PMAP_RELOAD_TLBS() set_cr3(kernel_pmap->pdirbase)
410 #define PMAP_INVALIDATE_PAGE(map, addr) { \
411 if (map == kernel_pmap) \
412 invlpg((vm_offset_t) addr); \
417 #endif /* NCPUS > 1 */
419 #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
423 * Structures to keep track of pending TLB invalidations
427 volatile boolean_t cpu_update_needed
[NCPUS
];
430 #endif /* NCPUS > 1 */
433 * Other useful macros.
435 #define current_pmap() (vm_map_pmap(current_act()->map))
436 #define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
438 struct pmap kernel_pmap_store
;
441 struct zone
*pmap_zone
; /* zone of pmap structures */
443 int pmap_debug
= 0; /* flag for debugging prints */
444 int ptes_per_vm_page
; /* number of hardware ptes needed
445 to map one VM page. */
446 unsigned int inuse_ptepages_count
= 0; /* debugging */
449 * Pmap cache. Cache is threaded through ref_count field of pmap.
450 * Max will eventually be constant -- variable for experimentation.
452 int pmap_cache_max
= 32;
453 int pmap_alloc_chunk
= 8;
454 pmap_t pmap_cache_list
;
455 int pmap_cache_count
;
456 decl_simple_lock_data(,pmap_cache_lock
)
458 extern vm_offset_t hole_start
, hole_end
;
463 * Page directory for kernel.
465 pt_entry_t
*kpde
= 0; /* set by start.s - keep out of bss */
468 #define PMAP_ALIAS_MAX 32
474 #define PMAP_ALIAS_COOKIE 0xdeadbeef
475 } pmap_aliasbuf
[PMAP_ALIAS_MAX
];
476 int pmap_alias_index
= 0;
477 extern vm_offset_t
get_rpc();
479 #endif /* DEBUG_ALIAS */
482 * Given an offset and a map, compute the address of the
483 * pte. If the address is invalid with respect to the map
484 * then PT_ENTRY_NULL is returned (and the map may need to grow).
486 * This is only used in machine-dependent code.
491 register pmap_t pmap
,
492 register vm_offset_t addr
)
494 register pt_entry_t
*ptp
;
495 register pt_entry_t pte
;
497 pte
= pmap
->dirbase
[pdenum(pmap
, addr
)];
498 if ((pte
& INTEL_PTE_VALID
) == 0)
499 return(PT_ENTRY_NULL
);
500 ptp
= (pt_entry_t
*)ptetokv(pte
);
501 return(&ptp
[ptenum(addr
)]);
505 #define pmap_pde(pmap, addr) (&(pmap)->dirbase[pdenum(pmap, addr)])
507 #define DEBUG_PTE_PAGE 0
514 register pt_entry_t
*pte
, *epte
;
517 /* check the use and wired counts */
518 if (ptep
== PTE_PAGE_NULL
)
520 pte
= pmap_pte(ptep
->pmap
, ptep
->va
);
521 epte
= pte
+ INTEL_PGBYTES
/sizeof(pt_entry_t
);
530 pte
+= ptes_per_vm_page
;
533 if (ctu
!= ptep
->use_count
|| ctw
!= ptep
->wired_count
) {
534 printf("use %d wired %d - actual use %d wired %d\n",
535 ptep
->use_count
, ptep
->wired_count
, ctu
, ctw
);
539 #endif /* DEBUG_PTE_PAGE */
542 * Map memory at initialization. The physical addresses being
543 * mapped are not managed and are never unmapped.
545 * For now, VM is already on, we only need to map the
550 register vm_offset_t virt
,
551 register vm_offset_t start
,
552 register vm_offset_t end
,
553 register vm_prot_t prot
)
558 while (start
< end
) {
559 pmap_enter(kernel_pmap
, virt
, start
, prot
, 0, FALSE
);
567 * Back-door routine for mapping kernel VM at initialization.
568 * Useful for mapping memory outside the range
569 * Sets no-cache, A, D.
570 * [vm_first_phys, vm_last_phys) (i.e., devices).
571 * Otherwise like pmap_map.
575 register vm_offset_t virt
,
576 register vm_offset_t start
,
577 register vm_offset_t end
,
580 register pt_entry_t
template;
581 register pt_entry_t
*pte
;
583 template = pa_to_pte(start
)
589 if (prot
& VM_PROT_WRITE
)
590 template |= INTEL_PTE_WRITE
;
592 while (start
< end
) {
593 pte
= pmap_pte(kernel_pmap
, virt
);
594 if (pte
== PT_ENTRY_NULL
)
595 panic("pmap_map_bd: Invalid kernel address\n");
596 WRITE_PTE_FAST(pte
, template)
597 pte_increment_pa(template);
608 extern char *first_avail
;
609 extern vm_offset_t virtual_avail
, virtual_end
;
610 extern vm_offset_t avail_start
, avail_end
, avail_next
;
613 * Bootstrap the system enough to run with virtual memory.
614 * Map the kernel's code and data, and allocate the system page table.
615 * Called with mapping OFF. Page_size must already be set.
618 * load_start: PA where kernel was loaded
619 * avail_start PA of first available physical page -
620 * after kernel page tables
621 * avail_end PA of last available physical page
622 * virtual_avail VA of first available page -
623 * after kernel page tables
624 * virtual_end VA of last available page -
625 * end of kernel address space
627 * &start_text start of kernel text
628 * &etext end of kernel text
633 vm_offset_t load_start
)
635 vm_offset_t va
, tva
, paddr
;
637 pt_entry_t
*pde
, *pte
, *ptend
;
638 vm_size_t morevm
; /* VM space for kernel map */
641 * Set ptes_per_vm_page for general use.
643 ptes_per_vm_page
= PAGE_SIZE
/ INTEL_PGBYTES
;
646 * The kernel's pmap is statically allocated so we don't
647 * have to use pmap_create, which is unlikely to work
648 * correctly at this part of the boot sequence.
651 kernel_pmap
= &kernel_pmap_store
;
654 lock_init(&pmap_system_lock
,
655 FALSE
, /* NOT a sleep lock */
658 #endif /* NCPUS > 1 */
660 simple_lock_init(&kernel_pmap
->lock
, ETAP_VM_PMAP_KERNEL
);
661 simple_lock_init(&pv_free_list_lock
, ETAP_VM_PMAP_FREE
);
663 kernel_pmap
->ref_count
= 1;
666 * The kernel page directory has been allocated;
667 * its virtual address is in kpde.
669 * Enough kernel page table pages have been allocated
670 * to map low system memory, kernel text, kernel data/bss,
671 * kdb's symbols, and the page directory and page tables.
673 * No other physical memory has been allocated.
677 * Start mapping virtual memory to physical memory, 1-1,
678 * at end of mapped memory.
681 virtual_avail
= phystokv(avail_start
);
682 virtual_end
= phystokv(avail_end
);
685 pde
+= pdenum(kernel_pmap
, virtual_avail
);
687 if (pte_to_pa(*pde
) == 0) {
688 /* This pte has not been allocated */
692 pte
= (pt_entry_t
*)ptetokv(*pde
);
693 /* first pte of page */
694 ptend
= pte
+NPTES
; /* last pte of page */
695 pte
+= ptenum(virtual_avail
); /* point to pte that
696 maps first avail VA */
697 pde
++; /* point pde to first empty slot */
700 template = pa_to_pte(avail_start
)
704 for (va
= virtual_avail
; va
< virtual_end
; va
+= INTEL_PGBYTES
) {
706 pte
= (pt_entry_t
*)phystokv(virtual_avail
);
708 virtual_avail
= (vm_offset_t
)ptend
;
709 if (virtual_avail
== hole_start
)
710 virtual_avail
= hole_end
;
711 *pde
= PA_TO_PTE((vm_offset_t
) pte
)
716 WRITE_PTE_FAST(pte
, template)
718 pte_increment_pa(template);
721 avail_start
= virtual_avail
- VM_MIN_KERNEL_ADDRESS
;
722 avail_next
= avail_start
;
725 * Figure out maximum kernel address.
726 * Kernel virtual space is:
727 * - at least three times physical memory
728 * - at least VM_MIN_KERNEL_ADDRESS
729 * - limited by VM_MAX_KERNEL_ADDRESS
732 morevm
= 3*avail_end
;
733 if (virtual_end
+ morevm
> VM_MAX_KERNEL_ADDRESS
)
734 morevm
= VM_MAX_KERNEL_ADDRESS
- virtual_end
+ 1;
737 * startup requires additional virtual memory (for tables, buffers,
738 * etc.). The kd driver may also require some of that memory to
739 * access the graphics board.
742 *(int *)&template = 0;
745 * Leave room for kernel-loaded servers, which have been linked at
746 * addresses from VM_MIN_KERNEL_LOADED_ADDRESS to
747 * VM_MAX_KERNEL_LOADED_ADDRESS.
749 if (virtual_end
+ morevm
< VM_MAX_KERNEL_LOADED_ADDRESS
+ 1)
750 morevm
= VM_MAX_KERNEL_LOADED_ADDRESS
+ 1 - virtual_end
;
753 virtual_end
+= morevm
;
754 for (tva
= va
; tva
< virtual_end
; tva
+= INTEL_PGBYTES
) {
756 pmap_next_page(&paddr
);
757 pte
= (pt_entry_t
*)phystokv(paddr
);
759 *pde
= PA_TO_PTE((vm_offset_t
) pte
)
764 WRITE_PTE_FAST(pte
, template)
770 /* Push the virtual avail address above hole_end */
771 if (virtual_avail
< hole_end
)
772 virtual_avail
= hole_end
;
778 virtual_end
= va
+ morevm
;
783 * invalidate user virtual addresses
787 pdenum(kernel_pmap
,VM_MIN_KERNEL_ADDRESS
)*sizeof(pt_entry_t
));
788 kernel_pmap
->dirbase
= kpde
;
789 printf("Kernel virtual space from 0x%x to 0x%x.\n",
790 VM_MIN_KERNEL_ADDRESS
, virtual_end
);
792 avail_start
= avail_next
;
793 printf("Available physical space from 0x%x to 0x%x\n",
794 avail_start
, avail_end
);
796 kernel_pmap
->pdirbase
= kvtophys((vm_offset_t
)kernel_pmap
->dirbase
);
798 if (cpuid_features() & CPUID_FEATURE_PAT
)
804 asm volatile("rdmsr" : "=A" (pat
) : "c" (msr
));
806 pat
&= ~(0xfULL
<< 48);
807 pat
|= 0x01ULL
<< 48;
809 asm volatile("wrmsr" :: "A" (pat
), "c" (msr
));
818 *startp
= virtual_avail
;
823 * Initialize the pmap module.
824 * Called by vm_init, to initialize any structures that the pmap
825 * system needs to map virtual memory.
830 register long npages
;
832 register vm_size_t s
;
836 * Allocate memory for the pv_head_table and its lock bits,
837 * the modify bit array, and the pte_page table.
840 npages
= atop(avail_end
- avail_start
);
841 s
= (vm_size_t
) (sizeof(struct pv_entry
) * npages
842 + pv_lock_table_size(npages
)
846 if (kmem_alloc_wired(kernel_map
, &addr
, s
) != KERN_SUCCESS
)
849 memset((char *)addr
, 0, s
);
852 * Allocate the structures first to preserve word-alignment.
854 pv_head_table
= (pv_entry_t
) addr
;
855 addr
= (vm_offset_t
) (pv_head_table
+ npages
);
857 pv_lock_table
= (char *) addr
;
858 addr
= (vm_offset_t
) (pv_lock_table
+ pv_lock_table_size(npages
));
860 pmap_phys_attributes
= (char *) addr
;
863 * Create the zone of physical maps,
864 * and of the physical-to-virtual entries.
866 s
= (vm_size_t
) sizeof(struct pmap
);
867 pmap_zone
= zinit(s
, 400*s
, 4096, "pmap"); /* XXX */
868 s
= (vm_size_t
) sizeof(struct pv_entry
);
869 pv_list_zone
= zinit(s
, 10000*s
, 4096, "pv_list"); /* XXX */
872 * Only now, when all of the data structures are allocated,
873 * can we set vm_first_phys and vm_last_phys. If we set them
874 * too soon, the kmem_alloc_wired above will try to use these
875 * data structures and blow up.
878 vm_first_phys
= avail_start
;
879 vm_last_phys
= avail_end
;
880 pmap_initialized
= TRUE
;
883 * Initializie pmap cache.
885 pmap_cache_list
= PMAP_NULL
;
886 pmap_cache_count
= 0;
887 simple_lock_init(&pmap_cache_lock
, ETAP_VM_PMAP_CACHE
);
891 #define pmap_valid_page(x) ((avail_start <= x) && (x < avail_end))
894 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
905 assert(phys
!= vm_page_fictitious_addr
);
906 if (!pmap_initialized
)
909 if (!pmap_valid_page(phys
))
912 PMAP_WRITE_LOCK(spl
);
914 pai
= pa_index(phys
);
915 pv_h
= pai_to_pvh(pai
);
917 result
= (pv_h
->pmap
== PMAP_NULL
);
918 PMAP_WRITE_UNLOCK(spl
);
924 * Create and return a physical map.
926 * If the size specified for the map
927 * is zero, the map is an actual physical
928 * map, and may be referenced by the
931 * If the size specified is non-zero,
932 * the map will be used in software only, and
933 * is bounded by that size.
940 register pmap_statistics_t stats
;
943 * A software use-only map doesn't even need a map.
951 * Try to get cached pmap, if this fails,
952 * allocate a pmap struct from the pmap_zone. Then allocate
953 * the page descriptor table from the pd_zone.
956 simple_lock(&pmap_cache_lock
);
957 while ((p
= pmap_cache_list
) == PMAP_NULL
) {
959 vm_offset_t dirbases
;
962 simple_unlock(&pmap_cache_lock
);
966 * XXX NEEDS MP DOING ALLOC logic so that if multiple processors
967 * XXX get here, only one allocates a chunk of pmaps.
968 * (for now we'll just let it go - safe but wasteful)
973 * Allocate a chunck of pmaps. Single kmem_alloc_wired
974 * operation reduces kernel map fragmentation.
977 if (kmem_alloc_wired(kernel_map
, &dirbases
,
978 pmap_alloc_chunk
* INTEL_PGBYTES
)
980 panic("pmap_create.1");
982 for (i
= pmap_alloc_chunk
; i
> 0 ; i
--) {
983 p
= (pmap_t
) zalloc(pmap_zone
);
985 panic("pmap_create.2");
988 * Initialize pmap. Don't bother with
989 * ref count as cache list is threaded
990 * through it. It'll be set on cache removal.
992 p
->dirbase
= (pt_entry_t
*) dirbases
;
993 dirbases
+= INTEL_PGBYTES
;
994 memcpy(p
->dirbase
, kpde
, INTEL_PGBYTES
);
995 p
->pdirbase
= kvtophys((vm_offset_t
)p
->dirbase
);
997 simple_lock_init(&p
->lock
, ETAP_VM_PMAP
);
1001 * Initialize statistics.
1004 stats
->resident_count
= 0;
1005 stats
->wired_count
= 0;
1010 simple_lock(&pmap_cache_lock
);
1011 p
->ref_count
= (int) pmap_cache_list
;
1012 pmap_cache_list
= p
;
1014 simple_unlock(&pmap_cache_lock
);
1016 simple_lock(&pmap_cache_lock
);
1019 assert(p
->stats
.resident_count
== 0);
1020 assert(p
->stats
.wired_count
== 0);
1021 p
->stats
.resident_count
= 0;
1022 p
->stats
.wired_count
= 0;
1024 pmap_cache_list
= (pmap_t
) p
->ref_count
;
1027 simple_unlock(&pmap_cache_lock
);
1033 * Retire the given physical map from service.
1034 * Should only be called if the map contains
1035 * no valid mappings.
1042 register pt_entry_t
*pdep
;
1043 register vm_offset_t pa
;
1046 register vm_page_t m
;
1052 simple_lock(&p
->lock
);
1055 register int my_cpu
;
1057 mp_disable_preemption();
1058 my_cpu
= cpu_number();
1061 * If some cpu is not using the physical pmap pointer that it
1062 * is supposed to be (see set_dirbase), we might be using the
1063 * pmap that is being destroyed! Make sure we are
1064 * physically on the right pmap:
1068 if (real_pmap
[my_cpu
] == p
) {
1069 PMAP_CPU_CLR(p
, my_cpu
);
1070 real_pmap
[my_cpu
] = kernel_pmap
;
1073 mp_enable_preemption();
1075 simple_unlock(&p
->lock
);
1079 return; /* still in use */
1083 * Free the memory maps, then the
1087 while (pdep
< &p
->dirbase
[pdenum(p
, LINEAR_KERNEL_ADDRESS
)]) {
1088 if (*pdep
& INTEL_PTE_VALID
) {
1089 pa
= pte_to_pa(*pdep
);
1090 vm_object_lock(pmap_object
);
1091 m
= vm_page_lookup(pmap_object
, pa
);
1092 if (m
== VM_PAGE_NULL
)
1093 panic("pmap_destroy: pte page not in object");
1094 vm_page_lock_queues();
1096 inuse_ptepages_count
--;
1097 vm_object_unlock(pmap_object
);
1098 vm_page_unlock_queues();
1101 * Clear pdes, this might be headed for the cache.
1103 c
= ptes_per_vm_page
;
1110 pdep
+= ptes_per_vm_page
;
1114 assert(p
->stats
.resident_count
== 0);
1115 assert(p
->stats
.wired_count
== 0);
1118 * Add to cache if not already full
1120 simple_lock(&pmap_cache_lock
);
1121 if (pmap_cache_count
<= pmap_cache_max
) {
1122 p
->ref_count
= (int) pmap_cache_list
;
1123 pmap_cache_list
= p
;
1125 simple_unlock(&pmap_cache_lock
);
1128 simple_unlock(&pmap_cache_lock
);
1129 kmem_free(kernel_map
, (vm_offset_t
)p
->dirbase
, INTEL_PGBYTES
);
1130 zfree(pmap_zone
, (vm_offset_t
) p
);
1135 * Add a reference to the specified pmap.
1144 if (p
!= PMAP_NULL
) {
1146 simple_lock(&p
->lock
);
1148 simple_unlock(&p
->lock
);
1154 * Remove a range of hardware page-table entries.
1155 * The entries given are the first (inclusive)
1156 * and last (exclusive) entries for the VM pages.
1157 * The virtual address is the va for the first pte.
1159 * The pmap must be locked.
1160 * If the pmap is not the kernel pmap, the range must lie
1161 * entirely within one pte-page. This is NOT checked.
1162 * Assumes that the pte-page exists.
1173 register pt_entry_t
*cpte
;
1174 int num_removed
, num_unwired
;
1179 if (pmap
!= kernel_pmap
)
1180 ptep_check(get_pte_page(spte
));
1181 #endif /* DEBUG_PTE_PAGE */
1185 for (cpte
= spte
; cpte
< epte
;
1186 cpte
+= ptes_per_vm_page
, va
+= PAGE_SIZE
) {
1188 pa
= pte_to_pa(*cpte
);
1196 if (!valid_page(pa
)) {
1199 * Outside range of managed physical memory.
1200 * Just remove the mappings.
1202 register int i
= ptes_per_vm_page
;
1203 register pt_entry_t
*lpte
= cpte
;
1215 * Get the modify and reference bits.
1219 register pt_entry_t
*lpte
;
1221 i
= ptes_per_vm_page
;
1224 pmap_phys_attributes
[pai
] |=
1225 *lpte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1232 * Remove the mapping from the pvlist for
1233 * this physical page.
1236 register pv_entry_t pv_h
, prev
, cur
;
1238 pv_h
= pai_to_pvh(pai
);
1239 if (pv_h
->pmap
== PMAP_NULL
) {
1240 panic("pmap_remove: null pv_list!");
1242 if (pv_h
->va
== va
&& pv_h
->pmap
== pmap
) {
1244 * Header is the pv_entry. Copy the next one
1245 * to header and free the next one (we cannot
1249 if (cur
!= PV_ENTRY_NULL
) {
1254 pv_h
->pmap
= PMAP_NULL
;
1261 if ((cur
= prev
->next
) == PV_ENTRY_NULL
) {
1262 panic("pmap-remove: mapping not in pv_list!");
1264 } while (cur
->va
!= va
|| cur
->pmap
!= pmap
);
1265 prev
->next
= cur
->next
;
1275 assert(pmap
->stats
.resident_count
>= num_removed
);
1276 pmap
->stats
.resident_count
-= num_removed
;
1277 assert(pmap
->stats
.wired_count
>= num_unwired
);
1278 pmap
->stats
.wired_count
-= num_unwired
;
1282 * Remove phys addr if mapped in specified map
1286 pmap_remove_some_phys(
1288 vm_offset_t phys_addr
)
1291 /* Implement to support working set code */
1297 * Remove the given range of addresses
1298 * from the specified map.
1300 * It is assumed that the start and end are properly
1301 * rounded to the hardware page size.
1313 register pt_entry_t
*pde
;
1314 register pt_entry_t
*spte
, *epte
;
1317 if (map
== PMAP_NULL
)
1320 PMAP_READ_LOCK(map
, spl
);
1322 pde
= pmap_pde(map
, s
);
1325 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
1328 if (*pde
& INTEL_PTE_VALID
) {
1329 spte
= (pt_entry_t
*)ptetokv(*pde
);
1330 spte
= &spte
[ptenum(s
)];
1331 epte
= &spte
[intel_btop(l
-s
)];
1332 pmap_remove_range(map
, s
, spte
, epte
);
1340 PMAP_READ_UNLOCK(map
, spl
);
1344 * Routine: pmap_page_protect
1347 * Lower the permission for all mappings to a given
1355 pv_entry_t pv_h
, prev
;
1356 register pv_entry_t pv_e
;
1357 register pt_entry_t
*pte
;
1359 register pmap_t pmap
;
1363 assert(phys
!= vm_page_fictitious_addr
);
1364 if (!valid_page(phys
)) {
1366 * Not a managed page.
1372 * Determine the new protection.
1376 case VM_PROT_READ
|VM_PROT_EXECUTE
:
1380 return; /* nothing to do */
1387 * Lock the pmap system first, since we will be changing
1391 PMAP_WRITE_LOCK(spl
);
1393 pai
= pa_index(phys
);
1394 pv_h
= pai_to_pvh(pai
);
1397 * Walk down PV list, changing or removing all mappings.
1398 * We do not have to lock the pv_list because we have
1399 * the entire pmap system locked.
1401 if (pv_h
->pmap
!= PMAP_NULL
) {
1407 * Lock the pmap to block pmap_extract and similar routines.
1409 simple_lock(&pmap
->lock
);
1412 register vm_offset_t va
;
1415 pte
= pmap_pte(pmap
, va
);
1418 * Consistency checks.
1420 /* assert(*pte & INTEL_PTE_VALID); XXX */
1421 /* assert(pte_to_phys(*pte) == phys); */
1424 * Invalidate TLBs for all CPUs using this mapping.
1426 PMAP_INVALIDATE_PAGE(pmap
, va
);
1430 * Remove the mapping if new protection is NONE
1431 * or if write-protecting a kernel mapping.
1433 if (remove
|| pmap
== kernel_pmap
) {
1435 * Remove the mapping, collecting any modify bits.
1438 register int i
= ptes_per_vm_page
;
1441 pmap_phys_attributes
[pai
] |=
1442 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1447 assert(pmap
->stats
.resident_count
>= 1);
1448 pmap
->stats
.resident_count
--;
1451 * Remove the pv_entry.
1455 * Fix up head later.
1457 pv_h
->pmap
= PMAP_NULL
;
1461 * Delete this entry.
1463 prev
->next
= pv_e
->next
;
1471 register int i
= ptes_per_vm_page
;
1474 *pte
&= ~INTEL_PTE_WRITE
;
1484 simple_unlock(&pmap
->lock
);
1486 } while ((pv_e
= prev
->next
) != PV_ENTRY_NULL
);
1489 * If pv_head mapping was removed, fix it up.
1491 if (pv_h
->pmap
== PMAP_NULL
) {
1493 if (pv_e
!= PV_ENTRY_NULL
) {
1500 PMAP_WRITE_UNLOCK(spl
);
1504 * Set the physical protection on the
1505 * specified range of this map as requested.
1506 * Will not increase permissions.
1515 register pt_entry_t
*pde
;
1516 register pt_entry_t
*spte
, *epte
;
1521 if (map
== PMAP_NULL
)
1525 * Determine the new protection.
1529 case VM_PROT_READ
|VM_PROT_EXECUTE
:
1531 case VM_PROT_READ
|VM_PROT_WRITE
:
1533 return; /* nothing to do */
1535 pmap_remove(map
, s
, e
);
1540 * If write-protecting in the kernel pmap,
1541 * remove the mappings; the i386 ignores
1542 * the write-permission bit in kernel mode.
1544 * XXX should be #if'd for i386
1547 if (cpuid_family
== CPUID_FAMILY_386
)
1548 if (map
== kernel_pmap
) {
1549 pmap_remove(map
, s
, e
);
1554 simple_lock(&map
->lock
);
1557 pde
= pmap_pde(map
, s
);
1559 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
1562 if (*pde
& INTEL_PTE_VALID
) {
1563 spte
= (pt_entry_t
*)ptetokv(*pde
);
1564 spte
= &spte
[ptenum(s
)];
1565 epte
= &spte
[intel_btop(l
-s
)];
1567 while (spte
< epte
) {
1568 if (*spte
& INTEL_PTE_VALID
)
1569 *spte
&= ~INTEL_PTE_WRITE
;
1579 simple_unlock(&map
->lock
);
1586 * Insert the given physical page (p) at
1587 * the specified virtual address (v) in the
1588 * target physical map with the protection requested.
1590 * If specified, the page will be wired down, meaning
1591 * that the related pte cannot be reclaimed.
1593 * NB: This is the only routine which MAY NOT lazy-evaluate
1594 * or lose information. That is, this routine must actually
1595 * insert this page into the given map NOW.
1599 register pmap_t pmap
,
1601 register vm_offset_t pa
,
1606 register pt_entry_t
*pte
;
1607 register pv_entry_t pv_h
;
1608 register int i
, pai
;
1610 pt_entry_t
template;
1614 XPR(0x80000000, "%x/%x: pmap_enter %x/%x/%x\n",
1615 current_thread()->top_act
,
1619 assert(pa
!= vm_page_fictitious_addr
);
1621 printf("pmap(%x, %x)\n", v
, pa
);
1622 if (pmap
== PMAP_NULL
)
1625 if (cpuid_family
== CPUID_FAMILY_386
)
1626 if (pmap
== kernel_pmap
&& (prot
& VM_PROT_WRITE
) == 0
1627 && !wired
/* hack for io_wire */ ) {
1629 * Because the 386 ignores write protection in kernel mode,
1630 * we cannot enter a read-only kernel mapping, and must
1631 * remove an existing mapping if changing it.
1633 * XXX should be #if'd for i386
1635 PMAP_READ_LOCK(pmap
, spl
);
1637 pte
= pmap_pte(pmap
, v
);
1638 if (pte
!= PT_ENTRY_NULL
&& pte_to_pa(*pte
) != 0) {
1640 * Invalidate the translation buffer,
1641 * then remove the mapping.
1643 PMAP_INVALIDATE_PAGE(pmap
, v
);
1644 pmap_remove_range(pmap
, v
, pte
,
1645 pte
+ ptes_per_vm_page
);
1647 PMAP_READ_UNLOCK(pmap
, spl
);
1652 * Must allocate a new pvlist entry while we're unlocked;
1653 * zalloc may cause pageout (which will lock the pmap system).
1654 * If we determine we need a pvlist entry, we will unlock
1655 * and allocate one. Then we will retry, throughing away
1656 * the allocated entry later (if we no longer need it).
1658 pv_e
= PV_ENTRY_NULL
;
1660 PMAP_READ_LOCK(pmap
, spl
);
1663 * Expand pmap to include this pte. Assume that
1664 * pmap is always expanded to include enough hardware
1665 * pages to map one VM page.
1668 while ((pte
= pmap_pte(pmap
, v
)) == PT_ENTRY_NULL
) {
1670 * Must unlock to expand the pmap.
1672 PMAP_READ_UNLOCK(pmap
, spl
);
1674 pmap_expand(pmap
, v
);
1676 PMAP_READ_LOCK(pmap
, spl
);
1679 * Special case if the physical page is already mapped
1682 old_pa
= pte_to_pa(*pte
);
1685 * May be changing its wired attribute or protection
1688 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
1690 if(flags
& VM_MEM_NOT_CACHEABLE
) {
1691 if(!(flags
& VM_MEM_GUARDED
))
1692 template |= INTEL_PTE_PTA
;
1693 template |= INTEL_PTE_NCACHE
;
1696 if (pmap
!= kernel_pmap
)
1697 template |= INTEL_PTE_USER
;
1698 if (prot
& VM_PROT_WRITE
)
1699 template |= INTEL_PTE_WRITE
;
1701 template |= INTEL_PTE_WIRED
;
1703 pmap
->stats
.wired_count
++;
1706 if (iswired(*pte
)) {
1707 assert(pmap
->stats
.wired_count
>= 1);
1708 pmap
->stats
.wired_count
--;
1712 PMAP_INVALIDATE_PAGE(pmap
, v
);
1714 i
= ptes_per_vm_page
;
1716 if (*pte
& INTEL_PTE_MOD
)
1717 template |= INTEL_PTE_MOD
;
1718 WRITE_PTE(pte
, template)
1720 pte_increment_pa(template);
1727 * Outline of code from here:
1728 * 1) If va was mapped, update TLBs, remove the mapping
1729 * and remove old pvlist entry.
1730 * 2) Add pvlist entry for new mapping
1731 * 3) Enter new mapping.
1733 * SHARING_FAULTS complicates this slightly in that it cannot
1734 * replace the mapping, but must remove it (because adding the
1735 * pvlist entry for the new mapping may remove others), and
1736 * hence always enters the new mapping at step 3)
1738 * If the old physical page is not managed step 1) is skipped
1739 * (except for updating the TLBs), and the mapping is
1740 * overwritten at step 3). If the new physical page is not
1741 * managed, step 2) is skipped.
1744 if (old_pa
!= (vm_offset_t
) 0) {
1746 PMAP_INVALIDATE_PAGE(pmap
, v
);
1749 if (pmap
!= kernel_pmap
)
1750 ptep_check(get_pte_page(pte
));
1751 #endif /* DEBUG_PTE_PAGE */
1754 * Don't do anything to pages outside valid memory here.
1755 * Instead convince the code that enters a new mapping
1756 * to overwrite the old one.
1759 if (valid_page(old_pa
)) {
1761 pai
= pa_index(old_pa
);
1764 assert(pmap
->stats
.resident_count
>= 1);
1765 pmap
->stats
.resident_count
--;
1766 if (iswired(*pte
)) {
1767 assert(pmap
->stats
.wired_count
>= 1);
1768 pmap
->stats
.wired_count
--;
1770 i
= ptes_per_vm_page
;
1772 pmap_phys_attributes
[pai
] |=
1773 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1776 pte_increment_pa(template);
1780 * Put pte back to beginning of page since it'll be
1781 * used later to enter the new page.
1783 pte
-= ptes_per_vm_page
;
1786 * Remove the mapping from the pvlist for
1787 * this physical page.
1790 register pv_entry_t prev
, cur
;
1792 pv_h
= pai_to_pvh(pai
);
1793 if (pv_h
->pmap
== PMAP_NULL
) {
1794 panic("pmap_enter: null pv_list!");
1796 if (pv_h
->va
== v
&& pv_h
->pmap
== pmap
) {
1798 * Header is the pv_entry. Copy the next one
1799 * to header and free the next one (we cannot
1803 if (cur
!= PV_ENTRY_NULL
) {
1808 pv_h
->pmap
= PMAP_NULL
;
1815 if ((cur
= prev
->next
) == PV_ENTRY_NULL
) {
1816 panic("pmap_enter: mapping not in pv_list!");
1818 } while (cur
->va
!= v
|| cur
->pmap
!= pmap
);
1819 prev
->next
= cur
->next
;
1828 * old_pa is not managed. Pretend it's zero so code
1829 * at Step 3) will enter new mapping (overwriting old
1830 * one). Do removal part of accounting.
1832 old_pa
= (vm_offset_t
) 0;
1833 assert(pmap
->stats
.resident_count
>= 1);
1834 pmap
->stats
.resident_count
--;
1835 if (iswired(*pte
)) {
1836 assert(pmap
->stats
.wired_count
>= 1);
1837 pmap
->stats
.wired_count
--;
1842 if (valid_page(pa
)) {
1845 * Step 2) Enter the mapping in the PV list for this
1855 * We can return here from the sharing fault code below
1856 * in case we removed the only entry on the pv list and thus
1857 * must enter the new one in the list header.
1859 #endif /* SHARING_FAULTS */
1861 pv_h
= pai_to_pvh(pai
);
1863 if (pv_h
->pmap
== PMAP_NULL
) {
1869 pv_h
->next
= PV_ENTRY_NULL
;
1875 * check that this mapping is not already there
1876 * or there is no alias for this mapping in the same map
1878 pv_entry_t e
= pv_h
;
1879 while (e
!= PV_ENTRY_NULL
) {
1880 if (e
->pmap
== pmap
&& e
->va
== v
)
1881 panic("pmap_enter: already in pv_list");
1889 * do sharing faults.
1890 * if we find an entry on this pv list in the same address
1891 * space, remove it. we know there will not be more
1894 pv_entry_t e
= pv_h
;
1897 while (e
!= PV_ENTRY_NULL
) {
1898 if (e
->pmap
== pmap
) {
1900 * Remove it, drop pv list lock first.
1904 opte
= pmap_pte(pmap
, e
->va
);
1905 assert(opte
!= PT_ENTRY_NULL
);
1907 * Invalidate the translation buffer,
1908 * then remove the mapping.
1910 PMAP_INVALIDATE_PAGE(pmap
, e
->va
);
1911 pmap_remove_range(pmap
, e
->va
, opte
,
1912 opte
+ ptes_per_vm_page
);
1914 * We could have remove the head entry,
1915 * so there could be no more entries
1916 * and so we have to use the pv head entry.
1917 * so, go back to the top and try the entry
1926 * check that this mapping is not already there
1929 while (e
!= PV_ENTRY_NULL
) {
1930 if (e
->pmap
== pmap
)
1931 panic("pmap_enter: alias in pv_list");
1935 #endif /* SHARING_FAULTS */
1939 * check for aliases within the same address space.
1941 pv_entry_t e
= pv_h
;
1942 vm_offset_t rpc
= get_rpc();
1944 while (e
!= PV_ENTRY_NULL
) {
1945 if (e
->pmap
== pmap
) {
1947 * log this entry in the alias ring buffer
1948 * if it's not there already.
1950 struct pmap_alias
*pma
;
1954 for (ii
= 0; ii
< pmap_alias_index
; ii
++) {
1955 if (pmap_aliasbuf
[ii
].rpc
== rpc
) {
1956 /* found it in the log already */
1962 pma
= &pmap_aliasbuf
[pmap_alias_index
];
1966 pma
->cookie
= PMAP_ALIAS_COOKIE
;
1967 if (++pmap_alias_index
>= PMAP_ALIAS_MAX
)
1968 panic("pmap_enter: exhausted alias log");
1974 #endif /* DEBUG_ALIAS */
1976 * Add new pv_entry after header.
1978 if (pv_e
== PV_ENTRY_NULL
) {
1980 if (pv_e
== PV_ENTRY_NULL
) {
1982 PMAP_READ_UNLOCK(pmap
, spl
);
1987 pv_e
= (pv_entry_t
) zalloc(pv_list_zone
);
1993 pv_e
->next
= pv_h
->next
;
1996 * Remember that we used the pvlist entry.
1998 pv_e
= PV_ENTRY_NULL
;
2004 * Step 3) Enter and count the mapping.
2007 pmap
->stats
.resident_count
++;
2010 * Build a template to speed up entering -
2011 * only the pfn changes.
2013 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
2015 if(flags
& VM_MEM_NOT_CACHEABLE
) {
2016 if(!(flags
& VM_MEM_GUARDED
))
2017 template |= INTEL_PTE_PTA
;
2018 template |= INTEL_PTE_NCACHE
;
2021 if (pmap
!= kernel_pmap
)
2022 template |= INTEL_PTE_USER
;
2023 if (prot
& VM_PROT_WRITE
)
2024 template |= INTEL_PTE_WRITE
;
2026 template |= INTEL_PTE_WIRED
;
2027 pmap
->stats
.wired_count
++;
2029 i
= ptes_per_vm_page
;
2031 WRITE_PTE(pte
, template)
2033 pte_increment_pa(template);
2036 if (pv_e
!= PV_ENTRY_NULL
) {
2040 PMAP_READ_UNLOCK(pmap
, spl
);
2044 * Routine: pmap_change_wiring
2045 * Function: Change the wiring attribute for a map/virtual-address
2047 * In/out conditions:
2048 * The mapping must already exist in the pmap.
2052 register pmap_t map
,
2056 register pt_entry_t
*pte
;
2062 * We must grab the pmap system lock because we may
2063 * change a pte_page queue.
2065 PMAP_READ_LOCK(map
, spl
);
2067 if ((pte
= pmap_pte(map
, v
)) == PT_ENTRY_NULL
)
2068 panic("pmap_change_wiring: pte missing");
2070 if (wired
&& !iswired(*pte
)) {
2072 * wiring down mapping
2074 map
->stats
.wired_count
++;
2075 i
= ptes_per_vm_page
;
2077 *pte
++ |= INTEL_PTE_WIRED
;
2080 else if (!wired
&& iswired(*pte
)) {
2084 assert(map
->stats
.wired_count
>= 1);
2085 map
->stats
.wired_count
--;
2086 i
= ptes_per_vm_page
;
2088 *pte
++ &= ~INTEL_PTE_WIRED
;
2092 PMAP_READ_UNLOCK(map
, spl
);
2101 * Routine: pmap_extract
2103 * Extract the physical page address associated
2104 * with the given map/virtual_address pair.
2109 register pmap_t pmap
,
2112 register pt_entry_t
*pte
;
2113 register vm_offset_t pa
;
2117 simple_lock(&pmap
->lock
);
2118 if ((pte
= pmap_pte(pmap
, va
)) == PT_ENTRY_NULL
)
2119 pa
= (vm_offset_t
) 0;
2120 else if (!(*pte
& INTEL_PTE_VALID
))
2121 pa
= (vm_offset_t
) 0;
2123 pa
= pte_to_pa(*pte
) + (va
& INTEL_OFFMASK
);
2124 simple_unlock(&pmap
->lock
);
2130 * Routine: pmap_expand
2132 * Expands a pmap to be able to map the specified virtual address.
2134 * Allocates new virtual memory for the P0 or P1 portion of the
2135 * pmap, then re-maps the physical pages that were in the old
2136 * pmap to be in the new pmap.
2138 * Must be called with the pmap system and the pmap unlocked,
2139 * since these must be unlocked to use vm_allocate or vm_deallocate.
2140 * Thus it must be called in a loop that checks whether the map
2141 * has been expanded enough.
2142 * (We won't loop forever, since page tables aren't shrunk.)
2146 register pmap_t map
,
2147 register vm_offset_t v
)
2150 register vm_page_t m
;
2151 register vm_offset_t pa
;
2155 if (map
== kernel_pmap
)
2156 panic("pmap_expand");
2159 * We cannot allocate the pmap_object in pmap_init,
2160 * because it is called before the zone package is up.
2161 * Allocate it now if it is missing.
2163 if (pmap_object
== VM_OBJECT_NULL
)
2164 pmap_object
= vm_object_allocate(avail_end
);
2167 * Allocate a VM page for the level 2 page table entries.
2169 while ((m
= vm_page_grab()) == VM_PAGE_NULL
)
2173 * Map the page to its physical address so that it
2174 * can be found later.
2177 vm_object_lock(pmap_object
);
2178 vm_page_insert(m
, pmap_object
, pa
);
2179 vm_page_lock_queues();
2181 inuse_ptepages_count
++;
2182 vm_object_unlock(pmap_object
);
2183 vm_page_unlock_queues();
2188 memset((void *)phystokv(pa
), 0, PAGE_SIZE
);
2190 PMAP_READ_LOCK(map
, spl
);
2192 * See if someone else expanded us first
2194 if (pmap_pte(map
, v
) != PT_ENTRY_NULL
) {
2195 PMAP_READ_UNLOCK(map
, spl
);
2196 vm_object_lock(pmap_object
);
2197 vm_page_lock_queues();
2199 inuse_ptepages_count
--;
2200 vm_page_unlock_queues();
2201 vm_object_unlock(pmap_object
);
2206 * Set the page directory entry for this page table.
2207 * If we have allocated more than one hardware page,
2208 * set several page directory entries.
2211 i
= ptes_per_vm_page
;
2212 pdp
= &map
->dirbase
[pdenum(map
, v
) & ~(i
-1)];
2214 *pdp
= pa_to_pte(pa
)
2219 pa
+= INTEL_PGBYTES
;
2222 PMAP_READ_UNLOCK(map
, spl
);
2227 * Copy the range specified by src_addr/len
2228 * from the source map to the range dst_addr/len
2229 * in the destination map.
2231 * This routine is only advisory and need not do anything.
2238 vm_offset_t dst_addr
,
2240 vm_offset_t src_addr
)
2243 dst_pmap
++; src_pmap
++; dst_addr
++; len
++; src_addr
++;
2249 * pmap_sync_caches_phys(ppnum_t pa)
2251 * Invalidates all of the instruction cache on a physical page and
2252 * pushes any dirty data from the data cache for the same physical page
2255 void pmap_sync_caches_phys(ppnum_t pa
)
2257 if (!(cpuid_features() & CPUID_FEATURE_SS
))
2259 __asm__
volatile("wbinvd");
2268 * Routine: pmap_collect
2270 * Garbage collects the physical map system for
2271 * pages which are no longer used.
2272 * Success need not be guaranteed -- that is, there
2273 * may well be pages which are not referenced, but
2274 * others may be collected.
2276 * Called by the pageout daemon when pages are scarce.
2282 register pt_entry_t
*pdp
, *ptp
;
2291 if (p
== kernel_pmap
)
2295 * Garbage collect map.
2297 PMAP_READ_LOCK(p
, spl
);
2300 for (pdp
= p
->dirbase
;
2301 pdp
< &p
->dirbase
[pdenum(p
, LINEAR_KERNEL_ADDRESS
)];
2302 pdp
+= ptes_per_vm_page
)
2304 if (*pdp
& INTEL_PTE_VALID
)
2305 if(*pdp
& INTEL_PTE_REF
) {
2306 *pdp
&= ~INTEL_PTE_REF
;
2310 pa
= pte_to_pa(*pdp
);
2311 ptp
= (pt_entry_t
*)phystokv(pa
);
2312 eptp
= ptp
+ NPTES
*ptes_per_vm_page
;
2315 * If the pte page has any wired mappings, we cannot
2320 register pt_entry_t
*ptep
;
2321 for (ptep
= ptp
; ptep
< eptp
; ptep
++) {
2322 if (iswired(*ptep
)) {
2330 * Remove the virtual addresses mapped by this pte page.
2332 pmap_remove_range(p
,
2333 pdetova(pdp
- p
->dirbase
),
2338 * Invalidate the page directory pointer.
2341 register int i
= ptes_per_vm_page
;
2342 register pt_entry_t
*pdep
= pdp
;
2348 PMAP_READ_UNLOCK(p
, spl
);
2351 * And free the pte page itself.
2354 register vm_page_t m
;
2356 vm_object_lock(pmap_object
);
2357 m
= vm_page_lookup(pmap_object
, pa
);
2358 if (m
== VM_PAGE_NULL
)
2359 panic("pmap_collect: pte page not in object");
2360 vm_page_lock_queues();
2362 inuse_ptepages_count
--;
2363 vm_page_unlock_queues();
2364 vm_object_unlock(pmap_object
);
2367 PMAP_READ_LOCK(p
, spl
);
2371 PMAP_READ_UNLOCK(p
, spl
);
2377 * Routine: pmap_kernel
2379 * Returns the physical map handle for the kernel.
2385 return (kernel_pmap
);
2390 * pmap_zero_page zeros the specified (machine independent) page.
2391 * See machine/phys.c or machine/phys.s for implementation.
2396 register vm_offset_t phys
)
2400 assert(phys
!= vm_page_fictitious_addr
);
2401 i
= PAGE_SIZE
/ INTEL_PGBYTES
;
2402 phys
= intel_pfn(phys
);
2410 * pmap_copy_page copies the specified (machine independent) page.
2411 * See machine/phys.c or machine/phys.s for implementation.
2421 assert(src
!= vm_page_fictitious_addr
);
2422 assert(dst
!= vm_page_fictitious_addr
);
2423 i
= PAGE_SIZE
/ INTEL_PGBYTES
;
2426 copy_phys(intel_pfn(src
), intel_pfn(dst
));
2427 src
+= INTEL_PGBYTES
;
2428 dst
+= INTEL_PGBYTES
;
2434 * Routine: pmap_pageable
2436 * Make the specified pages (by pmap, offset)
2437 * pageable (or not) as requested.
2439 * A page which is not pageable may not take
2440 * a fault; therefore, its page table entry
2441 * must remain valid for the duration.
2443 * This routine is merely advisory; pmap_enter
2444 * will specify that these pages are to be wired
2445 * down (or not) as appropriate.
2455 pmap
++; start
++; end
++; pageable
++;
2460 * Clear specified attribute bits.
2463 phys_attribute_clear(
2468 register pv_entry_t pv_e
;
2469 register pt_entry_t
*pte
;
2471 register pmap_t pmap
;
2474 assert(phys
!= vm_page_fictitious_addr
);
2475 if (!valid_page(phys
)) {
2477 * Not a managed page.
2483 * Lock the pmap system first, since we will be changing
2487 PMAP_WRITE_LOCK(spl
);
2489 pai
= pa_index(phys
);
2490 pv_h
= pai_to_pvh(pai
);
2493 * Walk down PV list, clearing all modify or reference bits.
2494 * We do not have to lock the pv_list because we have
2495 * the entire pmap system locked.
2497 if (pv_h
->pmap
!= PMAP_NULL
) {
2499 * There are some mappings.
2501 for (pv_e
= pv_h
; pv_e
!= PV_ENTRY_NULL
; pv_e
= pv_e
->next
) {
2505 * Lock the pmap to block pmap_extract and similar routines.
2507 simple_lock(&pmap
->lock
);
2510 register vm_offset_t va
;
2513 pte
= pmap_pte(pmap
, va
);
2517 * Consistency checks.
2519 assert(*pte
& INTEL_PTE_VALID
);
2520 /* assert(pte_to_phys(*pte) == phys); */
2524 * Invalidate TLBs for all CPUs using this mapping.
2526 PMAP_INVALIDATE_PAGE(pmap
, va
);
2530 * Clear modify or reference bits.
2533 register int i
= ptes_per_vm_page
;
2538 simple_unlock(&pmap
->lock
);
2542 pmap_phys_attributes
[pai
] &= ~bits
;
2544 PMAP_WRITE_UNLOCK(spl
);
2548 * Check specified attribute bits.
2551 phys_attribute_test(
2556 register pv_entry_t pv_e
;
2557 register pt_entry_t
*pte
;
2559 register pmap_t pmap
;
2562 assert(phys
!= vm_page_fictitious_addr
);
2563 if (!valid_page(phys
)) {
2565 * Not a managed page.
2571 * Lock the pmap system first, since we will be checking
2575 PMAP_WRITE_LOCK(spl
);
2577 pai
= pa_index(phys
);
2578 pv_h
= pai_to_pvh(pai
);
2580 if (pmap_phys_attributes
[pai
] & bits
) {
2581 PMAP_WRITE_UNLOCK(spl
);
2586 * Walk down PV list, checking all mappings.
2587 * We do not have to lock the pv_list because we have
2588 * the entire pmap system locked.
2590 if (pv_h
->pmap
!= PMAP_NULL
) {
2592 * There are some mappings.
2594 for (pv_e
= pv_h
; pv_e
!= PV_ENTRY_NULL
; pv_e
= pv_e
->next
) {
2598 * Lock the pmap to block pmap_extract and similar routines.
2600 simple_lock(&pmap
->lock
);
2603 register vm_offset_t va
;
2606 pte
= pmap_pte(pmap
, va
);
2610 * Consistency checks.
2612 assert(*pte
& INTEL_PTE_VALID
);
2613 /* assert(pte_to_phys(*pte) == phys); */
2618 * Check modify or reference bits.
2621 register int i
= ptes_per_vm_page
;
2624 if (*pte
++ & bits
) {
2625 simple_unlock(&pmap
->lock
);
2626 PMAP_WRITE_UNLOCK(spl
);
2631 simple_unlock(&pmap
->lock
);
2634 PMAP_WRITE_UNLOCK(spl
);
2639 * Set specified attribute bits.
2648 assert(phys
!= vm_page_fictitious_addr
);
2649 if (!valid_page(phys
)) {
2651 * Not a managed page.
2657 * Lock the pmap system and set the requested bits in
2658 * the phys attributes array. Don't need to bother with
2659 * ptes because the test routine looks here first.
2662 PMAP_WRITE_LOCK(spl
);
2663 pmap_phys_attributes
[pa_index(phys
)] |= bits
;
2664 PMAP_WRITE_UNLOCK(spl
);
2668 * Set the modify bit on the specified physical page.
2671 void pmap_set_modify(
2672 register vm_offset_t phys
)
2674 phys_attribute_set(phys
, PHYS_MODIFIED
);
2678 * Clear the modify bits on the specified physical page.
2683 register vm_offset_t phys
)
2685 phys_attribute_clear(phys
, PHYS_MODIFIED
);
2691 * Return whether or not the specified physical page is modified
2692 * by any physical maps.
2697 register vm_offset_t phys
)
2699 return (phys_attribute_test(phys
, PHYS_MODIFIED
));
2703 * pmap_clear_reference:
2705 * Clear the reference bit on the specified physical page.
2709 pmap_clear_reference(
2712 phys_attribute_clear(phys
, PHYS_REFERENCED
);
2716 * pmap_is_referenced:
2718 * Return whether or not the specified physical page is referenced
2719 * by any physical maps.
2726 return (phys_attribute_test(phys
, PHYS_REFERENCED
));
2730 * Set the modify bit on the specified range
2731 * of this map as requested.
2733 * This optimization stands only if each time the dirty bit
2734 * in vm_page_t is tested, it is also tested in the pmap.
2743 register pt_entry_t
*pde
;
2744 register pt_entry_t
*spte
, *epte
;
2747 if (map
== PMAP_NULL
)
2750 PMAP_READ_LOCK(map
, spl
);
2752 pde
= pmap_pde(map
, s
);
2753 while (s
&& s
< e
) {
2754 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
2757 if (*pde
& INTEL_PTE_VALID
) {
2758 spte
= (pt_entry_t
*)ptetokv(*pde
);
2760 spte
= &spte
[ptenum(s
)];
2761 epte
= &spte
[intel_btop(l
-s
)];
2763 epte
= &spte
[intel_btop(PDE_MAPPED_SIZE
)];
2764 spte
= &spte
[ptenum(s
)];
2766 while (spte
< epte
) {
2767 if (*spte
& INTEL_PTE_VALID
) {
2768 *spte
|= (INTEL_PTE_MOD
| INTEL_PTE_WRITE
);
2777 PMAP_READ_UNLOCK(map
, spl
);
2782 invalidate_icache(vm_offset_t addr
, unsigned cnt
, int phys
)
2787 flush_dcache(vm_offset_t addr
, unsigned count
, int phys
)
2795 pmap_wait_for_clear()
2797 register int my_cpu
;
2799 register pmap_t my_pmap
;
2801 mp_disable_preemption();
2802 my_cpu
= cpu_number();
2805 my_pmap
= real_pmap
[my_cpu
];
2807 if (!(my_pmap
&& pmap_in_use(my_pmap
, my_cpu
)))
2808 my_pmap
= kernel_pmap
;
2811 * Raise spl to splhigh (above splip) to block out pmap_extract
2812 * from IO code (which would put this cpu back in the active
2818 * Wait for any pmap updates in progress, on either user
2821 while (*(volatile hw_lock_t
)&my_pmap
->lock
.interlock
||
2822 *(volatile hw_lock_t
)&kernel_pmap
->lock
.interlock
) {
2827 mp_enable_preemption();
2831 pmap_flush_tlb_interrupt(void) {
2832 pmap_wait_for_clear();
2838 pmap_reload_tlb_interrupt(void) {
2839 pmap_wait_for_clear();
2841 set_cr3(kernel_pmap
->pdirbase
);
2845 #endif /* NCPUS > 1 */
2849 /* show phys page mappings and attributes */
2851 extern void db_show_page(vm_offset_t pa
);
2854 db_show_page(vm_offset_t pa
)
2861 pv_h
= pai_to_pvh(pai
);
2863 attr
= pmap_phys_attributes
[pai
];
2864 printf("phys page %x ", pa
);
2865 if (attr
& PHYS_MODIFIED
)
2866 printf("modified, ");
2867 if (attr
& PHYS_REFERENCED
)
2868 printf("referenced, ");
2869 if (pv_h
->pmap
|| pv_h
->next
)
2870 printf(" mapped at\n");
2872 printf(" not mapped\n");
2873 for (; pv_h
; pv_h
= pv_h
->next
)
2875 printf("%x in pmap %x\n", pv_h
->va
, pv_h
->pmap
);
2878 #endif /* MACH_KDB */
2881 void db_kvtophys(vm_offset_t
);
2882 void db_show_vaddrs(pt_entry_t
*);
2885 * print out the results of kvtophys(arg)
2891 db_printf("0x%x", kvtophys(vaddr
));
2895 * Walk the pages tables.
2899 pt_entry_t
*dirbase
)
2901 pt_entry_t
*ptep
, *pdep
, tmp
;
2902 int x
, y
, pdecnt
, ptecnt
;
2905 dirbase
= kernel_pmap
->dirbase
;
2908 db_printf("need a dirbase...\n");
2911 dirbase
= (pt_entry_t
*) ((unsigned long) dirbase
& ~INTEL_OFFMASK
);
2913 db_printf("dirbase: 0x%x\n", dirbase
);
2915 pdecnt
= ptecnt
= 0;
2917 for (y
= 0; y
< NPDES
; y
++, pdep
++) {
2918 if (((tmp
= *pdep
) & INTEL_PTE_VALID
) == 0) {
2922 ptep
= (pt_entry_t
*) ((*pdep
) & ~INTEL_OFFMASK
);
2923 db_printf("dir[%4d]: 0x%x\n", y
, *pdep
);
2924 for (x
= 0; x
< NPTES
; x
++, ptep
++) {
2925 if (((tmp
= *ptep
) & INTEL_PTE_VALID
) == 0) {
2929 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
2932 (y
<< 22) | (x
<< 12),
2933 *ptep
& ~INTEL_OFFMASK
);
2937 db_printf("total: %d tables, %d page table entries.\n", pdecnt
, ptecnt
);
2940 #endif /* MACH_KDB */
2942 #include <mach_vm_debug.h>
2944 #include <vm/vm_debug.h>
2947 pmap_list_resident_pages(
2948 register pmap_t pmap
,
2949 register vm_offset_t
*listp
,
2954 #endif /* MACH_VM_DEBUG */
2960 * BSD support routine to reassign virtual addresses.
2964 pmap_movepage(unsigned long from
, unsigned long to
, vm_size_t size
)
2967 pt_entry_t
*pte
, saved_pte
;
2968 /* Lock the kernel map */
2972 PMAP_READ_LOCK(kernel_pmap
, spl
);
2973 pte
= pmap_pte(kernel_pmap
, from
);
2975 panic("pmap_pagemove from pte NULL");
2977 PMAP_READ_UNLOCK(kernel_pmap
, spl
);
2979 pmap_enter(kernel_pmap
, to
, i386_trunc_page(*pte
),
2980 VM_PROT_READ
|VM_PROT_WRITE
, 0, *pte
& INTEL_PTE_WIRED
);
2982 pmap_remove(kernel_pmap
, from
, from
+PAGE_SIZE
);
2984 PMAP_READ_LOCK(kernel_pmap
, spl
);
2985 pte
= pmap_pte(kernel_pmap
, to
);
2987 panic("pmap_pagemove 'to' pte NULL");
2990 PMAP_READ_UNLOCK(kernel_pmap
, spl
);
2997 /* Get the processors to update the TLBs */
3002 kern_return_t
bmapvideo(vm_offset_t
*info
);
3003 kern_return_t
bmapvideo(vm_offset_t
*info
) {
3005 extern struct vc_info vinfo
;
3006 #ifdef NOTIMPLEMENTED
3007 (void)copyout((char *)&vinfo
, (char *)info
, sizeof(struct vc_info
)); /* Copy out the video info */
3009 return KERN_SUCCESS
;
3012 kern_return_t
bmapmap(vm_offset_t va
, vm_offset_t pa
, vm_size_t size
, vm_prot_t prot
, int attr
);
3013 kern_return_t
bmapmap(vm_offset_t va
, vm_offset_t pa
, vm_size_t size
, vm_prot_t prot
, int attr
) {
3015 #ifdef NOTIMPLEMENTED
3016 pmap_map_block(current_act()->task
->map
->pmap
, va
, pa
, size
, prot
, attr
); /* Map it in */
3018 return KERN_SUCCESS
;
3021 kern_return_t
bmapmapr(vm_offset_t va
);
3022 kern_return_t
bmapmapr(vm_offset_t va
) {
3024 #ifdef NOTIMPLEMENTED
3025 mapping_remove(current_act()->task
->map
->pmap
, va
); /* Remove map */
3027 return KERN_SUCCESS
;
3031 /* temporary workaround */
3033 coredumpok(vm_map_t map
, vm_offset_t va
)
3036 ptep
= pmap_pte(map
->pmap
, va
);
3037 if (0 == ptep
) return FALSE
;
3038 return ((*ptep
& (INTEL_PTE_NCACHE
|INTEL_PTE_WIRED
)) != (INTEL_PTE_NCACHE
|INTEL_PTE_WIRED
));