2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * Mach Operating System
28 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
29 * All Rights Reserved.
31 * Permission to use, copy, modify and distribute this software and its
32 * documentation is hereby granted, provided that both the copyright
33 * notice and this permission notice appear in all copies of the
34 * software, derivative works or modified versions, and any portions
35 * thereof, and that both notices appear in supporting documentation.
37 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
38 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
39 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
41 * Carnegie Mellon requests users of this software to return to
43 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
44 * School of Computer Science
45 * Carnegie Mellon University
46 * Pittsburgh PA 15213-3890
48 * any improvements or extensions that they make and grant Carnegie Mellon
49 * the rights to redistribute these changes.
56 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * (These guys wrote the Vax version)
59 * Physical Map management code for Intel i386, i486, and i860.
61 * Manages physical address maps.
63 * In addition to hardware address maps, this
64 * module is called upon to provide software-use-only
65 * maps which may or may not be stored in the same
66 * form as hardware maps. These pseudo-maps are
67 * used to store intermediate results from copy
68 * operations to and from address spaces.
70 * Since the information managed by this module is
71 * also stored by the logical address mapping module,
72 * this module may throw away valid virtual-to-physical
73 * mappings at almost any time. However, invalidations
74 * of virtual-to-physical mappings must be done as
77 * In order to cope with hardware architectures which
78 * make virtual-to-physical map invalidates expensive,
79 * this module may delay invalidate or reduced protection
80 * operations until such time as they are actually
81 * necessary. This module is given full information as
82 * to which processors are currently using which maps,
83 * and to when physical maps must be made correct.
89 #include <mach_ldebug.h>
91 #include <mach/machine/vm_types.h>
93 #include <mach/boolean.h>
94 #include <kern/thread.h>
95 #include <kern/zalloc.h>
97 #include <kern/lock.h>
98 #include <kern/kalloc.h>
102 #include <vm/vm_map.h>
103 #include <vm/vm_kern.h>
104 #include <mach/vm_param.h>
105 #include <mach/vm_prot.h>
106 #include <vm/vm_object.h>
107 #include <vm/vm_page.h>
109 #include <mach/machine/vm_param.h>
110 #include <machine/thread.h>
112 #include <kern/misc_protos.h> /* prototyping */
113 #include <i386/misc_protos.h>
115 #include <i386/cpuid.h>
116 #include <i386/cpu_data.h>
117 #include <i386/cpu_number.h>
118 #include <i386/machine_cpu.h>
119 #include <i386/mp_slave_boot.h>
122 #include <ddb/db_command.h>
123 #include <ddb/db_output.h>
124 #include <ddb/db_sym.h>
125 #include <ddb/db_print.h>
126 #endif /* MACH_KDB */
128 #include <kern/xpr.h>
130 #include <vm/vm_protos.h>
135 * Forward declarations for internal functions.
141 extern void pmap_remove_range(
147 void phys_attribute_clear(
151 boolean_t
phys_attribute_test(
155 void phys_attribute_set(
159 void pmap_growkernel(
162 void pmap_set_reference(
170 pt_entry_t
* pmap_mapgetpte(
174 boolean_t
phys_page_exists(
178 void set_dirbase(vm_offset_t dirbase
);
179 #endif /* set_dirbase */
181 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
183 #define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
184 #define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry);
186 #define value_64bit(value) ((value) & 0xFFFFFFFF00000000LL)
187 #define low32(x) ((unsigned int)((x) & 0x00000000ffffffffLL))
190 * Private data structures.
194 * For each vm_page_t, there is a list of all currently
195 * valid virtual mappings of that page. An entry is
196 * a pv_entry_t; the list is the pv_table.
199 typedef struct pv_entry
{
200 struct pv_entry
*next
; /* next pv_entry */
201 pmap_t pmap
; /* pmap where mapping lies */
202 vm_offset_t va
; /* virtual address for mapping */
205 #define PV_ENTRY_NULL ((pv_entry_t) 0)
207 pv_entry_t pv_head_table
; /* array of entries, one per page */
210 * pv_list entries are kept on a list that can only be accessed
211 * with the pmap system locked (at SPLVM, not in the cpus_active set).
212 * The list is refilled from the pv_list_zone if it becomes empty.
214 pv_entry_t pv_free_list
; /* free list at SPLVM */
215 decl_simple_lock_data(,pv_free_list_lock
)
216 int pv_free_count
= 0;
217 #define PV_LOW_WATER_MARK 5000
218 #define PV_ALLOC_CHUNK 2000
219 thread_call_t mapping_adjust_call
;
220 static thread_call_data_t mapping_adjust_call_data
;
221 int mappingrecurse
= 0;
223 #define PV_ALLOC(pv_e) { \
224 simple_lock(&pv_free_list_lock); \
225 if ((pv_e = pv_free_list) != 0) { \
226 pv_free_list = pv_e->next; \
228 if (pv_free_count < PV_LOW_WATER_MARK) \
229 if (hw_compare_and_store(0,1,&mappingrecurse)) \
230 thread_call_enter(mapping_adjust_call); \
232 simple_unlock(&pv_free_list_lock); \
235 #define PV_FREE(pv_e) { \
236 simple_lock(&pv_free_list_lock); \
237 pv_e->next = pv_free_list; \
238 pv_free_list = pv_e; \
240 simple_unlock(&pv_free_list_lock); \
243 zone_t pv_list_zone
; /* zone of pv_entry structures */
246 static zone_t pdpt_zone
;
251 * Each entry in the pv_head_table is locked by a bit in the
252 * pv_lock_table. The lock bits are accessed by the physical
253 * address of the page they lock.
256 char *pv_lock_table
; /* pointer to array of bits */
257 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
260 * First and last physical addresses that we maintain any information
261 * for. Initialized to zero so that pmap operations done before
262 * pmap_init won't touch any non-existent structures.
264 pmap_paddr_t vm_first_phys
= (pmap_paddr_t
) 0;
265 pmap_paddr_t vm_last_phys
= (pmap_paddr_t
) 0;
266 boolean_t pmap_initialized
= FALSE
;/* Has pmap_init completed? */
268 pmap_paddr_t kernel_vm_end
= (pmap_paddr_t
)0;
270 #define GROW_KERNEL_FUNCTION_IMPLEMENTED 1
271 #if GROW_KERNEL_FUNCTION_IMPLEMENTED /* not needed until growing kernel pmap */
272 static struct vm_object kptobj_object_store
;
273 static vm_object_t kptobj
;
278 * Index into pv_head table, its lock bits, and the modify/reference
279 * bits starting at vm_first_phys.
282 #define pa_index(pa) (i386_btop(pa - vm_first_phys))
284 #define pai_to_pvh(pai) (&pv_head_table[pai])
285 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
286 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
289 * Array of physical page attribites for managed pages.
290 * One byte per physical page.
292 char *pmap_phys_attributes
;
295 * Physical page attributes. Copy bits from PTE definition.
297 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
298 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
299 #define PHYS_NCACHE INTEL_PTE_NCACHE
302 * Amount of virtual memory mapped by one
303 * page-directory entry.
305 #define PDE_MAPPED_SIZE (pdetova(1))
308 * Locking and TLB invalidation
314 * There are two structures in the pmap module that need locking:
315 * the pmaps themselves, and the per-page pv_lists (which are locked
316 * by locking the pv_lock_table entry that corresponds to the pv_head
317 * for the list in question.) Most routines want to lock a pmap and
318 * then do operations in it that require pv_list locking -- however
319 * pmap_remove_all and pmap_copy_on_write operate on a physical page
320 * basis and want to do the locking in the reverse order, i.e. lock
321 * a pv_list and then go through all the pmaps referenced by that list.
322 * To protect against deadlock between these two cases, the pmap_lock
323 * is used. There are three different locking protocols as a result:
325 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
328 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
329 * lock on the pmap_lock (shared read), then lock the pmap
330 * and finally the pv_lists as needed [i.e. pmap lock before
333 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
334 * Get a write lock on the pmap_lock (exclusive write); this
335 * also guaranteees exclusive access to the pv_lists. Lock the
338 * At no time may any routine hold more than one pmap lock or more than
339 * one pv_list lock. Because interrupt level routines can allocate
340 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
341 * kernel_pmap can only be held at splhigh.
345 * We raise the interrupt level to splvm, to block interprocessor
346 * interrupts during pmap operations. We must take the CPU out of
347 * the cpus_active set while interrupts are blocked.
349 #define SPLVM(spl) { \
351 mp_disable_preemption(); \
352 i_bit_clear(cpu_number(), &cpus_active); \
353 mp_enable_preemption(); \
356 #define SPLX(spl) { \
357 mp_disable_preemption(); \
358 i_bit_set(cpu_number(), &cpus_active); \
359 mp_enable_preemption(); \
364 * Lock on pmap system
366 lock_t pmap_system_lock
;
368 #define PMAP_READ_LOCK(pmap, spl) { \
370 lock_read(&pmap_system_lock); \
371 simple_lock(&(pmap)->lock); \
374 #define PMAP_WRITE_LOCK(spl) { \
376 lock_write(&pmap_system_lock); \
379 #define PMAP_READ_UNLOCK(pmap, spl) { \
380 simple_unlock(&(pmap)->lock); \
381 lock_read_done(&pmap_system_lock); \
385 #define PMAP_WRITE_UNLOCK(spl) { \
386 lock_write_done(&pmap_system_lock); \
390 #define PMAP_WRITE_TO_READ_LOCK(pmap) { \
391 simple_lock(&(pmap)->lock); \
392 lock_write_to_read(&pmap_system_lock); \
395 #define LOCK_PVH(index) lock_pvh_pai(index)
397 #define UNLOCK_PVH(index) unlock_pvh_pai(index)
400 extern int max_lock_loops
;
401 extern int disableSerialOuput
;
403 unsigned int loop_count; \
404 loop_count = disableSerialOuput ? max_lock_loops \
406 #define LOOP_CHECK(msg, pmap) \
407 if (--loop_count == 0) { \
408 mp_disable_preemption(); \
409 kprintf("%s: cpu %d pmap %x, cpus_active 0x%x\n", \
410 msg, cpu_number(), pmap, cpus_active); \
411 Debugger("deadlock detection"); \
412 mp_enable_preemption(); \
413 loop_count = max_lock_loops; \
415 #else /* USLOCK_DEBUG */
417 #define LOOP_CHECK(msg, pmap)
418 #endif /* USLOCK_DEBUG */
420 #define PMAP_UPDATE_TLBS(pmap, s, e) \
425 mp_disable_preemption(); \
426 cpu_mask = 1 << cpu_number(); \
428 /* Since the pmap is locked, other updates are locked */ \
429 /* out, and any pmap_activate has finished. */ \
431 /* find other cpus using the pmap */ \
432 users = (pmap)->cpus_using & ~cpu_mask; \
435 /* signal them, and wait for them to finish */ \
436 /* using the pmap */ \
437 signal_cpus(users, (pmap), (s), (e)); \
438 while (((pmap)->cpus_using & cpus_active & ~cpu_mask)) { \
439 LOOP_CHECK("PMAP_UPDATE_TLBS", pmap); \
443 /* invalidate our own TLB if pmap is in use */ \
445 if ((pmap)->cpus_using & cpu_mask) { \
446 INVALIDATE_TLB((pmap), (s), (e)); \
449 mp_enable_preemption(); \
452 #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
454 #define INVALIDATE_TLB(m, s, e) { \
459 * Structures to keep track of pending TLB invalidations
464 #define UPDATE_LIST_SIZE 4
466 struct pmap_update_item
{
467 pmap_t pmap
; /* pmap to invalidate */
468 vm_offset_t start
; /* start address to invalidate */
469 vm_offset_t end
; /* end address to invalidate */
472 typedef struct pmap_update_item
*pmap_update_item_t
;
475 * List of pmap updates. If the list overflows,
476 * the last entry is changed to invalidate all.
478 struct pmap_update_list
{
479 decl_simple_lock_data(,lock
)
481 struct pmap_update_item item
[UPDATE_LIST_SIZE
];
483 typedef struct pmap_update_list
*pmap_update_list_t
;
485 extern void signal_cpus(
491 pmap_memory_region_t pmap_memory_regions
[PMAP_MEMORY_REGIONS_SIZE
];
494 * Other useful macros.
496 #define current_pmap() (vm_map_pmap(current_thread()->map))
497 #define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
499 struct pmap kernel_pmap_store
;
503 decl_simple_lock_data(,free_pmap_lock
)
506 struct zone
*pmap_zone
; /* zone of pmap structures */
508 int pmap_debug
= 0; /* flag for debugging prints */
510 unsigned int inuse_ptepages_count
= 0; /* debugging */
513 * Pmap cache. Cache is threaded through ref_count field of pmap.
514 * Max will eventually be constant -- variable for experimentation.
516 int pmap_cache_max
= 32;
517 int pmap_alloc_chunk
= 8;
518 pmap_t pmap_cache_list
;
519 int pmap_cache_count
;
520 decl_simple_lock_data(,pmap_cache_lock
)
522 extern vm_offset_t hole_start
, hole_end
;
528 pt_entry_t
*DMAP1
, *DMAP2
;
533 #define PMAP_ALIAS_MAX 32
539 #define PMAP_ALIAS_COOKIE 0xdeadbeef
540 } pmap_aliasbuf
[PMAP_ALIAS_MAX
];
541 int pmap_alias_index
= 0;
542 extern vm_offset_t
get_rpc();
544 #endif /* DEBUG_ALIAS */
546 #define pmap_pde(m, v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]))
547 #define pdir_pde(d, v) (d[(vm_offset_t)(v) >> PDESHIFT])
550 pmap_is_current(pmap_t pmap
)
552 return (pmap
== kernel_pmap
||
553 (pmap
->dirbase
[PTDPTDI
] & PG_FRAME
) == (PTDpde
[0] & PG_FRAME
));
558 * return address of mapped pte for vaddr va in pmap pmap.
561 pmap_pte(pmap_t pmap
, vm_offset_t va
)
566 pde
= pmap_pde(pmap
, va
);
568 if (pmap_is_current(pmap
))
570 newpf
= *pde
& PG_FRAME
;
571 if (((*CM4
) & PG_FRAME
) != newpf
) {
572 *CM4
= newpf
| INTEL_PTE_RW
| INTEL_PTE_VALID
;
575 return (pt_entry_t
*)CA4
+ (i386_btop(va
) & (NPTEPG
-1));
580 #define DEBUG_PTE_PAGE 0
587 register pt_entry_t
*pte
, *epte
;
590 /* check the use and wired counts */
591 if (ptep
== PTE_PAGE_NULL
)
593 pte
= pmap_pte(ptep
->pmap
, ptep
->va
);
594 epte
= pte
+ INTEL_PGBYTES
/sizeof(pt_entry_t
);
606 if (ctu
!= ptep
->use_count
|| ctw
!= ptep
->wired_count
) {
607 printf("use %d wired %d - actual use %d wired %d\n",
608 ptep
->use_count
, ptep
->wired_count
, ctu
, ctw
);
612 #endif /* DEBUG_PTE_PAGE */
615 * Map memory at initialization. The physical addresses being
616 * mapped are not managed and are never unmapped.
618 * For now, VM is already on, we only need to map the
623 register vm_offset_t virt
,
624 register vm_offset_t start_addr
,
625 register vm_offset_t end_addr
,
626 register vm_prot_t prot
)
631 while (start_addr
< end_addr
) {
632 pmap_enter(kernel_pmap
,
633 virt
, (ppnum_t
) i386_btop(start_addr
), prot
, 0, FALSE
);
641 * Back-door routine for mapping kernel VM at initialization.
642 * Useful for mapping memory outside the range
643 * Sets no-cache, A, D.
644 * [vm_first_phys, vm_last_phys) (i.e., devices).
645 * Otherwise like pmap_map.
649 register vm_offset_t virt
,
650 register vm_offset_t start_addr
,
651 register vm_offset_t end_addr
,
654 register pt_entry_t
template;
655 register pt_entry_t
*pte
;
657 template = pa_to_pte(start_addr
)
663 if (prot
& VM_PROT_WRITE
)
664 template |= INTEL_PTE_WRITE
;
666 /* XXX move pmap_pte out of loop, once one pte mapped, all are */
667 while (start_addr
< end_addr
) {
668 pte
= pmap_pte(kernel_pmap
, virt
);
669 if (pte
== PT_ENTRY_NULL
) {
670 panic("pmap_map_bd: Invalid kernel address\n");
672 WRITE_PTE_FAST(pte
, template)
673 pte_increment_pa(template);
675 start_addr
+= PAGE_SIZE
;
682 extern char *first_avail
;
683 extern vm_offset_t virtual_avail
, virtual_end
;
684 extern pmap_paddr_t avail_start
, avail_end
;
685 extern vm_offset_t etext
;
686 extern void *sectHIBB
;
687 extern int sectSizeHIB
;
690 * Bootstrap the system enough to run with virtual memory.
691 * Map the kernel's code and data, and allocate the system page table.
692 * Called with mapping OFF. Page_size must already be set.
695 * load_start: PA where kernel was loaded
696 * avail_start PA of first available physical page -
697 * after kernel page tables
698 * avail_end PA of last available physical page
699 * virtual_avail VA of first available page -
700 * after kernel page tables
701 * virtual_end VA of last available page -
702 * end of kernel address space
704 * &start_text start of kernel text
705 * &etext end of kernel text
710 __unused vm_offset_t load_start
)
715 int wpkernel
, boot_arg
;
717 vm_last_addr
= VM_MAX_KERNEL_ADDRESS
; /* Set the highest address
721 * The kernel's pmap is statically allocated so we don't
722 * have to use pmap_create, which is unlikely to work
723 * correctly at this part of the boot sequence.
726 kernel_pmap
= &kernel_pmap_store
;
728 kernel_pmap
->pmap_link
.next
= (queue_t
)kernel_pmap
; /* Set up anchor forward */
729 kernel_pmap
->pmap_link
.prev
= (queue_t
)kernel_pmap
; /* Set up anchor reverse */
731 kernel_pmap
->ref_count
= 1;
732 kernel_pmap
->pm_obj
= (vm_object_t
) NULL
;
733 kernel_pmap
->dirbase
= (pd_entry_t
*)((unsigned int)IdlePTD
| KERNBASE
);
734 kernel_pmap
->pdirbase
= (pd_entry_t
*)IdlePTD
;
736 kernel_pmap
->pm_pdpt
= (pd_entry_t
*)((unsigned int)IdlePDPT
| KERNBASE
);
737 kernel_pmap
->pm_ppdpt
= (vm_offset_t
)IdlePDPT
;
740 va
= (vm_offset_t
)kernel_pmap
->dirbase
;
741 /* setup self referential mapping(s) */
742 for (i
= 0; i
< NPGPTD
; i
++ ) {
744 pa
= (pmap_paddr_t
) kvtophys(va
+ i386_ptob(i
));
745 * (pd_entry_t
*) (kernel_pmap
->dirbase
+ PTDPTDI
+ i
) =
746 (pa
& PG_FRAME
) | INTEL_PTE_VALID
| INTEL_PTE_RW
| INTEL_PTE_REF
|
747 INTEL_PTE_MOD
| INTEL_PTE_WIRED
;
749 kernel_pmap
->pm_pdpt
[i
] = pa
| INTEL_PTE_VALID
;
755 virtual_avail
= (vm_offset_t
)VADDR(KPTDI
,0) + (vm_offset_t
)first_avail
;
756 virtual_end
= (vm_offset_t
)(VM_MAX_KERNEL_ADDRESS
);
759 * Reserve some special page table entries/VA space for temporary
762 #define SYSMAP(c, p, v, n) \
763 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n);
766 pte
= (pt_entry_t
*) pmap_pte(kernel_pmap
, va
);
769 * CMAP1/CMAP2 are used for zeroing and copying pages.
770 * CMAP3 is used for ml_phys_read/write.
772 SYSMAP(caddr_t
, CM1
, CA1
, 1)
773 * (pt_entry_t
*) CM1
= 0;
774 SYSMAP(caddr_t
, CM2
, CA2
, 1)
775 * (pt_entry_t
*) CM2
= 0;
776 SYSMAP(caddr_t
, CM3
, CA3
, 1)
777 * (pt_entry_t
*) CM3
= 0;
779 /* used by pmap_pte */
780 SYSMAP(caddr_t
, CM4
, CA4
, 1)
781 * (pt_entry_t
*) CM4
= 0;
783 /* DMAP user for debugger */
784 SYSMAP(caddr_t
, DMAP1
, DADDR1
, 1);
785 SYSMAP(caddr_t
, DMAP2
, DADDR2
, 1); /* XXX temporary - can remove */
788 lock_init(&pmap_system_lock
,
789 FALSE
, /* NOT a sleep lock */
795 if (PE_parse_boot_arg("debug", &boot_arg
)) {
796 if (boot_arg
& DB_PRT
) wpkernel
= 0;
797 if (boot_arg
& DB_NMI
) wpkernel
= 0;
800 /* remap kernel text readonly if not debugging or kprintfing */
806 for (myva
= i386_round_page(VM_MIN_KERNEL_ADDRESS
+ MP_BOOT
+ MP_BOOTSTACK
); myva
< etext
; myva
+= PAGE_SIZE
) {
807 if (myva
>= (vm_offset_t
)sectHIBB
&& myva
< ((vm_offset_t
)sectHIBB
+ sectSizeHIB
))
809 ptep
= pmap_pte(kernel_pmap
, myva
);
811 *ptep
&= ~INTEL_PTE_RW
;
816 simple_lock_init(&kernel_pmap
->lock
, 0);
817 simple_lock_init(&pv_free_list_lock
, 0);
819 /* invalidate user virtual addresses */
820 memset((char *)kernel_pmap
->dirbase
,
822 (KPTDI
) * sizeof(pd_entry_t
));
824 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
825 VADDR(KPTDI
,0), virtual_end
);
827 kprintf("Available physical space from 0x%llx to 0x%llx\n",
828 avail_start
, avail_end
);
829 printf("PAE enabled\n");
831 kprintf("Available physical space from 0x%x to 0x%x\n",
832 avail_start
, avail_end
);
841 *startp
= virtual_avail
;
846 * Initialize the pmap module.
847 * Called by vm_init, to initialize any structures that the pmap
848 * system needs to map virtual memory.
853 register long npages
;
855 register vm_size_t s
;
860 * Allocate memory for the pv_head_table and its lock bits,
861 * the modify bit array, and the pte_page table.
864 /* zero bias all these arrays now instead of off avail_start
865 so we cover all memory */
866 npages
= i386_btop(avail_end
);
867 s
= (vm_size_t
) (sizeof(struct pv_entry
) * npages
868 + pv_lock_table_size(npages
)
872 if (kmem_alloc_wired(kernel_map
, &addr
, s
) != KERN_SUCCESS
)
875 memset((char *)addr
, 0, s
);
878 * Allocate the structures first to preserve word-alignment.
880 pv_head_table
= (pv_entry_t
) addr
;
881 addr
= (vm_offset_t
) (pv_head_table
+ npages
);
883 pv_lock_table
= (char *) addr
;
884 addr
= (vm_offset_t
) (pv_lock_table
+ pv_lock_table_size(npages
));
886 pmap_phys_attributes
= (char *) addr
;
889 * Create the zone of physical maps,
890 * and of the physical-to-virtual entries.
892 s
= (vm_size_t
) sizeof(struct pmap
);
893 pmap_zone
= zinit(s
, 400*s
, 4096, "pmap"); /* XXX */
894 s
= (vm_size_t
) sizeof(struct pv_entry
);
895 pv_list_zone
= zinit(s
, 10000*s
, 4096, "pv_list"); /* XXX */
897 // s = (vm_size_t) (sizeof(pdpt_entry_t) * NPGPTD);
899 pdpt_zone
= zinit(s
, 400*s
, 4096, "pdpt"); /* XXX */
903 * Only now, when all of the data structures are allocated,
904 * can we set vm_first_phys and vm_last_phys. If we set them
905 * too soon, the kmem_alloc_wired above will try to use these
906 * data structures and blow up.
909 /* zero bias this now so we cover all memory */
911 vm_last_phys
= avail_end
;
913 #if GROW_KERNEL_FUNCTION_IMPLEMENTED
914 kptobj
= &kptobj_object_store
;
915 _vm_object_allocate((vm_object_size_t
)NKPDE
, kptobj
);
916 kernel_pmap
->pm_obj
= kptobj
;
919 /* create pv entries for kernel pages mapped by low level
920 startup code. these have to exist so we can pmap_remove()
921 e.g. kext pages from the middle of our addr space */
923 vaddr
= (vm_offset_t
)VM_MIN_KERNEL_ADDRESS
;
924 for (ppn
= 0; ppn
< i386_btop(avail_start
) ; ppn
++ ) {
927 pv_e
= pai_to_pvh(ppn
);
930 pv_e
->pmap
= kernel_pmap
;
931 pv_e
->next
= PV_ENTRY_NULL
;
934 pmap_initialized
= TRUE
;
937 * Initializie pmap cache.
939 pmap_cache_list
= PMAP_NULL
;
940 pmap_cache_count
= 0;
941 simple_lock_init(&pmap_cache_lock
, 0);
943 simple_lock_init(&free_pmap_lock
, 0);
949 x86_lowmem_free(void)
951 /* free lowmem pages back to the vm system. we had to defer doing this
952 until the vm system was fully up.
953 the actual pages that are released are determined by which
954 pages the memory sizing code puts into the region table */
956 ml_static_mfree((vm_offset_t
) i386_ptob(pmap_memory_regions
[0].base
)|VM_MIN_KERNEL_ADDRESS
,
957 (vm_size_t
) i386_ptob(pmap_memory_regions
[0].end
- pmap_memory_regions
[0].base
));
961 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
973 assert(pn
!= vm_page_fictitious_addr
);
974 phys
= (pmap_paddr_t
)i386_ptob(pn
);
975 if (!pmap_initialized
)
978 if (!pmap_valid_page(pn
))
981 PMAP_WRITE_LOCK(spl
);
983 pai
= pa_index(phys
);
984 pv_h
= pai_to_pvh(pai
);
986 result
= (pv_h
->pmap
== PMAP_NULL
);
987 PMAP_WRITE_UNLOCK(spl
);
993 * Create and return a physical map.
995 * If the size specified for the map
996 * is zero, the map is an actual physical
997 * map, and may be referenced by the
1000 * If the size specified is non-zero,
1001 * the map will be used in software only, and
1002 * is bounded by that size.
1010 register pmap_t pro
;
1014 register vm_offset_t va
;
1017 * A software use-only map doesn't even need a map.
1024 p
= (pmap_t
) zalloc(pmap_zone
);
1026 panic("pmap_create zalloc");
1027 if (KERN_SUCCESS
!= kmem_alloc_wired(kernel_map
, (vm_offset_t
*)(&p
->dirbase
), NBPTD
))
1028 panic("pmap_create kmem_alloc_wired");
1030 p
->pm_hold
= (vm_offset_t
)zalloc(pdpt_zone
);
1031 if ((vm_offset_t
)NULL
== p
->pm_hold
) {
1032 panic("pdpt zalloc");
1034 p
->pm_pdpt
= (pdpt_entry_t
*) (( p
->pm_hold
+ 31) & ~31);
1035 p
->pm_ppdpt
= kvtophys((vm_offset_t
)p
->pm_pdpt
); /* XXX */
1037 if (NULL
== (p
->pm_obj
= vm_object_allocate((vm_object_size_t
)(NPGPTD
*NPDEPG
))))
1038 panic("pmap_create vm_object_allocate");
1040 (void *)((unsigned int)IdlePTD
| KERNBASE
),
1042 va
= (vm_offset_t
)p
->dirbase
;
1043 p
->pdirbase
= (pd_entry_t
*)(kvtophys(va
));
1044 simple_lock_init(&p
->lock
, 0);
1046 /* setup self referential mapping(s) */
1047 for (i
= 0; i
< NPGPTD
; i
++ ) {
1049 pa
= (pmap_paddr_t
) kvtophys(va
+ i386_ptob(i
));
1050 * (pd_entry_t
*) (p
->dirbase
+ PTDPTDI
+ i
) =
1051 (pa
& PG_FRAME
) | INTEL_PTE_VALID
| INTEL_PTE_RW
| INTEL_PTE_REF
|
1052 INTEL_PTE_MOD
| INTEL_PTE_WIRED
;
1054 p
->pm_pdpt
[i
] = pa
| INTEL_PTE_VALID
;
1059 p
->stats
.resident_count
= 0;
1060 p
->stats
.wired_count
= 0;
1064 /* insert new pmap at head of queue hanging off kernel_pmap */
1066 simple_lock(&free_pmap_lock
);
1067 p
->pmap_link
.next
= (queue_t
)kernel_pmap
->pmap_link
.next
;
1068 kernel_pmap
->pmap_link
.next
= (queue_t
)p
;
1070 pro
= (pmap_t
) p
->pmap_link
.next
;
1071 p
->pmap_link
.prev
= (queue_t
)pro
->pmap_link
.prev
;
1072 pro
->pmap_link
.prev
= (queue_t
)p
;
1075 simple_unlock(&free_pmap_lock
);
1083 * Retire the given physical map from service.
1084 * Should only be called if the map contains
1085 * no valid mappings.
1092 register pt_entry_t
*pdep
;
1095 register vm_page_t m
;
1097 register pmap_t pre
,pro
;
1104 simple_lock(&p
->lock
);
1107 register int my_cpu
;
1109 mp_disable_preemption();
1110 my_cpu
= cpu_number();
1113 * If some cpu is not using the physical pmap pointer that it
1114 * is supposed to be (see set_dirbase), we might be using the
1115 * pmap that is being destroyed! Make sure we are
1116 * physically on the right pmap:
1118 /* force pmap/cr3 update */
1121 VM_MAX_KERNEL_ADDRESS
);
1123 if (PMAP_REAL(my_cpu
) == p
) {
1124 PMAP_CPU_CLR(p
, my_cpu
);
1125 PMAP_REAL(my_cpu
) = kernel_pmap
;
1127 set_cr3((unsigned int)kernel_pmap
->pm_ppdpt
);
1129 set_cr3((unsigned int)kernel_pmap
->pdirbase
);
1132 mp_enable_preemption();
1134 simple_unlock(&p
->lock
);
1138 return; /* still in use */
1142 /* remove from pmap queue */
1144 simple_lock(&free_pmap_lock
);
1146 pre
= (pmap_t
)p
->pmap_link
.prev
;
1147 pre
->pmap_link
.next
= (queue_t
)p
->pmap_link
.next
;
1148 pro
= (pmap_t
)p
->pmap_link
.next
;
1149 pro
->pmap_link
.prev
= (queue_t
)p
->pmap_link
.prev
;
1151 simple_unlock(&free_pmap_lock
);
1156 * Free the memory maps, then the
1160 pdep
= (pt_entry_t
*)p
->dirbase
;
1162 while (pdep
< (pt_entry_t
*)&p
->dirbase
[(UMAXPTDI
+1)]) {
1164 if (*pdep
& INTEL_PTE_VALID
) {
1165 ind
= pdep
- (pt_entry_t
*)&p
->dirbase
[0];
1166 vm_object_lock(p
->pm_obj
);
1167 m
= vm_page_lookup(p
->pm_obj
, (vm_object_offset_t
)ind
);
1168 if (m
== VM_PAGE_NULL
) {
1169 panic("pmap_destroy: pte page not in object");
1171 vm_page_lock_queues();
1173 inuse_ptepages_count
--;
1174 vm_object_unlock(p
->pm_obj
);
1175 vm_page_unlock_queues();
1178 * Clear pdes, this might be headed for the cache.
1188 vm_object_deallocate(p
->pm_obj
);
1189 kmem_free(kernel_map
, (vm_offset_t
)p
->dirbase
, NBPTD
);
1191 zfree(pdpt_zone
, (void *)p
->pm_hold
);
1193 zfree(pmap_zone
, p
);
1197 * Add a reference to the specified pmap.
1206 if (p
!= PMAP_NULL
) {
1208 simple_lock(&p
->lock
);
1210 simple_unlock(&p
->lock
);
1216 * Remove a range of hardware page-table entries.
1217 * The entries given are the first (inclusive)
1218 * and last (exclusive) entries for the VM pages.
1219 * The virtual address is the va for the first pte.
1221 * The pmap must be locked.
1222 * If the pmap is not the kernel pmap, the range must lie
1223 * entirely within one pte-page. This is NOT checked.
1224 * Assumes that the pte-page exists.
1235 register pt_entry_t
*cpte
;
1236 int num_removed
, num_unwired
;
1241 if (pmap
!= kernel_pmap
)
1242 ptep_check(get_pte_page(spte
));
1243 #endif /* DEBUG_PTE_PAGE */
1247 for (cpte
= spte
; cpte
< epte
;
1248 cpte
++, va
+= PAGE_SIZE
) {
1250 pa
= pte_to_pa(*cpte
);
1258 if (!valid_page(i386_btop(pa
))) {
1261 * Outside range of managed physical memory.
1262 * Just remove the mappings.
1264 register pt_entry_t
*lpte
= cpte
;
1274 * Get the modify and reference bits.
1277 register pt_entry_t
*lpte
;
1280 pmap_phys_attributes
[pai
] |=
1281 *lpte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1287 * Remove the mapping from the pvlist for
1288 * this physical page.
1291 register pv_entry_t pv_h
, prev
, cur
;
1293 pv_h
= pai_to_pvh(pai
);
1294 if (pv_h
->pmap
== PMAP_NULL
) {
1295 panic("pmap_remove: null pv_list!");
1297 if (pv_h
->va
== va
&& pv_h
->pmap
== pmap
) {
1299 * Header is the pv_entry. Copy the next one
1300 * to header and free the next one (we cannot
1304 if (cur
!= PV_ENTRY_NULL
) {
1309 pv_h
->pmap
= PMAP_NULL
;
1316 if ((cur
= prev
->next
) == PV_ENTRY_NULL
) {
1317 panic("pmap-remove: mapping not in pv_list!");
1319 } while (cur
->va
!= va
|| cur
->pmap
!= pmap
);
1320 prev
->next
= cur
->next
;
1330 assert(pmap
->stats
.resident_count
>= num_removed
);
1331 pmap
->stats
.resident_count
-= num_removed
;
1332 assert(pmap
->stats
.wired_count
>= num_unwired
);
1333 pmap
->stats
.wired_count
-= num_unwired
;
1337 * Remove phys addr if mapped in specified map
1341 pmap_remove_some_phys(
1342 __unused pmap_t map
,
1343 __unused ppnum_t pn
)
1346 /* Implement to support working set code */
1351 * Remove the given range of addresses
1352 * from the specified map.
1354 * It is assumed that the start and end are properly
1355 * rounded to the hardware page size.
1366 register pt_entry_t
*pde
;
1367 register pt_entry_t
*spte
, *epte
;
1372 if (map
== PMAP_NULL
)
1375 PMAP_READ_LOCK(map
, spl
);
1377 if (value_64bit(s64
) || value_64bit(e64
)) {
1378 panic("pmap_remove addr overflow");
1381 orig_s
= s
= (vm_offset_t
)low32(s64
);
1382 e
= (vm_offset_t
)low32(e64
);
1384 pde
= pmap_pde(map
, s
);
1387 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
1390 if (*pde
& INTEL_PTE_VALID
) {
1391 spte
= (pt_entry_t
*)pmap_pte(map
, (s
& ~(PDE_MAPPED_SIZE
-1)));
1392 spte
= &spte
[ptenum(s
)];
1393 epte
= &spte
[intel_btop(l
-s
)];
1394 pmap_remove_range(map
, s
, spte
, epte
);
1400 PMAP_UPDATE_TLBS(map
, orig_s
, e
);
1402 PMAP_READ_UNLOCK(map
, spl
);
1406 * Routine: pmap_page_protect
1409 * Lower the permission for all mappings to a given
1417 pv_entry_t pv_h
, prev
;
1418 register pv_entry_t pv_e
;
1419 register pt_entry_t
*pte
;
1421 register pmap_t pmap
;
1426 assert(pn
!= vm_page_fictitious_addr
);
1427 phys
= (pmap_paddr_t
)i386_ptob(pn
);
1428 if (!valid_page(pn
)) {
1430 * Not a managed page.
1436 * Determine the new protection.
1440 case VM_PROT_READ
|VM_PROT_EXECUTE
:
1444 return; /* nothing to do */
1451 * Lock the pmap system first, since we will be changing
1455 PMAP_WRITE_LOCK(spl
);
1457 pai
= pa_index(phys
);
1458 pv_h
= pai_to_pvh(pai
);
1461 * Walk down PV list, changing or removing all mappings.
1462 * We do not have to lock the pv_list because we have
1463 * the entire pmap system locked.
1465 if (pv_h
->pmap
!= PMAP_NULL
) {
1469 register vm_offset_t va
;
1472 * Lock the pmap to block pmap_extract and similar routines.
1474 simple_lock(&pmap
->lock
);
1479 pte
= pmap_pte(pmap
, va
);
1482 * Consistency checks.
1484 /* assert(*pte & INTEL_PTE_VALID); XXX */
1485 /* assert(pte_to_phys(*pte) == phys); */
1490 * Remove the mapping if new protection is NONE
1491 * or if write-protecting a kernel mapping.
1493 if (remove
|| pmap
== kernel_pmap
) {
1495 * Remove the mapping, collecting any modify bits.
1498 pmap_phys_attributes
[pai
] |=
1499 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1501 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
1504 assert(pmap
->stats
.resident_count
>= 1);
1505 pmap
->stats
.resident_count
--;
1508 * Remove the pv_entry.
1512 * Fix up head later.
1514 pv_h
->pmap
= PMAP_NULL
;
1518 * Delete this entry.
1520 prev
->next
= pv_e
->next
;
1529 *pte
&= ~INTEL_PTE_WRITE
;
1531 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
1538 simple_unlock(&pmap
->lock
);
1540 } while ((pv_e
= prev
->next
) != PV_ENTRY_NULL
);
1543 * If pv_head mapping was removed, fix it up.
1545 if (pv_h
->pmap
== PMAP_NULL
) {
1547 if (pv_e
!= PV_ENTRY_NULL
) {
1554 PMAP_WRITE_UNLOCK(spl
);
1562 * Disconnect all mappings for this page and return reference and change status
1563 * in generic format.
1566 unsigned int pmap_disconnect(
1569 pmap_page_protect(pa
, 0); /* disconnect the page */
1570 return (pmap_get_refmod(pa
)); /* return ref/chg status */
1574 * Set the physical protection on the
1575 * specified range of this map as requested.
1576 * Will not increase permissions.
1585 register pt_entry_t
*pde
;
1586 register pt_entry_t
*spte
, *epte
;
1589 vm_offset_t orig_s
= s
;
1592 if (map
== PMAP_NULL
)
1596 * Determine the new protection.
1600 case VM_PROT_READ
|VM_PROT_EXECUTE
:
1602 case VM_PROT_READ
|VM_PROT_WRITE
:
1604 return; /* nothing to do */
1606 pmap_remove(map
, (addr64_t
)s
, (addr64_t
)e
);
1611 simple_lock(&map
->lock
);
1613 pde
= pmap_pde(map
, s
);
1615 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
1618 if (*pde
& INTEL_PTE_VALID
) {
1619 spte
= (pt_entry_t
*)pmap_pte(map
, (s
& ~(PDE_MAPPED_SIZE
-1)));
1620 spte
= &spte
[ptenum(s
)];
1621 epte
= &spte
[intel_btop(l
-s
)];
1623 while (spte
< epte
) {
1624 if (*spte
& INTEL_PTE_VALID
)
1625 *spte
&= ~INTEL_PTE_WRITE
;
1633 PMAP_UPDATE_TLBS(map
, orig_s
, e
);
1635 simple_unlock(&map
->lock
);
1642 * Insert the given physical page (p) at
1643 * the specified virtual address (v) in the
1644 * target physical map with the protection requested.
1646 * If specified, the page will be wired down, meaning
1647 * that the related pte cannot be reclaimed.
1649 * NB: This is the only routine which MAY NOT lazy-evaluate
1650 * or lose information. That is, this routine must actually
1651 * insert this page into the given map NOW.
1655 register pmap_t pmap
,
1662 register pt_entry_t
*pte
;
1663 register pv_entry_t pv_h
;
1666 pt_entry_t
template;
1668 pmap_paddr_t old_pa
;
1669 pmap_paddr_t pa
= (pmap_paddr_t
)i386_ptob(pn
);
1671 XPR(0x80000000, "%x/%x: pmap_enter %x/%x/%x\n",
1676 assert(pn
!= vm_page_fictitious_addr
);
1678 printf("pmap(%x, %x)\n", v
, pn
);
1679 if (pmap
== PMAP_NULL
)
1683 * Must allocate a new pvlist entry while we're unlocked;
1684 * zalloc may cause pageout (which will lock the pmap system).
1685 * If we determine we need a pvlist entry, we will unlock
1686 * and allocate one. Then we will retry, throughing away
1687 * the allocated entry later (if we no longer need it).
1689 pv_e
= PV_ENTRY_NULL
;
1691 PMAP_READ_LOCK(pmap
, spl
);
1694 * Expand pmap to include this pte. Assume that
1695 * pmap is always expanded to include enough hardware
1696 * pages to map one VM page.
1699 while ((pte
= pmap_pte(pmap
, v
)) == PT_ENTRY_NULL
) {
1701 * Must unlock to expand the pmap.
1703 PMAP_READ_UNLOCK(pmap
, spl
);
1705 pmap_expand(pmap
, v
);
1707 PMAP_READ_LOCK(pmap
, spl
);
1710 * Special case if the physical page is already mapped
1713 old_pa
= pte_to_pa(*pte
);
1716 * May be changing its wired attribute or protection
1719 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
1721 if(flags
& VM_MEM_NOT_CACHEABLE
) {
1722 if(!(flags
& VM_MEM_GUARDED
))
1723 template |= INTEL_PTE_PTA
;
1724 template |= INTEL_PTE_NCACHE
;
1727 if (pmap
!= kernel_pmap
)
1728 template |= INTEL_PTE_USER
;
1729 if (prot
& VM_PROT_WRITE
)
1730 template |= INTEL_PTE_WRITE
;
1732 template |= INTEL_PTE_WIRED
;
1734 pmap
->stats
.wired_count
++;
1737 if (iswired(*pte
)) {
1738 assert(pmap
->stats
.wired_count
>= 1);
1739 pmap
->stats
.wired_count
--;
1743 if (*pte
& INTEL_PTE_MOD
)
1744 template |= INTEL_PTE_MOD
;
1745 WRITE_PTE(pte
, template)
1752 * Outline of code from here:
1753 * 1) If va was mapped, update TLBs, remove the mapping
1754 * and remove old pvlist entry.
1755 * 2) Add pvlist entry for new mapping
1756 * 3) Enter new mapping.
1758 * SHARING_FAULTS complicates this slightly in that it cannot
1759 * replace the mapping, but must remove it (because adding the
1760 * pvlist entry for the new mapping may remove others), and
1761 * hence always enters the new mapping at step 3)
1763 * If the old physical page is not managed step 1) is skipped
1764 * (except for updating the TLBs), and the mapping is
1765 * overwritten at step 3). If the new physical page is not
1766 * managed, step 2) is skipped.
1769 if (old_pa
!= (pmap_paddr_t
) 0) {
1773 if (pmap
!= kernel_pmap
)
1774 ptep_check(get_pte_page(pte
));
1775 #endif /* DEBUG_PTE_PAGE */
1778 * Don't do anything to pages outside valid memory here.
1779 * Instead convince the code that enters a new mapping
1780 * to overwrite the old one.
1783 if (valid_page(i386_btop(old_pa
))) {
1785 pai
= pa_index(old_pa
);
1788 assert(pmap
->stats
.resident_count
>= 1);
1789 pmap
->stats
.resident_count
--;
1790 if (iswired(*pte
)) {
1791 assert(pmap
->stats
.wired_count
>= 1);
1792 pmap
->stats
.wired_count
--;
1795 pmap_phys_attributes
[pai
] |=
1796 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1800 * Remove the mapping from the pvlist for
1801 * this physical page.
1804 register pv_entry_t prev
, cur
;
1806 pv_h
= pai_to_pvh(pai
);
1807 if (pv_h
->pmap
== PMAP_NULL
) {
1808 panic("pmap_enter: null pv_list!");
1810 if (pv_h
->va
== v
&& pv_h
->pmap
== pmap
) {
1812 * Header is the pv_entry. Copy the next one
1813 * to header and free the next one (we cannot
1817 if (cur
!= PV_ENTRY_NULL
) {
1822 pv_h
->pmap
= PMAP_NULL
;
1829 if ((cur
= prev
->next
) == PV_ENTRY_NULL
) {
1830 panic("pmap_enter: mapping not in pv_list!");
1832 } while (cur
->va
!= v
|| cur
->pmap
!= pmap
);
1833 prev
->next
= cur
->next
;
1842 * old_pa is not managed. Pretend it's zero so code
1843 * at Step 3) will enter new mapping (overwriting old
1844 * one). Do removal part of accounting.
1846 old_pa
= (pmap_paddr_t
) 0;
1847 assert(pmap
->stats
.resident_count
>= 1);
1848 pmap
->stats
.resident_count
--;
1849 if (iswired(*pte
)) {
1850 assert(pmap
->stats
.wired_count
>= 1);
1851 pmap
->stats
.wired_count
--;
1857 if (valid_page(i386_btop(pa
))) {
1860 * Step 2) Enter the mapping in the PV list for this
1870 * We can return here from the sharing fault code below
1871 * in case we removed the only entry on the pv list and thus
1872 * must enter the new one in the list header.
1874 #endif /* SHARING_FAULTS */
1876 pv_h
= pai_to_pvh(pai
);
1878 if (pv_h
->pmap
== PMAP_NULL
) {
1884 pv_h
->next
= PV_ENTRY_NULL
;
1890 * check that this mapping is not already there
1891 * or there is no alias for this mapping in the same map
1893 pv_entry_t e
= pv_h
;
1894 while (e
!= PV_ENTRY_NULL
) {
1895 if (e
->pmap
== pmap
&& e
->va
== v
)
1896 panic("pmap_enter: already in pv_list");
1904 * do sharing faults.
1905 * if we find an entry on this pv list in the same address
1906 * space, remove it. we know there will not be more
1909 pv_entry_t e
= pv_h
;
1912 while (e
!= PV_ENTRY_NULL
) {
1913 if (e
->pmap
== pmap
) {
1915 * Remove it, drop pv list lock first.
1919 opte
= pmap_pte(pmap
, e
->va
);
1920 assert(opte
!= PT_ENTRY_NULL
);
1922 * Invalidate the translation buffer,
1923 * then remove the mapping.
1925 pmap_remove_range(pmap
, e
->va
, opte
,
1927 PMAP_UPDATE_TLBS(pmap
, e
->va
, e
->va
+ PAGE_SIZE
);
1930 * We could have remove the head entry,
1931 * so there could be no more entries
1932 * and so we have to use the pv head entry.
1933 * so, go back to the top and try the entry
1942 * check that this mapping is not already there
1945 while (e
!= PV_ENTRY_NULL
) {
1946 if (e
->pmap
== pmap
)
1947 panic("pmap_enter: alias in pv_list");
1951 #endif /* SHARING_FAULTS */
1955 * check for aliases within the same address space.
1957 pv_entry_t e
= pv_h
;
1958 vm_offset_t rpc
= get_rpc();
1960 while (e
!= PV_ENTRY_NULL
) {
1961 if (e
->pmap
== pmap
) {
1963 * log this entry in the alias ring buffer
1964 * if it's not there already.
1966 struct pmap_alias
*pma
;
1970 for (ii
= 0; ii
< pmap_alias_index
; ii
++) {
1971 if (pmap_aliasbuf
[ii
].rpc
== rpc
) {
1972 /* found it in the log already */
1978 pma
= &pmap_aliasbuf
[pmap_alias_index
];
1982 pma
->cookie
= PMAP_ALIAS_COOKIE
;
1983 if (++pmap_alias_index
>= PMAP_ALIAS_MAX
)
1984 panic("pmap_enter: exhausted alias log");
1990 #endif /* DEBUG_ALIAS */
1992 * Add new pv_entry after header.
1994 if (pv_e
== PV_ENTRY_NULL
) {
1996 if (pv_e
== PV_ENTRY_NULL
) {
1997 panic("pmap no pv_e's");
2002 pv_e
->next
= pv_h
->next
;
2005 * Remember that we used the pvlist entry.
2007 pv_e
= PV_ENTRY_NULL
;
2013 * Step 3) Enter and count the mapping.
2016 pmap
->stats
.resident_count
++;
2019 * Build a template to speed up entering -
2020 * only the pfn changes.
2022 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
2024 if(flags
& VM_MEM_NOT_CACHEABLE
) {
2025 if(!(flags
& VM_MEM_GUARDED
))
2026 template |= INTEL_PTE_PTA
;
2027 template |= INTEL_PTE_NCACHE
;
2030 if (pmap
!= kernel_pmap
)
2031 template |= INTEL_PTE_USER
;
2032 if (prot
& VM_PROT_WRITE
)
2033 template |= INTEL_PTE_WRITE
;
2035 template |= INTEL_PTE_WIRED
;
2036 pmap
->stats
.wired_count
++;
2039 WRITE_PTE(pte
, template)
2042 PMAP_UPDATE_TLBS(pmap
, v
, v
+ PAGE_SIZE
);
2044 if (pv_e
!= PV_ENTRY_NULL
) {
2048 PMAP_READ_UNLOCK(pmap
, spl
);
2052 * Routine: pmap_change_wiring
2053 * Function: Change the wiring attribute for a map/virtual-address
2055 * In/out conditions:
2056 * The mapping must already exist in the pmap.
2060 register pmap_t map
,
2064 register pt_entry_t
*pte
;
2069 * We must grab the pmap system lock because we may
2070 * change a pte_page queue.
2072 PMAP_READ_LOCK(map
, spl
);
2074 if ((pte
= pmap_pte(map
, v
)) == PT_ENTRY_NULL
)
2075 panic("pmap_change_wiring: pte missing");
2077 if (wired
&& !iswired(*pte
)) {
2079 * wiring down mapping
2081 map
->stats
.wired_count
++;
2082 *pte
++ |= INTEL_PTE_WIRED
;
2084 else if (!wired
&& iswired(*pte
)) {
2088 assert(map
->stats
.wired_count
>= 1);
2089 map
->stats
.wired_count
--;
2090 *pte
++ &= ~INTEL_PTE_WIRED
;
2093 PMAP_READ_UNLOCK(map
, spl
);
2102 pmap_find_phys(pmap_t pmap
, addr64_t va
)
2108 if (value_64bit(va
))
2109 panic("pmap_find_phys 64 bit value");
2110 a32
= (vm_offset_t
) low32(va
);
2111 ptp
= pmap_pte(pmap
, a32
);
2112 if (PT_ENTRY_NULL
== ptp
) {
2115 ppn
= (ppnum_t
) i386_btop(pte_to_pa(*ptp
));
2121 * Routine: pmap_extract
2123 * Extract the physical page address associated
2124 * with the given map/virtual_address pair.
2125 * Change to shim for backwards compatibility but will not
2126 * work for 64 bit systems. Some old drivers that we cannot
2132 register pmap_t pmap
,
2138 vaddr
= (vm_offset_t
)0;
2139 ppn
= pmap_find_phys(pmap
, (addr64_t
)va
);
2141 vaddr
= ((vm_offset_t
)i386_ptob(ppn
)) | (va
& INTEL_OFFMASK
);
2148 * Routine: pmap_expand
2150 * Expands a pmap to be able to map the specified virtual address.
2152 * Allocates new virtual memory for the P0 or P1 portion of the
2153 * pmap, then re-maps the physical pages that were in the old
2154 * pmap to be in the new pmap.
2156 * Must be called with the pmap system and the pmap unlocked,
2157 * since these must be unlocked to use vm_allocate or vm_deallocate.
2158 * Thus it must be called in a loop that checks whether the map
2159 * has been expanded enough.
2160 * (We won't loop forever, since page tables aren't shrunk.)
2164 register pmap_t map
,
2165 register vm_offset_t v
)
2168 register vm_page_t m
;
2169 register pmap_paddr_t pa
;
2174 if (map
== kernel_pmap
) {
2180 * Allocate a VM page for the level 2 page table entries.
2182 while ((m
= vm_page_grab()) == VM_PAGE_NULL
)
2186 * put the page into the pmap's obj list so it
2187 * can be found later.
2192 vm_object_lock(map
->pm_obj
);
2193 vm_page_insert(m
, map
->pm_obj
, (vm_object_offset_t
)i
);
2194 vm_page_lock_queues();
2196 inuse_ptepages_count
++;
2197 vm_object_unlock(map
->pm_obj
);
2198 vm_page_unlock_queues();
2205 PMAP_READ_LOCK(map
, spl
);
2207 * See if someone else expanded us first
2209 if (pmap_pte(map
, v
) != PT_ENTRY_NULL
) {
2210 PMAP_READ_UNLOCK(map
, spl
);
2211 vm_object_lock(map
->pm_obj
);
2212 vm_page_lock_queues();
2214 inuse_ptepages_count
--;
2215 vm_page_unlock_queues();
2216 vm_object_unlock(map
->pm_obj
);
2221 * Set the page directory entry for this page table.
2222 * If we have allocated more than one hardware page,
2223 * set several page directory entries.
2226 pdp
= &map
->dirbase
[pdenum(map
, v
)];
2227 *pdp
= pa_to_pte(pa
)
2232 PMAP_READ_UNLOCK(map
, spl
);
2237 * Copy the range specified by src_addr/len
2238 * from the source map to the range dst_addr/len
2239 * in the destination map.
2241 * This routine is only advisory and need not do anything.
2248 vm_offset_t dst_addr
,
2250 vm_offset_t src_addr
)
2253 dst_pmap
++; src_pmap
++; dst_addr
++; len
++; src_addr
++;
2259 * pmap_sync_page_data_phys(ppnum_t pa)
2261 * Invalidates all of the instruction cache on a physical page and
2262 * pushes any dirty data from the data cache for the same physical page
2263 * Not required in i386.
2266 pmap_sync_page_data_phys(__unused ppnum_t pa
)
2272 * pmap_sync_page_attributes_phys(ppnum_t pa)
2274 * Write back and invalidate all cachelines on a physical page.
2277 pmap_sync_page_attributes_phys(ppnum_t pa
)
2279 cache_flush_page_phys(pa
);
2286 * Routine: pmap_collect
2288 * Garbage collects the physical map system for
2289 * pages which are no longer used.
2290 * Success need not be guaranteed -- that is, there
2291 * may well be pages which are not referenced, but
2292 * others may be collected.
2294 * Called by the pageout daemon when pages are scarce.
2300 register pt_entry_t
*pdp
, *ptp
;
2308 if (p
== kernel_pmap
)
2312 * Garbage collect map.
2314 PMAP_READ_LOCK(p
, spl
);
2316 for (pdp
= (pt_entry_t
*)p
->dirbase
;
2317 pdp
< (pt_entry_t
*)&p
->dirbase
[(UMAXPTDI
+1)];
2320 if (*pdp
& INTEL_PTE_VALID
) {
2321 if(*pdp
& INTEL_PTE_REF
) {
2322 *pdp
&= ~INTEL_PTE_REF
;
2326 ptp
= pmap_pte(p
, pdetova(pdp
- (pt_entry_t
*)p
->dirbase
));
2327 eptp
= ptp
+ NPTEPG
;
2330 * If the pte page has any wired mappings, we cannot
2335 register pt_entry_t
*ptep
;
2336 for (ptep
= ptp
; ptep
< eptp
; ptep
++) {
2337 if (iswired(*ptep
)) {
2345 * Remove the virtual addresses mapped by this pte page.
2347 pmap_remove_range(p
,
2348 pdetova(pdp
- (pt_entry_t
*)p
->dirbase
),
2353 * Invalidate the page directory pointer.
2357 PMAP_READ_UNLOCK(p
, spl
);
2360 * And free the pte page itself.
2363 register vm_page_t m
;
2365 vm_object_lock(p
->pm_obj
);
2366 m
= vm_page_lookup(p
->pm_obj
,(vm_object_offset_t
)(pdp
- (pt_entry_t
*)&p
->dirbase
[0]));
2367 if (m
== VM_PAGE_NULL
)
2368 panic("pmap_collect: pte page not in object");
2369 vm_page_lock_queues();
2371 inuse_ptepages_count
--;
2372 vm_page_unlock_queues();
2373 vm_object_unlock(p
->pm_obj
);
2376 PMAP_READ_LOCK(p
, spl
);
2381 PMAP_UPDATE_TLBS(p
, VM_MIN_ADDRESS
, VM_MAX_ADDRESS
);
2382 PMAP_READ_UNLOCK(p
, spl
);
2388 * Routine: pmap_kernel
2390 * Returns the physical map handle for the kernel.
2396 return (kernel_pmap
);
2401 pmap_copy_page(src
, dst
)
2405 bcopy_phys((addr64_t
)i386_ptob(src
),
2406 (addr64_t
)i386_ptob(dst
),
2412 * Routine: pmap_pageable
2414 * Make the specified pages (by pmap, offset)
2415 * pageable (or not) as requested.
2417 * A page which is not pageable may not take
2418 * a fault; therefore, its page table entry
2419 * must remain valid for the duration.
2421 * This routine is merely advisory; pmap_enter
2422 * will specify that these pages are to be wired
2423 * down (or not) as appropriate.
2427 __unused pmap_t pmap
,
2428 __unused vm_offset_t start_addr
,
2429 __unused vm_offset_t end_addr
,
2430 __unused boolean_t pageable
)
2433 pmap
++; start_addr
++; end_addr
++; pageable
++;
2438 * Clear specified attribute bits.
2441 phys_attribute_clear(
2446 register pv_entry_t pv_e
;
2447 register pt_entry_t
*pte
;
2449 register pmap_t pmap
;
2453 assert(pn
!= vm_page_fictitious_addr
);
2454 if (!valid_page(pn
)) {
2456 * Not a managed page.
2462 * Lock the pmap system first, since we will be changing
2466 PMAP_WRITE_LOCK(spl
);
2467 phys
= i386_ptob(pn
);
2468 pai
= pa_index(phys
);
2469 pv_h
= pai_to_pvh(pai
);
2472 * Walk down PV list, clearing all modify or reference bits.
2473 * We do not have to lock the pv_list because we have
2474 * the entire pmap system locked.
2476 if (pv_h
->pmap
!= PMAP_NULL
) {
2478 * There are some mappings.
2480 for (pv_e
= pv_h
; pv_e
!= PV_ENTRY_NULL
; pv_e
= pv_e
->next
) {
2484 * Lock the pmap to block pmap_extract and similar routines.
2486 simple_lock(&pmap
->lock
);
2489 register vm_offset_t va
;
2492 pte
= pmap_pte(pmap
, va
);
2496 * Consistency checks.
2498 assert(*pte
& INTEL_PTE_VALID
);
2499 /* assert(pte_to_phys(*pte) == phys); */
2503 * Clear modify or reference bits.
2507 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
2509 simple_unlock(&pmap
->lock
);
2514 pmap_phys_attributes
[pai
] &= ~bits
;
2516 PMAP_WRITE_UNLOCK(spl
);
2520 * Check specified attribute bits.
2523 phys_attribute_test(
2528 register pv_entry_t pv_e
;
2529 register pt_entry_t
*pte
;
2531 register pmap_t pmap
;
2535 assert(pn
!= vm_page_fictitious_addr
);
2536 if (!valid_page(pn
)) {
2538 * Not a managed page.
2544 * Lock the pmap system first, since we will be checking
2548 PMAP_WRITE_LOCK(spl
);
2549 phys
= i386_ptob(pn
);
2550 pai
= pa_index(phys
);
2551 pv_h
= pai_to_pvh(pai
);
2553 if (pmap_phys_attributes
[pai
] & bits
) {
2554 PMAP_WRITE_UNLOCK(spl
);
2559 * Walk down PV list, checking all mappings.
2560 * We do not have to lock the pv_list because we have
2561 * the entire pmap system locked.
2563 if (pv_h
->pmap
!= PMAP_NULL
) {
2565 * There are some mappings.
2567 for (pv_e
= pv_h
; pv_e
!= PV_ENTRY_NULL
; pv_e
= pv_e
->next
) {
2571 * Lock the pmap to block pmap_extract and similar routines.
2573 simple_lock(&pmap
->lock
);
2576 register vm_offset_t va
;
2579 pte
= pmap_pte(pmap
, va
);
2583 * Consistency checks.
2585 assert(*pte
& INTEL_PTE_VALID
);
2586 /* assert(pte_to_phys(*pte) == phys); */
2591 * Check modify or reference bits.
2594 if (*pte
++ & bits
) {
2595 simple_unlock(&pmap
->lock
);
2596 PMAP_WRITE_UNLOCK(spl
);
2600 simple_unlock(&pmap
->lock
);
2603 PMAP_WRITE_UNLOCK(spl
);
2608 * Set specified attribute bits.
2618 assert(pn
!= vm_page_fictitious_addr
);
2619 if (!valid_page(pn
)) {
2621 * Not a managed page.
2627 * Lock the pmap system and set the requested bits in
2628 * the phys attributes array. Don't need to bother with
2629 * ptes because the test routine looks here first.
2631 phys
= i386_ptob(pn
);
2632 PMAP_WRITE_LOCK(spl
);
2633 pmap_phys_attributes
[pa_index(phys
)] |= bits
;
2634 PMAP_WRITE_UNLOCK(spl
);
2638 * Set the modify bit on the specified physical page.
2641 void pmap_set_modify(
2644 phys_attribute_set(pn
, PHYS_MODIFIED
);
2648 * Clear the modify bits on the specified physical page.
2655 phys_attribute_clear(pn
, PHYS_MODIFIED
);
2661 * Return whether or not the specified physical page is modified
2662 * by any physical maps.
2669 return (phys_attribute_test(pn
, PHYS_MODIFIED
));
2673 * pmap_clear_reference:
2675 * Clear the reference bit on the specified physical page.
2679 pmap_clear_reference(
2682 phys_attribute_clear(pn
, PHYS_REFERENCED
);
2686 pmap_set_reference(ppnum_t pn
)
2688 phys_attribute_set(pn
, PHYS_REFERENCED
);
2692 * pmap_is_referenced:
2694 * Return whether or not the specified physical page is referenced
2695 * by any physical maps.
2702 return (phys_attribute_test(pn
, PHYS_REFERENCED
));
2706 * pmap_get_refmod(phys)
2707 * returns the referenced and modified bits of the specified
2711 pmap_get_refmod(ppnum_t pa
)
2713 return ( ((phys_attribute_test(pa
, PHYS_MODIFIED
))? VM_MEM_MODIFIED
: 0)
2714 | ((phys_attribute_test(pa
, PHYS_REFERENCED
))? VM_MEM_REFERENCED
: 0));
2718 * pmap_clear_refmod(phys, mask)
2719 * clears the referenced and modified bits as specified by the mask
2720 * of the specified physical page.
2723 pmap_clear_refmod(ppnum_t pa
, unsigned int mask
)
2725 unsigned int x86Mask
;
2727 x86Mask
= ( ((mask
& VM_MEM_MODIFIED
)? PHYS_MODIFIED
: 0)
2728 | ((mask
& VM_MEM_REFERENCED
)? PHYS_REFERENCED
: 0));
2729 phys_attribute_clear(pa
, x86Mask
);
2733 * Set the modify bit on the specified range
2734 * of this map as requested.
2736 * This optimization stands only if each time the dirty bit
2737 * in vm_page_t is tested, it is also tested in the pmap.
2746 register pt_entry_t
*pde
;
2747 register pt_entry_t
*spte
, *epte
;
2749 vm_offset_t orig_s
= s
;
2751 if (map
== PMAP_NULL
)
2754 PMAP_READ_LOCK(map
, spl
);
2756 pde
= pmap_pde(map
, s
);
2757 while (s
&& s
< e
) {
2758 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
2761 if (*pde
& INTEL_PTE_VALID
) {
2762 spte
= (pt_entry_t
*)pmap_pte(map
, (s
& ~(PDE_MAPPED_SIZE
-1)));
2764 spte
= &spte
[ptenum(s
)];
2765 epte
= &spte
[intel_btop(l
-s
)];
2767 epte
= &spte
[intel_btop(PDE_MAPPED_SIZE
)];
2768 spte
= &spte
[ptenum(s
)];
2770 while (spte
< epte
) {
2771 if (*spte
& INTEL_PTE_VALID
) {
2772 *spte
|= (INTEL_PTE_MOD
| INTEL_PTE_WRITE
);
2780 PMAP_UPDATE_TLBS(map
, orig_s
, e
);
2781 PMAP_READ_UNLOCK(map
, spl
);
2786 invalidate_icache(__unused vm_offset_t addr
,
2787 __unused
unsigned cnt
,
2793 flush_dcache(__unused vm_offset_t addr
,
2794 __unused
unsigned count
,
2801 * TLB Coherence Code (TLB "shootdown" code)
2803 * Threads that belong to the same task share the same address space and
2804 * hence share a pmap. However, they may run on distinct cpus and thus
2805 * have distinct TLBs that cache page table entries. In order to guarantee
2806 * the TLBs are consistent, whenever a pmap is changed, all threads that
2807 * are active in that pmap must have their TLB updated. To keep track of
2808 * this information, the set of cpus that are currently using a pmap is
2809 * maintained within each pmap structure (cpus_using). Pmap_activate() and
2810 * pmap_deactivate add and remove, respectively, a cpu from this set.
2811 * Since the TLBs are not addressable over the bus, each processor must
2812 * flush its own TLB; a processor that needs to invalidate another TLB
2813 * needs to interrupt the processor that owns that TLB to signal the
2816 * Whenever a pmap is updated, the lock on that pmap is locked, and all
2817 * cpus using the pmap are signaled to invalidate. All threads that need
2818 * to activate a pmap must wait for the lock to clear to await any updates
2819 * in progress before using the pmap. They must ACQUIRE the lock to add
2820 * their cpu to the cpus_using set. An implicit assumption made
2821 * throughout the TLB code is that all kernel code that runs at or higher
2822 * than splvm blocks out update interrupts, and that such code does not
2823 * touch pageable pages.
2825 * A shootdown interrupt serves another function besides signaling a
2826 * processor to invalidate. The interrupt routine (pmap_update_interrupt)
2827 * waits for the both the pmap lock (and the kernel pmap lock) to clear,
2828 * preventing user code from making implicit pmap updates while the
2829 * sending processor is performing its update. (This could happen via a
2830 * user data write reference that turns on the modify bit in the page
2831 * table). It must wait for any kernel updates that may have started
2832 * concurrently with a user pmap update because the IPC code
2834 * Spinning on the VALUES of the locks is sufficient (rather than
2835 * having to acquire the locks) because any updates that occur subsequent
2836 * to finding the lock unlocked will be signaled via another interrupt.
2837 * (This assumes the interrupt is cleared before the low level interrupt code
2838 * calls pmap_update_interrupt()).
2840 * The signaling processor must wait for any implicit updates in progress
2841 * to terminate before continuing with its update. Thus it must wait for an
2842 * acknowledgement of the interrupt from each processor for which such
2843 * references could be made. For maintaining this information, a set
2844 * cpus_active is used. A cpu is in this set if and only if it can
2845 * use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from
2846 * this set; when all such cpus are removed, it is safe to update.
2848 * Before attempting to acquire the update lock on a pmap, a cpu (A) must
2849 * be at least at the priority of the interprocessor interrupt
2850 * (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a
2851 * kernel update; it would spin forever in pmap_update_interrupt() trying
2852 * to acquire the user pmap lock it had already acquired. Furthermore A
2853 * must remove itself from cpus_active. Otherwise, another cpu holding
2854 * the lock (B) could be in the process of sending an update signal to A,
2855 * and thus be waiting for A to remove itself from cpus_active. If A is
2856 * spinning on the lock at priority this will never happen and a deadlock
2861 * Signal another CPU that it must flush its TLB
2867 vm_offset_t start_addr
,
2868 vm_offset_t end_addr
)
2870 register int which_cpu
, j
;
2871 register pmap_update_list_t update_list_p
;
2873 while ((which_cpu
= ffs((unsigned long)use_list
)) != 0) {
2874 which_cpu
-= 1; /* convert to 0 origin */
2876 update_list_p
= cpu_update_list(which_cpu
);
2877 simple_lock(&update_list_p
->lock
);
2879 j
= update_list_p
->count
;
2880 if (j
>= UPDATE_LIST_SIZE
) {
2882 * list overflowed. Change last item to
2883 * indicate overflow.
2885 update_list_p
->item
[UPDATE_LIST_SIZE
-1].pmap
= kernel_pmap
;
2886 update_list_p
->item
[UPDATE_LIST_SIZE
-1].start
= VM_MIN_ADDRESS
;
2887 update_list_p
->item
[UPDATE_LIST_SIZE
-1].end
= VM_MAX_KERNEL_ADDRESS
;
2890 update_list_p
->item
[j
].pmap
= pmap
;
2891 update_list_p
->item
[j
].start
= start_addr
;
2892 update_list_p
->item
[j
].end
= end_addr
;
2893 update_list_p
->count
= j
+1;
2895 cpu_update_needed(which_cpu
) = TRUE
;
2896 simple_unlock(&update_list_p
->lock
);
2898 /* if its the kernel pmap, ignore cpus_idle */
2899 if (((cpus_idle
& (1 << which_cpu
)) == 0) ||
2900 (pmap
== kernel_pmap
) || PMAP_REAL(which_cpu
) == pmap
)
2902 i386_signal_cpu(which_cpu
, MP_TLB_FLUSH
, ASYNC
);
2904 use_list
&= ~(1 << which_cpu
);
2909 process_pmap_updates(
2910 register pmap_t my_pmap
)
2912 register int my_cpu
;
2913 register pmap_update_list_t update_list_p
;
2915 register pmap_t pmap
;
2917 mp_disable_preemption();
2918 my_cpu
= cpu_number();
2919 update_list_p
= cpu_update_list(my_cpu
);
2920 simple_lock(&update_list_p
->lock
);
2922 for (j
= 0; j
< update_list_p
->count
; j
++) {
2923 pmap
= update_list_p
->item
[j
].pmap
;
2924 if (pmap
== my_pmap
||
2925 pmap
== kernel_pmap
) {
2927 if (pmap
->ref_count
<= 0) {
2928 PMAP_CPU_CLR(pmap
, my_cpu
);
2929 PMAP_REAL(my_cpu
) = kernel_pmap
;
2931 set_cr3((unsigned int)kernel_pmap
->pm_ppdpt
);
2933 set_cr3((unsigned int)kernel_pmap
->pdirbase
);
2936 INVALIDATE_TLB(pmap
,
2937 update_list_p
->item
[j
].start
,
2938 update_list_p
->item
[j
].end
);
2941 update_list_p
->count
= 0;
2942 cpu_update_needed(my_cpu
) = FALSE
;
2943 simple_unlock(&update_list_p
->lock
);
2944 mp_enable_preemption();
2948 * Interrupt routine for TBIA requested from other processor.
2949 * This routine can also be called at all interrupts time if
2950 * the cpu was idle. Some driver interrupt routines might access
2951 * newly allocated vm. (This is the case for hd)
2954 pmap_update_interrupt(void)
2956 register int my_cpu
;
2958 register pmap_t my_pmap
;
2960 mp_disable_preemption();
2961 my_cpu
= cpu_number();
2964 * Raise spl to splvm (above splip) to block out pmap_extract
2965 * from IO code (which would put this cpu back in the active
2970 my_pmap
= PMAP_REAL(my_cpu
);
2972 if (!(my_pmap
&& pmap_in_use(my_pmap
, my_cpu
)))
2973 my_pmap
= kernel_pmap
;
2979 * Indicate that we're not using either user or kernel
2982 i_bit_clear(my_cpu
, &cpus_active
);
2985 * Wait for any pmap updates in progress, on either user
2988 while (*(volatile int *)(&my_pmap
->lock
.interlock
.lock_data
) ||
2989 *(volatile int *)(&kernel_pmap
->lock
.interlock
.lock_data
)) {
2990 LOOP_CHECK("pmap_update_interrupt", my_pmap
);
2994 process_pmap_updates(my_pmap
);
2996 i_bit_set(my_cpu
, &cpus_active
);
2998 } while (cpu_update_needed(my_cpu
));
3001 mp_enable_preemption();
3006 /* show phys page mappings and attributes */
3008 extern void db_show_page(pmap_paddr_t pa
);
3011 db_show_page(pmap_paddr_t pa
)
3018 pv_h
= pai_to_pvh(pai
);
3020 attr
= pmap_phys_attributes
[pai
];
3021 printf("phys page %x ", pa
);
3022 if (attr
& PHYS_MODIFIED
)
3023 printf("modified, ");
3024 if (attr
& PHYS_REFERENCED
)
3025 printf("referenced, ");
3026 if (pv_h
->pmap
|| pv_h
->next
)
3027 printf(" mapped at\n");
3029 printf(" not mapped\n");
3030 for (; pv_h
; pv_h
= pv_h
->next
)
3032 printf("%x in pmap %x\n", pv_h
->va
, pv_h
->pmap
);
3035 #endif /* MACH_KDB */
3038 void db_kvtophys(vm_offset_t
);
3039 void db_show_vaddrs(pt_entry_t
*);
3042 * print out the results of kvtophys(arg)
3048 db_printf("0x%x", kvtophys(vaddr
));
3052 * Walk the pages tables.
3056 pt_entry_t
*dirbase
)
3058 pt_entry_t
*ptep
, *pdep
, tmp
;
3059 int x
, y
, pdecnt
, ptecnt
;
3062 dirbase
= kernel_pmap
->dirbase
;
3065 db_printf("need a dirbase...\n");
3068 dirbase
= (pt_entry_t
*) ((unsigned long) dirbase
& ~INTEL_OFFMASK
);
3070 db_printf("dirbase: 0x%x\n", dirbase
);
3072 pdecnt
= ptecnt
= 0;
3074 for (y
= 0; y
< NPDEPG
; y
++, pdep
++) {
3075 if (((tmp
= *pdep
) & INTEL_PTE_VALID
) == 0) {
3079 ptep
= (pt_entry_t
*) ((*pdep
) & ~INTEL_OFFMASK
);
3080 db_printf("dir[%4d]: 0x%x\n", y
, *pdep
);
3081 for (x
= 0; x
< NPTEPG
; x
++, ptep
++) {
3082 if (((tmp
= *ptep
) & INTEL_PTE_VALID
) == 0) {
3086 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
3089 (y
<< 22) | (x
<< 12),
3090 *ptep
& ~INTEL_OFFMASK
);
3094 db_printf("total: %d tables, %d page table entries.\n", pdecnt
, ptecnt
);
3097 #endif /* MACH_KDB */
3099 #include <mach_vm_debug.h>
3101 #include <vm/vm_debug.h>
3104 pmap_list_resident_pages(
3105 __unused pmap_t pmap
,
3106 __unused vm_offset_t
*listp
,
3111 #endif /* MACH_VM_DEBUG */
3117 * BSD support routine to reassign virtual addresses.
3121 pmap_movepage(unsigned long from
, unsigned long to
, vm_size_t size
)
3124 pt_entry_t
*pte
, saved_pte
;
3126 /* Lock the kernel map */
3127 PMAP_READ_LOCK(kernel_pmap
, spl
);
3131 pte
= pmap_pte(kernel_pmap
, from
);
3133 panic("pmap_pagemove from pte NULL");
3135 PMAP_READ_UNLOCK(kernel_pmap
, spl
);
3137 pmap_enter(kernel_pmap
, to
, (ppnum_t
)i386_btop(i386_trunc_page(*pte
)),
3138 VM_PROT_READ
|VM_PROT_WRITE
, 0, *pte
& INTEL_PTE_WIRED
);
3140 pmap_remove(kernel_pmap
, (addr64_t
)from
, (addr64_t
)(from
+PAGE_SIZE
));
3142 PMAP_READ_LOCK(kernel_pmap
, spl
);
3143 pte
= pmap_pte(kernel_pmap
, to
);
3145 panic("pmap_pagemove 'to' pte NULL");
3154 /* Get the processors to update the TLBs */
3155 PMAP_UPDATE_TLBS(kernel_pmap
, from
, from
+size
);
3156 PMAP_UPDATE_TLBS(kernel_pmap
, to
, to
+size
);
3158 PMAP_READ_UNLOCK(kernel_pmap
, spl
);
3161 #endif /* MACH_BSD */
3163 /* temporary workaround */
3165 coredumpok(vm_map_t map
, vm_offset_t va
)
3169 ptep
= pmap_pte(map
->pmap
, va
);
3172 return ((*ptep
& (INTEL_PTE_NCACHE
| INTEL_PTE_WIRED
)) != (INTEL_PTE_NCACHE
| INTEL_PTE_WIRED
));
3176 * grow the number of kernel page table entries, if needed
3179 pmap_growkernel(vm_offset_t addr
)
3181 #if GROW_KERNEL_FUNCTION_IMPLEMENTED
3184 vm_offset_t ptppaddr
;
3187 pd_entry_t newpdir
= 0;
3191 * Losers return to try again until the winner completes the work.
3193 if (kptobj
== 0) panic("growkernel 0");
3194 if (!vm_object_lock_try(kptobj
)) {
3198 vm_page_lock_queues();
3203 * If this is the first time thru, locate the end of the
3204 * kernel page table entries and set nkpt to the current
3205 * number of kernel page table pages
3208 if (kernel_vm_end
== 0) {
3209 kernel_vm_end
= KERNBASE
;
3212 while (pdir_pde(kernel_pmap
->dirbase
, kernel_vm_end
)) {
3213 kernel_vm_end
= (kernel_vm_end
+ PAGE_SIZE
* NPTEPG
) & ~(PAGE_SIZE
* NPTEPG
- 1);
3219 * Now allocate and map the required number of page tables
3221 addr
= (addr
+ PAGE_SIZE
* NPTEPG
) & ~(PAGE_SIZE
* NPTEPG
- 1);
3222 while (kernel_vm_end
< addr
) {
3223 if (pdir_pde(kernel_pmap
->dirbase
, kernel_vm_end
)) {
3224 kernel_vm_end
= (kernel_vm_end
+ PAGE_SIZE
* NPTEPG
) & ~(PAGE_SIZE
* NPTEPG
- 1);
3225 continue; /* someone already filled this one */
3228 nkpg
= vm_page_alloc(kptobj
, nkpt
);
3230 panic("pmap_growkernel: no memory to grow kernel");
3234 ppn
= nkpg
->phys_page
;
3235 pmap_zero_page(ppn
);
3236 ptppaddr
= i386_ptob(ppn
);
3237 newpdir
= (pd_entry_t
) (ptppaddr
| INTEL_PTE_VALID
|
3238 INTEL_PTE_RW
| INTEL_PTE_REF
| INTEL_PTE_MOD
);
3239 pdir_pde(kernel_pmap
->dirbase
, kernel_vm_end
) = newpdir
;
3241 simple_lock(&free_pmap_lock
);
3242 for (pmap
= (struct pmap
*)kernel_pmap
->pmap_link
.next
;
3243 pmap
!= kernel_pmap
;
3244 pmap
= (struct pmap
*)pmap
->pmap_link
.next
) {
3245 *pmap_pde(pmap
, kernel_vm_end
) = newpdir
;
3247 simple_unlock(&free_pmap_lock
);
3250 vm_page_unlock_queues();
3251 vm_object_unlock(kptobj
);
3256 pmap_mapgetpte(vm_map_t map
, vm_offset_t v
)
3258 return pmap_pte(map
->pmap
, v
);
3267 assert(pn
!= vm_page_fictitious_addr
);
3269 if (!pmap_initialized
)
3271 phys
= (pmap_paddr_t
) i386_ptob(pn
);
3272 if (!pmap_valid_page(pn
))
3279 mapping_free_prime()
3284 for (i
= 0; i
< (5 * PV_ALLOC_CHUNK
); i
++) {
3285 pv_e
= (pv_entry_t
) zalloc(pv_list_zone
);
3297 if (mapping_adjust_call
== NULL
) {
3298 thread_call_setup(&mapping_adjust_call_data
,
3299 (thread_call_func_t
) mapping_adjust
,
3300 (thread_call_param_t
) NULL
);
3301 mapping_adjust_call
= &mapping_adjust_call_data
;
3303 /* XXX rethink best way to do locking here */
3304 if (pv_free_count
< PV_LOW_WATER_MARK
) {
3305 for (i
= 0; i
< PV_ALLOC_CHUNK
; i
++) {
3306 pv_e
= (pv_entry_t
) zalloc(pv_list_zone
);
3316 pmap_commpage_init(vm_offset_t kernel_commpage
, vm_offset_t user_commpage
, int cnt
)
3319 pt_entry_t
*opte
, *npte
;
3322 for (i
= 0; i
< cnt
; i
++) {
3323 opte
= pmap_pte(kernel_pmap
, kernel_commpage
);
3324 if (0 == opte
) panic("kernel_commpage");
3325 npte
= pmap_pte(kernel_pmap
, user_commpage
);
3326 if (0 == npte
) panic("user_commpage");
3327 pte
= *opte
| INTEL_PTE_USER
|INTEL_PTE_GLOBAL
;
3328 pte
&= ~INTEL_PTE_WRITE
; // ensure read only
3329 WRITE_PTE_FAST(npte
, pte
);
3330 kernel_commpage
+= INTEL_PGBYTES
;
3331 user_commpage
+= INTEL_PGBYTES
;
3335 static cpu_pmap_t cpu_pmap_master
;
3336 static struct pmap_update_list cpu_update_list_master
;
3339 pmap_cpu_alloc(boolean_t is_boot_cpu
)
3344 pmap_update_list_t up
;
3345 vm_offset_t address
;
3346 vm_map_entry_t entry
;
3349 cp
= &cpu_pmap_master
;
3350 up
= &cpu_update_list_master
;
3353 * The per-cpu pmap data structure itself.
3355 ret
= kmem_alloc(kernel_map
,
3356 (vm_offset_t
*) &cp
, sizeof(cpu_pmap_t
));
3357 if (ret
!= KERN_SUCCESS
) {
3358 printf("pmap_cpu_alloc() failed ret=%d\n", ret
);
3361 bzero((void *)cp
, sizeof(cpu_pmap_t
));
3364 * The tlb flush update list.
3366 ret
= kmem_alloc(kernel_map
,
3367 (vm_offset_t
*) &up
, sizeof(*up
));
3368 if (ret
!= KERN_SUCCESS
) {
3369 printf("pmap_cpu_alloc() failed ret=%d\n", ret
);
3375 * The temporary windows used for copy/zero - see loose_ends.c
3377 for (i
= 0; i
< PMAP_NWINDOWS
; i
++) {
3378 ret
= vm_map_find_space(kernel_map
,
3379 &address
, PAGE_SIZE
, 0, &entry
);
3380 if (ret
!= KERN_SUCCESS
) {
3381 printf("pmap_cpu_alloc() "
3382 "vm_map_find_space ret=%d\n", ret
);
3386 vm_map_unlock(kernel_map
);
3388 cp
->mapwindow
[i
].prv_CADDR
= (caddr_t
) address
;
3389 cp
->mapwindow
[i
].prv_CMAP
= vtopte(address
);
3390 * (int *) cp
->mapwindow
[i
].prv_CMAP
= 0;
3392 kprintf("pmap_cpu_alloc() "
3393 "window=%d CADDR=0x%x CMAP=0x%x\n",
3394 i
, address
, vtopte(address
));
3399 * Set up the pmap request list
3401 cp
->update_list
= up
;
3402 simple_lock_init(&up
->lock
, 0);
3409 pmap_cpu_free(struct cpu_pmap
*cp
)
3411 if (cp
!= NULL
&& cp
!= &cpu_pmap_master
) {
3412 if (cp
->update_list
!= NULL
)
3413 kfree((void *) cp
->update_list
,
3414 sizeof(*cp
->update_list
));
3415 kfree((void *) cp
, sizeof(cpu_pmap_t
));