2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
34 * Mach Operating System
35 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
36 * All Rights Reserved.
38 * Permission to use, copy, modify and distribute this software and its
39 * documentation is hereby granted, provided that both the copyright
40 * notice and this permission notice appear in all copies of the
41 * software, derivative works or modified versions, and any portions
42 * thereof, and that both notices appear in supporting documentation.
44 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
45 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
46 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
48 * Carnegie Mellon requests users of this software to return to
50 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
51 * School of Computer Science
52 * Carnegie Mellon University
53 * Pittsburgh PA 15213-3890
55 * any improvements or extensions that they make and grant Carnegie Mellon
56 * the rights to redistribute these changes.
63 * Author: Avadis Tevanian, Jr., Michael Wayne Young
64 * (These guys wrote the Vax version)
66 * Physical Map management code for Intel i386, i486, and i860.
68 * Manages physical address maps.
70 * In addition to hardware address maps, this
71 * module is called upon to provide software-use-only
72 * maps which may or may not be stored in the same
73 * form as hardware maps. These pseudo-maps are
74 * used to store intermediate results from copy
75 * operations to and from address spaces.
77 * Since the information managed by this module is
78 * also stored by the logical address mapping module,
79 * this module may throw away valid virtual-to-physical
80 * mappings at almost any time. However, invalidations
81 * of virtual-to-physical mappings must be done as
84 * In order to cope with hardware architectures which
85 * make virtual-to-physical map invalidates expensive,
86 * this module may delay invalidate or reduced protection
87 * operations until such time as they are actually
88 * necessary. This module is given full information as
89 * to which processors are currently using which maps,
90 * and to when physical maps must be made correct.
96 #include <mach_ldebug.h>
98 #include <mach/machine/vm_types.h>
100 #include <mach/boolean.h>
101 #include <kern/thread.h>
102 #include <kern/zalloc.h>
104 #include <kern/lock.h>
105 #include <kern/kalloc.h>
106 #include <kern/spl.h>
109 #include <vm/vm_map.h>
110 #include <vm/vm_kern.h>
111 #include <mach/vm_param.h>
112 #include <mach/vm_prot.h>
113 #include <vm/vm_object.h>
114 #include <vm/vm_page.h>
116 #include <mach/machine/vm_param.h>
117 #include <machine/thread.h>
119 #include <kern/misc_protos.h> /* prototyping */
120 #include <i386/misc_protos.h>
122 #include <i386/cpuid.h>
123 #include <i386/cpu_data.h>
124 #include <i386/cpu_number.h>
125 #include <i386/machine_cpu.h>
126 #include <i386/mp_slave_boot.h>
129 #include <ddb/db_command.h>
130 #include <ddb/db_output.h>
131 #include <ddb/db_sym.h>
132 #include <ddb/db_print.h>
133 #endif /* MACH_KDB */
135 #include <kern/xpr.h>
137 #include <vm/vm_protos.h>
142 * Forward declarations for internal functions.
148 extern void pmap_remove_range(
154 void phys_attribute_clear(
158 boolean_t
phys_attribute_test(
162 void phys_attribute_set(
166 void pmap_growkernel(
169 void pmap_set_reference(
177 pt_entry_t
* pmap_mapgetpte(
181 boolean_t
phys_page_exists(
185 void set_dirbase(vm_offset_t dirbase
);
186 #endif /* set_dirbase */
188 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
190 #define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
191 #define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry);
193 #define value_64bit(value) ((value) & 0xFFFFFFFF00000000LL)
194 #define low32(x) ((unsigned int)((x) & 0x00000000ffffffffLL))
197 * Private data structures.
201 * For each vm_page_t, there is a list of all currently
202 * valid virtual mappings of that page. An entry is
203 * a pv_entry_t; the list is the pv_table.
206 typedef struct pv_entry
{
207 struct pv_entry
*next
; /* next pv_entry */
208 pmap_t pmap
; /* pmap where mapping lies */
209 vm_offset_t va
; /* virtual address for mapping */
212 #define PV_ENTRY_NULL ((pv_entry_t) 0)
214 pv_entry_t pv_head_table
; /* array of entries, one per page */
217 * pv_list entries are kept on a list that can only be accessed
218 * with the pmap system locked (at SPLVM, not in the cpus_active set).
219 * The list is refilled from the pv_list_zone if it becomes empty.
221 pv_entry_t pv_free_list
; /* free list at SPLVM */
222 decl_simple_lock_data(,pv_free_list_lock
)
223 int pv_free_count
= 0;
224 #define PV_LOW_WATER_MARK 5000
225 #define PV_ALLOC_CHUNK 2000
226 thread_call_t mapping_adjust_call
;
227 static thread_call_data_t mapping_adjust_call_data
;
228 int mappingrecurse
= 0;
230 #define PV_ALLOC(pv_e) { \
231 simple_lock(&pv_free_list_lock); \
232 if ((pv_e = pv_free_list) != 0) { \
233 pv_free_list = pv_e->next; \
235 if (pv_free_count < PV_LOW_WATER_MARK) \
236 if (hw_compare_and_store(0,1,&mappingrecurse)) \
237 thread_call_enter(mapping_adjust_call); \
239 simple_unlock(&pv_free_list_lock); \
242 #define PV_FREE(pv_e) { \
243 simple_lock(&pv_free_list_lock); \
244 pv_e->next = pv_free_list; \
245 pv_free_list = pv_e; \
247 simple_unlock(&pv_free_list_lock); \
250 zone_t pv_list_zone
; /* zone of pv_entry structures */
253 static zone_t pdpt_zone
;
258 * Each entry in the pv_head_table is locked by a bit in the
259 * pv_lock_table. The lock bits are accessed by the physical
260 * address of the page they lock.
263 char *pv_lock_table
; /* pointer to array of bits */
264 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
267 * First and last physical addresses that we maintain any information
268 * for. Initialized to zero so that pmap operations done before
269 * pmap_init won't touch any non-existent structures.
271 pmap_paddr_t vm_first_phys
= (pmap_paddr_t
) 0;
272 pmap_paddr_t vm_last_phys
= (pmap_paddr_t
) 0;
273 boolean_t pmap_initialized
= FALSE
;/* Has pmap_init completed? */
275 pmap_paddr_t kernel_vm_end
= (pmap_paddr_t
)0;
277 #define GROW_KERNEL_FUNCTION_IMPLEMENTED 1
278 #if GROW_KERNEL_FUNCTION_IMPLEMENTED /* not needed until growing kernel pmap */
279 static struct vm_object kptobj_object_store
;
280 static vm_object_t kptobj
;
285 * Index into pv_head table, its lock bits, and the modify/reference
286 * bits starting at vm_first_phys.
289 #define pa_index(pa) (i386_btop(pa - vm_first_phys))
291 #define pai_to_pvh(pai) (&pv_head_table[pai])
292 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
293 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
296 * Array of physical page attribites for managed pages.
297 * One byte per physical page.
299 char *pmap_phys_attributes
;
302 * Physical page attributes. Copy bits from PTE definition.
304 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
305 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
306 #define PHYS_NCACHE INTEL_PTE_NCACHE
309 * Amount of virtual memory mapped by one
310 * page-directory entry.
312 #define PDE_MAPPED_SIZE (pdetova(1))
315 * Locking and TLB invalidation
321 * There are two structures in the pmap module that need locking:
322 * the pmaps themselves, and the per-page pv_lists (which are locked
323 * by locking the pv_lock_table entry that corresponds to the pv_head
324 * for the list in question.) Most routines want to lock a pmap and
325 * then do operations in it that require pv_list locking -- however
326 * pmap_remove_all and pmap_copy_on_write operate on a physical page
327 * basis and want to do the locking in the reverse order, i.e. lock
328 * a pv_list and then go through all the pmaps referenced by that list.
329 * To protect against deadlock between these two cases, the pmap_lock
330 * is used. There are three different locking protocols as a result:
332 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
335 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
336 * lock on the pmap_lock (shared read), then lock the pmap
337 * and finally the pv_lists as needed [i.e. pmap lock before
340 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
341 * Get a write lock on the pmap_lock (exclusive write); this
342 * also guaranteees exclusive access to the pv_lists. Lock the
345 * At no time may any routine hold more than one pmap lock or more than
346 * one pv_list lock. Because interrupt level routines can allocate
347 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
348 * kernel_pmap can only be held at splhigh.
352 * We raise the interrupt level to splvm, to block interprocessor
353 * interrupts during pmap operations. We must take the CPU out of
354 * the cpus_active set while interrupts are blocked.
356 #define SPLVM(spl) { \
358 mp_disable_preemption(); \
359 i_bit_clear(cpu_number(), &cpus_active); \
360 mp_enable_preemption(); \
363 #define SPLX(spl) { \
364 mp_disable_preemption(); \
365 i_bit_set(cpu_number(), &cpus_active); \
366 mp_enable_preemption(); \
371 * Lock on pmap system
373 lock_t pmap_system_lock
;
375 #define PMAP_READ_LOCK(pmap, spl) { \
377 lock_read(&pmap_system_lock); \
378 simple_lock(&(pmap)->lock); \
381 #define PMAP_WRITE_LOCK(spl) { \
383 lock_write(&pmap_system_lock); \
386 #define PMAP_READ_UNLOCK(pmap, spl) { \
387 simple_unlock(&(pmap)->lock); \
388 lock_read_done(&pmap_system_lock); \
392 #define PMAP_WRITE_UNLOCK(spl) { \
393 lock_write_done(&pmap_system_lock); \
397 #define PMAP_WRITE_TO_READ_LOCK(pmap) { \
398 simple_lock(&(pmap)->lock); \
399 lock_write_to_read(&pmap_system_lock); \
402 #define LOCK_PVH(index) lock_pvh_pai(index)
404 #define UNLOCK_PVH(index) unlock_pvh_pai(index)
407 extern int max_lock_loops
;
408 extern int disableSerialOuput
;
410 unsigned int loop_count; \
411 loop_count = disableSerialOuput ? max_lock_loops \
413 #define LOOP_CHECK(msg, pmap) \
414 if (--loop_count == 0) { \
415 mp_disable_preemption(); \
416 kprintf("%s: cpu %d pmap %x, cpus_active 0x%x\n", \
417 msg, cpu_number(), pmap, cpus_active); \
418 Debugger("deadlock detection"); \
419 mp_enable_preemption(); \
420 loop_count = max_lock_loops; \
422 #else /* USLOCK_DEBUG */
424 #define LOOP_CHECK(msg, pmap)
425 #endif /* USLOCK_DEBUG */
427 #define PMAP_UPDATE_TLBS(pmap, s, e) \
432 mp_disable_preemption(); \
433 cpu_mask = 1 << cpu_number(); \
435 /* Since the pmap is locked, other updates are locked */ \
436 /* out, and any pmap_activate has finished. */ \
438 /* find other cpus using the pmap */ \
439 users = (pmap)->cpus_using & ~cpu_mask; \
442 /* signal them, and wait for them to finish */ \
443 /* using the pmap */ \
444 signal_cpus(users, (pmap), (s), (e)); \
445 while (((pmap)->cpus_using & cpus_active & ~cpu_mask)) { \
446 LOOP_CHECK("PMAP_UPDATE_TLBS", pmap); \
450 /* invalidate our own TLB if pmap is in use */ \
452 if ((pmap)->cpus_using & cpu_mask) { \
453 INVALIDATE_TLB((pmap), (s), (e)); \
456 mp_enable_preemption(); \
459 #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
461 #define INVALIDATE_TLB(m, s, e) { \
466 * Structures to keep track of pending TLB invalidations
471 #define UPDATE_LIST_SIZE 4
473 struct pmap_update_item
{
474 pmap_t pmap
; /* pmap to invalidate */
475 vm_offset_t start
; /* start address to invalidate */
476 vm_offset_t end
; /* end address to invalidate */
479 typedef struct pmap_update_item
*pmap_update_item_t
;
482 * List of pmap updates. If the list overflows,
483 * the last entry is changed to invalidate all.
485 struct pmap_update_list
{
486 decl_simple_lock_data(,lock
)
488 struct pmap_update_item item
[UPDATE_LIST_SIZE
];
490 typedef struct pmap_update_list
*pmap_update_list_t
;
492 extern void signal_cpus(
498 pmap_memory_region_t pmap_memory_regions
[PMAP_MEMORY_REGIONS_SIZE
];
501 * Other useful macros.
503 #define current_pmap() (vm_map_pmap(current_thread()->map))
504 #define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
506 struct pmap kernel_pmap_store
;
510 decl_simple_lock_data(,free_pmap_lock
)
513 struct zone
*pmap_zone
; /* zone of pmap structures */
515 int pmap_debug
= 0; /* flag for debugging prints */
517 unsigned int inuse_ptepages_count
= 0; /* debugging */
520 * Pmap cache. Cache is threaded through ref_count field of pmap.
521 * Max will eventually be constant -- variable for experimentation.
523 int pmap_cache_max
= 32;
524 int pmap_alloc_chunk
= 8;
525 pmap_t pmap_cache_list
;
526 int pmap_cache_count
;
527 decl_simple_lock_data(,pmap_cache_lock
)
529 extern vm_offset_t hole_start
, hole_end
;
535 pt_entry_t
*DMAP1
, *DMAP2
;
540 #define PMAP_ALIAS_MAX 32
546 #define PMAP_ALIAS_COOKIE 0xdeadbeef
547 } pmap_aliasbuf
[PMAP_ALIAS_MAX
];
548 int pmap_alias_index
= 0;
549 extern vm_offset_t
get_rpc();
551 #endif /* DEBUG_ALIAS */
553 #define pmap_pde(m, v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]))
554 #define pdir_pde(d, v) (d[(vm_offset_t)(v) >> PDESHIFT])
557 pmap_is_current(pmap_t pmap
)
559 return (pmap
== kernel_pmap
||
560 (pmap
->dirbase
[PTDPTDI
] & PG_FRAME
) == (PTDpde
[0] & PG_FRAME
));
565 * return address of mapped pte for vaddr va in pmap pmap.
568 pmap_pte(pmap_t pmap
, vm_offset_t va
)
573 pde
= pmap_pde(pmap
, va
);
575 if (pmap_is_current(pmap
))
577 newpf
= *pde
& PG_FRAME
;
578 if (((*CM4
) & PG_FRAME
) != newpf
) {
579 *CM4
= newpf
| INTEL_PTE_RW
| INTEL_PTE_VALID
;
582 return (pt_entry_t
*)CA4
+ (i386_btop(va
) & (NPTEPG
-1));
587 #define DEBUG_PTE_PAGE 0
594 register pt_entry_t
*pte
, *epte
;
597 /* check the use and wired counts */
598 if (ptep
== PTE_PAGE_NULL
)
600 pte
= pmap_pte(ptep
->pmap
, ptep
->va
);
601 epte
= pte
+ INTEL_PGBYTES
/sizeof(pt_entry_t
);
613 if (ctu
!= ptep
->use_count
|| ctw
!= ptep
->wired_count
) {
614 printf("use %d wired %d - actual use %d wired %d\n",
615 ptep
->use_count
, ptep
->wired_count
, ctu
, ctw
);
619 #endif /* DEBUG_PTE_PAGE */
622 * Map memory at initialization. The physical addresses being
623 * mapped are not managed and are never unmapped.
625 * For now, VM is already on, we only need to map the
630 register vm_offset_t virt
,
631 register vm_offset_t start_addr
,
632 register vm_offset_t end_addr
,
633 register vm_prot_t prot
)
638 while (start_addr
< end_addr
) {
639 pmap_enter(kernel_pmap
,
640 virt
, (ppnum_t
) i386_btop(start_addr
), prot
, 0, FALSE
);
648 * Back-door routine for mapping kernel VM at initialization.
649 * Useful for mapping memory outside the range
650 * Sets no-cache, A, D.
651 * [vm_first_phys, vm_last_phys) (i.e., devices).
652 * Otherwise like pmap_map.
656 register vm_offset_t virt
,
657 register vm_offset_t start_addr
,
658 register vm_offset_t end_addr
,
661 register pt_entry_t
template;
662 register pt_entry_t
*pte
;
664 template = pa_to_pte(start_addr
)
670 if (prot
& VM_PROT_WRITE
)
671 template |= INTEL_PTE_WRITE
;
673 /* XXX move pmap_pte out of loop, once one pte mapped, all are */
674 while (start_addr
< end_addr
) {
675 pte
= pmap_pte(kernel_pmap
, virt
);
676 if (pte
== PT_ENTRY_NULL
) {
677 panic("pmap_map_bd: Invalid kernel address\n");
679 WRITE_PTE_FAST(pte
, template)
680 pte_increment_pa(template);
682 start_addr
+= PAGE_SIZE
;
689 extern char *first_avail
;
690 extern vm_offset_t virtual_avail
, virtual_end
;
691 extern pmap_paddr_t avail_start
, avail_end
;
692 extern vm_offset_t etext
;
693 extern void *sectHIBB
;
694 extern int sectSizeHIB
;
697 * Bootstrap the system enough to run with virtual memory.
698 * Map the kernel's code and data, and allocate the system page table.
699 * Called with mapping OFF. Page_size must already be set.
702 * load_start: PA where kernel was loaded
703 * avail_start PA of first available physical page -
704 * after kernel page tables
705 * avail_end PA of last available physical page
706 * virtual_avail VA of first available page -
707 * after kernel page tables
708 * virtual_end VA of last available page -
709 * end of kernel address space
711 * &start_text start of kernel text
712 * &etext end of kernel text
717 __unused vm_offset_t load_start
)
722 int wpkernel
, boot_arg
;
724 vm_last_addr
= VM_MAX_KERNEL_ADDRESS
; /* Set the highest address
728 * The kernel's pmap is statically allocated so we don't
729 * have to use pmap_create, which is unlikely to work
730 * correctly at this part of the boot sequence.
733 kernel_pmap
= &kernel_pmap_store
;
735 kernel_pmap
->pmap_link
.next
= (queue_t
)kernel_pmap
; /* Set up anchor forward */
736 kernel_pmap
->pmap_link
.prev
= (queue_t
)kernel_pmap
; /* Set up anchor reverse */
738 kernel_pmap
->ref_count
= 1;
739 kernel_pmap
->pm_obj
= (vm_object_t
) NULL
;
740 kernel_pmap
->dirbase
= (pd_entry_t
*)((unsigned int)IdlePTD
| KERNBASE
);
741 kernel_pmap
->pdirbase
= (pd_entry_t
*)IdlePTD
;
743 kernel_pmap
->pm_pdpt
= (pd_entry_t
*)((unsigned int)IdlePDPT
| KERNBASE
);
744 kernel_pmap
->pm_ppdpt
= (vm_offset_t
)IdlePDPT
;
747 va
= (vm_offset_t
)kernel_pmap
->dirbase
;
748 /* setup self referential mapping(s) */
749 for (i
= 0; i
< NPGPTD
; i
++ ) {
751 pa
= (pmap_paddr_t
) kvtophys(va
+ i386_ptob(i
));
752 * (pd_entry_t
*) (kernel_pmap
->dirbase
+ PTDPTDI
+ i
) =
753 (pa
& PG_FRAME
) | INTEL_PTE_VALID
| INTEL_PTE_RW
| INTEL_PTE_REF
|
754 INTEL_PTE_MOD
| INTEL_PTE_WIRED
;
756 kernel_pmap
->pm_pdpt
[i
] = pa
| INTEL_PTE_VALID
;
762 virtual_avail
= (vm_offset_t
)VADDR(KPTDI
,0) + (vm_offset_t
)first_avail
;
763 virtual_end
= (vm_offset_t
)(VM_MAX_KERNEL_ADDRESS
);
766 * Reserve some special page table entries/VA space for temporary
769 #define SYSMAP(c, p, v, n) \
770 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n);
773 pte
= (pt_entry_t
*) pmap_pte(kernel_pmap
, va
);
776 * CMAP1/CMAP2 are used for zeroing and copying pages.
777 * CMAP3 is used for ml_phys_read/write.
779 SYSMAP(caddr_t
, CM1
, CA1
, 1)
780 * (pt_entry_t
*) CM1
= 0;
781 SYSMAP(caddr_t
, CM2
, CA2
, 1)
782 * (pt_entry_t
*) CM2
= 0;
783 SYSMAP(caddr_t
, CM3
, CA3
, 1)
784 * (pt_entry_t
*) CM3
= 0;
786 /* used by pmap_pte */
787 SYSMAP(caddr_t
, CM4
, CA4
, 1)
788 * (pt_entry_t
*) CM4
= 0;
790 /* DMAP user for debugger */
791 SYSMAP(caddr_t
, DMAP1
, DADDR1
, 1);
792 SYSMAP(caddr_t
, DMAP2
, DADDR2
, 1); /* XXX temporary - can remove */
795 lock_init(&pmap_system_lock
,
796 FALSE
, /* NOT a sleep lock */
802 if (PE_parse_boot_arg("debug", &boot_arg
)) {
803 if (boot_arg
& DB_PRT
) wpkernel
= 0;
804 if (boot_arg
& DB_NMI
) wpkernel
= 0;
807 /* remap kernel text readonly if not debugging or kprintfing */
813 for (myva
= i386_round_page(VM_MIN_KERNEL_ADDRESS
+ MP_BOOT
+ MP_BOOTSTACK
); myva
< etext
; myva
+= PAGE_SIZE
) {
814 if (myva
>= (vm_offset_t
)sectHIBB
&& myva
< ((vm_offset_t
)sectHIBB
+ sectSizeHIB
))
816 ptep
= pmap_pte(kernel_pmap
, myva
);
818 *ptep
&= ~INTEL_PTE_RW
;
823 simple_lock_init(&kernel_pmap
->lock
, 0);
824 simple_lock_init(&pv_free_list_lock
, 0);
826 /* invalidate user virtual addresses */
827 memset((char *)kernel_pmap
->dirbase
,
829 (KPTDI
) * sizeof(pd_entry_t
));
831 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
832 VADDR(KPTDI
,0), virtual_end
);
834 kprintf("Available physical space from 0x%llx to 0x%llx\n",
835 avail_start
, avail_end
);
836 printf("PAE enabled\n");
838 kprintf("Available physical space from 0x%x to 0x%x\n",
839 avail_start
, avail_end
);
848 *startp
= virtual_avail
;
853 * Initialize the pmap module.
854 * Called by vm_init, to initialize any structures that the pmap
855 * system needs to map virtual memory.
860 register long npages
;
862 register vm_size_t s
;
867 * Allocate memory for the pv_head_table and its lock bits,
868 * the modify bit array, and the pte_page table.
871 /* zero bias all these arrays now instead of off avail_start
872 so we cover all memory */
873 npages
= i386_btop(avail_end
);
874 s
= (vm_size_t
) (sizeof(struct pv_entry
) * npages
875 + pv_lock_table_size(npages
)
879 if (kmem_alloc_wired(kernel_map
, &addr
, s
) != KERN_SUCCESS
)
882 memset((char *)addr
, 0, s
);
885 * Allocate the structures first to preserve word-alignment.
887 pv_head_table
= (pv_entry_t
) addr
;
888 addr
= (vm_offset_t
) (pv_head_table
+ npages
);
890 pv_lock_table
= (char *) addr
;
891 addr
= (vm_offset_t
) (pv_lock_table
+ pv_lock_table_size(npages
));
893 pmap_phys_attributes
= (char *) addr
;
896 * Create the zone of physical maps,
897 * and of the physical-to-virtual entries.
899 s
= (vm_size_t
) sizeof(struct pmap
);
900 pmap_zone
= zinit(s
, 400*s
, 4096, "pmap"); /* XXX */
901 s
= (vm_size_t
) sizeof(struct pv_entry
);
902 pv_list_zone
= zinit(s
, 10000*s
, 4096, "pv_list"); /* XXX */
904 // s = (vm_size_t) (sizeof(pdpt_entry_t) * NPGPTD);
906 pdpt_zone
= zinit(s
, 400*s
, 4096, "pdpt"); /* XXX */
910 * Only now, when all of the data structures are allocated,
911 * can we set vm_first_phys and vm_last_phys. If we set them
912 * too soon, the kmem_alloc_wired above will try to use these
913 * data structures and blow up.
916 /* zero bias this now so we cover all memory */
918 vm_last_phys
= avail_end
;
920 #if GROW_KERNEL_FUNCTION_IMPLEMENTED
921 kptobj
= &kptobj_object_store
;
922 _vm_object_allocate((vm_object_size_t
)NKPDE
, kptobj
);
923 kernel_pmap
->pm_obj
= kptobj
;
926 /* create pv entries for kernel pages mapped by low level
927 startup code. these have to exist so we can pmap_remove()
928 e.g. kext pages from the middle of our addr space */
930 vaddr
= (vm_offset_t
)VM_MIN_KERNEL_ADDRESS
;
931 for (ppn
= 0; ppn
< i386_btop(avail_start
) ; ppn
++ ) {
934 pv_e
= pai_to_pvh(ppn
);
937 pv_e
->pmap
= kernel_pmap
;
938 pv_e
->next
= PV_ENTRY_NULL
;
941 pmap_initialized
= TRUE
;
944 * Initializie pmap cache.
946 pmap_cache_list
= PMAP_NULL
;
947 pmap_cache_count
= 0;
948 simple_lock_init(&pmap_cache_lock
, 0);
950 simple_lock_init(&free_pmap_lock
, 0);
956 x86_lowmem_free(void)
958 /* free lowmem pages back to the vm system. we had to defer doing this
959 until the vm system was fully up.
960 the actual pages that are released are determined by which
961 pages the memory sizing code puts into the region table */
963 ml_static_mfree((vm_offset_t
) i386_ptob(pmap_memory_regions
[0].base
)|VM_MIN_KERNEL_ADDRESS
,
964 (vm_size_t
) i386_ptob(pmap_memory_regions
[0].end
- pmap_memory_regions
[0].base
));
968 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
980 assert(pn
!= vm_page_fictitious_addr
);
981 phys
= (pmap_paddr_t
)i386_ptob(pn
);
982 if (!pmap_initialized
)
985 if (!pmap_valid_page(pn
))
988 PMAP_WRITE_LOCK(spl
);
990 pai
= pa_index(phys
);
991 pv_h
= pai_to_pvh(pai
);
993 result
= (pv_h
->pmap
== PMAP_NULL
);
994 PMAP_WRITE_UNLOCK(spl
);
1000 * Create and return a physical map.
1002 * If the size specified for the map
1003 * is zero, the map is an actual physical
1004 * map, and may be referenced by the
1007 * If the size specified is non-zero,
1008 * the map will be used in software only, and
1009 * is bounded by that size.
1017 register pmap_t pro
;
1021 register vm_offset_t va
;
1024 * A software use-only map doesn't even need a map.
1031 p
= (pmap_t
) zalloc(pmap_zone
);
1033 panic("pmap_create zalloc");
1034 if (KERN_SUCCESS
!= kmem_alloc_wired(kernel_map
, (vm_offset_t
*)(&p
->dirbase
), NBPTD
))
1035 panic("pmap_create kmem_alloc_wired");
1037 p
->pm_hold
= (vm_offset_t
)zalloc(pdpt_zone
);
1038 if ((vm_offset_t
)NULL
== p
->pm_hold
) {
1039 panic("pdpt zalloc");
1041 p
->pm_pdpt
= (pdpt_entry_t
*) (( p
->pm_hold
+ 31) & ~31);
1042 p
->pm_ppdpt
= kvtophys((vm_offset_t
)p
->pm_pdpt
); /* XXX */
1044 if (NULL
== (p
->pm_obj
= vm_object_allocate((vm_object_size_t
)(NPGPTD
*NPDEPG
))))
1045 panic("pmap_create vm_object_allocate");
1047 (void *)((unsigned int)IdlePTD
| KERNBASE
),
1049 va
= (vm_offset_t
)p
->dirbase
;
1050 p
->pdirbase
= (pd_entry_t
*)(kvtophys(va
));
1051 simple_lock_init(&p
->lock
, 0);
1053 /* setup self referential mapping(s) */
1054 for (i
= 0; i
< NPGPTD
; i
++ ) {
1056 pa
= (pmap_paddr_t
) kvtophys(va
+ i386_ptob(i
));
1057 * (pd_entry_t
*) (p
->dirbase
+ PTDPTDI
+ i
) =
1058 (pa
& PG_FRAME
) | INTEL_PTE_VALID
| INTEL_PTE_RW
| INTEL_PTE_REF
|
1059 INTEL_PTE_MOD
| INTEL_PTE_WIRED
;
1061 p
->pm_pdpt
[i
] = pa
| INTEL_PTE_VALID
;
1066 p
->stats
.resident_count
= 0;
1067 p
->stats
.wired_count
= 0;
1071 /* insert new pmap at head of queue hanging off kernel_pmap */
1073 simple_lock(&free_pmap_lock
);
1074 p
->pmap_link
.next
= (queue_t
)kernel_pmap
->pmap_link
.next
;
1075 kernel_pmap
->pmap_link
.next
= (queue_t
)p
;
1077 pro
= (pmap_t
) p
->pmap_link
.next
;
1078 p
->pmap_link
.prev
= (queue_t
)pro
->pmap_link
.prev
;
1079 pro
->pmap_link
.prev
= (queue_t
)p
;
1082 simple_unlock(&free_pmap_lock
);
1090 * Retire the given physical map from service.
1091 * Should only be called if the map contains
1092 * no valid mappings.
1099 register pt_entry_t
*pdep
;
1102 register vm_page_t m
;
1104 register pmap_t pre
,pro
;
1111 simple_lock(&p
->lock
);
1114 register int my_cpu
;
1116 mp_disable_preemption();
1117 my_cpu
= cpu_number();
1120 * If some cpu is not using the physical pmap pointer that it
1121 * is supposed to be (see set_dirbase), we might be using the
1122 * pmap that is being destroyed! Make sure we are
1123 * physically on the right pmap:
1125 /* force pmap/cr3 update */
1128 VM_MAX_KERNEL_ADDRESS
);
1130 if (PMAP_REAL(my_cpu
) == p
) {
1131 PMAP_CPU_CLR(p
, my_cpu
);
1132 PMAP_REAL(my_cpu
) = kernel_pmap
;
1134 set_cr3((unsigned int)kernel_pmap
->pm_ppdpt
);
1136 set_cr3((unsigned int)kernel_pmap
->pdirbase
);
1139 mp_enable_preemption();
1141 simple_unlock(&p
->lock
);
1145 return; /* still in use */
1149 /* remove from pmap queue */
1151 simple_lock(&free_pmap_lock
);
1153 pre
= (pmap_t
)p
->pmap_link
.prev
;
1154 pre
->pmap_link
.next
= (queue_t
)p
->pmap_link
.next
;
1155 pro
= (pmap_t
)p
->pmap_link
.next
;
1156 pro
->pmap_link
.prev
= (queue_t
)p
->pmap_link
.prev
;
1158 simple_unlock(&free_pmap_lock
);
1163 * Free the memory maps, then the
1167 pdep
= (pt_entry_t
*)p
->dirbase
;
1169 while (pdep
< (pt_entry_t
*)&p
->dirbase
[(UMAXPTDI
+1)]) {
1171 if (*pdep
& INTEL_PTE_VALID
) {
1172 ind
= pdep
- (pt_entry_t
*)&p
->dirbase
[0];
1173 vm_object_lock(p
->pm_obj
);
1174 m
= vm_page_lookup(p
->pm_obj
, (vm_object_offset_t
)ind
);
1175 if (m
== VM_PAGE_NULL
) {
1176 panic("pmap_destroy: pte page not in object");
1178 vm_page_lock_queues();
1180 inuse_ptepages_count
--;
1181 vm_object_unlock(p
->pm_obj
);
1182 vm_page_unlock_queues();
1185 * Clear pdes, this might be headed for the cache.
1195 vm_object_deallocate(p
->pm_obj
);
1196 kmem_free(kernel_map
, (vm_offset_t
)p
->dirbase
, NBPTD
);
1198 zfree(pdpt_zone
, (void *)p
->pm_hold
);
1200 zfree(pmap_zone
, p
);
1204 * Add a reference to the specified pmap.
1213 if (p
!= PMAP_NULL
) {
1215 simple_lock(&p
->lock
);
1217 simple_unlock(&p
->lock
);
1223 * Remove a range of hardware page-table entries.
1224 * The entries given are the first (inclusive)
1225 * and last (exclusive) entries for the VM pages.
1226 * The virtual address is the va for the first pte.
1228 * The pmap must be locked.
1229 * If the pmap is not the kernel pmap, the range must lie
1230 * entirely within one pte-page. This is NOT checked.
1231 * Assumes that the pte-page exists.
1242 register pt_entry_t
*cpte
;
1243 int num_removed
, num_unwired
;
1248 if (pmap
!= kernel_pmap
)
1249 ptep_check(get_pte_page(spte
));
1250 #endif /* DEBUG_PTE_PAGE */
1254 for (cpte
= spte
; cpte
< epte
;
1255 cpte
++, va
+= PAGE_SIZE
) {
1257 pa
= pte_to_pa(*cpte
);
1265 if (!valid_page(i386_btop(pa
))) {
1268 * Outside range of managed physical memory.
1269 * Just remove the mappings.
1271 register pt_entry_t
*lpte
= cpte
;
1281 * Get the modify and reference bits.
1284 register pt_entry_t
*lpte
;
1287 pmap_phys_attributes
[pai
] |=
1288 *lpte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1294 * Remove the mapping from the pvlist for
1295 * this physical page.
1298 register pv_entry_t pv_h
, prev
, cur
;
1300 pv_h
= pai_to_pvh(pai
);
1301 if (pv_h
->pmap
== PMAP_NULL
) {
1302 panic("pmap_remove: null pv_list!");
1304 if (pv_h
->va
== va
&& pv_h
->pmap
== pmap
) {
1306 * Header is the pv_entry. Copy the next one
1307 * to header and free the next one (we cannot
1311 if (cur
!= PV_ENTRY_NULL
) {
1316 pv_h
->pmap
= PMAP_NULL
;
1323 if ((cur
= prev
->next
) == PV_ENTRY_NULL
) {
1324 panic("pmap-remove: mapping not in pv_list!");
1326 } while (cur
->va
!= va
|| cur
->pmap
!= pmap
);
1327 prev
->next
= cur
->next
;
1337 assert(pmap
->stats
.resident_count
>= num_removed
);
1338 pmap
->stats
.resident_count
-= num_removed
;
1339 assert(pmap
->stats
.wired_count
>= num_unwired
);
1340 pmap
->stats
.wired_count
-= num_unwired
;
1344 * Remove phys addr if mapped in specified map
1348 pmap_remove_some_phys(
1349 __unused pmap_t map
,
1350 __unused ppnum_t pn
)
1353 /* Implement to support working set code */
1358 * Remove the given range of addresses
1359 * from the specified map.
1361 * It is assumed that the start and end are properly
1362 * rounded to the hardware page size.
1373 register pt_entry_t
*pde
;
1374 register pt_entry_t
*spte
, *epte
;
1379 if (map
== PMAP_NULL
)
1382 PMAP_READ_LOCK(map
, spl
);
1384 if (value_64bit(s64
) || value_64bit(e64
)) {
1385 panic("pmap_remove addr overflow");
1388 orig_s
= s
= (vm_offset_t
)low32(s64
);
1389 e
= (vm_offset_t
)low32(e64
);
1391 pde
= pmap_pde(map
, s
);
1394 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
1397 if (*pde
& INTEL_PTE_VALID
) {
1398 spte
= (pt_entry_t
*)pmap_pte(map
, (s
& ~(PDE_MAPPED_SIZE
-1)));
1399 spte
= &spte
[ptenum(s
)];
1400 epte
= &spte
[intel_btop(l
-s
)];
1401 pmap_remove_range(map
, s
, spte
, epte
);
1407 PMAP_UPDATE_TLBS(map
, orig_s
, e
);
1409 PMAP_READ_UNLOCK(map
, spl
);
1413 * Routine: pmap_page_protect
1416 * Lower the permission for all mappings to a given
1424 pv_entry_t pv_h
, prev
;
1425 register pv_entry_t pv_e
;
1426 register pt_entry_t
*pte
;
1428 register pmap_t pmap
;
1433 assert(pn
!= vm_page_fictitious_addr
);
1434 phys
= (pmap_paddr_t
)i386_ptob(pn
);
1435 if (!valid_page(pn
)) {
1437 * Not a managed page.
1443 * Determine the new protection.
1447 case VM_PROT_READ
|VM_PROT_EXECUTE
:
1451 return; /* nothing to do */
1458 * Lock the pmap system first, since we will be changing
1462 PMAP_WRITE_LOCK(spl
);
1464 pai
= pa_index(phys
);
1465 pv_h
= pai_to_pvh(pai
);
1468 * Walk down PV list, changing or removing all mappings.
1469 * We do not have to lock the pv_list because we have
1470 * the entire pmap system locked.
1472 if (pv_h
->pmap
!= PMAP_NULL
) {
1476 register vm_offset_t va
;
1479 * Lock the pmap to block pmap_extract and similar routines.
1481 simple_lock(&pmap
->lock
);
1486 pte
= pmap_pte(pmap
, va
);
1489 * Consistency checks.
1491 /* assert(*pte & INTEL_PTE_VALID); XXX */
1492 /* assert(pte_to_phys(*pte) == phys); */
1497 * Remove the mapping if new protection is NONE
1498 * or if write-protecting a kernel mapping.
1500 if (remove
|| pmap
== kernel_pmap
) {
1502 * Remove the mapping, collecting any modify bits.
1505 pmap_phys_attributes
[pai
] |=
1506 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1508 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
1511 assert(pmap
->stats
.resident_count
>= 1);
1512 pmap
->stats
.resident_count
--;
1515 * Remove the pv_entry.
1519 * Fix up head later.
1521 pv_h
->pmap
= PMAP_NULL
;
1525 * Delete this entry.
1527 prev
->next
= pv_e
->next
;
1536 *pte
&= ~INTEL_PTE_WRITE
;
1538 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
1545 simple_unlock(&pmap
->lock
);
1547 } while ((pv_e
= prev
->next
) != PV_ENTRY_NULL
);
1550 * If pv_head mapping was removed, fix it up.
1552 if (pv_h
->pmap
== PMAP_NULL
) {
1554 if (pv_e
!= PV_ENTRY_NULL
) {
1561 PMAP_WRITE_UNLOCK(spl
);
1569 * Disconnect all mappings for this page and return reference and change status
1570 * in generic format.
1573 unsigned int pmap_disconnect(
1576 pmap_page_protect(pa
, 0); /* disconnect the page */
1577 return (pmap_get_refmod(pa
)); /* return ref/chg status */
1581 * Set the physical protection on the
1582 * specified range of this map as requested.
1583 * Will not increase permissions.
1592 register pt_entry_t
*pde
;
1593 register pt_entry_t
*spte
, *epte
;
1596 vm_offset_t orig_s
= s
;
1599 if (map
== PMAP_NULL
)
1603 * Determine the new protection.
1607 case VM_PROT_READ
|VM_PROT_EXECUTE
:
1609 case VM_PROT_READ
|VM_PROT_WRITE
:
1611 return; /* nothing to do */
1613 pmap_remove(map
, (addr64_t
)s
, (addr64_t
)e
);
1618 simple_lock(&map
->lock
);
1620 pde
= pmap_pde(map
, s
);
1622 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
1625 if (*pde
& INTEL_PTE_VALID
) {
1626 spte
= (pt_entry_t
*)pmap_pte(map
, (s
& ~(PDE_MAPPED_SIZE
-1)));
1627 spte
= &spte
[ptenum(s
)];
1628 epte
= &spte
[intel_btop(l
-s
)];
1630 while (spte
< epte
) {
1631 if (*spte
& INTEL_PTE_VALID
)
1632 *spte
&= ~INTEL_PTE_WRITE
;
1640 PMAP_UPDATE_TLBS(map
, orig_s
, e
);
1642 simple_unlock(&map
->lock
);
1649 * Insert the given physical page (p) at
1650 * the specified virtual address (v) in the
1651 * target physical map with the protection requested.
1653 * If specified, the page will be wired down, meaning
1654 * that the related pte cannot be reclaimed.
1656 * NB: This is the only routine which MAY NOT lazy-evaluate
1657 * or lose information. That is, this routine must actually
1658 * insert this page into the given map NOW.
1662 register pmap_t pmap
,
1669 register pt_entry_t
*pte
;
1670 register pv_entry_t pv_h
;
1673 pt_entry_t
template;
1675 pmap_paddr_t old_pa
;
1676 pmap_paddr_t pa
= (pmap_paddr_t
)i386_ptob(pn
);
1678 XPR(0x80000000, "%x/%x: pmap_enter %x/%x/%x\n",
1683 assert(pn
!= vm_page_fictitious_addr
);
1685 printf("pmap(%x, %x)\n", v
, pn
);
1686 if (pmap
== PMAP_NULL
)
1690 * Must allocate a new pvlist entry while we're unlocked;
1691 * zalloc may cause pageout (which will lock the pmap system).
1692 * If we determine we need a pvlist entry, we will unlock
1693 * and allocate one. Then we will retry, throughing away
1694 * the allocated entry later (if we no longer need it).
1696 pv_e
= PV_ENTRY_NULL
;
1698 PMAP_READ_LOCK(pmap
, spl
);
1701 * Expand pmap to include this pte. Assume that
1702 * pmap is always expanded to include enough hardware
1703 * pages to map one VM page.
1706 while ((pte
= pmap_pte(pmap
, v
)) == PT_ENTRY_NULL
) {
1708 * Must unlock to expand the pmap.
1710 PMAP_READ_UNLOCK(pmap
, spl
);
1712 pmap_expand(pmap
, v
);
1714 PMAP_READ_LOCK(pmap
, spl
);
1717 * Special case if the physical page is already mapped
1720 old_pa
= pte_to_pa(*pte
);
1723 * May be changing its wired attribute or protection
1726 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
1728 if(flags
& VM_MEM_NOT_CACHEABLE
) {
1729 if(!(flags
& VM_MEM_GUARDED
))
1730 template |= INTEL_PTE_PTA
;
1731 template |= INTEL_PTE_NCACHE
;
1734 if (pmap
!= kernel_pmap
)
1735 template |= INTEL_PTE_USER
;
1736 if (prot
& VM_PROT_WRITE
)
1737 template |= INTEL_PTE_WRITE
;
1739 template |= INTEL_PTE_WIRED
;
1741 pmap
->stats
.wired_count
++;
1744 if (iswired(*pte
)) {
1745 assert(pmap
->stats
.wired_count
>= 1);
1746 pmap
->stats
.wired_count
--;
1750 if (*pte
& INTEL_PTE_MOD
)
1751 template |= INTEL_PTE_MOD
;
1752 WRITE_PTE(pte
, template)
1759 * Outline of code from here:
1760 * 1) If va was mapped, update TLBs, remove the mapping
1761 * and remove old pvlist entry.
1762 * 2) Add pvlist entry for new mapping
1763 * 3) Enter new mapping.
1765 * SHARING_FAULTS complicates this slightly in that it cannot
1766 * replace the mapping, but must remove it (because adding the
1767 * pvlist entry for the new mapping may remove others), and
1768 * hence always enters the new mapping at step 3)
1770 * If the old physical page is not managed step 1) is skipped
1771 * (except for updating the TLBs), and the mapping is
1772 * overwritten at step 3). If the new physical page is not
1773 * managed, step 2) is skipped.
1776 if (old_pa
!= (pmap_paddr_t
) 0) {
1780 if (pmap
!= kernel_pmap
)
1781 ptep_check(get_pte_page(pte
));
1782 #endif /* DEBUG_PTE_PAGE */
1785 * Don't do anything to pages outside valid memory here.
1786 * Instead convince the code that enters a new mapping
1787 * to overwrite the old one.
1790 if (valid_page(i386_btop(old_pa
))) {
1792 pai
= pa_index(old_pa
);
1795 assert(pmap
->stats
.resident_count
>= 1);
1796 pmap
->stats
.resident_count
--;
1797 if (iswired(*pte
)) {
1798 assert(pmap
->stats
.wired_count
>= 1);
1799 pmap
->stats
.wired_count
--;
1802 pmap_phys_attributes
[pai
] |=
1803 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1807 * Remove the mapping from the pvlist for
1808 * this physical page.
1811 register pv_entry_t prev
, cur
;
1813 pv_h
= pai_to_pvh(pai
);
1814 if (pv_h
->pmap
== PMAP_NULL
) {
1815 panic("pmap_enter: null pv_list!");
1817 if (pv_h
->va
== v
&& pv_h
->pmap
== pmap
) {
1819 * Header is the pv_entry. Copy the next one
1820 * to header and free the next one (we cannot
1824 if (cur
!= PV_ENTRY_NULL
) {
1829 pv_h
->pmap
= PMAP_NULL
;
1836 if ((cur
= prev
->next
) == PV_ENTRY_NULL
) {
1837 panic("pmap_enter: mapping not in pv_list!");
1839 } while (cur
->va
!= v
|| cur
->pmap
!= pmap
);
1840 prev
->next
= cur
->next
;
1849 * old_pa is not managed. Pretend it's zero so code
1850 * at Step 3) will enter new mapping (overwriting old
1851 * one). Do removal part of accounting.
1853 old_pa
= (pmap_paddr_t
) 0;
1854 assert(pmap
->stats
.resident_count
>= 1);
1855 pmap
->stats
.resident_count
--;
1856 if (iswired(*pte
)) {
1857 assert(pmap
->stats
.wired_count
>= 1);
1858 pmap
->stats
.wired_count
--;
1864 if (valid_page(i386_btop(pa
))) {
1867 * Step 2) Enter the mapping in the PV list for this
1877 * We can return here from the sharing fault code below
1878 * in case we removed the only entry on the pv list and thus
1879 * must enter the new one in the list header.
1881 #endif /* SHARING_FAULTS */
1883 pv_h
= pai_to_pvh(pai
);
1885 if (pv_h
->pmap
== PMAP_NULL
) {
1891 pv_h
->next
= PV_ENTRY_NULL
;
1897 * check that this mapping is not already there
1898 * or there is no alias for this mapping in the same map
1900 pv_entry_t e
= pv_h
;
1901 while (e
!= PV_ENTRY_NULL
) {
1902 if (e
->pmap
== pmap
&& e
->va
== v
)
1903 panic("pmap_enter: already in pv_list");
1911 * do sharing faults.
1912 * if we find an entry on this pv list in the same address
1913 * space, remove it. we know there will not be more
1916 pv_entry_t e
= pv_h
;
1919 while (e
!= PV_ENTRY_NULL
) {
1920 if (e
->pmap
== pmap
) {
1922 * Remove it, drop pv list lock first.
1926 opte
= pmap_pte(pmap
, e
->va
);
1927 assert(opte
!= PT_ENTRY_NULL
);
1929 * Invalidate the translation buffer,
1930 * then remove the mapping.
1932 pmap_remove_range(pmap
, e
->va
, opte
,
1934 PMAP_UPDATE_TLBS(pmap
, e
->va
, e
->va
+ PAGE_SIZE
);
1937 * We could have remove the head entry,
1938 * so there could be no more entries
1939 * and so we have to use the pv head entry.
1940 * so, go back to the top and try the entry
1949 * check that this mapping is not already there
1952 while (e
!= PV_ENTRY_NULL
) {
1953 if (e
->pmap
== pmap
)
1954 panic("pmap_enter: alias in pv_list");
1958 #endif /* SHARING_FAULTS */
1962 * check for aliases within the same address space.
1964 pv_entry_t e
= pv_h
;
1965 vm_offset_t rpc
= get_rpc();
1967 while (e
!= PV_ENTRY_NULL
) {
1968 if (e
->pmap
== pmap
) {
1970 * log this entry in the alias ring buffer
1971 * if it's not there already.
1973 struct pmap_alias
*pma
;
1977 for (ii
= 0; ii
< pmap_alias_index
; ii
++) {
1978 if (pmap_aliasbuf
[ii
].rpc
== rpc
) {
1979 /* found it in the log already */
1985 pma
= &pmap_aliasbuf
[pmap_alias_index
];
1989 pma
->cookie
= PMAP_ALIAS_COOKIE
;
1990 if (++pmap_alias_index
>= PMAP_ALIAS_MAX
)
1991 panic("pmap_enter: exhausted alias log");
1997 #endif /* DEBUG_ALIAS */
1999 * Add new pv_entry after header.
2001 if (pv_e
== PV_ENTRY_NULL
) {
2003 if (pv_e
== PV_ENTRY_NULL
) {
2004 panic("pmap no pv_e's");
2009 pv_e
->next
= pv_h
->next
;
2012 * Remember that we used the pvlist entry.
2014 pv_e
= PV_ENTRY_NULL
;
2020 * Step 3) Enter and count the mapping.
2023 pmap
->stats
.resident_count
++;
2026 * Build a template to speed up entering -
2027 * only the pfn changes.
2029 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
2031 if(flags
& VM_MEM_NOT_CACHEABLE
) {
2032 if(!(flags
& VM_MEM_GUARDED
))
2033 template |= INTEL_PTE_PTA
;
2034 template |= INTEL_PTE_NCACHE
;
2037 if (pmap
!= kernel_pmap
)
2038 template |= INTEL_PTE_USER
;
2039 if (prot
& VM_PROT_WRITE
)
2040 template |= INTEL_PTE_WRITE
;
2042 template |= INTEL_PTE_WIRED
;
2043 pmap
->stats
.wired_count
++;
2046 WRITE_PTE(pte
, template)
2049 PMAP_UPDATE_TLBS(pmap
, v
, v
+ PAGE_SIZE
);
2051 if (pv_e
!= PV_ENTRY_NULL
) {
2055 PMAP_READ_UNLOCK(pmap
, spl
);
2059 * Routine: pmap_change_wiring
2060 * Function: Change the wiring attribute for a map/virtual-address
2062 * In/out conditions:
2063 * The mapping must already exist in the pmap.
2067 register pmap_t map
,
2071 register pt_entry_t
*pte
;
2076 * We must grab the pmap system lock because we may
2077 * change a pte_page queue.
2079 PMAP_READ_LOCK(map
, spl
);
2081 if ((pte
= pmap_pte(map
, v
)) == PT_ENTRY_NULL
)
2082 panic("pmap_change_wiring: pte missing");
2084 if (wired
&& !iswired(*pte
)) {
2086 * wiring down mapping
2088 map
->stats
.wired_count
++;
2089 *pte
++ |= INTEL_PTE_WIRED
;
2091 else if (!wired
&& iswired(*pte
)) {
2095 assert(map
->stats
.wired_count
>= 1);
2096 map
->stats
.wired_count
--;
2097 *pte
++ &= ~INTEL_PTE_WIRED
;
2100 PMAP_READ_UNLOCK(map
, spl
);
2109 pmap_find_phys(pmap_t pmap
, addr64_t va
)
2115 if (value_64bit(va
))
2116 panic("pmap_find_phys 64 bit value");
2117 a32
= (vm_offset_t
) low32(va
);
2118 ptp
= pmap_pte(pmap
, a32
);
2119 if (PT_ENTRY_NULL
== ptp
) {
2122 ppn
= (ppnum_t
) i386_btop(pte_to_pa(*ptp
));
2128 * Routine: pmap_extract
2130 * Extract the physical page address associated
2131 * with the given map/virtual_address pair.
2132 * Change to shim for backwards compatibility but will not
2133 * work for 64 bit systems. Some old drivers that we cannot
2139 register pmap_t pmap
,
2145 vaddr
= (vm_offset_t
)0;
2146 ppn
= pmap_find_phys(pmap
, (addr64_t
)va
);
2148 vaddr
= ((vm_offset_t
)i386_ptob(ppn
)) | (va
& INTEL_OFFMASK
);
2155 * Routine: pmap_expand
2157 * Expands a pmap to be able to map the specified virtual address.
2159 * Allocates new virtual memory for the P0 or P1 portion of the
2160 * pmap, then re-maps the physical pages that were in the old
2161 * pmap to be in the new pmap.
2163 * Must be called with the pmap system and the pmap unlocked,
2164 * since these must be unlocked to use vm_allocate or vm_deallocate.
2165 * Thus it must be called in a loop that checks whether the map
2166 * has been expanded enough.
2167 * (We won't loop forever, since page tables aren't shrunk.)
2171 register pmap_t map
,
2172 register vm_offset_t v
)
2175 register vm_page_t m
;
2176 register pmap_paddr_t pa
;
2181 if (map
== kernel_pmap
) {
2187 * Allocate a VM page for the level 2 page table entries.
2189 while ((m
= vm_page_grab()) == VM_PAGE_NULL
)
2193 * put the page into the pmap's obj list so it
2194 * can be found later.
2199 vm_object_lock(map
->pm_obj
);
2200 vm_page_insert(m
, map
->pm_obj
, (vm_object_offset_t
)i
);
2201 vm_page_lock_queues();
2203 inuse_ptepages_count
++;
2204 vm_object_unlock(map
->pm_obj
);
2205 vm_page_unlock_queues();
2212 PMAP_READ_LOCK(map
, spl
);
2214 * See if someone else expanded us first
2216 if (pmap_pte(map
, v
) != PT_ENTRY_NULL
) {
2217 PMAP_READ_UNLOCK(map
, spl
);
2218 vm_object_lock(map
->pm_obj
);
2219 vm_page_lock_queues();
2221 inuse_ptepages_count
--;
2222 vm_page_unlock_queues();
2223 vm_object_unlock(map
->pm_obj
);
2228 * Set the page directory entry for this page table.
2229 * If we have allocated more than one hardware page,
2230 * set several page directory entries.
2233 pdp
= &map
->dirbase
[pdenum(map
, v
)];
2234 *pdp
= pa_to_pte(pa
)
2239 PMAP_READ_UNLOCK(map
, spl
);
2244 * Copy the range specified by src_addr/len
2245 * from the source map to the range dst_addr/len
2246 * in the destination map.
2248 * This routine is only advisory and need not do anything.
2255 vm_offset_t dst_addr
,
2257 vm_offset_t src_addr
)
2260 dst_pmap
++; src_pmap
++; dst_addr
++; len
++; src_addr
++;
2266 * pmap_sync_page_data_phys(ppnum_t pa)
2268 * Invalidates all of the instruction cache on a physical page and
2269 * pushes any dirty data from the data cache for the same physical page
2270 * Not required in i386.
2273 pmap_sync_page_data_phys(__unused ppnum_t pa
)
2279 * pmap_sync_page_attributes_phys(ppnum_t pa)
2281 * Write back and invalidate all cachelines on a physical page.
2284 pmap_sync_page_attributes_phys(ppnum_t pa
)
2286 cache_flush_page_phys(pa
);
2293 * Routine: pmap_collect
2295 * Garbage collects the physical map system for
2296 * pages which are no longer used.
2297 * Success need not be guaranteed -- that is, there
2298 * may well be pages which are not referenced, but
2299 * others may be collected.
2301 * Called by the pageout daemon when pages are scarce.
2307 register pt_entry_t
*pdp
, *ptp
;
2315 if (p
== kernel_pmap
)
2319 * Garbage collect map.
2321 PMAP_READ_LOCK(p
, spl
);
2323 for (pdp
= (pt_entry_t
*)p
->dirbase
;
2324 pdp
< (pt_entry_t
*)&p
->dirbase
[(UMAXPTDI
+1)];
2327 if (*pdp
& INTEL_PTE_VALID
) {
2328 if(*pdp
& INTEL_PTE_REF
) {
2329 *pdp
&= ~INTEL_PTE_REF
;
2333 ptp
= pmap_pte(p
, pdetova(pdp
- (pt_entry_t
*)p
->dirbase
));
2334 eptp
= ptp
+ NPTEPG
;
2337 * If the pte page has any wired mappings, we cannot
2342 register pt_entry_t
*ptep
;
2343 for (ptep
= ptp
; ptep
< eptp
; ptep
++) {
2344 if (iswired(*ptep
)) {
2352 * Remove the virtual addresses mapped by this pte page.
2354 pmap_remove_range(p
,
2355 pdetova(pdp
- (pt_entry_t
*)p
->dirbase
),
2360 * Invalidate the page directory pointer.
2364 PMAP_READ_UNLOCK(p
, spl
);
2367 * And free the pte page itself.
2370 register vm_page_t m
;
2372 vm_object_lock(p
->pm_obj
);
2373 m
= vm_page_lookup(p
->pm_obj
,(vm_object_offset_t
)(pdp
- (pt_entry_t
*)&p
->dirbase
[0]));
2374 if (m
== VM_PAGE_NULL
)
2375 panic("pmap_collect: pte page not in object");
2376 vm_page_lock_queues();
2378 inuse_ptepages_count
--;
2379 vm_page_unlock_queues();
2380 vm_object_unlock(p
->pm_obj
);
2383 PMAP_READ_LOCK(p
, spl
);
2388 PMAP_UPDATE_TLBS(p
, VM_MIN_ADDRESS
, VM_MAX_ADDRESS
);
2389 PMAP_READ_UNLOCK(p
, spl
);
2395 * Routine: pmap_kernel
2397 * Returns the physical map handle for the kernel.
2403 return (kernel_pmap
);
2408 pmap_copy_page(src
, dst
)
2412 bcopy_phys((addr64_t
)i386_ptob(src
),
2413 (addr64_t
)i386_ptob(dst
),
2419 * Routine: pmap_pageable
2421 * Make the specified pages (by pmap, offset)
2422 * pageable (or not) as requested.
2424 * A page which is not pageable may not take
2425 * a fault; therefore, its page table entry
2426 * must remain valid for the duration.
2428 * This routine is merely advisory; pmap_enter
2429 * will specify that these pages are to be wired
2430 * down (or not) as appropriate.
2434 __unused pmap_t pmap
,
2435 __unused vm_offset_t start_addr
,
2436 __unused vm_offset_t end_addr
,
2437 __unused boolean_t pageable
)
2440 pmap
++; start_addr
++; end_addr
++; pageable
++;
2445 * Clear specified attribute bits.
2448 phys_attribute_clear(
2453 register pv_entry_t pv_e
;
2454 register pt_entry_t
*pte
;
2456 register pmap_t pmap
;
2460 assert(pn
!= vm_page_fictitious_addr
);
2461 if (!valid_page(pn
)) {
2463 * Not a managed page.
2469 * Lock the pmap system first, since we will be changing
2473 PMAP_WRITE_LOCK(spl
);
2474 phys
= i386_ptob(pn
);
2475 pai
= pa_index(phys
);
2476 pv_h
= pai_to_pvh(pai
);
2479 * Walk down PV list, clearing all modify or reference bits.
2480 * We do not have to lock the pv_list because we have
2481 * the entire pmap system locked.
2483 if (pv_h
->pmap
!= PMAP_NULL
) {
2485 * There are some mappings.
2487 for (pv_e
= pv_h
; pv_e
!= PV_ENTRY_NULL
; pv_e
= pv_e
->next
) {
2491 * Lock the pmap to block pmap_extract and similar routines.
2493 simple_lock(&pmap
->lock
);
2496 register vm_offset_t va
;
2499 pte
= pmap_pte(pmap
, va
);
2503 * Consistency checks.
2505 assert(*pte
& INTEL_PTE_VALID
);
2506 /* assert(pte_to_phys(*pte) == phys); */
2510 * Clear modify or reference bits.
2514 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
2516 simple_unlock(&pmap
->lock
);
2521 pmap_phys_attributes
[pai
] &= ~bits
;
2523 PMAP_WRITE_UNLOCK(spl
);
2527 * Check specified attribute bits.
2530 phys_attribute_test(
2535 register pv_entry_t pv_e
;
2536 register pt_entry_t
*pte
;
2538 register pmap_t pmap
;
2542 assert(pn
!= vm_page_fictitious_addr
);
2543 if (!valid_page(pn
)) {
2545 * Not a managed page.
2551 * Lock the pmap system first, since we will be checking
2555 PMAP_WRITE_LOCK(spl
);
2556 phys
= i386_ptob(pn
);
2557 pai
= pa_index(phys
);
2558 pv_h
= pai_to_pvh(pai
);
2560 if (pmap_phys_attributes
[pai
] & bits
) {
2561 PMAP_WRITE_UNLOCK(spl
);
2566 * Walk down PV list, checking all mappings.
2567 * We do not have to lock the pv_list because we have
2568 * the entire pmap system locked.
2570 if (pv_h
->pmap
!= PMAP_NULL
) {
2572 * There are some mappings.
2574 for (pv_e
= pv_h
; pv_e
!= PV_ENTRY_NULL
; pv_e
= pv_e
->next
) {
2578 * Lock the pmap to block pmap_extract and similar routines.
2580 simple_lock(&pmap
->lock
);
2583 register vm_offset_t va
;
2586 pte
= pmap_pte(pmap
, va
);
2590 * Consistency checks.
2592 assert(*pte
& INTEL_PTE_VALID
);
2593 /* assert(pte_to_phys(*pte) == phys); */
2598 * Check modify or reference bits.
2601 if (*pte
++ & bits
) {
2602 simple_unlock(&pmap
->lock
);
2603 PMAP_WRITE_UNLOCK(spl
);
2607 simple_unlock(&pmap
->lock
);
2610 PMAP_WRITE_UNLOCK(spl
);
2615 * Set specified attribute bits.
2625 assert(pn
!= vm_page_fictitious_addr
);
2626 if (!valid_page(pn
)) {
2628 * Not a managed page.
2634 * Lock the pmap system and set the requested bits in
2635 * the phys attributes array. Don't need to bother with
2636 * ptes because the test routine looks here first.
2638 phys
= i386_ptob(pn
);
2639 PMAP_WRITE_LOCK(spl
);
2640 pmap_phys_attributes
[pa_index(phys
)] |= bits
;
2641 PMAP_WRITE_UNLOCK(spl
);
2645 * Set the modify bit on the specified physical page.
2648 void pmap_set_modify(
2651 phys_attribute_set(pn
, PHYS_MODIFIED
);
2655 * Clear the modify bits on the specified physical page.
2662 phys_attribute_clear(pn
, PHYS_MODIFIED
);
2668 * Return whether or not the specified physical page is modified
2669 * by any physical maps.
2676 return (phys_attribute_test(pn
, PHYS_MODIFIED
));
2680 * pmap_clear_reference:
2682 * Clear the reference bit on the specified physical page.
2686 pmap_clear_reference(
2689 phys_attribute_clear(pn
, PHYS_REFERENCED
);
2693 pmap_set_reference(ppnum_t pn
)
2695 phys_attribute_set(pn
, PHYS_REFERENCED
);
2699 * pmap_is_referenced:
2701 * Return whether or not the specified physical page is referenced
2702 * by any physical maps.
2709 return (phys_attribute_test(pn
, PHYS_REFERENCED
));
2713 * pmap_get_refmod(phys)
2714 * returns the referenced and modified bits of the specified
2718 pmap_get_refmod(ppnum_t pa
)
2720 return ( ((phys_attribute_test(pa
, PHYS_MODIFIED
))? VM_MEM_MODIFIED
: 0)
2721 | ((phys_attribute_test(pa
, PHYS_REFERENCED
))? VM_MEM_REFERENCED
: 0));
2725 * pmap_clear_refmod(phys, mask)
2726 * clears the referenced and modified bits as specified by the mask
2727 * of the specified physical page.
2730 pmap_clear_refmod(ppnum_t pa
, unsigned int mask
)
2732 unsigned int x86Mask
;
2734 x86Mask
= ( ((mask
& VM_MEM_MODIFIED
)? PHYS_MODIFIED
: 0)
2735 | ((mask
& VM_MEM_REFERENCED
)? PHYS_REFERENCED
: 0));
2736 phys_attribute_clear(pa
, x86Mask
);
2740 * Set the modify bit on the specified range
2741 * of this map as requested.
2743 * This optimization stands only if each time the dirty bit
2744 * in vm_page_t is tested, it is also tested in the pmap.
2753 register pt_entry_t
*pde
;
2754 register pt_entry_t
*spte
, *epte
;
2756 vm_offset_t orig_s
= s
;
2758 if (map
== PMAP_NULL
)
2761 PMAP_READ_LOCK(map
, spl
);
2763 pde
= pmap_pde(map
, s
);
2764 while (s
&& s
< e
) {
2765 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
2768 if (*pde
& INTEL_PTE_VALID
) {
2769 spte
= (pt_entry_t
*)pmap_pte(map
, (s
& ~(PDE_MAPPED_SIZE
-1)));
2771 spte
= &spte
[ptenum(s
)];
2772 epte
= &spte
[intel_btop(l
-s
)];
2774 epte
= &spte
[intel_btop(PDE_MAPPED_SIZE
)];
2775 spte
= &spte
[ptenum(s
)];
2777 while (spte
< epte
) {
2778 if (*spte
& INTEL_PTE_VALID
) {
2779 *spte
|= (INTEL_PTE_MOD
| INTEL_PTE_WRITE
);
2787 PMAP_UPDATE_TLBS(map
, orig_s
, e
);
2788 PMAP_READ_UNLOCK(map
, spl
);
2793 invalidate_icache(__unused vm_offset_t addr
,
2794 __unused
unsigned cnt
,
2800 flush_dcache(__unused vm_offset_t addr
,
2801 __unused
unsigned count
,
2808 * TLB Coherence Code (TLB "shootdown" code)
2810 * Threads that belong to the same task share the same address space and
2811 * hence share a pmap. However, they may run on distinct cpus and thus
2812 * have distinct TLBs that cache page table entries. In order to guarantee
2813 * the TLBs are consistent, whenever a pmap is changed, all threads that
2814 * are active in that pmap must have their TLB updated. To keep track of
2815 * this information, the set of cpus that are currently using a pmap is
2816 * maintained within each pmap structure (cpus_using). Pmap_activate() and
2817 * pmap_deactivate add and remove, respectively, a cpu from this set.
2818 * Since the TLBs are not addressable over the bus, each processor must
2819 * flush its own TLB; a processor that needs to invalidate another TLB
2820 * needs to interrupt the processor that owns that TLB to signal the
2823 * Whenever a pmap is updated, the lock on that pmap is locked, and all
2824 * cpus using the pmap are signaled to invalidate. All threads that need
2825 * to activate a pmap must wait for the lock to clear to await any updates
2826 * in progress before using the pmap. They must ACQUIRE the lock to add
2827 * their cpu to the cpus_using set. An implicit assumption made
2828 * throughout the TLB code is that all kernel code that runs at or higher
2829 * than splvm blocks out update interrupts, and that such code does not
2830 * touch pageable pages.
2832 * A shootdown interrupt serves another function besides signaling a
2833 * processor to invalidate. The interrupt routine (pmap_update_interrupt)
2834 * waits for the both the pmap lock (and the kernel pmap lock) to clear,
2835 * preventing user code from making implicit pmap updates while the
2836 * sending processor is performing its update. (This could happen via a
2837 * user data write reference that turns on the modify bit in the page
2838 * table). It must wait for any kernel updates that may have started
2839 * concurrently with a user pmap update because the IPC code
2841 * Spinning on the VALUES of the locks is sufficient (rather than
2842 * having to acquire the locks) because any updates that occur subsequent
2843 * to finding the lock unlocked will be signaled via another interrupt.
2844 * (This assumes the interrupt is cleared before the low level interrupt code
2845 * calls pmap_update_interrupt()).
2847 * The signaling processor must wait for any implicit updates in progress
2848 * to terminate before continuing with its update. Thus it must wait for an
2849 * acknowledgement of the interrupt from each processor for which such
2850 * references could be made. For maintaining this information, a set
2851 * cpus_active is used. A cpu is in this set if and only if it can
2852 * use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from
2853 * this set; when all such cpus are removed, it is safe to update.
2855 * Before attempting to acquire the update lock on a pmap, a cpu (A) must
2856 * be at least at the priority of the interprocessor interrupt
2857 * (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a
2858 * kernel update; it would spin forever in pmap_update_interrupt() trying
2859 * to acquire the user pmap lock it had already acquired. Furthermore A
2860 * must remove itself from cpus_active. Otherwise, another cpu holding
2861 * the lock (B) could be in the process of sending an update signal to A,
2862 * and thus be waiting for A to remove itself from cpus_active. If A is
2863 * spinning on the lock at priority this will never happen and a deadlock
2868 * Signal another CPU that it must flush its TLB
2874 vm_offset_t start_addr
,
2875 vm_offset_t end_addr
)
2877 register int which_cpu
, j
;
2878 register pmap_update_list_t update_list_p
;
2880 while ((which_cpu
= ffs((unsigned long)use_list
)) != 0) {
2881 which_cpu
-= 1; /* convert to 0 origin */
2883 update_list_p
= cpu_update_list(which_cpu
);
2884 simple_lock(&update_list_p
->lock
);
2886 j
= update_list_p
->count
;
2887 if (j
>= UPDATE_LIST_SIZE
) {
2889 * list overflowed. Change last item to
2890 * indicate overflow.
2892 update_list_p
->item
[UPDATE_LIST_SIZE
-1].pmap
= kernel_pmap
;
2893 update_list_p
->item
[UPDATE_LIST_SIZE
-1].start
= VM_MIN_ADDRESS
;
2894 update_list_p
->item
[UPDATE_LIST_SIZE
-1].end
= VM_MAX_KERNEL_ADDRESS
;
2897 update_list_p
->item
[j
].pmap
= pmap
;
2898 update_list_p
->item
[j
].start
= start_addr
;
2899 update_list_p
->item
[j
].end
= end_addr
;
2900 update_list_p
->count
= j
+1;
2902 cpu_update_needed(which_cpu
) = TRUE
;
2903 simple_unlock(&update_list_p
->lock
);
2905 /* if its the kernel pmap, ignore cpus_idle */
2906 if (((cpus_idle
& (1 << which_cpu
)) == 0) ||
2907 (pmap
== kernel_pmap
) || PMAP_REAL(which_cpu
) == pmap
)
2909 i386_signal_cpu(which_cpu
, MP_TLB_FLUSH
, ASYNC
);
2911 use_list
&= ~(1 << which_cpu
);
2916 process_pmap_updates(
2917 register pmap_t my_pmap
)
2919 register int my_cpu
;
2920 register pmap_update_list_t update_list_p
;
2922 register pmap_t pmap
;
2924 mp_disable_preemption();
2925 my_cpu
= cpu_number();
2926 update_list_p
= cpu_update_list(my_cpu
);
2927 simple_lock(&update_list_p
->lock
);
2929 for (j
= 0; j
< update_list_p
->count
; j
++) {
2930 pmap
= update_list_p
->item
[j
].pmap
;
2931 if (pmap
== my_pmap
||
2932 pmap
== kernel_pmap
) {
2934 if (pmap
->ref_count
<= 0) {
2935 PMAP_CPU_CLR(pmap
, my_cpu
);
2936 PMAP_REAL(my_cpu
) = kernel_pmap
;
2938 set_cr3((unsigned int)kernel_pmap
->pm_ppdpt
);
2940 set_cr3((unsigned int)kernel_pmap
->pdirbase
);
2943 INVALIDATE_TLB(pmap
,
2944 update_list_p
->item
[j
].start
,
2945 update_list_p
->item
[j
].end
);
2948 update_list_p
->count
= 0;
2949 cpu_update_needed(my_cpu
) = FALSE
;
2950 simple_unlock(&update_list_p
->lock
);
2951 mp_enable_preemption();
2955 * Interrupt routine for TBIA requested from other processor.
2956 * This routine can also be called at all interrupts time if
2957 * the cpu was idle. Some driver interrupt routines might access
2958 * newly allocated vm. (This is the case for hd)
2961 pmap_update_interrupt(void)
2963 register int my_cpu
;
2965 register pmap_t my_pmap
;
2967 mp_disable_preemption();
2968 my_cpu
= cpu_number();
2971 * Raise spl to splvm (above splip) to block out pmap_extract
2972 * from IO code (which would put this cpu back in the active
2977 my_pmap
= PMAP_REAL(my_cpu
);
2979 if (!(my_pmap
&& pmap_in_use(my_pmap
, my_cpu
)))
2980 my_pmap
= kernel_pmap
;
2986 * Indicate that we're not using either user or kernel
2989 i_bit_clear(my_cpu
, &cpus_active
);
2992 * Wait for any pmap updates in progress, on either user
2995 while (*(volatile int *)(&my_pmap
->lock
.interlock
.lock_data
) ||
2996 *(volatile int *)(&kernel_pmap
->lock
.interlock
.lock_data
)) {
2997 LOOP_CHECK("pmap_update_interrupt", my_pmap
);
3001 process_pmap_updates(my_pmap
);
3003 i_bit_set(my_cpu
, &cpus_active
);
3005 } while (cpu_update_needed(my_cpu
));
3008 mp_enable_preemption();
3013 /* show phys page mappings and attributes */
3015 extern void db_show_page(pmap_paddr_t pa
);
3018 db_show_page(pmap_paddr_t pa
)
3025 pv_h
= pai_to_pvh(pai
);
3027 attr
= pmap_phys_attributes
[pai
];
3028 printf("phys page %x ", pa
);
3029 if (attr
& PHYS_MODIFIED
)
3030 printf("modified, ");
3031 if (attr
& PHYS_REFERENCED
)
3032 printf("referenced, ");
3033 if (pv_h
->pmap
|| pv_h
->next
)
3034 printf(" mapped at\n");
3036 printf(" not mapped\n");
3037 for (; pv_h
; pv_h
= pv_h
->next
)
3039 printf("%x in pmap %x\n", pv_h
->va
, pv_h
->pmap
);
3042 #endif /* MACH_KDB */
3045 void db_kvtophys(vm_offset_t
);
3046 void db_show_vaddrs(pt_entry_t
*);
3049 * print out the results of kvtophys(arg)
3055 db_printf("0x%x", kvtophys(vaddr
));
3059 * Walk the pages tables.
3063 pt_entry_t
*dirbase
)
3065 pt_entry_t
*ptep
, *pdep
, tmp
;
3066 int x
, y
, pdecnt
, ptecnt
;
3069 dirbase
= kernel_pmap
->dirbase
;
3072 db_printf("need a dirbase...\n");
3075 dirbase
= (pt_entry_t
*) ((unsigned long) dirbase
& ~INTEL_OFFMASK
);
3077 db_printf("dirbase: 0x%x\n", dirbase
);
3079 pdecnt
= ptecnt
= 0;
3081 for (y
= 0; y
< NPDEPG
; y
++, pdep
++) {
3082 if (((tmp
= *pdep
) & INTEL_PTE_VALID
) == 0) {
3086 ptep
= (pt_entry_t
*) ((*pdep
) & ~INTEL_OFFMASK
);
3087 db_printf("dir[%4d]: 0x%x\n", y
, *pdep
);
3088 for (x
= 0; x
< NPTEPG
; x
++, ptep
++) {
3089 if (((tmp
= *ptep
) & INTEL_PTE_VALID
) == 0) {
3093 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
3096 (y
<< 22) | (x
<< 12),
3097 *ptep
& ~INTEL_OFFMASK
);
3101 db_printf("total: %d tables, %d page table entries.\n", pdecnt
, ptecnt
);
3104 #endif /* MACH_KDB */
3106 #include <mach_vm_debug.h>
3108 #include <vm/vm_debug.h>
3111 pmap_list_resident_pages(
3112 __unused pmap_t pmap
,
3113 __unused vm_offset_t
*listp
,
3118 #endif /* MACH_VM_DEBUG */
3124 * BSD support routine to reassign virtual addresses.
3128 pmap_movepage(unsigned long from
, unsigned long to
, vm_size_t size
)
3131 pt_entry_t
*pte
, saved_pte
;
3133 /* Lock the kernel map */
3134 PMAP_READ_LOCK(kernel_pmap
, spl
);
3138 pte
= pmap_pte(kernel_pmap
, from
);
3140 panic("pmap_pagemove from pte NULL");
3142 PMAP_READ_UNLOCK(kernel_pmap
, spl
);
3144 pmap_enter(kernel_pmap
, to
, (ppnum_t
)i386_btop(i386_trunc_page(*pte
)),
3145 VM_PROT_READ
|VM_PROT_WRITE
, 0, *pte
& INTEL_PTE_WIRED
);
3147 pmap_remove(kernel_pmap
, (addr64_t
)from
, (addr64_t
)(from
+PAGE_SIZE
));
3149 PMAP_READ_LOCK(kernel_pmap
, spl
);
3150 pte
= pmap_pte(kernel_pmap
, to
);
3152 panic("pmap_pagemove 'to' pte NULL");
3161 /* Get the processors to update the TLBs */
3162 PMAP_UPDATE_TLBS(kernel_pmap
, from
, from
+size
);
3163 PMAP_UPDATE_TLBS(kernel_pmap
, to
, to
+size
);
3165 PMAP_READ_UNLOCK(kernel_pmap
, spl
);
3168 #endif /* MACH_BSD */
3170 /* temporary workaround */
3172 coredumpok(vm_map_t map
, vm_offset_t va
)
3176 ptep
= pmap_pte(map
->pmap
, va
);
3179 return ((*ptep
& (INTEL_PTE_NCACHE
| INTEL_PTE_WIRED
)) != (INTEL_PTE_NCACHE
| INTEL_PTE_WIRED
));
3183 * grow the number of kernel page table entries, if needed
3186 pmap_growkernel(vm_offset_t addr
)
3188 #if GROW_KERNEL_FUNCTION_IMPLEMENTED
3191 vm_offset_t ptppaddr
;
3194 pd_entry_t newpdir
= 0;
3198 * Losers return to try again until the winner completes the work.
3200 if (kptobj
== 0) panic("growkernel 0");
3201 if (!vm_object_lock_try(kptobj
)) {
3205 vm_page_lock_queues();
3210 * If this is the first time thru, locate the end of the
3211 * kernel page table entries and set nkpt to the current
3212 * number of kernel page table pages
3215 if (kernel_vm_end
== 0) {
3216 kernel_vm_end
= KERNBASE
;
3219 while (pdir_pde(kernel_pmap
->dirbase
, kernel_vm_end
)) {
3220 kernel_vm_end
= (kernel_vm_end
+ PAGE_SIZE
* NPTEPG
) & ~(PAGE_SIZE
* NPTEPG
- 1);
3226 * Now allocate and map the required number of page tables
3228 addr
= (addr
+ PAGE_SIZE
* NPTEPG
) & ~(PAGE_SIZE
* NPTEPG
- 1);
3229 while (kernel_vm_end
< addr
) {
3230 if (pdir_pde(kernel_pmap
->dirbase
, kernel_vm_end
)) {
3231 kernel_vm_end
= (kernel_vm_end
+ PAGE_SIZE
* NPTEPG
) & ~(PAGE_SIZE
* NPTEPG
- 1);
3232 continue; /* someone already filled this one */
3235 nkpg
= vm_page_alloc(kptobj
, nkpt
);
3237 panic("pmap_growkernel: no memory to grow kernel");
3241 ppn
= nkpg
->phys_page
;
3242 pmap_zero_page(ppn
);
3243 ptppaddr
= i386_ptob(ppn
);
3244 newpdir
= (pd_entry_t
) (ptppaddr
| INTEL_PTE_VALID
|
3245 INTEL_PTE_RW
| INTEL_PTE_REF
| INTEL_PTE_MOD
);
3246 pdir_pde(kernel_pmap
->dirbase
, kernel_vm_end
) = newpdir
;
3248 simple_lock(&free_pmap_lock
);
3249 for (pmap
= (struct pmap
*)kernel_pmap
->pmap_link
.next
;
3250 pmap
!= kernel_pmap
;
3251 pmap
= (struct pmap
*)pmap
->pmap_link
.next
) {
3252 *pmap_pde(pmap
, kernel_vm_end
) = newpdir
;
3254 simple_unlock(&free_pmap_lock
);
3257 vm_page_unlock_queues();
3258 vm_object_unlock(kptobj
);
3263 pmap_mapgetpte(vm_map_t map
, vm_offset_t v
)
3265 return pmap_pte(map
->pmap
, v
);
3274 assert(pn
!= vm_page_fictitious_addr
);
3276 if (!pmap_initialized
)
3278 phys
= (pmap_paddr_t
) i386_ptob(pn
);
3279 if (!pmap_valid_page(pn
))
3286 mapping_free_prime()
3291 for (i
= 0; i
< (5 * PV_ALLOC_CHUNK
); i
++) {
3292 pv_e
= (pv_entry_t
) zalloc(pv_list_zone
);
3304 if (mapping_adjust_call
== NULL
) {
3305 thread_call_setup(&mapping_adjust_call_data
,
3306 (thread_call_func_t
) mapping_adjust
,
3307 (thread_call_param_t
) NULL
);
3308 mapping_adjust_call
= &mapping_adjust_call_data
;
3310 /* XXX rethink best way to do locking here */
3311 if (pv_free_count
< PV_LOW_WATER_MARK
) {
3312 for (i
= 0; i
< PV_ALLOC_CHUNK
; i
++) {
3313 pv_e
= (pv_entry_t
) zalloc(pv_list_zone
);
3323 pmap_commpage_init(vm_offset_t kernel_commpage
, vm_offset_t user_commpage
, int cnt
)
3326 pt_entry_t
*opte
, *npte
;
3329 for (i
= 0; i
< cnt
; i
++) {
3330 opte
= pmap_pte(kernel_pmap
, kernel_commpage
);
3331 if (0 == opte
) panic("kernel_commpage");
3332 npte
= pmap_pte(kernel_pmap
, user_commpage
);
3333 if (0 == npte
) panic("user_commpage");
3334 pte
= *opte
| INTEL_PTE_USER
|INTEL_PTE_GLOBAL
;
3335 pte
&= ~INTEL_PTE_WRITE
; // ensure read only
3336 WRITE_PTE_FAST(npte
, pte
);
3337 kernel_commpage
+= INTEL_PGBYTES
;
3338 user_commpage
+= INTEL_PGBYTES
;
3342 static cpu_pmap_t cpu_pmap_master
;
3343 static struct pmap_update_list cpu_update_list_master
;
3346 pmap_cpu_alloc(boolean_t is_boot_cpu
)
3351 pmap_update_list_t up
;
3352 vm_offset_t address
;
3353 vm_map_entry_t entry
;
3356 cp
= &cpu_pmap_master
;
3357 up
= &cpu_update_list_master
;
3360 * The per-cpu pmap data structure itself.
3362 ret
= kmem_alloc(kernel_map
,
3363 (vm_offset_t
*) &cp
, sizeof(cpu_pmap_t
));
3364 if (ret
!= KERN_SUCCESS
) {
3365 printf("pmap_cpu_alloc() failed ret=%d\n", ret
);
3368 bzero((void *)cp
, sizeof(cpu_pmap_t
));
3371 * The tlb flush update list.
3373 ret
= kmem_alloc(kernel_map
,
3374 (vm_offset_t
*) &up
, sizeof(*up
));
3375 if (ret
!= KERN_SUCCESS
) {
3376 printf("pmap_cpu_alloc() failed ret=%d\n", ret
);
3382 * The temporary windows used for copy/zero - see loose_ends.c
3384 for (i
= 0; i
< PMAP_NWINDOWS
; i
++) {
3385 ret
= vm_map_find_space(kernel_map
,
3386 &address
, PAGE_SIZE
, 0, &entry
);
3387 if (ret
!= KERN_SUCCESS
) {
3388 printf("pmap_cpu_alloc() "
3389 "vm_map_find_space ret=%d\n", ret
);
3393 vm_map_unlock(kernel_map
);
3395 cp
->mapwindow
[i
].prv_CADDR
= (caddr_t
) address
;
3396 cp
->mapwindow
[i
].prv_CMAP
= vtopte(address
);
3397 * (int *) cp
->mapwindow
[i
].prv_CMAP
= 0;
3399 kprintf("pmap_cpu_alloc() "
3400 "window=%d CADDR=0x%x CMAP=0x%x\n",
3401 i
, address
, vtopte(address
));
3406 * Set up the pmap request list
3408 cp
->update_list
= up
;
3409 simple_lock_init(&up
->lock
, 0);
3416 pmap_cpu_free(struct cpu_pmap
*cp
)
3418 if (cp
!= NULL
&& cp
!= &cpu_pmap_master
) {
3419 if (cp
->update_list
!= NULL
)
3420 kfree((void *) cp
->update_list
,
3421 sizeof(*cp
->update_list
));
3422 kfree((void *) cp
, sizeof(cpu_pmap_t
));