2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
55 * Author: Avadis Tevanian, Jr., Michael Wayne Young
56 * (These guys wrote the Vax version)
58 * Physical Map management code for Intel i386, i486, and i860.
60 * Manages physical address maps.
62 * In addition to hardware address maps, this
63 * module is called upon to provide software-use-only
64 * maps which may or may not be stored in the same
65 * form as hardware maps. These pseudo-maps are
66 * used to store intermediate results from copy
67 * operations to and from address spaces.
69 * Since the information managed by this module is
70 * also stored by the logical address mapping module,
71 * this module may throw away valid virtual-to-physical
72 * mappings at almost any time. However, invalidations
73 * of virtual-to-physical mappings must be done as
76 * In order to cope with hardware architectures which
77 * make virtual-to-physical map invalidates expensive,
78 * this module may delay invalidate or reduced protection
79 * operations until such time as they are actually
80 * necessary. This module is given full information as
81 * to which processors are currently using which maps,
82 * and to when physical maps must be made correct.
88 #include <mach_ldebug.h>
90 #include <mach/machine/vm_types.h>
92 #include <mach/boolean.h>
93 #include <kern/thread.h>
94 #include <kern/zalloc.h>
96 #include <kern/lock.h>
97 #include <kern/kalloc.h>
101 #include <vm/vm_map.h>
102 #include <vm/vm_kern.h>
103 #include <mach/vm_param.h>
104 #include <mach/vm_prot.h>
105 #include <vm/vm_object.h>
106 #include <vm/vm_page.h>
108 #include <mach/machine/vm_param.h>
109 #include <machine/thread.h>
111 #include <kern/misc_protos.h> /* prototyping */
112 #include <i386/misc_protos.h>
114 #include <i386/cpuid.h>
115 #include <i386/cpu_data.h>
116 #include <i386/cpu_number.h>
117 #include <i386/machine_cpu.h>
118 #include <i386/mp_slave_boot.h>
121 #include <ddb/db_command.h>
122 #include <ddb/db_output.h>
123 #include <ddb/db_sym.h>
124 #include <ddb/db_print.h>
125 #endif /* MACH_KDB */
127 #include <kern/xpr.h>
129 #include <vm/vm_protos.h>
134 * Forward declarations for internal functions.
140 extern void pmap_remove_range(
146 void phys_attribute_clear(
150 boolean_t
phys_attribute_test(
154 void phys_attribute_set(
158 void pmap_growkernel(
161 void pmap_set_reference(
169 pt_entry_t
* pmap_mapgetpte(
173 boolean_t
phys_page_exists(
177 void set_dirbase(vm_offset_t dirbase
);
178 #endif /* set_dirbase */
180 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
182 #define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
183 #define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry);
185 #define value_64bit(value) ((value) & 0xFFFFFFFF00000000LL)
186 #define low32(x) ((unsigned int)((x) & 0x00000000ffffffffLL))
189 * Private data structures.
193 * For each vm_page_t, there is a list of all currently
194 * valid virtual mappings of that page. An entry is
195 * a pv_entry_t; the list is the pv_table.
198 typedef struct pv_entry
{
199 struct pv_entry
*next
; /* next pv_entry */
200 pmap_t pmap
; /* pmap where mapping lies */
201 vm_offset_t va
; /* virtual address for mapping */
204 #define PV_ENTRY_NULL ((pv_entry_t) 0)
206 pv_entry_t pv_head_table
; /* array of entries, one per page */
209 * pv_list entries are kept on a list that can only be accessed
210 * with the pmap system locked (at SPLVM, not in the cpus_active set).
211 * The list is refilled from the pv_list_zone if it becomes empty.
213 pv_entry_t pv_free_list
; /* free list at SPLVM */
214 decl_simple_lock_data(,pv_free_list_lock
)
215 int pv_free_count
= 0;
216 #define PV_LOW_WATER_MARK 5000
217 #define PV_ALLOC_CHUNK 2000
218 thread_call_t mapping_adjust_call
;
219 static thread_call_data_t mapping_adjust_call_data
;
220 int mappingrecurse
= 0;
222 #define PV_ALLOC(pv_e) { \
223 simple_lock(&pv_free_list_lock); \
224 if ((pv_e = pv_free_list) != 0) { \
225 pv_free_list = pv_e->next; \
227 if (pv_free_count < PV_LOW_WATER_MARK) \
228 if (hw_compare_and_store(0,1,&mappingrecurse)) \
229 thread_call_enter(mapping_adjust_call); \
231 simple_unlock(&pv_free_list_lock); \
234 #define PV_FREE(pv_e) { \
235 simple_lock(&pv_free_list_lock); \
236 pv_e->next = pv_free_list; \
237 pv_free_list = pv_e; \
239 simple_unlock(&pv_free_list_lock); \
242 zone_t pv_list_zone
; /* zone of pv_entry structures */
245 static zone_t pdpt_zone
;
250 * Each entry in the pv_head_table is locked by a bit in the
251 * pv_lock_table. The lock bits are accessed by the physical
252 * address of the page they lock.
255 char *pv_lock_table
; /* pointer to array of bits */
256 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
259 * First and last physical addresses that we maintain any information
260 * for. Initialized to zero so that pmap operations done before
261 * pmap_init won't touch any non-existent structures.
263 pmap_paddr_t vm_first_phys
= (pmap_paddr_t
) 0;
264 pmap_paddr_t vm_last_phys
= (pmap_paddr_t
) 0;
265 boolean_t pmap_initialized
= FALSE
;/* Has pmap_init completed? */
267 pmap_paddr_t kernel_vm_end
= (pmap_paddr_t
)0;
269 #define GROW_KERNEL_FUNCTION_IMPLEMENTED 1
270 #if GROW_KERNEL_FUNCTION_IMPLEMENTED /* not needed until growing kernel pmap */
271 static struct vm_object kptobj_object_store
;
272 static vm_object_t kptobj
;
277 * Index into pv_head table, its lock bits, and the modify/reference
278 * bits starting at vm_first_phys.
281 #define pa_index(pa) (i386_btop(pa - vm_first_phys))
283 #define pai_to_pvh(pai) (&pv_head_table[pai])
284 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
285 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
288 * Array of physical page attribites for managed pages.
289 * One byte per physical page.
291 char *pmap_phys_attributes
;
294 * Physical page attributes. Copy bits from PTE definition.
296 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
297 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
298 #define PHYS_NCACHE INTEL_PTE_NCACHE
301 * Amount of virtual memory mapped by one
302 * page-directory entry.
304 #define PDE_MAPPED_SIZE (pdetova(1))
307 * Locking and TLB invalidation
313 * There are two structures in the pmap module that need locking:
314 * the pmaps themselves, and the per-page pv_lists (which are locked
315 * by locking the pv_lock_table entry that corresponds to the pv_head
316 * for the list in question.) Most routines want to lock a pmap and
317 * then do operations in it that require pv_list locking -- however
318 * pmap_remove_all and pmap_copy_on_write operate on a physical page
319 * basis and want to do the locking in the reverse order, i.e. lock
320 * a pv_list and then go through all the pmaps referenced by that list.
321 * To protect against deadlock between these two cases, the pmap_lock
322 * is used. There are three different locking protocols as a result:
324 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
327 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
328 * lock on the pmap_lock (shared read), then lock the pmap
329 * and finally the pv_lists as needed [i.e. pmap lock before
332 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
333 * Get a write lock on the pmap_lock (exclusive write); this
334 * also guaranteees exclusive access to the pv_lists. Lock the
337 * At no time may any routine hold more than one pmap lock or more than
338 * one pv_list lock. Because interrupt level routines can allocate
339 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
340 * kernel_pmap can only be held at splhigh.
344 * We raise the interrupt level to splvm, to block interprocessor
345 * interrupts during pmap operations. We must take the CPU out of
346 * the cpus_active set while interrupts are blocked.
348 #define SPLVM(spl) { \
350 mp_disable_preemption(); \
351 i_bit_clear(cpu_number(), &cpus_active); \
352 mp_enable_preemption(); \
355 #define SPLX(spl) { \
356 mp_disable_preemption(); \
357 i_bit_set(cpu_number(), &cpus_active); \
358 mp_enable_preemption(); \
363 * Lock on pmap system
365 lock_t pmap_system_lock
;
367 #define PMAP_READ_LOCK(pmap, spl) { \
369 lock_read(&pmap_system_lock); \
370 simple_lock(&(pmap)->lock); \
373 #define PMAP_WRITE_LOCK(spl) { \
375 lock_write(&pmap_system_lock); \
378 #define PMAP_READ_UNLOCK(pmap, spl) { \
379 simple_unlock(&(pmap)->lock); \
380 lock_read_done(&pmap_system_lock); \
384 #define PMAP_WRITE_UNLOCK(spl) { \
385 lock_write_done(&pmap_system_lock); \
389 #define PMAP_WRITE_TO_READ_LOCK(pmap) { \
390 simple_lock(&(pmap)->lock); \
391 lock_write_to_read(&pmap_system_lock); \
394 #define LOCK_PVH(index) lock_pvh_pai(index)
396 #define UNLOCK_PVH(index) unlock_pvh_pai(index)
399 extern int max_lock_loops
;
400 extern int disableSerialOuput
;
402 unsigned int loop_count; \
403 loop_count = disableSerialOuput ? max_lock_loops \
405 #define LOOP_CHECK(msg, pmap) \
406 if (--loop_count == 0) { \
407 mp_disable_preemption(); \
408 kprintf("%s: cpu %d pmap %x, cpus_active 0x%x\n", \
409 msg, cpu_number(), pmap, cpus_active); \
410 Debugger("deadlock detection"); \
411 mp_enable_preemption(); \
412 loop_count = max_lock_loops; \
414 #else /* USLOCK_DEBUG */
416 #define LOOP_CHECK(msg, pmap)
417 #endif /* USLOCK_DEBUG */
419 #define PMAP_UPDATE_TLBS(pmap, s, e) \
424 mp_disable_preemption(); \
425 cpu_mask = 1 << cpu_number(); \
427 /* Since the pmap is locked, other updates are locked */ \
428 /* out, and any pmap_activate has finished. */ \
430 /* find other cpus using the pmap */ \
431 users = (pmap)->cpus_using & ~cpu_mask; \
434 /* signal them, and wait for them to finish */ \
435 /* using the pmap */ \
436 signal_cpus(users, (pmap), (s), (e)); \
437 while (((pmap)->cpus_using & cpus_active & ~cpu_mask)) { \
438 LOOP_CHECK("PMAP_UPDATE_TLBS", pmap); \
442 /* invalidate our own TLB if pmap is in use */ \
444 if ((pmap)->cpus_using & cpu_mask) { \
445 INVALIDATE_TLB((pmap), (s), (e)); \
448 mp_enable_preemption(); \
451 #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
453 #define INVALIDATE_TLB(m, s, e) { \
458 * Structures to keep track of pending TLB invalidations
463 #define UPDATE_LIST_SIZE 4
465 struct pmap_update_item
{
466 pmap_t pmap
; /* pmap to invalidate */
467 vm_offset_t start
; /* start address to invalidate */
468 vm_offset_t end
; /* end address to invalidate */
471 typedef struct pmap_update_item
*pmap_update_item_t
;
474 * List of pmap updates. If the list overflows,
475 * the last entry is changed to invalidate all.
477 struct pmap_update_list
{
478 decl_simple_lock_data(,lock
)
480 struct pmap_update_item item
[UPDATE_LIST_SIZE
];
482 typedef struct pmap_update_list
*pmap_update_list_t
;
484 extern void signal_cpus(
490 pmap_memory_region_t pmap_memory_regions
[PMAP_MEMORY_REGIONS_SIZE
];
493 * Other useful macros.
495 #define current_pmap() (vm_map_pmap(current_thread()->map))
496 #define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
498 struct pmap kernel_pmap_store
;
502 decl_simple_lock_data(,free_pmap_lock
)
505 struct zone
*pmap_zone
; /* zone of pmap structures */
507 int pmap_debug
= 0; /* flag for debugging prints */
509 unsigned int inuse_ptepages_count
= 0; /* debugging */
512 * Pmap cache. Cache is threaded through ref_count field of pmap.
513 * Max will eventually be constant -- variable for experimentation.
515 int pmap_cache_max
= 32;
516 int pmap_alloc_chunk
= 8;
517 pmap_t pmap_cache_list
;
518 int pmap_cache_count
;
519 decl_simple_lock_data(,pmap_cache_lock
)
521 extern vm_offset_t hole_start
, hole_end
;
527 pt_entry_t
*DMAP1
, *DMAP2
;
532 #define PMAP_ALIAS_MAX 32
538 #define PMAP_ALIAS_COOKIE 0xdeadbeef
539 } pmap_aliasbuf
[PMAP_ALIAS_MAX
];
540 int pmap_alias_index
= 0;
541 extern vm_offset_t
get_rpc();
543 #endif /* DEBUG_ALIAS */
545 #define pmap_pde(m, v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]))
546 #define pdir_pde(d, v) (d[(vm_offset_t)(v) >> PDESHIFT])
549 pmap_is_current(pmap_t pmap
)
551 return (pmap
== kernel_pmap
||
552 (pmap
->dirbase
[PTDPTDI
] & PG_FRAME
) == (PTDpde
[0] & PG_FRAME
));
557 * return address of mapped pte for vaddr va in pmap pmap.
560 pmap_pte(pmap_t pmap
, vm_offset_t va
)
565 pde
= pmap_pde(pmap
, va
);
567 if (pmap_is_current(pmap
))
569 newpf
= *pde
& PG_FRAME
;
570 if (((*CM4
) & PG_FRAME
) != newpf
) {
571 *CM4
= newpf
| INTEL_PTE_RW
| INTEL_PTE_VALID
;
574 return (pt_entry_t
*)CA4
+ (i386_btop(va
) & (NPTEPG
-1));
579 #define DEBUG_PTE_PAGE 0
586 register pt_entry_t
*pte
, *epte
;
589 /* check the use and wired counts */
590 if (ptep
== PTE_PAGE_NULL
)
592 pte
= pmap_pte(ptep
->pmap
, ptep
->va
);
593 epte
= pte
+ INTEL_PGBYTES
/sizeof(pt_entry_t
);
605 if (ctu
!= ptep
->use_count
|| ctw
!= ptep
->wired_count
) {
606 printf("use %d wired %d - actual use %d wired %d\n",
607 ptep
->use_count
, ptep
->wired_count
, ctu
, ctw
);
611 #endif /* DEBUG_PTE_PAGE */
614 * Map memory at initialization. The physical addresses being
615 * mapped are not managed and are never unmapped.
617 * For now, VM is already on, we only need to map the
622 register vm_offset_t virt
,
623 register vm_offset_t start_addr
,
624 register vm_offset_t end_addr
,
625 register vm_prot_t prot
)
630 while (start_addr
< end_addr
) {
631 pmap_enter(kernel_pmap
,
632 virt
, (ppnum_t
) i386_btop(start_addr
), prot
, 0, FALSE
);
640 * Back-door routine for mapping kernel VM at initialization.
641 * Useful for mapping memory outside the range
642 * Sets no-cache, A, D.
643 * [vm_first_phys, vm_last_phys) (i.e., devices).
644 * Otherwise like pmap_map.
648 register vm_offset_t virt
,
649 register vm_offset_t start_addr
,
650 register vm_offset_t end_addr
,
653 register pt_entry_t
template;
654 register pt_entry_t
*pte
;
656 template = pa_to_pte(start_addr
)
662 if (prot
& VM_PROT_WRITE
)
663 template |= INTEL_PTE_WRITE
;
665 /* XXX move pmap_pte out of loop, once one pte mapped, all are */
666 while (start_addr
< end_addr
) {
667 pte
= pmap_pte(kernel_pmap
, virt
);
668 if (pte
== PT_ENTRY_NULL
) {
669 panic("pmap_map_bd: Invalid kernel address\n");
671 WRITE_PTE_FAST(pte
, template)
672 pte_increment_pa(template);
674 start_addr
+= PAGE_SIZE
;
681 extern char *first_avail
;
682 extern vm_offset_t virtual_avail
, virtual_end
;
683 extern pmap_paddr_t avail_start
, avail_end
;
684 extern vm_offset_t etext
;
685 extern void *sectHIBB
;
686 extern int sectSizeHIB
;
689 * Bootstrap the system enough to run with virtual memory.
690 * Map the kernel's code and data, and allocate the system page table.
691 * Called with mapping OFF. Page_size must already be set.
694 * load_start: PA where kernel was loaded
695 * avail_start PA of first available physical page -
696 * after kernel page tables
697 * avail_end PA of last available physical page
698 * virtual_avail VA of first available page -
699 * after kernel page tables
700 * virtual_end VA of last available page -
701 * end of kernel address space
703 * &start_text start of kernel text
704 * &etext end of kernel text
709 __unused vm_offset_t load_start
)
714 int wpkernel
, boot_arg
;
716 vm_last_addr
= VM_MAX_KERNEL_ADDRESS
; /* Set the highest address
720 * The kernel's pmap is statically allocated so we don't
721 * have to use pmap_create, which is unlikely to work
722 * correctly at this part of the boot sequence.
725 kernel_pmap
= &kernel_pmap_store
;
727 kernel_pmap
->pmap_link
.next
= (queue_t
)kernel_pmap
; /* Set up anchor forward */
728 kernel_pmap
->pmap_link
.prev
= (queue_t
)kernel_pmap
; /* Set up anchor reverse */
730 kernel_pmap
->ref_count
= 1;
731 kernel_pmap
->pm_obj
= (vm_object_t
) NULL
;
732 kernel_pmap
->dirbase
= (pd_entry_t
*)((unsigned int)IdlePTD
| KERNBASE
);
733 kernel_pmap
->pdirbase
= (pd_entry_t
*)IdlePTD
;
735 kernel_pmap
->pm_pdpt
= (pd_entry_t
*)((unsigned int)IdlePDPT
| KERNBASE
);
736 kernel_pmap
->pm_ppdpt
= (vm_offset_t
)IdlePDPT
;
739 va
= (vm_offset_t
)kernel_pmap
->dirbase
;
740 /* setup self referential mapping(s) */
741 for (i
= 0; i
< NPGPTD
; i
++ ) {
743 pa
= (pmap_paddr_t
) kvtophys(va
+ i386_ptob(i
));
744 * (pd_entry_t
*) (kernel_pmap
->dirbase
+ PTDPTDI
+ i
) =
745 (pa
& PG_FRAME
) | INTEL_PTE_VALID
| INTEL_PTE_RW
| INTEL_PTE_REF
|
746 INTEL_PTE_MOD
| INTEL_PTE_WIRED
;
748 kernel_pmap
->pm_pdpt
[i
] = pa
| INTEL_PTE_VALID
;
754 virtual_avail
= (vm_offset_t
)VADDR(KPTDI
,0) + (vm_offset_t
)first_avail
;
755 virtual_end
= (vm_offset_t
)(VM_MAX_KERNEL_ADDRESS
);
758 * Reserve some special page table entries/VA space for temporary
761 #define SYSMAP(c, p, v, n) \
762 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n);
765 pte
= (pt_entry_t
*) pmap_pte(kernel_pmap
, va
);
768 * CMAP1/CMAP2 are used for zeroing and copying pages.
769 * CMAP3 is used for ml_phys_read/write.
771 SYSMAP(caddr_t
, CM1
, CA1
, 1)
772 * (pt_entry_t
*) CM1
= 0;
773 SYSMAP(caddr_t
, CM2
, CA2
, 1)
774 * (pt_entry_t
*) CM2
= 0;
775 SYSMAP(caddr_t
, CM3
, CA3
, 1)
776 * (pt_entry_t
*) CM3
= 0;
778 /* used by pmap_pte */
779 SYSMAP(caddr_t
, CM4
, CA4
, 1)
780 * (pt_entry_t
*) CM4
= 0;
782 /* DMAP user for debugger */
783 SYSMAP(caddr_t
, DMAP1
, DADDR1
, 1);
784 SYSMAP(caddr_t
, DMAP2
, DADDR2
, 1); /* XXX temporary - can remove */
787 lock_init(&pmap_system_lock
,
788 FALSE
, /* NOT a sleep lock */
794 if (PE_parse_boot_arg("debug", &boot_arg
)) {
795 if (boot_arg
& DB_PRT
) wpkernel
= 0;
796 if (boot_arg
& DB_NMI
) wpkernel
= 0;
799 /* remap kernel text readonly if not debugging or kprintfing */
805 for (myva
= i386_round_page(VM_MIN_KERNEL_ADDRESS
+ MP_BOOT
+ MP_BOOTSTACK
); myva
< etext
; myva
+= PAGE_SIZE
) {
806 if (myva
>= (vm_offset_t
)sectHIBB
&& myva
< ((vm_offset_t
)sectHIBB
+ sectSizeHIB
))
808 ptep
= pmap_pte(kernel_pmap
, myva
);
810 *ptep
&= ~INTEL_PTE_RW
;
815 simple_lock_init(&kernel_pmap
->lock
, 0);
816 simple_lock_init(&pv_free_list_lock
, 0);
818 /* invalidate user virtual addresses */
819 memset((char *)kernel_pmap
->dirbase
,
821 (KPTDI
) * sizeof(pd_entry_t
));
823 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
824 VADDR(KPTDI
,0), virtual_end
);
826 kprintf("Available physical space from 0x%llx to 0x%llx\n",
827 avail_start
, avail_end
);
828 printf("PAE enabled\n");
830 kprintf("Available physical space from 0x%x to 0x%x\n",
831 avail_start
, avail_end
);
840 *startp
= virtual_avail
;
845 * Initialize the pmap module.
846 * Called by vm_init, to initialize any structures that the pmap
847 * system needs to map virtual memory.
852 register long npages
;
854 register vm_size_t s
;
859 * Allocate memory for the pv_head_table and its lock bits,
860 * the modify bit array, and the pte_page table.
863 /* zero bias all these arrays now instead of off avail_start
864 so we cover all memory */
865 npages
= i386_btop(avail_end
);
866 s
= (vm_size_t
) (sizeof(struct pv_entry
) * npages
867 + pv_lock_table_size(npages
)
871 if (kmem_alloc_wired(kernel_map
, &addr
, s
) != KERN_SUCCESS
)
874 memset((char *)addr
, 0, s
);
877 * Allocate the structures first to preserve word-alignment.
879 pv_head_table
= (pv_entry_t
) addr
;
880 addr
= (vm_offset_t
) (pv_head_table
+ npages
);
882 pv_lock_table
= (char *) addr
;
883 addr
= (vm_offset_t
) (pv_lock_table
+ pv_lock_table_size(npages
));
885 pmap_phys_attributes
= (char *) addr
;
888 * Create the zone of physical maps,
889 * and of the physical-to-virtual entries.
891 s
= (vm_size_t
) sizeof(struct pmap
);
892 pmap_zone
= zinit(s
, 400*s
, 4096, "pmap"); /* XXX */
893 s
= (vm_size_t
) sizeof(struct pv_entry
);
894 pv_list_zone
= zinit(s
, 10000*s
, 4096, "pv_list"); /* XXX */
896 // s = (vm_size_t) (sizeof(pdpt_entry_t) * NPGPTD);
898 pdpt_zone
= zinit(s
, 400*s
, 4096, "pdpt"); /* XXX */
902 * Only now, when all of the data structures are allocated,
903 * can we set vm_first_phys and vm_last_phys. If we set them
904 * too soon, the kmem_alloc_wired above will try to use these
905 * data structures and blow up.
908 /* zero bias this now so we cover all memory */
910 vm_last_phys
= avail_end
;
912 #if GROW_KERNEL_FUNCTION_IMPLEMENTED
913 kptobj
= &kptobj_object_store
;
914 _vm_object_allocate((vm_object_size_t
)NKPDE
, kptobj
);
915 kernel_pmap
->pm_obj
= kptobj
;
918 /* create pv entries for kernel pages mapped by low level
919 startup code. these have to exist so we can pmap_remove()
920 e.g. kext pages from the middle of our addr space */
922 vaddr
= (vm_offset_t
)VM_MIN_KERNEL_ADDRESS
;
923 for (ppn
= 0; ppn
< i386_btop(avail_start
) ; ppn
++ ) {
926 pv_e
= pai_to_pvh(ppn
);
929 pv_e
->pmap
= kernel_pmap
;
930 pv_e
->next
= PV_ENTRY_NULL
;
933 pmap_initialized
= TRUE
;
936 * Initializie pmap cache.
938 pmap_cache_list
= PMAP_NULL
;
939 pmap_cache_count
= 0;
940 simple_lock_init(&pmap_cache_lock
, 0);
942 simple_lock_init(&free_pmap_lock
, 0);
948 x86_lowmem_free(void)
950 /* free lowmem pages back to the vm system. we had to defer doing this
951 until the vm system was fully up.
952 the actual pages that are released are determined by which
953 pages the memory sizing code puts into the region table */
955 ml_static_mfree((vm_offset_t
) i386_ptob(pmap_memory_regions
[0].base
)|VM_MIN_KERNEL_ADDRESS
,
956 (vm_size_t
) i386_ptob(pmap_memory_regions
[0].end
- pmap_memory_regions
[0].base
));
960 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
972 assert(pn
!= vm_page_fictitious_addr
);
973 phys
= (pmap_paddr_t
)i386_ptob(pn
);
974 if (!pmap_initialized
)
977 if (!pmap_valid_page(pn
))
980 PMAP_WRITE_LOCK(spl
);
982 pai
= pa_index(phys
);
983 pv_h
= pai_to_pvh(pai
);
985 result
= (pv_h
->pmap
== PMAP_NULL
);
986 PMAP_WRITE_UNLOCK(spl
);
992 * Create and return a physical map.
994 * If the size specified for the map
995 * is zero, the map is an actual physical
996 * map, and may be referenced by the
999 * If the size specified is non-zero,
1000 * the map will be used in software only, and
1001 * is bounded by that size.
1009 register pmap_t pro
;
1013 register vm_offset_t va
;
1016 * A software use-only map doesn't even need a map.
1023 p
= (pmap_t
) zalloc(pmap_zone
);
1025 panic("pmap_create zalloc");
1026 if (KERN_SUCCESS
!= kmem_alloc_wired(kernel_map
, (vm_offset_t
*)(&p
->dirbase
), NBPTD
))
1027 panic("pmap_create kmem_alloc_wired");
1029 p
->pm_hold
= (vm_offset_t
)zalloc(pdpt_zone
);
1030 if ((vm_offset_t
)NULL
== p
->pm_hold
) {
1031 panic("pdpt zalloc");
1033 p
->pm_pdpt
= (pdpt_entry_t
*) (( p
->pm_hold
+ 31) & ~31);
1034 p
->pm_ppdpt
= kvtophys((vm_offset_t
)p
->pm_pdpt
); /* XXX */
1036 if (NULL
== (p
->pm_obj
= vm_object_allocate((vm_object_size_t
)(NPGPTD
*NPDEPG
))))
1037 panic("pmap_create vm_object_allocate");
1039 (void *)((unsigned int)IdlePTD
| KERNBASE
),
1041 va
= (vm_offset_t
)p
->dirbase
;
1042 p
->pdirbase
= (pd_entry_t
*)(kvtophys(va
));
1043 simple_lock_init(&p
->lock
, 0);
1045 /* setup self referential mapping(s) */
1046 for (i
= 0; i
< NPGPTD
; i
++ ) {
1048 pa
= (pmap_paddr_t
) kvtophys(va
+ i386_ptob(i
));
1049 * (pd_entry_t
*) (p
->dirbase
+ PTDPTDI
+ i
) =
1050 (pa
& PG_FRAME
) | INTEL_PTE_VALID
| INTEL_PTE_RW
| INTEL_PTE_REF
|
1051 INTEL_PTE_MOD
| INTEL_PTE_WIRED
;
1053 p
->pm_pdpt
[i
] = pa
| INTEL_PTE_VALID
;
1058 p
->stats
.resident_count
= 0;
1059 p
->stats
.wired_count
= 0;
1063 /* insert new pmap at head of queue hanging off kernel_pmap */
1065 simple_lock(&free_pmap_lock
);
1066 p
->pmap_link
.next
= (queue_t
)kernel_pmap
->pmap_link
.next
;
1067 kernel_pmap
->pmap_link
.next
= (queue_t
)p
;
1069 pro
= (pmap_t
) p
->pmap_link
.next
;
1070 p
->pmap_link
.prev
= (queue_t
)pro
->pmap_link
.prev
;
1071 pro
->pmap_link
.prev
= (queue_t
)p
;
1074 simple_unlock(&free_pmap_lock
);
1082 * Retire the given physical map from service.
1083 * Should only be called if the map contains
1084 * no valid mappings.
1091 register pt_entry_t
*pdep
;
1094 register vm_page_t m
;
1096 register pmap_t pre
,pro
;
1103 simple_lock(&p
->lock
);
1106 register int my_cpu
;
1108 mp_disable_preemption();
1109 my_cpu
= cpu_number();
1112 * If some cpu is not using the physical pmap pointer that it
1113 * is supposed to be (see set_dirbase), we might be using the
1114 * pmap that is being destroyed! Make sure we are
1115 * physically on the right pmap:
1117 /* force pmap/cr3 update */
1120 VM_MAX_KERNEL_ADDRESS
);
1122 if (PMAP_REAL(my_cpu
) == p
) {
1123 PMAP_CPU_CLR(p
, my_cpu
);
1124 PMAP_REAL(my_cpu
) = kernel_pmap
;
1126 set_cr3((unsigned int)kernel_pmap
->pm_ppdpt
);
1128 set_cr3((unsigned int)kernel_pmap
->pdirbase
);
1131 mp_enable_preemption();
1133 simple_unlock(&p
->lock
);
1137 return; /* still in use */
1141 /* remove from pmap queue */
1143 simple_lock(&free_pmap_lock
);
1145 pre
= (pmap_t
)p
->pmap_link
.prev
;
1146 pre
->pmap_link
.next
= (queue_t
)p
->pmap_link
.next
;
1147 pro
= (pmap_t
)p
->pmap_link
.next
;
1148 pro
->pmap_link
.prev
= (queue_t
)p
->pmap_link
.prev
;
1150 simple_unlock(&free_pmap_lock
);
1155 * Free the memory maps, then the
1159 pdep
= (pt_entry_t
*)p
->dirbase
;
1161 while (pdep
< (pt_entry_t
*)&p
->dirbase
[(UMAXPTDI
+1)]) {
1163 if (*pdep
& INTEL_PTE_VALID
) {
1164 ind
= pdep
- (pt_entry_t
*)&p
->dirbase
[0];
1165 vm_object_lock(p
->pm_obj
);
1166 m
= vm_page_lookup(p
->pm_obj
, (vm_object_offset_t
)ind
);
1167 if (m
== VM_PAGE_NULL
) {
1168 panic("pmap_destroy: pte page not in object");
1170 vm_page_lock_queues();
1172 inuse_ptepages_count
--;
1173 vm_object_unlock(p
->pm_obj
);
1174 vm_page_unlock_queues();
1177 * Clear pdes, this might be headed for the cache.
1187 vm_object_deallocate(p
->pm_obj
);
1188 kmem_free(kernel_map
, (vm_offset_t
)p
->dirbase
, NBPTD
);
1190 zfree(pdpt_zone
, (void *)p
->pm_hold
);
1192 zfree(pmap_zone
, p
);
1196 * Add a reference to the specified pmap.
1205 if (p
!= PMAP_NULL
) {
1207 simple_lock(&p
->lock
);
1209 simple_unlock(&p
->lock
);
1215 * Remove a range of hardware page-table entries.
1216 * The entries given are the first (inclusive)
1217 * and last (exclusive) entries for the VM pages.
1218 * The virtual address is the va for the first pte.
1220 * The pmap must be locked.
1221 * If the pmap is not the kernel pmap, the range must lie
1222 * entirely within one pte-page. This is NOT checked.
1223 * Assumes that the pte-page exists.
1234 register pt_entry_t
*cpte
;
1235 int num_removed
, num_unwired
;
1240 if (pmap
!= kernel_pmap
)
1241 ptep_check(get_pte_page(spte
));
1242 #endif /* DEBUG_PTE_PAGE */
1246 for (cpte
= spte
; cpte
< epte
;
1247 cpte
++, va
+= PAGE_SIZE
) {
1249 pa
= pte_to_pa(*cpte
);
1257 if (!valid_page(i386_btop(pa
))) {
1260 * Outside range of managed physical memory.
1261 * Just remove the mappings.
1263 register pt_entry_t
*lpte
= cpte
;
1273 * Get the modify and reference bits.
1276 register pt_entry_t
*lpte
;
1279 pmap_phys_attributes
[pai
] |=
1280 *lpte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1286 * Remove the mapping from the pvlist for
1287 * this physical page.
1290 register pv_entry_t pv_h
, prev
, cur
;
1292 pv_h
= pai_to_pvh(pai
);
1293 if (pv_h
->pmap
== PMAP_NULL
) {
1294 panic("pmap_remove: null pv_list!");
1296 if (pv_h
->va
== va
&& pv_h
->pmap
== pmap
) {
1298 * Header is the pv_entry. Copy the next one
1299 * to header and free the next one (we cannot
1303 if (cur
!= PV_ENTRY_NULL
) {
1308 pv_h
->pmap
= PMAP_NULL
;
1315 if ((cur
= prev
->next
) == PV_ENTRY_NULL
) {
1316 panic("pmap-remove: mapping not in pv_list!");
1318 } while (cur
->va
!= va
|| cur
->pmap
!= pmap
);
1319 prev
->next
= cur
->next
;
1329 assert(pmap
->stats
.resident_count
>= num_removed
);
1330 pmap
->stats
.resident_count
-= num_removed
;
1331 assert(pmap
->stats
.wired_count
>= num_unwired
);
1332 pmap
->stats
.wired_count
-= num_unwired
;
1336 * Remove phys addr if mapped in specified map
1340 pmap_remove_some_phys(
1341 __unused pmap_t map
,
1342 __unused ppnum_t pn
)
1345 /* Implement to support working set code */
1350 * Remove the given range of addresses
1351 * from the specified map.
1353 * It is assumed that the start and end are properly
1354 * rounded to the hardware page size.
1365 register pt_entry_t
*pde
;
1366 register pt_entry_t
*spte
, *epte
;
1371 if (map
== PMAP_NULL
)
1374 PMAP_READ_LOCK(map
, spl
);
1376 if (value_64bit(s64
) || value_64bit(e64
)) {
1377 panic("pmap_remove addr overflow");
1380 orig_s
= s
= (vm_offset_t
)low32(s64
);
1381 e
= (vm_offset_t
)low32(e64
);
1383 pde
= pmap_pde(map
, s
);
1386 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
1389 if (*pde
& INTEL_PTE_VALID
) {
1390 spte
= (pt_entry_t
*)pmap_pte(map
, (s
& ~(PDE_MAPPED_SIZE
-1)));
1391 spte
= &spte
[ptenum(s
)];
1392 epte
= &spte
[intel_btop(l
-s
)];
1393 pmap_remove_range(map
, s
, spte
, epte
);
1399 PMAP_UPDATE_TLBS(map
, orig_s
, e
);
1401 PMAP_READ_UNLOCK(map
, spl
);
1405 * Routine: pmap_page_protect
1408 * Lower the permission for all mappings to a given
1416 pv_entry_t pv_h
, prev
;
1417 register pv_entry_t pv_e
;
1418 register pt_entry_t
*pte
;
1420 register pmap_t pmap
;
1425 assert(pn
!= vm_page_fictitious_addr
);
1426 phys
= (pmap_paddr_t
)i386_ptob(pn
);
1427 if (!valid_page(pn
)) {
1429 * Not a managed page.
1435 * Determine the new protection.
1439 case VM_PROT_READ
|VM_PROT_EXECUTE
:
1443 return; /* nothing to do */
1450 * Lock the pmap system first, since we will be changing
1454 PMAP_WRITE_LOCK(spl
);
1456 pai
= pa_index(phys
);
1457 pv_h
= pai_to_pvh(pai
);
1460 * Walk down PV list, changing or removing all mappings.
1461 * We do not have to lock the pv_list because we have
1462 * the entire pmap system locked.
1464 if (pv_h
->pmap
!= PMAP_NULL
) {
1468 register vm_offset_t va
;
1471 * Lock the pmap to block pmap_extract and similar routines.
1473 simple_lock(&pmap
->lock
);
1478 pte
= pmap_pte(pmap
, va
);
1481 * Consistency checks.
1483 /* assert(*pte & INTEL_PTE_VALID); XXX */
1484 /* assert(pte_to_phys(*pte) == phys); */
1489 * Remove the mapping if new protection is NONE
1490 * or if write-protecting a kernel mapping.
1492 if (remove
|| pmap
== kernel_pmap
) {
1494 * Remove the mapping, collecting any modify bits.
1497 pmap_phys_attributes
[pai
] |=
1498 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1500 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
1503 assert(pmap
->stats
.resident_count
>= 1);
1504 pmap
->stats
.resident_count
--;
1507 * Remove the pv_entry.
1511 * Fix up head later.
1513 pv_h
->pmap
= PMAP_NULL
;
1517 * Delete this entry.
1519 prev
->next
= pv_e
->next
;
1528 *pte
&= ~INTEL_PTE_WRITE
;
1530 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
1537 simple_unlock(&pmap
->lock
);
1539 } while ((pv_e
= prev
->next
) != PV_ENTRY_NULL
);
1542 * If pv_head mapping was removed, fix it up.
1544 if (pv_h
->pmap
== PMAP_NULL
) {
1546 if (pv_e
!= PV_ENTRY_NULL
) {
1553 PMAP_WRITE_UNLOCK(spl
);
1561 * Disconnect all mappings for this page and return reference and change status
1562 * in generic format.
1565 unsigned int pmap_disconnect(
1568 pmap_page_protect(pa
, 0); /* disconnect the page */
1569 return (pmap_get_refmod(pa
)); /* return ref/chg status */
1573 * Set the physical protection on the
1574 * specified range of this map as requested.
1575 * Will not increase permissions.
1584 register pt_entry_t
*pde
;
1585 register pt_entry_t
*spte
, *epte
;
1588 vm_offset_t orig_s
= s
;
1591 if (map
== PMAP_NULL
)
1595 * Determine the new protection.
1599 case VM_PROT_READ
|VM_PROT_EXECUTE
:
1601 case VM_PROT_READ
|VM_PROT_WRITE
:
1603 return; /* nothing to do */
1605 pmap_remove(map
, (addr64_t
)s
, (addr64_t
)e
);
1610 simple_lock(&map
->lock
);
1612 pde
= pmap_pde(map
, s
);
1614 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
1617 if (*pde
& INTEL_PTE_VALID
) {
1618 spte
= (pt_entry_t
*)pmap_pte(map
, (s
& ~(PDE_MAPPED_SIZE
-1)));
1619 spte
= &spte
[ptenum(s
)];
1620 epte
= &spte
[intel_btop(l
-s
)];
1622 while (spte
< epte
) {
1623 if (*spte
& INTEL_PTE_VALID
)
1624 *spte
&= ~INTEL_PTE_WRITE
;
1632 PMAP_UPDATE_TLBS(map
, orig_s
, e
);
1634 simple_unlock(&map
->lock
);
1641 * Insert the given physical page (p) at
1642 * the specified virtual address (v) in the
1643 * target physical map with the protection requested.
1645 * If specified, the page will be wired down, meaning
1646 * that the related pte cannot be reclaimed.
1648 * NB: This is the only routine which MAY NOT lazy-evaluate
1649 * or lose information. That is, this routine must actually
1650 * insert this page into the given map NOW.
1654 register pmap_t pmap
,
1661 register pt_entry_t
*pte
;
1662 register pv_entry_t pv_h
;
1665 pt_entry_t
template;
1667 pmap_paddr_t old_pa
;
1668 pmap_paddr_t pa
= (pmap_paddr_t
)i386_ptob(pn
);
1670 XPR(0x80000000, "%x/%x: pmap_enter %x/%x/%x\n",
1675 assert(pn
!= vm_page_fictitious_addr
);
1677 printf("pmap(%x, %x)\n", v
, pn
);
1678 if (pmap
== PMAP_NULL
)
1682 * Must allocate a new pvlist entry while we're unlocked;
1683 * zalloc may cause pageout (which will lock the pmap system).
1684 * If we determine we need a pvlist entry, we will unlock
1685 * and allocate one. Then we will retry, throughing away
1686 * the allocated entry later (if we no longer need it).
1688 pv_e
= PV_ENTRY_NULL
;
1690 PMAP_READ_LOCK(pmap
, spl
);
1693 * Expand pmap to include this pte. Assume that
1694 * pmap is always expanded to include enough hardware
1695 * pages to map one VM page.
1698 while ((pte
= pmap_pte(pmap
, v
)) == PT_ENTRY_NULL
) {
1700 * Must unlock to expand the pmap.
1702 PMAP_READ_UNLOCK(pmap
, spl
);
1704 pmap_expand(pmap
, v
);
1706 PMAP_READ_LOCK(pmap
, spl
);
1709 * Special case if the physical page is already mapped
1712 old_pa
= pte_to_pa(*pte
);
1715 * May be changing its wired attribute or protection
1718 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
1720 if(flags
& VM_MEM_NOT_CACHEABLE
) {
1721 if(!(flags
& VM_MEM_GUARDED
))
1722 template |= INTEL_PTE_PTA
;
1723 template |= INTEL_PTE_NCACHE
;
1726 if (pmap
!= kernel_pmap
)
1727 template |= INTEL_PTE_USER
;
1728 if (prot
& VM_PROT_WRITE
)
1729 template |= INTEL_PTE_WRITE
;
1731 template |= INTEL_PTE_WIRED
;
1733 pmap
->stats
.wired_count
++;
1736 if (iswired(*pte
)) {
1737 assert(pmap
->stats
.wired_count
>= 1);
1738 pmap
->stats
.wired_count
--;
1742 if (*pte
& INTEL_PTE_MOD
)
1743 template |= INTEL_PTE_MOD
;
1744 WRITE_PTE(pte
, template)
1751 * Outline of code from here:
1752 * 1) If va was mapped, update TLBs, remove the mapping
1753 * and remove old pvlist entry.
1754 * 2) Add pvlist entry for new mapping
1755 * 3) Enter new mapping.
1757 * SHARING_FAULTS complicates this slightly in that it cannot
1758 * replace the mapping, but must remove it (because adding the
1759 * pvlist entry for the new mapping may remove others), and
1760 * hence always enters the new mapping at step 3)
1762 * If the old physical page is not managed step 1) is skipped
1763 * (except for updating the TLBs), and the mapping is
1764 * overwritten at step 3). If the new physical page is not
1765 * managed, step 2) is skipped.
1768 if (old_pa
!= (pmap_paddr_t
) 0) {
1772 if (pmap
!= kernel_pmap
)
1773 ptep_check(get_pte_page(pte
));
1774 #endif /* DEBUG_PTE_PAGE */
1777 * Don't do anything to pages outside valid memory here.
1778 * Instead convince the code that enters a new mapping
1779 * to overwrite the old one.
1782 if (valid_page(i386_btop(old_pa
))) {
1784 pai
= pa_index(old_pa
);
1787 assert(pmap
->stats
.resident_count
>= 1);
1788 pmap
->stats
.resident_count
--;
1789 if (iswired(*pte
)) {
1790 assert(pmap
->stats
.wired_count
>= 1);
1791 pmap
->stats
.wired_count
--;
1794 pmap_phys_attributes
[pai
] |=
1795 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1799 * Remove the mapping from the pvlist for
1800 * this physical page.
1803 register pv_entry_t prev
, cur
;
1805 pv_h
= pai_to_pvh(pai
);
1806 if (pv_h
->pmap
== PMAP_NULL
) {
1807 panic("pmap_enter: null pv_list!");
1809 if (pv_h
->va
== v
&& pv_h
->pmap
== pmap
) {
1811 * Header is the pv_entry. Copy the next one
1812 * to header and free the next one (we cannot
1816 if (cur
!= PV_ENTRY_NULL
) {
1821 pv_h
->pmap
= PMAP_NULL
;
1828 if ((cur
= prev
->next
) == PV_ENTRY_NULL
) {
1829 panic("pmap_enter: mapping not in pv_list!");
1831 } while (cur
->va
!= v
|| cur
->pmap
!= pmap
);
1832 prev
->next
= cur
->next
;
1841 * old_pa is not managed. Pretend it's zero so code
1842 * at Step 3) will enter new mapping (overwriting old
1843 * one). Do removal part of accounting.
1845 old_pa
= (pmap_paddr_t
) 0;
1846 assert(pmap
->stats
.resident_count
>= 1);
1847 pmap
->stats
.resident_count
--;
1848 if (iswired(*pte
)) {
1849 assert(pmap
->stats
.wired_count
>= 1);
1850 pmap
->stats
.wired_count
--;
1856 if (valid_page(i386_btop(pa
))) {
1859 * Step 2) Enter the mapping in the PV list for this
1869 * We can return here from the sharing fault code below
1870 * in case we removed the only entry on the pv list and thus
1871 * must enter the new one in the list header.
1873 #endif /* SHARING_FAULTS */
1875 pv_h
= pai_to_pvh(pai
);
1877 if (pv_h
->pmap
== PMAP_NULL
) {
1883 pv_h
->next
= PV_ENTRY_NULL
;
1889 * check that this mapping is not already there
1890 * or there is no alias for this mapping in the same map
1892 pv_entry_t e
= pv_h
;
1893 while (e
!= PV_ENTRY_NULL
) {
1894 if (e
->pmap
== pmap
&& e
->va
== v
)
1895 panic("pmap_enter: already in pv_list");
1903 * do sharing faults.
1904 * if we find an entry on this pv list in the same address
1905 * space, remove it. we know there will not be more
1908 pv_entry_t e
= pv_h
;
1911 while (e
!= PV_ENTRY_NULL
) {
1912 if (e
->pmap
== pmap
) {
1914 * Remove it, drop pv list lock first.
1918 opte
= pmap_pte(pmap
, e
->va
);
1919 assert(opte
!= PT_ENTRY_NULL
);
1921 * Invalidate the translation buffer,
1922 * then remove the mapping.
1924 pmap_remove_range(pmap
, e
->va
, opte
,
1926 PMAP_UPDATE_TLBS(pmap
, e
->va
, e
->va
+ PAGE_SIZE
);
1929 * We could have remove the head entry,
1930 * so there could be no more entries
1931 * and so we have to use the pv head entry.
1932 * so, go back to the top and try the entry
1941 * check that this mapping is not already there
1944 while (e
!= PV_ENTRY_NULL
) {
1945 if (e
->pmap
== pmap
)
1946 panic("pmap_enter: alias in pv_list");
1950 #endif /* SHARING_FAULTS */
1954 * check for aliases within the same address space.
1956 pv_entry_t e
= pv_h
;
1957 vm_offset_t rpc
= get_rpc();
1959 while (e
!= PV_ENTRY_NULL
) {
1960 if (e
->pmap
== pmap
) {
1962 * log this entry in the alias ring buffer
1963 * if it's not there already.
1965 struct pmap_alias
*pma
;
1969 for (ii
= 0; ii
< pmap_alias_index
; ii
++) {
1970 if (pmap_aliasbuf
[ii
].rpc
== rpc
) {
1971 /* found it in the log already */
1977 pma
= &pmap_aliasbuf
[pmap_alias_index
];
1981 pma
->cookie
= PMAP_ALIAS_COOKIE
;
1982 if (++pmap_alias_index
>= PMAP_ALIAS_MAX
)
1983 panic("pmap_enter: exhausted alias log");
1989 #endif /* DEBUG_ALIAS */
1991 * Add new pv_entry after header.
1993 if (pv_e
== PV_ENTRY_NULL
) {
1995 if (pv_e
== PV_ENTRY_NULL
) {
1996 panic("pmap no pv_e's");
2001 pv_e
->next
= pv_h
->next
;
2004 * Remember that we used the pvlist entry.
2006 pv_e
= PV_ENTRY_NULL
;
2012 * Step 3) Enter and count the mapping.
2015 pmap
->stats
.resident_count
++;
2018 * Build a template to speed up entering -
2019 * only the pfn changes.
2021 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
2023 if(flags
& VM_MEM_NOT_CACHEABLE
) {
2024 if(!(flags
& VM_MEM_GUARDED
))
2025 template |= INTEL_PTE_PTA
;
2026 template |= INTEL_PTE_NCACHE
;
2029 if (pmap
!= kernel_pmap
)
2030 template |= INTEL_PTE_USER
;
2031 if (prot
& VM_PROT_WRITE
)
2032 template |= INTEL_PTE_WRITE
;
2034 template |= INTEL_PTE_WIRED
;
2035 pmap
->stats
.wired_count
++;
2038 WRITE_PTE(pte
, template)
2041 PMAP_UPDATE_TLBS(pmap
, v
, v
+ PAGE_SIZE
);
2043 if (pv_e
!= PV_ENTRY_NULL
) {
2047 PMAP_READ_UNLOCK(pmap
, spl
);
2051 * Routine: pmap_change_wiring
2052 * Function: Change the wiring attribute for a map/virtual-address
2054 * In/out conditions:
2055 * The mapping must already exist in the pmap.
2059 register pmap_t map
,
2063 register pt_entry_t
*pte
;
2068 * We must grab the pmap system lock because we may
2069 * change a pte_page queue.
2071 PMAP_READ_LOCK(map
, spl
);
2073 if ((pte
= pmap_pte(map
, v
)) == PT_ENTRY_NULL
)
2074 panic("pmap_change_wiring: pte missing");
2076 if (wired
&& !iswired(*pte
)) {
2078 * wiring down mapping
2080 map
->stats
.wired_count
++;
2081 *pte
++ |= INTEL_PTE_WIRED
;
2083 else if (!wired
&& iswired(*pte
)) {
2087 assert(map
->stats
.wired_count
>= 1);
2088 map
->stats
.wired_count
--;
2089 *pte
++ &= ~INTEL_PTE_WIRED
;
2092 PMAP_READ_UNLOCK(map
, spl
);
2101 pmap_find_phys(pmap_t pmap
, addr64_t va
)
2107 if (value_64bit(va
))
2108 panic("pmap_find_phys 64 bit value");
2109 a32
= (vm_offset_t
) low32(va
);
2110 ptp
= pmap_pte(pmap
, a32
);
2111 if (PT_ENTRY_NULL
== ptp
) {
2114 ppn
= (ppnum_t
) i386_btop(pte_to_pa(*ptp
));
2120 * Routine: pmap_extract
2122 * Extract the physical page address associated
2123 * with the given map/virtual_address pair.
2124 * Change to shim for backwards compatibility but will not
2125 * work for 64 bit systems. Some old drivers that we cannot
2131 register pmap_t pmap
,
2137 vaddr
= (vm_offset_t
)0;
2138 ppn
= pmap_find_phys(pmap
, (addr64_t
)va
);
2140 vaddr
= ((vm_offset_t
)i386_ptob(ppn
)) | (va
& INTEL_OFFMASK
);
2147 * Routine: pmap_expand
2149 * Expands a pmap to be able to map the specified virtual address.
2151 * Allocates new virtual memory for the P0 or P1 portion of the
2152 * pmap, then re-maps the physical pages that were in the old
2153 * pmap to be in the new pmap.
2155 * Must be called with the pmap system and the pmap unlocked,
2156 * since these must be unlocked to use vm_allocate or vm_deallocate.
2157 * Thus it must be called in a loop that checks whether the map
2158 * has been expanded enough.
2159 * (We won't loop forever, since page tables aren't shrunk.)
2163 register pmap_t map
,
2164 register vm_offset_t v
)
2167 register vm_page_t m
;
2168 register pmap_paddr_t pa
;
2173 if (map
== kernel_pmap
) {
2179 * Allocate a VM page for the level 2 page table entries.
2181 while ((m
= vm_page_grab()) == VM_PAGE_NULL
)
2185 * put the page into the pmap's obj list so it
2186 * can be found later.
2191 vm_object_lock(map
->pm_obj
);
2192 vm_page_insert(m
, map
->pm_obj
, (vm_object_offset_t
)i
);
2193 vm_page_lock_queues();
2195 inuse_ptepages_count
++;
2196 vm_object_unlock(map
->pm_obj
);
2197 vm_page_unlock_queues();
2204 PMAP_READ_LOCK(map
, spl
);
2206 * See if someone else expanded us first
2208 if (pmap_pte(map
, v
) != PT_ENTRY_NULL
) {
2209 PMAP_READ_UNLOCK(map
, spl
);
2210 vm_object_lock(map
->pm_obj
);
2211 vm_page_lock_queues();
2213 inuse_ptepages_count
--;
2214 vm_page_unlock_queues();
2215 vm_object_unlock(map
->pm_obj
);
2220 * Set the page directory entry for this page table.
2221 * If we have allocated more than one hardware page,
2222 * set several page directory entries.
2225 pdp
= &map
->dirbase
[pdenum(map
, v
)];
2226 *pdp
= pa_to_pte(pa
)
2231 PMAP_READ_UNLOCK(map
, spl
);
2236 * Copy the range specified by src_addr/len
2237 * from the source map to the range dst_addr/len
2238 * in the destination map.
2240 * This routine is only advisory and need not do anything.
2247 vm_offset_t dst_addr
,
2249 vm_offset_t src_addr
)
2252 dst_pmap
++; src_pmap
++; dst_addr
++; len
++; src_addr
++;
2258 * pmap_sync_page_data_phys(ppnum_t pa)
2260 * Invalidates all of the instruction cache on a physical page and
2261 * pushes any dirty data from the data cache for the same physical page
2262 * Not required in i386.
2265 pmap_sync_page_data_phys(__unused ppnum_t pa
)
2271 * pmap_sync_page_attributes_phys(ppnum_t pa)
2273 * Write back and invalidate all cachelines on a physical page.
2276 pmap_sync_page_attributes_phys(ppnum_t pa
)
2278 cache_flush_page_phys(pa
);
2285 * Routine: pmap_collect
2287 * Garbage collects the physical map system for
2288 * pages which are no longer used.
2289 * Success need not be guaranteed -- that is, there
2290 * may well be pages which are not referenced, but
2291 * others may be collected.
2293 * Called by the pageout daemon when pages are scarce.
2299 register pt_entry_t
*pdp
, *ptp
;
2307 if (p
== kernel_pmap
)
2311 * Garbage collect map.
2313 PMAP_READ_LOCK(p
, spl
);
2315 for (pdp
= (pt_entry_t
*)p
->dirbase
;
2316 pdp
< (pt_entry_t
*)&p
->dirbase
[(UMAXPTDI
+1)];
2319 if (*pdp
& INTEL_PTE_VALID
) {
2320 if(*pdp
& INTEL_PTE_REF
) {
2321 *pdp
&= ~INTEL_PTE_REF
;
2325 ptp
= pmap_pte(p
, pdetova(pdp
- (pt_entry_t
*)p
->dirbase
));
2326 eptp
= ptp
+ NPTEPG
;
2329 * If the pte page has any wired mappings, we cannot
2334 register pt_entry_t
*ptep
;
2335 for (ptep
= ptp
; ptep
< eptp
; ptep
++) {
2336 if (iswired(*ptep
)) {
2344 * Remove the virtual addresses mapped by this pte page.
2346 pmap_remove_range(p
,
2347 pdetova(pdp
- (pt_entry_t
*)p
->dirbase
),
2352 * Invalidate the page directory pointer.
2356 PMAP_READ_UNLOCK(p
, spl
);
2359 * And free the pte page itself.
2362 register vm_page_t m
;
2364 vm_object_lock(p
->pm_obj
);
2365 m
= vm_page_lookup(p
->pm_obj
,(vm_object_offset_t
)(pdp
- (pt_entry_t
*)&p
->dirbase
[0]));
2366 if (m
== VM_PAGE_NULL
)
2367 panic("pmap_collect: pte page not in object");
2368 vm_page_lock_queues();
2370 inuse_ptepages_count
--;
2371 vm_page_unlock_queues();
2372 vm_object_unlock(p
->pm_obj
);
2375 PMAP_READ_LOCK(p
, spl
);
2380 PMAP_UPDATE_TLBS(p
, VM_MIN_ADDRESS
, VM_MAX_ADDRESS
);
2381 PMAP_READ_UNLOCK(p
, spl
);
2387 * Routine: pmap_kernel
2389 * Returns the physical map handle for the kernel.
2395 return (kernel_pmap
);
2400 pmap_copy_page(src
, dst
)
2404 bcopy_phys((addr64_t
)i386_ptob(src
),
2405 (addr64_t
)i386_ptob(dst
),
2411 * Routine: pmap_pageable
2413 * Make the specified pages (by pmap, offset)
2414 * pageable (or not) as requested.
2416 * A page which is not pageable may not take
2417 * a fault; therefore, its page table entry
2418 * must remain valid for the duration.
2420 * This routine is merely advisory; pmap_enter
2421 * will specify that these pages are to be wired
2422 * down (or not) as appropriate.
2426 __unused pmap_t pmap
,
2427 __unused vm_offset_t start_addr
,
2428 __unused vm_offset_t end_addr
,
2429 __unused boolean_t pageable
)
2432 pmap
++; start_addr
++; end_addr
++; pageable
++;
2437 * Clear specified attribute bits.
2440 phys_attribute_clear(
2445 register pv_entry_t pv_e
;
2446 register pt_entry_t
*pte
;
2448 register pmap_t pmap
;
2452 assert(pn
!= vm_page_fictitious_addr
);
2453 if (!valid_page(pn
)) {
2455 * Not a managed page.
2461 * Lock the pmap system first, since we will be changing
2465 PMAP_WRITE_LOCK(spl
);
2466 phys
= i386_ptob(pn
);
2467 pai
= pa_index(phys
);
2468 pv_h
= pai_to_pvh(pai
);
2471 * Walk down PV list, clearing all modify or reference bits.
2472 * We do not have to lock the pv_list because we have
2473 * the entire pmap system locked.
2475 if (pv_h
->pmap
!= PMAP_NULL
) {
2477 * There are some mappings.
2479 for (pv_e
= pv_h
; pv_e
!= PV_ENTRY_NULL
; pv_e
= pv_e
->next
) {
2483 * Lock the pmap to block pmap_extract and similar routines.
2485 simple_lock(&pmap
->lock
);
2488 register vm_offset_t va
;
2491 pte
= pmap_pte(pmap
, va
);
2495 * Consistency checks.
2497 assert(*pte
& INTEL_PTE_VALID
);
2498 /* assert(pte_to_phys(*pte) == phys); */
2502 * Clear modify or reference bits.
2506 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
2508 simple_unlock(&pmap
->lock
);
2513 pmap_phys_attributes
[pai
] &= ~bits
;
2515 PMAP_WRITE_UNLOCK(spl
);
2519 * Check specified attribute bits.
2522 phys_attribute_test(
2527 register pv_entry_t pv_e
;
2528 register pt_entry_t
*pte
;
2530 register pmap_t pmap
;
2534 assert(pn
!= vm_page_fictitious_addr
);
2535 if (!valid_page(pn
)) {
2537 * Not a managed page.
2543 * Lock the pmap system first, since we will be checking
2547 PMAP_WRITE_LOCK(spl
);
2548 phys
= i386_ptob(pn
);
2549 pai
= pa_index(phys
);
2550 pv_h
= pai_to_pvh(pai
);
2552 if (pmap_phys_attributes
[pai
] & bits
) {
2553 PMAP_WRITE_UNLOCK(spl
);
2558 * Walk down PV list, checking all mappings.
2559 * We do not have to lock the pv_list because we have
2560 * the entire pmap system locked.
2562 if (pv_h
->pmap
!= PMAP_NULL
) {
2564 * There are some mappings.
2566 for (pv_e
= pv_h
; pv_e
!= PV_ENTRY_NULL
; pv_e
= pv_e
->next
) {
2570 * Lock the pmap to block pmap_extract and similar routines.
2572 simple_lock(&pmap
->lock
);
2575 register vm_offset_t va
;
2578 pte
= pmap_pte(pmap
, va
);
2582 * Consistency checks.
2584 assert(*pte
& INTEL_PTE_VALID
);
2585 /* assert(pte_to_phys(*pte) == phys); */
2590 * Check modify or reference bits.
2593 if (*pte
++ & bits
) {
2594 simple_unlock(&pmap
->lock
);
2595 PMAP_WRITE_UNLOCK(spl
);
2599 simple_unlock(&pmap
->lock
);
2602 PMAP_WRITE_UNLOCK(spl
);
2607 * Set specified attribute bits.
2617 assert(pn
!= vm_page_fictitious_addr
);
2618 if (!valid_page(pn
)) {
2620 * Not a managed page.
2626 * Lock the pmap system and set the requested bits in
2627 * the phys attributes array. Don't need to bother with
2628 * ptes because the test routine looks here first.
2630 phys
= i386_ptob(pn
);
2631 PMAP_WRITE_LOCK(spl
);
2632 pmap_phys_attributes
[pa_index(phys
)] |= bits
;
2633 PMAP_WRITE_UNLOCK(spl
);
2637 * Set the modify bit on the specified physical page.
2640 void pmap_set_modify(
2643 phys_attribute_set(pn
, PHYS_MODIFIED
);
2647 * Clear the modify bits on the specified physical page.
2654 phys_attribute_clear(pn
, PHYS_MODIFIED
);
2660 * Return whether or not the specified physical page is modified
2661 * by any physical maps.
2668 return (phys_attribute_test(pn
, PHYS_MODIFIED
));
2672 * pmap_clear_reference:
2674 * Clear the reference bit on the specified physical page.
2678 pmap_clear_reference(
2681 phys_attribute_clear(pn
, PHYS_REFERENCED
);
2685 pmap_set_reference(ppnum_t pn
)
2687 phys_attribute_set(pn
, PHYS_REFERENCED
);
2691 * pmap_is_referenced:
2693 * Return whether or not the specified physical page is referenced
2694 * by any physical maps.
2701 return (phys_attribute_test(pn
, PHYS_REFERENCED
));
2705 * pmap_get_refmod(phys)
2706 * returns the referenced and modified bits of the specified
2710 pmap_get_refmod(ppnum_t pa
)
2712 return ( ((phys_attribute_test(pa
, PHYS_MODIFIED
))? VM_MEM_MODIFIED
: 0)
2713 | ((phys_attribute_test(pa
, PHYS_REFERENCED
))? VM_MEM_REFERENCED
: 0));
2717 * pmap_clear_refmod(phys, mask)
2718 * clears the referenced and modified bits as specified by the mask
2719 * of the specified physical page.
2722 pmap_clear_refmod(ppnum_t pa
, unsigned int mask
)
2724 unsigned int x86Mask
;
2726 x86Mask
= ( ((mask
& VM_MEM_MODIFIED
)? PHYS_MODIFIED
: 0)
2727 | ((mask
& VM_MEM_REFERENCED
)? PHYS_REFERENCED
: 0));
2728 phys_attribute_clear(pa
, x86Mask
);
2732 * Set the modify bit on the specified range
2733 * of this map as requested.
2735 * This optimization stands only if each time the dirty bit
2736 * in vm_page_t is tested, it is also tested in the pmap.
2745 register pt_entry_t
*pde
;
2746 register pt_entry_t
*spte
, *epte
;
2748 vm_offset_t orig_s
= s
;
2750 if (map
== PMAP_NULL
)
2753 PMAP_READ_LOCK(map
, spl
);
2755 pde
= pmap_pde(map
, s
);
2756 while (s
&& s
< e
) {
2757 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
2760 if (*pde
& INTEL_PTE_VALID
) {
2761 spte
= (pt_entry_t
*)pmap_pte(map
, (s
& ~(PDE_MAPPED_SIZE
-1)));
2763 spte
= &spte
[ptenum(s
)];
2764 epte
= &spte
[intel_btop(l
-s
)];
2766 epte
= &spte
[intel_btop(PDE_MAPPED_SIZE
)];
2767 spte
= &spte
[ptenum(s
)];
2769 while (spte
< epte
) {
2770 if (*spte
& INTEL_PTE_VALID
) {
2771 *spte
|= (INTEL_PTE_MOD
| INTEL_PTE_WRITE
);
2779 PMAP_UPDATE_TLBS(map
, orig_s
, e
);
2780 PMAP_READ_UNLOCK(map
, spl
);
2785 invalidate_icache(__unused vm_offset_t addr
,
2786 __unused
unsigned cnt
,
2792 flush_dcache(__unused vm_offset_t addr
,
2793 __unused
unsigned count
,
2800 * TLB Coherence Code (TLB "shootdown" code)
2802 * Threads that belong to the same task share the same address space and
2803 * hence share a pmap. However, they may run on distinct cpus and thus
2804 * have distinct TLBs that cache page table entries. In order to guarantee
2805 * the TLBs are consistent, whenever a pmap is changed, all threads that
2806 * are active in that pmap must have their TLB updated. To keep track of
2807 * this information, the set of cpus that are currently using a pmap is
2808 * maintained within each pmap structure (cpus_using). Pmap_activate() and
2809 * pmap_deactivate add and remove, respectively, a cpu from this set.
2810 * Since the TLBs are not addressable over the bus, each processor must
2811 * flush its own TLB; a processor that needs to invalidate another TLB
2812 * needs to interrupt the processor that owns that TLB to signal the
2815 * Whenever a pmap is updated, the lock on that pmap is locked, and all
2816 * cpus using the pmap are signaled to invalidate. All threads that need
2817 * to activate a pmap must wait for the lock to clear to await any updates
2818 * in progress before using the pmap. They must ACQUIRE the lock to add
2819 * their cpu to the cpus_using set. An implicit assumption made
2820 * throughout the TLB code is that all kernel code that runs at or higher
2821 * than splvm blocks out update interrupts, and that such code does not
2822 * touch pageable pages.
2824 * A shootdown interrupt serves another function besides signaling a
2825 * processor to invalidate. The interrupt routine (pmap_update_interrupt)
2826 * waits for the both the pmap lock (and the kernel pmap lock) to clear,
2827 * preventing user code from making implicit pmap updates while the
2828 * sending processor is performing its update. (This could happen via a
2829 * user data write reference that turns on the modify bit in the page
2830 * table). It must wait for any kernel updates that may have started
2831 * concurrently with a user pmap update because the IPC code
2833 * Spinning on the VALUES of the locks is sufficient (rather than
2834 * having to acquire the locks) because any updates that occur subsequent
2835 * to finding the lock unlocked will be signaled via another interrupt.
2836 * (This assumes the interrupt is cleared before the low level interrupt code
2837 * calls pmap_update_interrupt()).
2839 * The signaling processor must wait for any implicit updates in progress
2840 * to terminate before continuing with its update. Thus it must wait for an
2841 * acknowledgement of the interrupt from each processor for which such
2842 * references could be made. For maintaining this information, a set
2843 * cpus_active is used. A cpu is in this set if and only if it can
2844 * use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from
2845 * this set; when all such cpus are removed, it is safe to update.
2847 * Before attempting to acquire the update lock on a pmap, a cpu (A) must
2848 * be at least at the priority of the interprocessor interrupt
2849 * (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a
2850 * kernel update; it would spin forever in pmap_update_interrupt() trying
2851 * to acquire the user pmap lock it had already acquired. Furthermore A
2852 * must remove itself from cpus_active. Otherwise, another cpu holding
2853 * the lock (B) could be in the process of sending an update signal to A,
2854 * and thus be waiting for A to remove itself from cpus_active. If A is
2855 * spinning on the lock at priority this will never happen and a deadlock
2860 * Signal another CPU that it must flush its TLB
2866 vm_offset_t start_addr
,
2867 vm_offset_t end_addr
)
2869 register int which_cpu
, j
;
2870 register pmap_update_list_t update_list_p
;
2872 while ((which_cpu
= ffs((unsigned long)use_list
)) != 0) {
2873 which_cpu
-= 1; /* convert to 0 origin */
2875 update_list_p
= cpu_update_list(which_cpu
);
2876 simple_lock(&update_list_p
->lock
);
2878 j
= update_list_p
->count
;
2879 if (j
>= UPDATE_LIST_SIZE
) {
2881 * list overflowed. Change last item to
2882 * indicate overflow.
2884 update_list_p
->item
[UPDATE_LIST_SIZE
-1].pmap
= kernel_pmap
;
2885 update_list_p
->item
[UPDATE_LIST_SIZE
-1].start
= VM_MIN_ADDRESS
;
2886 update_list_p
->item
[UPDATE_LIST_SIZE
-1].end
= VM_MAX_KERNEL_ADDRESS
;
2889 update_list_p
->item
[j
].pmap
= pmap
;
2890 update_list_p
->item
[j
].start
= start_addr
;
2891 update_list_p
->item
[j
].end
= end_addr
;
2892 update_list_p
->count
= j
+1;
2894 cpu_update_needed(which_cpu
) = TRUE
;
2895 simple_unlock(&update_list_p
->lock
);
2897 /* if its the kernel pmap, ignore cpus_idle */
2898 if (((cpus_idle
& (1 << which_cpu
)) == 0) ||
2899 (pmap
== kernel_pmap
) || PMAP_REAL(which_cpu
) == pmap
)
2901 i386_signal_cpu(which_cpu
, MP_TLB_FLUSH
, ASYNC
);
2903 use_list
&= ~(1 << which_cpu
);
2908 process_pmap_updates(
2909 register pmap_t my_pmap
)
2911 register int my_cpu
;
2912 register pmap_update_list_t update_list_p
;
2914 register pmap_t pmap
;
2916 mp_disable_preemption();
2917 my_cpu
= cpu_number();
2918 update_list_p
= cpu_update_list(my_cpu
);
2919 simple_lock(&update_list_p
->lock
);
2921 for (j
= 0; j
< update_list_p
->count
; j
++) {
2922 pmap
= update_list_p
->item
[j
].pmap
;
2923 if (pmap
== my_pmap
||
2924 pmap
== kernel_pmap
) {
2926 if (pmap
->ref_count
<= 0) {
2927 PMAP_CPU_CLR(pmap
, my_cpu
);
2928 PMAP_REAL(my_cpu
) = kernel_pmap
;
2930 set_cr3((unsigned int)kernel_pmap
->pm_ppdpt
);
2932 set_cr3((unsigned int)kernel_pmap
->pdirbase
);
2935 INVALIDATE_TLB(pmap
,
2936 update_list_p
->item
[j
].start
,
2937 update_list_p
->item
[j
].end
);
2940 update_list_p
->count
= 0;
2941 cpu_update_needed(my_cpu
) = FALSE
;
2942 simple_unlock(&update_list_p
->lock
);
2943 mp_enable_preemption();
2947 * Interrupt routine for TBIA requested from other processor.
2948 * This routine can also be called at all interrupts time if
2949 * the cpu was idle. Some driver interrupt routines might access
2950 * newly allocated vm. (This is the case for hd)
2953 pmap_update_interrupt(void)
2955 register int my_cpu
;
2957 register pmap_t my_pmap
;
2959 mp_disable_preemption();
2960 my_cpu
= cpu_number();
2963 * Raise spl to splvm (above splip) to block out pmap_extract
2964 * from IO code (which would put this cpu back in the active
2969 my_pmap
= PMAP_REAL(my_cpu
);
2971 if (!(my_pmap
&& pmap_in_use(my_pmap
, my_cpu
)))
2972 my_pmap
= kernel_pmap
;
2978 * Indicate that we're not using either user or kernel
2981 i_bit_clear(my_cpu
, &cpus_active
);
2984 * Wait for any pmap updates in progress, on either user
2987 while (*(volatile int *)(&my_pmap
->lock
.interlock
.lock_data
) ||
2988 *(volatile int *)(&kernel_pmap
->lock
.interlock
.lock_data
)) {
2989 LOOP_CHECK("pmap_update_interrupt", my_pmap
);
2993 process_pmap_updates(my_pmap
);
2995 i_bit_set(my_cpu
, &cpus_active
);
2997 } while (cpu_update_needed(my_cpu
));
3000 mp_enable_preemption();
3005 /* show phys page mappings and attributes */
3007 extern void db_show_page(pmap_paddr_t pa
);
3010 db_show_page(pmap_paddr_t pa
)
3017 pv_h
= pai_to_pvh(pai
);
3019 attr
= pmap_phys_attributes
[pai
];
3020 printf("phys page %x ", pa
);
3021 if (attr
& PHYS_MODIFIED
)
3022 printf("modified, ");
3023 if (attr
& PHYS_REFERENCED
)
3024 printf("referenced, ");
3025 if (pv_h
->pmap
|| pv_h
->next
)
3026 printf(" mapped at\n");
3028 printf(" not mapped\n");
3029 for (; pv_h
; pv_h
= pv_h
->next
)
3031 printf("%x in pmap %x\n", pv_h
->va
, pv_h
->pmap
);
3034 #endif /* MACH_KDB */
3037 void db_kvtophys(vm_offset_t
);
3038 void db_show_vaddrs(pt_entry_t
*);
3041 * print out the results of kvtophys(arg)
3047 db_printf("0x%x", kvtophys(vaddr
));
3051 * Walk the pages tables.
3055 pt_entry_t
*dirbase
)
3057 pt_entry_t
*ptep
, *pdep
, tmp
;
3058 int x
, y
, pdecnt
, ptecnt
;
3061 dirbase
= kernel_pmap
->dirbase
;
3064 db_printf("need a dirbase...\n");
3067 dirbase
= (pt_entry_t
*) ((unsigned long) dirbase
& ~INTEL_OFFMASK
);
3069 db_printf("dirbase: 0x%x\n", dirbase
);
3071 pdecnt
= ptecnt
= 0;
3073 for (y
= 0; y
< NPDEPG
; y
++, pdep
++) {
3074 if (((tmp
= *pdep
) & INTEL_PTE_VALID
) == 0) {
3078 ptep
= (pt_entry_t
*) ((*pdep
) & ~INTEL_OFFMASK
);
3079 db_printf("dir[%4d]: 0x%x\n", y
, *pdep
);
3080 for (x
= 0; x
< NPTEPG
; x
++, ptep
++) {
3081 if (((tmp
= *ptep
) & INTEL_PTE_VALID
) == 0) {
3085 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
3088 (y
<< 22) | (x
<< 12),
3089 *ptep
& ~INTEL_OFFMASK
);
3093 db_printf("total: %d tables, %d page table entries.\n", pdecnt
, ptecnt
);
3096 #endif /* MACH_KDB */
3098 #include <mach_vm_debug.h>
3100 #include <vm/vm_debug.h>
3103 pmap_list_resident_pages(
3104 __unused pmap_t pmap
,
3105 __unused vm_offset_t
*listp
,
3110 #endif /* MACH_VM_DEBUG */
3116 * BSD support routine to reassign virtual addresses.
3120 pmap_movepage(unsigned long from
, unsigned long to
, vm_size_t size
)
3123 pt_entry_t
*pte
, saved_pte
;
3125 /* Lock the kernel map */
3126 PMAP_READ_LOCK(kernel_pmap
, spl
);
3130 pte
= pmap_pte(kernel_pmap
, from
);
3132 panic("pmap_pagemove from pte NULL");
3134 PMAP_READ_UNLOCK(kernel_pmap
, spl
);
3136 pmap_enter(kernel_pmap
, to
, (ppnum_t
)i386_btop(i386_trunc_page(*pte
)),
3137 VM_PROT_READ
|VM_PROT_WRITE
, 0, *pte
& INTEL_PTE_WIRED
);
3139 pmap_remove(kernel_pmap
, (addr64_t
)from
, (addr64_t
)(from
+PAGE_SIZE
));
3141 PMAP_READ_LOCK(kernel_pmap
, spl
);
3142 pte
= pmap_pte(kernel_pmap
, to
);
3144 panic("pmap_pagemove 'to' pte NULL");
3153 /* Get the processors to update the TLBs */
3154 PMAP_UPDATE_TLBS(kernel_pmap
, from
, from
+size
);
3155 PMAP_UPDATE_TLBS(kernel_pmap
, to
, to
+size
);
3157 PMAP_READ_UNLOCK(kernel_pmap
, spl
);
3160 #endif /* MACH_BSD */
3162 /* temporary workaround */
3164 coredumpok(vm_map_t map
, vm_offset_t va
)
3168 ptep
= pmap_pte(map
->pmap
, va
);
3171 return ((*ptep
& (INTEL_PTE_NCACHE
| INTEL_PTE_WIRED
)) != (INTEL_PTE_NCACHE
| INTEL_PTE_WIRED
));
3175 * grow the number of kernel page table entries, if needed
3178 pmap_growkernel(vm_offset_t addr
)
3180 #if GROW_KERNEL_FUNCTION_IMPLEMENTED
3183 vm_offset_t ptppaddr
;
3186 pd_entry_t newpdir
= 0;
3190 * Losers return to try again until the winner completes the work.
3192 if (kptobj
== 0) panic("growkernel 0");
3193 if (!vm_object_lock_try(kptobj
)) {
3197 vm_page_lock_queues();
3202 * If this is the first time thru, locate the end of the
3203 * kernel page table entries and set nkpt to the current
3204 * number of kernel page table pages
3207 if (kernel_vm_end
== 0) {
3208 kernel_vm_end
= KERNBASE
;
3211 while (pdir_pde(kernel_pmap
->dirbase
, kernel_vm_end
)) {
3212 kernel_vm_end
= (kernel_vm_end
+ PAGE_SIZE
* NPTEPG
) & ~(PAGE_SIZE
* NPTEPG
- 1);
3218 * Now allocate and map the required number of page tables
3220 addr
= (addr
+ PAGE_SIZE
* NPTEPG
) & ~(PAGE_SIZE
* NPTEPG
- 1);
3221 while (kernel_vm_end
< addr
) {
3222 if (pdir_pde(kernel_pmap
->dirbase
, kernel_vm_end
)) {
3223 kernel_vm_end
= (kernel_vm_end
+ PAGE_SIZE
* NPTEPG
) & ~(PAGE_SIZE
* NPTEPG
- 1);
3224 continue; /* someone already filled this one */
3227 nkpg
= vm_page_alloc(kptobj
, nkpt
);
3229 panic("pmap_growkernel: no memory to grow kernel");
3233 ppn
= nkpg
->phys_page
;
3234 pmap_zero_page(ppn
);
3235 ptppaddr
= i386_ptob(ppn
);
3236 newpdir
= (pd_entry_t
) (ptppaddr
| INTEL_PTE_VALID
|
3237 INTEL_PTE_RW
| INTEL_PTE_REF
| INTEL_PTE_MOD
);
3238 pdir_pde(kernel_pmap
->dirbase
, kernel_vm_end
) = newpdir
;
3240 simple_lock(&free_pmap_lock
);
3241 for (pmap
= (struct pmap
*)kernel_pmap
->pmap_link
.next
;
3242 pmap
!= kernel_pmap
;
3243 pmap
= (struct pmap
*)pmap
->pmap_link
.next
) {
3244 *pmap_pde(pmap
, kernel_vm_end
) = newpdir
;
3246 simple_unlock(&free_pmap_lock
);
3249 vm_page_unlock_queues();
3250 vm_object_unlock(kptobj
);
3255 pmap_mapgetpte(vm_map_t map
, vm_offset_t v
)
3257 return pmap_pte(map
->pmap
, v
);
3266 assert(pn
!= vm_page_fictitious_addr
);
3268 if (!pmap_initialized
)
3270 phys
= (pmap_paddr_t
) i386_ptob(pn
);
3271 if (!pmap_valid_page(pn
))
3278 mapping_free_prime()
3283 for (i
= 0; i
< (5 * PV_ALLOC_CHUNK
); i
++) {
3284 pv_e
= (pv_entry_t
) zalloc(pv_list_zone
);
3296 if (mapping_adjust_call
== NULL
) {
3297 thread_call_setup(&mapping_adjust_call_data
,
3298 (thread_call_func_t
) mapping_adjust
,
3299 (thread_call_param_t
) NULL
);
3300 mapping_adjust_call
= &mapping_adjust_call_data
;
3302 /* XXX rethink best way to do locking here */
3303 if (pv_free_count
< PV_LOW_WATER_MARK
) {
3304 for (i
= 0; i
< PV_ALLOC_CHUNK
; i
++) {
3305 pv_e
= (pv_entry_t
) zalloc(pv_list_zone
);
3315 pmap_commpage_init(vm_offset_t kernel_commpage
, vm_offset_t user_commpage
, int cnt
)
3318 pt_entry_t
*opte
, *npte
;
3321 for (i
= 0; i
< cnt
; i
++) {
3322 opte
= pmap_pte(kernel_pmap
, kernel_commpage
);
3323 if (0 == opte
) panic("kernel_commpage");
3324 npte
= pmap_pte(kernel_pmap
, user_commpage
);
3325 if (0 == npte
) panic("user_commpage");
3326 pte
= *opte
| INTEL_PTE_USER
|INTEL_PTE_GLOBAL
;
3327 pte
&= ~INTEL_PTE_WRITE
; // ensure read only
3328 WRITE_PTE_FAST(npte
, pte
);
3329 kernel_commpage
+= INTEL_PGBYTES
;
3330 user_commpage
+= INTEL_PGBYTES
;
3334 static cpu_pmap_t cpu_pmap_master
;
3335 static struct pmap_update_list cpu_update_list_master
;
3338 pmap_cpu_alloc(boolean_t is_boot_cpu
)
3343 pmap_update_list_t up
;
3344 vm_offset_t address
;
3345 vm_map_entry_t entry
;
3348 cp
= &cpu_pmap_master
;
3349 up
= &cpu_update_list_master
;
3352 * The per-cpu pmap data structure itself.
3354 ret
= kmem_alloc(kernel_map
,
3355 (vm_offset_t
*) &cp
, sizeof(cpu_pmap_t
));
3356 if (ret
!= KERN_SUCCESS
) {
3357 printf("pmap_cpu_alloc() failed ret=%d\n", ret
);
3360 bzero((void *)cp
, sizeof(cpu_pmap_t
));
3363 * The tlb flush update list.
3365 ret
= kmem_alloc(kernel_map
,
3366 (vm_offset_t
*) &up
, sizeof(*up
));
3367 if (ret
!= KERN_SUCCESS
) {
3368 printf("pmap_cpu_alloc() failed ret=%d\n", ret
);
3374 * The temporary windows used for copy/zero - see loose_ends.c
3376 for (i
= 0; i
< PMAP_NWINDOWS
; i
++) {
3377 ret
= vm_map_find_space(kernel_map
,
3378 &address
, PAGE_SIZE
, 0, &entry
);
3379 if (ret
!= KERN_SUCCESS
) {
3380 printf("pmap_cpu_alloc() "
3381 "vm_map_find_space ret=%d\n", ret
);
3385 vm_map_unlock(kernel_map
);
3387 cp
->mapwindow
[i
].prv_CADDR
= (caddr_t
) address
;
3388 cp
->mapwindow
[i
].prv_CMAP
= vtopte(address
);
3389 * (int *) cp
->mapwindow
[i
].prv_CMAP
= 0;
3391 kprintf("pmap_cpu_alloc() "
3392 "window=%d CADDR=0x%x CMAP=0x%x\n",
3393 i
, address
, vtopte(address
));
3398 * Set up the pmap request list
3400 cp
->update_list
= up
;
3401 simple_lock_init(&up
->lock
, 0);
3408 pmap_cpu_free(struct cpu_pmap
*cp
)
3410 if (cp
!= NULL
&& cp
!= &cpu_pmap_master
) {
3411 if (cp
->update_list
!= NULL
)
3412 kfree((void *) cp
->update_list
,
3413 sizeof(*cp
->update_list
));
3414 kfree((void *) cp
, sizeof(cpu_pmap_t
));