2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
61 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * (These guys wrote the Vax version)
64 * Physical Map management code for Intel i386, i486, and i860.
66 * Manages physical address maps.
68 * In addition to hardware address maps, this
69 * module is called upon to provide software-use-only
70 * maps which may or may not be stored in the same
71 * form as hardware maps. These pseudo-maps are
72 * used to store intermediate results from copy
73 * operations to and from address spaces.
75 * Since the information managed by this module is
76 * also stored by the logical address mapping module,
77 * this module may throw away valid virtual-to-physical
78 * mappings at almost any time. However, invalidations
79 * of virtual-to-physical mappings must be done as
82 * In order to cope with hardware architectures which
83 * make virtual-to-physical map invalidates expensive,
84 * this module may delay invalidate or reduced protection
85 * operations until such time as they are actually
86 * necessary. This module is given full information as
87 * to which processors are currently using which maps,
88 * and to when physical maps must be made correct.
94 #include <mach_ldebug.h>
96 #include <mach/machine/vm_types.h>
98 #include <mach/boolean.h>
99 #include <kern/thread.h>
100 #include <kern/zalloc.h>
102 #include <kern/lock.h>
103 #include <kern/kalloc.h>
104 #include <kern/spl.h>
107 #include <vm/vm_map.h>
108 #include <vm/vm_kern.h>
109 #include <mach/vm_param.h>
110 #include <mach/vm_prot.h>
111 #include <vm/vm_object.h>
112 #include <vm/vm_page.h>
114 #include <mach/machine/vm_param.h>
115 #include <machine/thread.h>
117 #include <kern/misc_protos.h> /* prototyping */
118 #include <i386/misc_protos.h>
120 #include <i386/cpuid.h>
121 #include <i386/cpu_data.h>
122 #include <i386/cpu_number.h>
123 #include <i386/machine_cpu.h>
124 #include <i386/mp_slave_boot.h>
127 #include <ddb/db_command.h>
128 #include <ddb/db_output.h>
129 #include <ddb/db_sym.h>
130 #include <ddb/db_print.h>
131 #endif /* MACH_KDB */
133 #include <kern/xpr.h>
135 #include <vm/vm_protos.h>
140 * Forward declarations for internal functions.
146 extern void pmap_remove_range(
152 void phys_attribute_clear(
156 boolean_t
phys_attribute_test(
160 void phys_attribute_set(
164 void pmap_growkernel(
167 void pmap_set_reference(
175 pt_entry_t
* pmap_mapgetpte(
179 boolean_t
phys_page_exists(
183 void set_dirbase(vm_offset_t dirbase
);
184 #endif /* set_dirbase */
186 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
188 #define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
189 #define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry);
191 #define value_64bit(value) ((value) & 0xFFFFFFFF00000000LL)
192 #define low32(x) ((unsigned int)((x) & 0x00000000ffffffffLL))
195 * Private data structures.
199 * For each vm_page_t, there is a list of all currently
200 * valid virtual mappings of that page. An entry is
201 * a pv_entry_t; the list is the pv_table.
204 typedef struct pv_entry
{
205 struct pv_entry
*next
; /* next pv_entry */
206 pmap_t pmap
; /* pmap where mapping lies */
207 vm_offset_t va
; /* virtual address for mapping */
210 #define PV_ENTRY_NULL ((pv_entry_t) 0)
212 pv_entry_t pv_head_table
; /* array of entries, one per page */
215 * pv_list entries are kept on a list that can only be accessed
216 * with the pmap system locked (at SPLVM, not in the cpus_active set).
217 * The list is refilled from the pv_list_zone if it becomes empty.
219 pv_entry_t pv_free_list
; /* free list at SPLVM */
220 decl_simple_lock_data(,pv_free_list_lock
)
221 int pv_free_count
= 0;
222 #define PV_LOW_WATER_MARK 5000
223 #define PV_ALLOC_CHUNK 2000
224 thread_call_t mapping_adjust_call
;
225 static thread_call_data_t mapping_adjust_call_data
;
226 int mappingrecurse
= 0;
228 #define PV_ALLOC(pv_e) { \
229 simple_lock(&pv_free_list_lock); \
230 if ((pv_e = pv_free_list) != 0) { \
231 pv_free_list = pv_e->next; \
233 if (pv_free_count < PV_LOW_WATER_MARK) \
234 if (hw_compare_and_store(0,1,&mappingrecurse)) \
235 thread_call_enter(mapping_adjust_call); \
237 simple_unlock(&pv_free_list_lock); \
240 #define PV_FREE(pv_e) { \
241 simple_lock(&pv_free_list_lock); \
242 pv_e->next = pv_free_list; \
243 pv_free_list = pv_e; \
245 simple_unlock(&pv_free_list_lock); \
248 zone_t pv_list_zone
; /* zone of pv_entry structures */
251 static zone_t pdpt_zone
;
256 * Each entry in the pv_head_table is locked by a bit in the
257 * pv_lock_table. The lock bits are accessed by the physical
258 * address of the page they lock.
261 char *pv_lock_table
; /* pointer to array of bits */
262 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
265 * First and last physical addresses that we maintain any information
266 * for. Initialized to zero so that pmap operations done before
267 * pmap_init won't touch any non-existent structures.
269 pmap_paddr_t vm_first_phys
= (pmap_paddr_t
) 0;
270 pmap_paddr_t vm_last_phys
= (pmap_paddr_t
) 0;
271 boolean_t pmap_initialized
= FALSE
;/* Has pmap_init completed? */
273 pmap_paddr_t kernel_vm_end
= (pmap_paddr_t
)0;
275 #define GROW_KERNEL_FUNCTION_IMPLEMENTED 1
276 #if GROW_KERNEL_FUNCTION_IMPLEMENTED /* not needed until growing kernel pmap */
277 static struct vm_object kptobj_object_store
;
278 static vm_object_t kptobj
;
283 * Index into pv_head table, its lock bits, and the modify/reference
284 * bits starting at vm_first_phys.
287 #define pa_index(pa) (i386_btop(pa - vm_first_phys))
289 #define pai_to_pvh(pai) (&pv_head_table[pai])
290 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
291 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
294 * Array of physical page attribites for managed pages.
295 * One byte per physical page.
297 char *pmap_phys_attributes
;
300 * Physical page attributes. Copy bits from PTE definition.
302 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
303 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
304 #define PHYS_NCACHE INTEL_PTE_NCACHE
307 * Amount of virtual memory mapped by one
308 * page-directory entry.
310 #define PDE_MAPPED_SIZE (pdetova(1))
313 * Locking and TLB invalidation
319 * There are two structures in the pmap module that need locking:
320 * the pmaps themselves, and the per-page pv_lists (which are locked
321 * by locking the pv_lock_table entry that corresponds to the pv_head
322 * for the list in question.) Most routines want to lock a pmap and
323 * then do operations in it that require pv_list locking -- however
324 * pmap_remove_all and pmap_copy_on_write operate on a physical page
325 * basis and want to do the locking in the reverse order, i.e. lock
326 * a pv_list and then go through all the pmaps referenced by that list.
327 * To protect against deadlock between these two cases, the pmap_lock
328 * is used. There are three different locking protocols as a result:
330 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
333 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
334 * lock on the pmap_lock (shared read), then lock the pmap
335 * and finally the pv_lists as needed [i.e. pmap lock before
338 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
339 * Get a write lock on the pmap_lock (exclusive write); this
340 * also guaranteees exclusive access to the pv_lists. Lock the
343 * At no time may any routine hold more than one pmap lock or more than
344 * one pv_list lock. Because interrupt level routines can allocate
345 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
346 * kernel_pmap can only be held at splhigh.
350 * We raise the interrupt level to splvm, to block interprocessor
351 * interrupts during pmap operations. We must take the CPU out of
352 * the cpus_active set while interrupts are blocked.
354 #define SPLVM(spl) { \
356 mp_disable_preemption(); \
357 i_bit_clear(cpu_number(), &cpus_active); \
358 mp_enable_preemption(); \
361 #define SPLX(spl) { \
362 mp_disable_preemption(); \
363 i_bit_set(cpu_number(), &cpus_active); \
364 mp_enable_preemption(); \
369 * Lock on pmap system
371 lock_t pmap_system_lock
;
373 #define PMAP_READ_LOCK(pmap, spl) { \
375 lock_read(&pmap_system_lock); \
376 simple_lock(&(pmap)->lock); \
379 #define PMAP_WRITE_LOCK(spl) { \
381 lock_write(&pmap_system_lock); \
384 #define PMAP_READ_UNLOCK(pmap, spl) { \
385 simple_unlock(&(pmap)->lock); \
386 lock_read_done(&pmap_system_lock); \
390 #define PMAP_WRITE_UNLOCK(spl) { \
391 lock_write_done(&pmap_system_lock); \
395 #define PMAP_WRITE_TO_READ_LOCK(pmap) { \
396 simple_lock(&(pmap)->lock); \
397 lock_write_to_read(&pmap_system_lock); \
400 #define LOCK_PVH(index) lock_pvh_pai(index)
402 #define UNLOCK_PVH(index) unlock_pvh_pai(index)
405 extern int max_lock_loops
;
406 extern int disableSerialOuput
;
408 unsigned int loop_count; \
409 loop_count = disableSerialOuput ? max_lock_loops \
411 #define LOOP_CHECK(msg, pmap) \
412 if (--loop_count == 0) { \
413 mp_disable_preemption(); \
414 kprintf("%s: cpu %d pmap %x, cpus_active 0x%x\n", \
415 msg, cpu_number(), pmap, cpus_active); \
416 Debugger("deadlock detection"); \
417 mp_enable_preemption(); \
418 loop_count = max_lock_loops; \
420 #else /* USLOCK_DEBUG */
422 #define LOOP_CHECK(msg, pmap)
423 #endif /* USLOCK_DEBUG */
425 #define PMAP_UPDATE_TLBS(pmap, s, e) \
430 mp_disable_preemption(); \
431 cpu_mask = 1 << cpu_number(); \
433 /* Since the pmap is locked, other updates are locked */ \
434 /* out, and any pmap_activate has finished. */ \
436 /* find other cpus using the pmap */ \
437 users = (pmap)->cpus_using & ~cpu_mask; \
440 /* signal them, and wait for them to finish */ \
441 /* using the pmap */ \
442 signal_cpus(users, (pmap), (s), (e)); \
443 while (((pmap)->cpus_using & cpus_active & ~cpu_mask)) { \
444 LOOP_CHECK("PMAP_UPDATE_TLBS", pmap); \
448 /* invalidate our own TLB if pmap is in use */ \
450 if ((pmap)->cpus_using & cpu_mask) { \
451 INVALIDATE_TLB((pmap), (s), (e)); \
454 mp_enable_preemption(); \
457 #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
459 #define INVALIDATE_TLB(m, s, e) { \
464 * Structures to keep track of pending TLB invalidations
469 #define UPDATE_LIST_SIZE 4
471 struct pmap_update_item
{
472 pmap_t pmap
; /* pmap to invalidate */
473 vm_offset_t start
; /* start address to invalidate */
474 vm_offset_t end
; /* end address to invalidate */
477 typedef struct pmap_update_item
*pmap_update_item_t
;
480 * List of pmap updates. If the list overflows,
481 * the last entry is changed to invalidate all.
483 struct pmap_update_list
{
484 decl_simple_lock_data(,lock
)
486 struct pmap_update_item item
[UPDATE_LIST_SIZE
];
488 typedef struct pmap_update_list
*pmap_update_list_t
;
490 extern void signal_cpus(
496 pmap_memory_region_t pmap_memory_regions
[PMAP_MEMORY_REGIONS_SIZE
];
499 * Other useful macros.
501 #define current_pmap() (vm_map_pmap(current_thread()->map))
502 #define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
504 struct pmap kernel_pmap_store
;
508 decl_simple_lock_data(,free_pmap_lock
)
511 struct zone
*pmap_zone
; /* zone of pmap structures */
513 int pmap_debug
= 0; /* flag for debugging prints */
515 unsigned int inuse_ptepages_count
= 0; /* debugging */
518 * Pmap cache. Cache is threaded through ref_count field of pmap.
519 * Max will eventually be constant -- variable for experimentation.
521 int pmap_cache_max
= 32;
522 int pmap_alloc_chunk
= 8;
523 pmap_t pmap_cache_list
;
524 int pmap_cache_count
;
525 decl_simple_lock_data(,pmap_cache_lock
)
527 extern vm_offset_t hole_start
, hole_end
;
533 pt_entry_t
*DMAP1
, *DMAP2
;
538 #define PMAP_ALIAS_MAX 32
544 #define PMAP_ALIAS_COOKIE 0xdeadbeef
545 } pmap_aliasbuf
[PMAP_ALIAS_MAX
];
546 int pmap_alias_index
= 0;
547 extern vm_offset_t
get_rpc();
549 #endif /* DEBUG_ALIAS */
551 #define pmap_pde(m, v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]))
552 #define pdir_pde(d, v) (d[(vm_offset_t)(v) >> PDESHIFT])
555 pmap_is_current(pmap_t pmap
)
557 return (pmap
== kernel_pmap
||
558 (pmap
->dirbase
[PTDPTDI
] & PG_FRAME
) == (PTDpde
[0] & PG_FRAME
));
563 * return address of mapped pte for vaddr va in pmap pmap.
566 pmap_pte(pmap_t pmap
, vm_offset_t va
)
571 pde
= pmap_pde(pmap
, va
);
573 if (pmap_is_current(pmap
))
575 newpf
= *pde
& PG_FRAME
;
576 if (((*CM4
) & PG_FRAME
) != newpf
) {
577 *CM4
= newpf
| INTEL_PTE_RW
| INTEL_PTE_VALID
;
580 return (pt_entry_t
*)CA4
+ (i386_btop(va
) & (NPTEPG
-1));
585 #define DEBUG_PTE_PAGE 0
592 register pt_entry_t
*pte
, *epte
;
595 /* check the use and wired counts */
596 if (ptep
== PTE_PAGE_NULL
)
598 pte
= pmap_pte(ptep
->pmap
, ptep
->va
);
599 epte
= pte
+ INTEL_PGBYTES
/sizeof(pt_entry_t
);
611 if (ctu
!= ptep
->use_count
|| ctw
!= ptep
->wired_count
) {
612 printf("use %d wired %d - actual use %d wired %d\n",
613 ptep
->use_count
, ptep
->wired_count
, ctu
, ctw
);
617 #endif /* DEBUG_PTE_PAGE */
620 * Map memory at initialization. The physical addresses being
621 * mapped are not managed and are never unmapped.
623 * For now, VM is already on, we only need to map the
628 register vm_offset_t virt
,
629 register vm_offset_t start_addr
,
630 register vm_offset_t end_addr
,
631 register vm_prot_t prot
)
636 while (start_addr
< end_addr
) {
637 pmap_enter(kernel_pmap
,
638 virt
, (ppnum_t
) i386_btop(start_addr
), prot
, 0, FALSE
);
646 * Back-door routine for mapping kernel VM at initialization.
647 * Useful for mapping memory outside the range
648 * Sets no-cache, A, D.
649 * [vm_first_phys, vm_last_phys) (i.e., devices).
650 * Otherwise like pmap_map.
654 register vm_offset_t virt
,
655 register vm_offset_t start_addr
,
656 register vm_offset_t end_addr
,
659 register pt_entry_t
template;
660 register pt_entry_t
*pte
;
662 template = pa_to_pte(start_addr
)
668 if (prot
& VM_PROT_WRITE
)
669 template |= INTEL_PTE_WRITE
;
671 /* XXX move pmap_pte out of loop, once one pte mapped, all are */
672 while (start_addr
< end_addr
) {
673 pte
= pmap_pte(kernel_pmap
, virt
);
674 if (pte
== PT_ENTRY_NULL
) {
675 panic("pmap_map_bd: Invalid kernel address\n");
677 WRITE_PTE_FAST(pte
, template)
678 pte_increment_pa(template);
680 start_addr
+= PAGE_SIZE
;
687 extern char *first_avail
;
688 extern vm_offset_t virtual_avail
, virtual_end
;
689 extern pmap_paddr_t avail_start
, avail_end
;
690 extern vm_offset_t etext
;
691 extern void *sectHIBB
;
692 extern int sectSizeHIB
;
695 * Bootstrap the system enough to run with virtual memory.
696 * Map the kernel's code and data, and allocate the system page table.
697 * Called with mapping OFF. Page_size must already be set.
700 * load_start: PA where kernel was loaded
701 * avail_start PA of first available physical page -
702 * after kernel page tables
703 * avail_end PA of last available physical page
704 * virtual_avail VA of first available page -
705 * after kernel page tables
706 * virtual_end VA of last available page -
707 * end of kernel address space
709 * &start_text start of kernel text
710 * &etext end of kernel text
715 __unused vm_offset_t load_start
)
720 int wpkernel
, boot_arg
;
722 vm_last_addr
= VM_MAX_KERNEL_ADDRESS
; /* Set the highest address
726 * The kernel's pmap is statically allocated so we don't
727 * have to use pmap_create, which is unlikely to work
728 * correctly at this part of the boot sequence.
731 kernel_pmap
= &kernel_pmap_store
;
733 kernel_pmap
->pmap_link
.next
= (queue_t
)kernel_pmap
; /* Set up anchor forward */
734 kernel_pmap
->pmap_link
.prev
= (queue_t
)kernel_pmap
; /* Set up anchor reverse */
736 kernel_pmap
->ref_count
= 1;
737 kernel_pmap
->pm_obj
= (vm_object_t
) NULL
;
738 kernel_pmap
->dirbase
= (pd_entry_t
*)((unsigned int)IdlePTD
| KERNBASE
);
739 kernel_pmap
->pdirbase
= (pd_entry_t
*)IdlePTD
;
741 kernel_pmap
->pm_pdpt
= (pd_entry_t
*)((unsigned int)IdlePDPT
| KERNBASE
);
742 kernel_pmap
->pm_ppdpt
= (vm_offset_t
)IdlePDPT
;
745 va
= (vm_offset_t
)kernel_pmap
->dirbase
;
746 /* setup self referential mapping(s) */
747 for (i
= 0; i
< NPGPTD
; i
++ ) {
749 pa
= (pmap_paddr_t
) kvtophys(va
+ i386_ptob(i
));
750 * (pd_entry_t
*) (kernel_pmap
->dirbase
+ PTDPTDI
+ i
) =
751 (pa
& PG_FRAME
) | INTEL_PTE_VALID
| INTEL_PTE_RW
| INTEL_PTE_REF
|
752 INTEL_PTE_MOD
| INTEL_PTE_WIRED
;
754 kernel_pmap
->pm_pdpt
[i
] = pa
| INTEL_PTE_VALID
;
760 virtual_avail
= (vm_offset_t
)VADDR(KPTDI
,0) + (vm_offset_t
)first_avail
;
761 virtual_end
= (vm_offset_t
)(VM_MAX_KERNEL_ADDRESS
);
764 * Reserve some special page table entries/VA space for temporary
767 #define SYSMAP(c, p, v, n) \
768 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n);
771 pte
= (pt_entry_t
*) pmap_pte(kernel_pmap
, va
);
774 * CMAP1/CMAP2 are used for zeroing and copying pages.
775 * CMAP3 is used for ml_phys_read/write.
777 SYSMAP(caddr_t
, CM1
, CA1
, 1)
778 * (pt_entry_t
*) CM1
= 0;
779 SYSMAP(caddr_t
, CM2
, CA2
, 1)
780 * (pt_entry_t
*) CM2
= 0;
781 SYSMAP(caddr_t
, CM3
, CA3
, 1)
782 * (pt_entry_t
*) CM3
= 0;
784 /* used by pmap_pte */
785 SYSMAP(caddr_t
, CM4
, CA4
, 1)
786 * (pt_entry_t
*) CM4
= 0;
788 /* DMAP user for debugger */
789 SYSMAP(caddr_t
, DMAP1
, DADDR1
, 1);
790 SYSMAP(caddr_t
, DMAP2
, DADDR2
, 1); /* XXX temporary - can remove */
793 lock_init(&pmap_system_lock
,
794 FALSE
, /* NOT a sleep lock */
800 if (PE_parse_boot_arg("debug", &boot_arg
)) {
801 if (boot_arg
& DB_PRT
) wpkernel
= 0;
802 if (boot_arg
& DB_NMI
) wpkernel
= 0;
805 /* remap kernel text readonly if not debugging or kprintfing */
811 for (myva
= i386_round_page(VM_MIN_KERNEL_ADDRESS
+ MP_BOOT
+ MP_BOOTSTACK
); myva
< etext
; myva
+= PAGE_SIZE
) {
812 if (myva
>= (vm_offset_t
)sectHIBB
&& myva
< ((vm_offset_t
)sectHIBB
+ sectSizeHIB
))
814 ptep
= pmap_pte(kernel_pmap
, myva
);
816 *ptep
&= ~INTEL_PTE_RW
;
821 simple_lock_init(&kernel_pmap
->lock
, 0);
822 simple_lock_init(&pv_free_list_lock
, 0);
824 /* invalidate user virtual addresses */
825 memset((char *)kernel_pmap
->dirbase
,
827 (KPTDI
) * sizeof(pd_entry_t
));
829 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
830 VADDR(KPTDI
,0), virtual_end
);
832 kprintf("Available physical space from 0x%llx to 0x%llx\n",
833 avail_start
, avail_end
);
834 printf("PAE enabled\n");
836 kprintf("Available physical space from 0x%x to 0x%x\n",
837 avail_start
, avail_end
);
846 *startp
= virtual_avail
;
851 * Initialize the pmap module.
852 * Called by vm_init, to initialize any structures that the pmap
853 * system needs to map virtual memory.
858 register long npages
;
860 register vm_size_t s
;
865 * Allocate memory for the pv_head_table and its lock bits,
866 * the modify bit array, and the pte_page table.
869 /* zero bias all these arrays now instead of off avail_start
870 so we cover all memory */
871 npages
= i386_btop(avail_end
);
872 s
= (vm_size_t
) (sizeof(struct pv_entry
) * npages
873 + pv_lock_table_size(npages
)
877 if (kmem_alloc_wired(kernel_map
, &addr
, s
) != KERN_SUCCESS
)
880 memset((char *)addr
, 0, s
);
883 * Allocate the structures first to preserve word-alignment.
885 pv_head_table
= (pv_entry_t
) addr
;
886 addr
= (vm_offset_t
) (pv_head_table
+ npages
);
888 pv_lock_table
= (char *) addr
;
889 addr
= (vm_offset_t
) (pv_lock_table
+ pv_lock_table_size(npages
));
891 pmap_phys_attributes
= (char *) addr
;
894 * Create the zone of physical maps,
895 * and of the physical-to-virtual entries.
897 s
= (vm_size_t
) sizeof(struct pmap
);
898 pmap_zone
= zinit(s
, 400*s
, 4096, "pmap"); /* XXX */
899 s
= (vm_size_t
) sizeof(struct pv_entry
);
900 pv_list_zone
= zinit(s
, 10000*s
, 4096, "pv_list"); /* XXX */
902 // s = (vm_size_t) (sizeof(pdpt_entry_t) * NPGPTD);
904 pdpt_zone
= zinit(s
, 400*s
, 4096, "pdpt"); /* XXX */
908 * Only now, when all of the data structures are allocated,
909 * can we set vm_first_phys and vm_last_phys. If we set them
910 * too soon, the kmem_alloc_wired above will try to use these
911 * data structures and blow up.
914 /* zero bias this now so we cover all memory */
916 vm_last_phys
= avail_end
;
918 #if GROW_KERNEL_FUNCTION_IMPLEMENTED
919 kptobj
= &kptobj_object_store
;
920 _vm_object_allocate((vm_object_size_t
)NKPDE
, kptobj
);
921 kernel_pmap
->pm_obj
= kptobj
;
924 /* create pv entries for kernel pages mapped by low level
925 startup code. these have to exist so we can pmap_remove()
926 e.g. kext pages from the middle of our addr space */
928 vaddr
= (vm_offset_t
)VM_MIN_KERNEL_ADDRESS
;
929 for (ppn
= 0; ppn
< i386_btop(avail_start
) ; ppn
++ ) {
932 pv_e
= pai_to_pvh(ppn
);
935 pv_e
->pmap
= kernel_pmap
;
936 pv_e
->next
= PV_ENTRY_NULL
;
939 pmap_initialized
= TRUE
;
942 * Initializie pmap cache.
944 pmap_cache_list
= PMAP_NULL
;
945 pmap_cache_count
= 0;
946 simple_lock_init(&pmap_cache_lock
, 0);
948 simple_lock_init(&free_pmap_lock
, 0);
954 x86_lowmem_free(void)
956 /* free lowmem pages back to the vm system. we had to defer doing this
957 until the vm system was fully up.
958 the actual pages that are released are determined by which
959 pages the memory sizing code puts into the region table */
961 ml_static_mfree((vm_offset_t
) i386_ptob(pmap_memory_regions
[0].base
)|VM_MIN_KERNEL_ADDRESS
,
962 (vm_size_t
) i386_ptob(pmap_memory_regions
[0].end
- pmap_memory_regions
[0].base
));
966 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
978 assert(pn
!= vm_page_fictitious_addr
);
979 phys
= (pmap_paddr_t
)i386_ptob(pn
);
980 if (!pmap_initialized
)
983 if (!pmap_valid_page(pn
))
986 PMAP_WRITE_LOCK(spl
);
988 pai
= pa_index(phys
);
989 pv_h
= pai_to_pvh(pai
);
991 result
= (pv_h
->pmap
== PMAP_NULL
);
992 PMAP_WRITE_UNLOCK(spl
);
998 * Create and return a physical map.
1000 * If the size specified for the map
1001 * is zero, the map is an actual physical
1002 * map, and may be referenced by the
1005 * If the size specified is non-zero,
1006 * the map will be used in software only, and
1007 * is bounded by that size.
1015 register pmap_t pro
;
1019 register vm_offset_t va
;
1022 * A software use-only map doesn't even need a map.
1029 p
= (pmap_t
) zalloc(pmap_zone
);
1031 panic("pmap_create zalloc");
1032 if (KERN_SUCCESS
!= kmem_alloc_wired(kernel_map
, (vm_offset_t
*)(&p
->dirbase
), NBPTD
))
1033 panic("pmap_create kmem_alloc_wired");
1035 p
->pm_hold
= (vm_offset_t
)zalloc(pdpt_zone
);
1036 if ((vm_offset_t
)NULL
== p
->pm_hold
) {
1037 panic("pdpt zalloc");
1039 p
->pm_pdpt
= (pdpt_entry_t
*) (( p
->pm_hold
+ 31) & ~31);
1040 p
->pm_ppdpt
= kvtophys((vm_offset_t
)p
->pm_pdpt
); /* XXX */
1042 if (NULL
== (p
->pm_obj
= vm_object_allocate((vm_object_size_t
)(NPGPTD
*NPDEPG
))))
1043 panic("pmap_create vm_object_allocate");
1045 (void *)((unsigned int)IdlePTD
| KERNBASE
),
1047 va
= (vm_offset_t
)p
->dirbase
;
1048 p
->pdirbase
= (pd_entry_t
*)(kvtophys(va
));
1049 simple_lock_init(&p
->lock
, 0);
1051 /* setup self referential mapping(s) */
1052 for (i
= 0; i
< NPGPTD
; i
++ ) {
1054 pa
= (pmap_paddr_t
) kvtophys(va
+ i386_ptob(i
));
1055 * (pd_entry_t
*) (p
->dirbase
+ PTDPTDI
+ i
) =
1056 (pa
& PG_FRAME
) | INTEL_PTE_VALID
| INTEL_PTE_RW
| INTEL_PTE_REF
|
1057 INTEL_PTE_MOD
| INTEL_PTE_WIRED
;
1059 p
->pm_pdpt
[i
] = pa
| INTEL_PTE_VALID
;
1064 p
->stats
.resident_count
= 0;
1065 p
->stats
.wired_count
= 0;
1069 /* insert new pmap at head of queue hanging off kernel_pmap */
1071 simple_lock(&free_pmap_lock
);
1072 p
->pmap_link
.next
= (queue_t
)kernel_pmap
->pmap_link
.next
;
1073 kernel_pmap
->pmap_link
.next
= (queue_t
)p
;
1075 pro
= (pmap_t
) p
->pmap_link
.next
;
1076 p
->pmap_link
.prev
= (queue_t
)pro
->pmap_link
.prev
;
1077 pro
->pmap_link
.prev
= (queue_t
)p
;
1080 simple_unlock(&free_pmap_lock
);
1088 * Retire the given physical map from service.
1089 * Should only be called if the map contains
1090 * no valid mappings.
1097 register pt_entry_t
*pdep
;
1100 register vm_page_t m
;
1102 register pmap_t pre
,pro
;
1109 simple_lock(&p
->lock
);
1112 register int my_cpu
;
1114 mp_disable_preemption();
1115 my_cpu
= cpu_number();
1118 * If some cpu is not using the physical pmap pointer that it
1119 * is supposed to be (see set_dirbase), we might be using the
1120 * pmap that is being destroyed! Make sure we are
1121 * physically on the right pmap:
1123 /* force pmap/cr3 update */
1126 VM_MAX_KERNEL_ADDRESS
);
1128 if (PMAP_REAL(my_cpu
) == p
) {
1129 PMAP_CPU_CLR(p
, my_cpu
);
1130 PMAP_REAL(my_cpu
) = kernel_pmap
;
1132 set_cr3((unsigned int)kernel_pmap
->pm_ppdpt
);
1134 set_cr3((unsigned int)kernel_pmap
->pdirbase
);
1137 mp_enable_preemption();
1139 simple_unlock(&p
->lock
);
1143 return; /* still in use */
1147 /* remove from pmap queue */
1149 simple_lock(&free_pmap_lock
);
1151 pre
= (pmap_t
)p
->pmap_link
.prev
;
1152 pre
->pmap_link
.next
= (queue_t
)p
->pmap_link
.next
;
1153 pro
= (pmap_t
)p
->pmap_link
.next
;
1154 pro
->pmap_link
.prev
= (queue_t
)p
->pmap_link
.prev
;
1156 simple_unlock(&free_pmap_lock
);
1161 * Free the memory maps, then the
1165 pdep
= (pt_entry_t
*)p
->dirbase
;
1167 while (pdep
< (pt_entry_t
*)&p
->dirbase
[(UMAXPTDI
+1)]) {
1169 if (*pdep
& INTEL_PTE_VALID
) {
1170 ind
= pdep
- (pt_entry_t
*)&p
->dirbase
[0];
1171 vm_object_lock(p
->pm_obj
);
1172 m
= vm_page_lookup(p
->pm_obj
, (vm_object_offset_t
)ind
);
1173 if (m
== VM_PAGE_NULL
) {
1174 panic("pmap_destroy: pte page not in object");
1176 vm_page_lock_queues();
1178 inuse_ptepages_count
--;
1179 vm_object_unlock(p
->pm_obj
);
1180 vm_page_unlock_queues();
1183 * Clear pdes, this might be headed for the cache.
1193 vm_object_deallocate(p
->pm_obj
);
1194 kmem_free(kernel_map
, (vm_offset_t
)p
->dirbase
, NBPTD
);
1196 zfree(pdpt_zone
, (void *)p
->pm_hold
);
1198 zfree(pmap_zone
, p
);
1202 * Add a reference to the specified pmap.
1211 if (p
!= PMAP_NULL
) {
1213 simple_lock(&p
->lock
);
1215 simple_unlock(&p
->lock
);
1221 * Remove a range of hardware page-table entries.
1222 * The entries given are the first (inclusive)
1223 * and last (exclusive) entries for the VM pages.
1224 * The virtual address is the va for the first pte.
1226 * The pmap must be locked.
1227 * If the pmap is not the kernel pmap, the range must lie
1228 * entirely within one pte-page. This is NOT checked.
1229 * Assumes that the pte-page exists.
1240 register pt_entry_t
*cpte
;
1241 int num_removed
, num_unwired
;
1246 if (pmap
!= kernel_pmap
)
1247 ptep_check(get_pte_page(spte
));
1248 #endif /* DEBUG_PTE_PAGE */
1252 for (cpte
= spte
; cpte
< epte
;
1253 cpte
++, va
+= PAGE_SIZE
) {
1255 pa
= pte_to_pa(*cpte
);
1263 if (!valid_page(i386_btop(pa
))) {
1266 * Outside range of managed physical memory.
1267 * Just remove the mappings.
1269 register pt_entry_t
*lpte
= cpte
;
1279 * Get the modify and reference bits.
1282 register pt_entry_t
*lpte
;
1285 pmap_phys_attributes
[pai
] |=
1286 *lpte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1292 * Remove the mapping from the pvlist for
1293 * this physical page.
1296 register pv_entry_t pv_h
, prev
, cur
;
1298 pv_h
= pai_to_pvh(pai
);
1299 if (pv_h
->pmap
== PMAP_NULL
) {
1300 panic("pmap_remove: null pv_list!");
1302 if (pv_h
->va
== va
&& pv_h
->pmap
== pmap
) {
1304 * Header is the pv_entry. Copy the next one
1305 * to header and free the next one (we cannot
1309 if (cur
!= PV_ENTRY_NULL
) {
1314 pv_h
->pmap
= PMAP_NULL
;
1321 if ((cur
= prev
->next
) == PV_ENTRY_NULL
) {
1322 panic("pmap-remove: mapping not in pv_list!");
1324 } while (cur
->va
!= va
|| cur
->pmap
!= pmap
);
1325 prev
->next
= cur
->next
;
1335 assert(pmap
->stats
.resident_count
>= num_removed
);
1336 pmap
->stats
.resident_count
-= num_removed
;
1337 assert(pmap
->stats
.wired_count
>= num_unwired
);
1338 pmap
->stats
.wired_count
-= num_unwired
;
1342 * Remove phys addr if mapped in specified map
1346 pmap_remove_some_phys(
1347 __unused pmap_t map
,
1348 __unused ppnum_t pn
)
1351 /* Implement to support working set code */
1356 * Remove the given range of addresses
1357 * from the specified map.
1359 * It is assumed that the start and end are properly
1360 * rounded to the hardware page size.
1371 register pt_entry_t
*pde
;
1372 register pt_entry_t
*spte
, *epte
;
1377 if (map
== PMAP_NULL
)
1380 PMAP_READ_LOCK(map
, spl
);
1382 if (value_64bit(s64
) || value_64bit(e64
)) {
1383 panic("pmap_remove addr overflow");
1386 orig_s
= s
= (vm_offset_t
)low32(s64
);
1387 e
= (vm_offset_t
)low32(e64
);
1389 pde
= pmap_pde(map
, s
);
1392 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
1395 if (*pde
& INTEL_PTE_VALID
) {
1396 spte
= (pt_entry_t
*)pmap_pte(map
, (s
& ~(PDE_MAPPED_SIZE
-1)));
1397 spte
= &spte
[ptenum(s
)];
1398 epte
= &spte
[intel_btop(l
-s
)];
1399 pmap_remove_range(map
, s
, spte
, epte
);
1405 PMAP_UPDATE_TLBS(map
, orig_s
, e
);
1407 PMAP_READ_UNLOCK(map
, spl
);
1411 * Routine: pmap_page_protect
1414 * Lower the permission for all mappings to a given
1422 pv_entry_t pv_h
, prev
;
1423 register pv_entry_t pv_e
;
1424 register pt_entry_t
*pte
;
1426 register pmap_t pmap
;
1431 assert(pn
!= vm_page_fictitious_addr
);
1432 phys
= (pmap_paddr_t
)i386_ptob(pn
);
1433 if (!valid_page(pn
)) {
1435 * Not a managed page.
1441 * Determine the new protection.
1445 case VM_PROT_READ
|VM_PROT_EXECUTE
:
1449 return; /* nothing to do */
1456 * Lock the pmap system first, since we will be changing
1460 PMAP_WRITE_LOCK(spl
);
1462 pai
= pa_index(phys
);
1463 pv_h
= pai_to_pvh(pai
);
1466 * Walk down PV list, changing or removing all mappings.
1467 * We do not have to lock the pv_list because we have
1468 * the entire pmap system locked.
1470 if (pv_h
->pmap
!= PMAP_NULL
) {
1474 register vm_offset_t va
;
1477 * Lock the pmap to block pmap_extract and similar routines.
1479 simple_lock(&pmap
->lock
);
1484 pte
= pmap_pte(pmap
, va
);
1487 * Consistency checks.
1489 /* assert(*pte & INTEL_PTE_VALID); XXX */
1490 /* assert(pte_to_phys(*pte) == phys); */
1495 * Remove the mapping if new protection is NONE
1496 * or if write-protecting a kernel mapping.
1498 if (remove
|| pmap
== kernel_pmap
) {
1500 * Remove the mapping, collecting any modify bits.
1503 pmap_phys_attributes
[pai
] |=
1504 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1506 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
1509 assert(pmap
->stats
.resident_count
>= 1);
1510 pmap
->stats
.resident_count
--;
1513 * Remove the pv_entry.
1517 * Fix up head later.
1519 pv_h
->pmap
= PMAP_NULL
;
1523 * Delete this entry.
1525 prev
->next
= pv_e
->next
;
1534 *pte
&= ~INTEL_PTE_WRITE
;
1536 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
1543 simple_unlock(&pmap
->lock
);
1545 } while ((pv_e
= prev
->next
) != PV_ENTRY_NULL
);
1548 * If pv_head mapping was removed, fix it up.
1550 if (pv_h
->pmap
== PMAP_NULL
) {
1552 if (pv_e
!= PV_ENTRY_NULL
) {
1559 PMAP_WRITE_UNLOCK(spl
);
1567 * Disconnect all mappings for this page and return reference and change status
1568 * in generic format.
1571 unsigned int pmap_disconnect(
1574 pmap_page_protect(pa
, 0); /* disconnect the page */
1575 return (pmap_get_refmod(pa
)); /* return ref/chg status */
1579 * Set the physical protection on the
1580 * specified range of this map as requested.
1581 * Will not increase permissions.
1590 register pt_entry_t
*pde
;
1591 register pt_entry_t
*spte
, *epte
;
1594 vm_offset_t orig_s
= s
;
1597 if (map
== PMAP_NULL
)
1601 * Determine the new protection.
1605 case VM_PROT_READ
|VM_PROT_EXECUTE
:
1607 case VM_PROT_READ
|VM_PROT_WRITE
:
1609 return; /* nothing to do */
1611 pmap_remove(map
, (addr64_t
)s
, (addr64_t
)e
);
1616 simple_lock(&map
->lock
);
1618 pde
= pmap_pde(map
, s
);
1620 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
1623 if (*pde
& INTEL_PTE_VALID
) {
1624 spte
= (pt_entry_t
*)pmap_pte(map
, (s
& ~(PDE_MAPPED_SIZE
-1)));
1625 spte
= &spte
[ptenum(s
)];
1626 epte
= &spte
[intel_btop(l
-s
)];
1628 while (spte
< epte
) {
1629 if (*spte
& INTEL_PTE_VALID
)
1630 *spte
&= ~INTEL_PTE_WRITE
;
1638 PMAP_UPDATE_TLBS(map
, orig_s
, e
);
1640 simple_unlock(&map
->lock
);
1647 * Insert the given physical page (p) at
1648 * the specified virtual address (v) in the
1649 * target physical map with the protection requested.
1651 * If specified, the page will be wired down, meaning
1652 * that the related pte cannot be reclaimed.
1654 * NB: This is the only routine which MAY NOT lazy-evaluate
1655 * or lose information. That is, this routine must actually
1656 * insert this page into the given map NOW.
1660 register pmap_t pmap
,
1667 register pt_entry_t
*pte
;
1668 register pv_entry_t pv_h
;
1671 pt_entry_t
template;
1673 pmap_paddr_t old_pa
;
1674 pmap_paddr_t pa
= (pmap_paddr_t
)i386_ptob(pn
);
1676 XPR(0x80000000, "%x/%x: pmap_enter %x/%x/%x\n",
1681 assert(pn
!= vm_page_fictitious_addr
);
1683 printf("pmap(%x, %x)\n", v
, pn
);
1684 if (pmap
== PMAP_NULL
)
1688 * Must allocate a new pvlist entry while we're unlocked;
1689 * zalloc may cause pageout (which will lock the pmap system).
1690 * If we determine we need a pvlist entry, we will unlock
1691 * and allocate one. Then we will retry, throughing away
1692 * the allocated entry later (if we no longer need it).
1694 pv_e
= PV_ENTRY_NULL
;
1696 PMAP_READ_LOCK(pmap
, spl
);
1699 * Expand pmap to include this pte. Assume that
1700 * pmap is always expanded to include enough hardware
1701 * pages to map one VM page.
1704 while ((pte
= pmap_pte(pmap
, v
)) == PT_ENTRY_NULL
) {
1706 * Must unlock to expand the pmap.
1708 PMAP_READ_UNLOCK(pmap
, spl
);
1710 pmap_expand(pmap
, v
);
1712 PMAP_READ_LOCK(pmap
, spl
);
1715 * Special case if the physical page is already mapped
1718 old_pa
= pte_to_pa(*pte
);
1721 * May be changing its wired attribute or protection
1724 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
1726 if(flags
& VM_MEM_NOT_CACHEABLE
) {
1727 if(!(flags
& VM_MEM_GUARDED
))
1728 template |= INTEL_PTE_PTA
;
1729 template |= INTEL_PTE_NCACHE
;
1732 if (pmap
!= kernel_pmap
)
1733 template |= INTEL_PTE_USER
;
1734 if (prot
& VM_PROT_WRITE
)
1735 template |= INTEL_PTE_WRITE
;
1737 template |= INTEL_PTE_WIRED
;
1739 pmap
->stats
.wired_count
++;
1742 if (iswired(*pte
)) {
1743 assert(pmap
->stats
.wired_count
>= 1);
1744 pmap
->stats
.wired_count
--;
1748 if (*pte
& INTEL_PTE_MOD
)
1749 template |= INTEL_PTE_MOD
;
1750 WRITE_PTE(pte
, template)
1757 * Outline of code from here:
1758 * 1) If va was mapped, update TLBs, remove the mapping
1759 * and remove old pvlist entry.
1760 * 2) Add pvlist entry for new mapping
1761 * 3) Enter new mapping.
1763 * SHARING_FAULTS complicates this slightly in that it cannot
1764 * replace the mapping, but must remove it (because adding the
1765 * pvlist entry for the new mapping may remove others), and
1766 * hence always enters the new mapping at step 3)
1768 * If the old physical page is not managed step 1) is skipped
1769 * (except for updating the TLBs), and the mapping is
1770 * overwritten at step 3). If the new physical page is not
1771 * managed, step 2) is skipped.
1774 if (old_pa
!= (pmap_paddr_t
) 0) {
1778 if (pmap
!= kernel_pmap
)
1779 ptep_check(get_pte_page(pte
));
1780 #endif /* DEBUG_PTE_PAGE */
1783 * Don't do anything to pages outside valid memory here.
1784 * Instead convince the code that enters a new mapping
1785 * to overwrite the old one.
1788 if (valid_page(i386_btop(old_pa
))) {
1790 pai
= pa_index(old_pa
);
1793 assert(pmap
->stats
.resident_count
>= 1);
1794 pmap
->stats
.resident_count
--;
1795 if (iswired(*pte
)) {
1796 assert(pmap
->stats
.wired_count
>= 1);
1797 pmap
->stats
.wired_count
--;
1800 pmap_phys_attributes
[pai
] |=
1801 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1805 * Remove the mapping from the pvlist for
1806 * this physical page.
1809 register pv_entry_t prev
, cur
;
1811 pv_h
= pai_to_pvh(pai
);
1812 if (pv_h
->pmap
== PMAP_NULL
) {
1813 panic("pmap_enter: null pv_list!");
1815 if (pv_h
->va
== v
&& pv_h
->pmap
== pmap
) {
1817 * Header is the pv_entry. Copy the next one
1818 * to header and free the next one (we cannot
1822 if (cur
!= PV_ENTRY_NULL
) {
1827 pv_h
->pmap
= PMAP_NULL
;
1834 if ((cur
= prev
->next
) == PV_ENTRY_NULL
) {
1835 panic("pmap_enter: mapping not in pv_list!");
1837 } while (cur
->va
!= v
|| cur
->pmap
!= pmap
);
1838 prev
->next
= cur
->next
;
1847 * old_pa is not managed. Pretend it's zero so code
1848 * at Step 3) will enter new mapping (overwriting old
1849 * one). Do removal part of accounting.
1851 old_pa
= (pmap_paddr_t
) 0;
1852 assert(pmap
->stats
.resident_count
>= 1);
1853 pmap
->stats
.resident_count
--;
1854 if (iswired(*pte
)) {
1855 assert(pmap
->stats
.wired_count
>= 1);
1856 pmap
->stats
.wired_count
--;
1862 if (valid_page(i386_btop(pa
))) {
1865 * Step 2) Enter the mapping in the PV list for this
1875 * We can return here from the sharing fault code below
1876 * in case we removed the only entry on the pv list and thus
1877 * must enter the new one in the list header.
1879 #endif /* SHARING_FAULTS */
1881 pv_h
= pai_to_pvh(pai
);
1883 if (pv_h
->pmap
== PMAP_NULL
) {
1889 pv_h
->next
= PV_ENTRY_NULL
;
1895 * check that this mapping is not already there
1896 * or there is no alias for this mapping in the same map
1898 pv_entry_t e
= pv_h
;
1899 while (e
!= PV_ENTRY_NULL
) {
1900 if (e
->pmap
== pmap
&& e
->va
== v
)
1901 panic("pmap_enter: already in pv_list");
1909 * do sharing faults.
1910 * if we find an entry on this pv list in the same address
1911 * space, remove it. we know there will not be more
1914 pv_entry_t e
= pv_h
;
1917 while (e
!= PV_ENTRY_NULL
) {
1918 if (e
->pmap
== pmap
) {
1920 * Remove it, drop pv list lock first.
1924 opte
= pmap_pte(pmap
, e
->va
);
1925 assert(opte
!= PT_ENTRY_NULL
);
1927 * Invalidate the translation buffer,
1928 * then remove the mapping.
1930 pmap_remove_range(pmap
, e
->va
, opte
,
1932 PMAP_UPDATE_TLBS(pmap
, e
->va
, e
->va
+ PAGE_SIZE
);
1935 * We could have remove the head entry,
1936 * so there could be no more entries
1937 * and so we have to use the pv head entry.
1938 * so, go back to the top and try the entry
1947 * check that this mapping is not already there
1950 while (e
!= PV_ENTRY_NULL
) {
1951 if (e
->pmap
== pmap
)
1952 panic("pmap_enter: alias in pv_list");
1956 #endif /* SHARING_FAULTS */
1960 * check for aliases within the same address space.
1962 pv_entry_t e
= pv_h
;
1963 vm_offset_t rpc
= get_rpc();
1965 while (e
!= PV_ENTRY_NULL
) {
1966 if (e
->pmap
== pmap
) {
1968 * log this entry in the alias ring buffer
1969 * if it's not there already.
1971 struct pmap_alias
*pma
;
1975 for (ii
= 0; ii
< pmap_alias_index
; ii
++) {
1976 if (pmap_aliasbuf
[ii
].rpc
== rpc
) {
1977 /* found it in the log already */
1983 pma
= &pmap_aliasbuf
[pmap_alias_index
];
1987 pma
->cookie
= PMAP_ALIAS_COOKIE
;
1988 if (++pmap_alias_index
>= PMAP_ALIAS_MAX
)
1989 panic("pmap_enter: exhausted alias log");
1995 #endif /* DEBUG_ALIAS */
1997 * Add new pv_entry after header.
1999 if (pv_e
== PV_ENTRY_NULL
) {
2001 if (pv_e
== PV_ENTRY_NULL
) {
2002 panic("pmap no pv_e's");
2007 pv_e
->next
= pv_h
->next
;
2010 * Remember that we used the pvlist entry.
2012 pv_e
= PV_ENTRY_NULL
;
2018 * Step 3) Enter and count the mapping.
2021 pmap
->stats
.resident_count
++;
2024 * Build a template to speed up entering -
2025 * only the pfn changes.
2027 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
2029 if(flags
& VM_MEM_NOT_CACHEABLE
) {
2030 if(!(flags
& VM_MEM_GUARDED
))
2031 template |= INTEL_PTE_PTA
;
2032 template |= INTEL_PTE_NCACHE
;
2035 if (pmap
!= kernel_pmap
)
2036 template |= INTEL_PTE_USER
;
2037 if (prot
& VM_PROT_WRITE
)
2038 template |= INTEL_PTE_WRITE
;
2040 template |= INTEL_PTE_WIRED
;
2041 pmap
->stats
.wired_count
++;
2044 WRITE_PTE(pte
, template)
2047 PMAP_UPDATE_TLBS(pmap
, v
, v
+ PAGE_SIZE
);
2049 if (pv_e
!= PV_ENTRY_NULL
) {
2053 PMAP_READ_UNLOCK(pmap
, spl
);
2057 * Routine: pmap_change_wiring
2058 * Function: Change the wiring attribute for a map/virtual-address
2060 * In/out conditions:
2061 * The mapping must already exist in the pmap.
2065 register pmap_t map
,
2069 register pt_entry_t
*pte
;
2074 * We must grab the pmap system lock because we may
2075 * change a pte_page queue.
2077 PMAP_READ_LOCK(map
, spl
);
2079 if ((pte
= pmap_pte(map
, v
)) == PT_ENTRY_NULL
)
2080 panic("pmap_change_wiring: pte missing");
2082 if (wired
&& !iswired(*pte
)) {
2084 * wiring down mapping
2086 map
->stats
.wired_count
++;
2087 *pte
++ |= INTEL_PTE_WIRED
;
2089 else if (!wired
&& iswired(*pte
)) {
2093 assert(map
->stats
.wired_count
>= 1);
2094 map
->stats
.wired_count
--;
2095 *pte
++ &= ~INTEL_PTE_WIRED
;
2098 PMAP_READ_UNLOCK(map
, spl
);
2107 pmap_find_phys(pmap_t pmap
, addr64_t va
)
2113 if (value_64bit(va
))
2114 panic("pmap_find_phys 64 bit value");
2115 a32
= (vm_offset_t
) low32(va
);
2116 ptp
= pmap_pte(pmap
, a32
);
2117 if (PT_ENTRY_NULL
== ptp
) {
2120 ppn
= (ppnum_t
) i386_btop(pte_to_pa(*ptp
));
2126 * Routine: pmap_extract
2128 * Extract the physical page address associated
2129 * with the given map/virtual_address pair.
2130 * Change to shim for backwards compatibility but will not
2131 * work for 64 bit systems. Some old drivers that we cannot
2137 register pmap_t pmap
,
2143 vaddr
= (vm_offset_t
)0;
2144 ppn
= pmap_find_phys(pmap
, (addr64_t
)va
);
2146 vaddr
= ((vm_offset_t
)i386_ptob(ppn
)) | (va
& INTEL_OFFMASK
);
2153 * Routine: pmap_expand
2155 * Expands a pmap to be able to map the specified virtual address.
2157 * Allocates new virtual memory for the P0 or P1 portion of the
2158 * pmap, then re-maps the physical pages that were in the old
2159 * pmap to be in the new pmap.
2161 * Must be called with the pmap system and the pmap unlocked,
2162 * since these must be unlocked to use vm_allocate or vm_deallocate.
2163 * Thus it must be called in a loop that checks whether the map
2164 * has been expanded enough.
2165 * (We won't loop forever, since page tables aren't shrunk.)
2169 register pmap_t map
,
2170 register vm_offset_t v
)
2173 register vm_page_t m
;
2174 register pmap_paddr_t pa
;
2179 if (map
== kernel_pmap
) {
2185 * Allocate a VM page for the level 2 page table entries.
2187 while ((m
= vm_page_grab()) == VM_PAGE_NULL
)
2191 * put the page into the pmap's obj list so it
2192 * can be found later.
2197 vm_object_lock(map
->pm_obj
);
2198 vm_page_insert(m
, map
->pm_obj
, (vm_object_offset_t
)i
);
2199 vm_page_lock_queues();
2201 inuse_ptepages_count
++;
2202 vm_object_unlock(map
->pm_obj
);
2203 vm_page_unlock_queues();
2210 PMAP_READ_LOCK(map
, spl
);
2212 * See if someone else expanded us first
2214 if (pmap_pte(map
, v
) != PT_ENTRY_NULL
) {
2215 PMAP_READ_UNLOCK(map
, spl
);
2216 vm_object_lock(map
->pm_obj
);
2217 vm_page_lock_queues();
2219 inuse_ptepages_count
--;
2220 vm_page_unlock_queues();
2221 vm_object_unlock(map
->pm_obj
);
2226 * Set the page directory entry for this page table.
2227 * If we have allocated more than one hardware page,
2228 * set several page directory entries.
2231 pdp
= &map
->dirbase
[pdenum(map
, v
)];
2232 *pdp
= pa_to_pte(pa
)
2237 PMAP_READ_UNLOCK(map
, spl
);
2242 * Copy the range specified by src_addr/len
2243 * from the source map to the range dst_addr/len
2244 * in the destination map.
2246 * This routine is only advisory and need not do anything.
2253 vm_offset_t dst_addr
,
2255 vm_offset_t src_addr
)
2258 dst_pmap
++; src_pmap
++; dst_addr
++; len
++; src_addr
++;
2264 * pmap_sync_page_data_phys(ppnum_t pa)
2266 * Invalidates all of the instruction cache on a physical page and
2267 * pushes any dirty data from the data cache for the same physical page
2268 * Not required in i386.
2271 pmap_sync_page_data_phys(__unused ppnum_t pa
)
2277 * pmap_sync_page_attributes_phys(ppnum_t pa)
2279 * Write back and invalidate all cachelines on a physical page.
2282 pmap_sync_page_attributes_phys(ppnum_t pa
)
2284 cache_flush_page_phys(pa
);
2291 * Routine: pmap_collect
2293 * Garbage collects the physical map system for
2294 * pages which are no longer used.
2295 * Success need not be guaranteed -- that is, there
2296 * may well be pages which are not referenced, but
2297 * others may be collected.
2299 * Called by the pageout daemon when pages are scarce.
2305 register pt_entry_t
*pdp
, *ptp
;
2313 if (p
== kernel_pmap
)
2317 * Garbage collect map.
2319 PMAP_READ_LOCK(p
, spl
);
2321 for (pdp
= (pt_entry_t
*)p
->dirbase
;
2322 pdp
< (pt_entry_t
*)&p
->dirbase
[(UMAXPTDI
+1)];
2325 if (*pdp
& INTEL_PTE_VALID
) {
2326 if(*pdp
& INTEL_PTE_REF
) {
2327 *pdp
&= ~INTEL_PTE_REF
;
2331 ptp
= pmap_pte(p
, pdetova(pdp
- (pt_entry_t
*)p
->dirbase
));
2332 eptp
= ptp
+ NPTEPG
;
2335 * If the pte page has any wired mappings, we cannot
2340 register pt_entry_t
*ptep
;
2341 for (ptep
= ptp
; ptep
< eptp
; ptep
++) {
2342 if (iswired(*ptep
)) {
2350 * Remove the virtual addresses mapped by this pte page.
2352 pmap_remove_range(p
,
2353 pdetova(pdp
- (pt_entry_t
*)p
->dirbase
),
2358 * Invalidate the page directory pointer.
2362 PMAP_READ_UNLOCK(p
, spl
);
2365 * And free the pte page itself.
2368 register vm_page_t m
;
2370 vm_object_lock(p
->pm_obj
);
2371 m
= vm_page_lookup(p
->pm_obj
,(vm_object_offset_t
)(pdp
- (pt_entry_t
*)&p
->dirbase
[0]));
2372 if (m
== VM_PAGE_NULL
)
2373 panic("pmap_collect: pte page not in object");
2374 vm_page_lock_queues();
2376 inuse_ptepages_count
--;
2377 vm_page_unlock_queues();
2378 vm_object_unlock(p
->pm_obj
);
2381 PMAP_READ_LOCK(p
, spl
);
2386 PMAP_UPDATE_TLBS(p
, VM_MIN_ADDRESS
, VM_MAX_ADDRESS
);
2387 PMAP_READ_UNLOCK(p
, spl
);
2393 * Routine: pmap_kernel
2395 * Returns the physical map handle for the kernel.
2401 return (kernel_pmap
);
2406 pmap_copy_page(src
, dst
)
2410 bcopy_phys((addr64_t
)i386_ptob(src
),
2411 (addr64_t
)i386_ptob(dst
),
2417 * Routine: pmap_pageable
2419 * Make the specified pages (by pmap, offset)
2420 * pageable (or not) as requested.
2422 * A page which is not pageable may not take
2423 * a fault; therefore, its page table entry
2424 * must remain valid for the duration.
2426 * This routine is merely advisory; pmap_enter
2427 * will specify that these pages are to be wired
2428 * down (or not) as appropriate.
2432 __unused pmap_t pmap
,
2433 __unused vm_offset_t start_addr
,
2434 __unused vm_offset_t end_addr
,
2435 __unused boolean_t pageable
)
2438 pmap
++; start_addr
++; end_addr
++; pageable
++;
2443 * Clear specified attribute bits.
2446 phys_attribute_clear(
2451 register pv_entry_t pv_e
;
2452 register pt_entry_t
*pte
;
2454 register pmap_t pmap
;
2458 assert(pn
!= vm_page_fictitious_addr
);
2459 if (!valid_page(pn
)) {
2461 * Not a managed page.
2467 * Lock the pmap system first, since we will be changing
2471 PMAP_WRITE_LOCK(spl
);
2472 phys
= i386_ptob(pn
);
2473 pai
= pa_index(phys
);
2474 pv_h
= pai_to_pvh(pai
);
2477 * Walk down PV list, clearing all modify or reference bits.
2478 * We do not have to lock the pv_list because we have
2479 * the entire pmap system locked.
2481 if (pv_h
->pmap
!= PMAP_NULL
) {
2483 * There are some mappings.
2485 for (pv_e
= pv_h
; pv_e
!= PV_ENTRY_NULL
; pv_e
= pv_e
->next
) {
2489 * Lock the pmap to block pmap_extract and similar routines.
2491 simple_lock(&pmap
->lock
);
2494 register vm_offset_t va
;
2497 pte
= pmap_pte(pmap
, va
);
2501 * Consistency checks.
2503 assert(*pte
& INTEL_PTE_VALID
);
2504 /* assert(pte_to_phys(*pte) == phys); */
2508 * Clear modify or reference bits.
2512 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
2514 simple_unlock(&pmap
->lock
);
2519 pmap_phys_attributes
[pai
] &= ~bits
;
2521 PMAP_WRITE_UNLOCK(spl
);
2525 * Check specified attribute bits.
2528 phys_attribute_test(
2533 register pv_entry_t pv_e
;
2534 register pt_entry_t
*pte
;
2536 register pmap_t pmap
;
2540 assert(pn
!= vm_page_fictitious_addr
);
2541 if (!valid_page(pn
)) {
2543 * Not a managed page.
2549 * Lock the pmap system first, since we will be checking
2553 PMAP_WRITE_LOCK(spl
);
2554 phys
= i386_ptob(pn
);
2555 pai
= pa_index(phys
);
2556 pv_h
= pai_to_pvh(pai
);
2558 if (pmap_phys_attributes
[pai
] & bits
) {
2559 PMAP_WRITE_UNLOCK(spl
);
2564 * Walk down PV list, checking all mappings.
2565 * We do not have to lock the pv_list because we have
2566 * the entire pmap system locked.
2568 if (pv_h
->pmap
!= PMAP_NULL
) {
2570 * There are some mappings.
2572 for (pv_e
= pv_h
; pv_e
!= PV_ENTRY_NULL
; pv_e
= pv_e
->next
) {
2576 * Lock the pmap to block pmap_extract and similar routines.
2578 simple_lock(&pmap
->lock
);
2581 register vm_offset_t va
;
2584 pte
= pmap_pte(pmap
, va
);
2588 * Consistency checks.
2590 assert(*pte
& INTEL_PTE_VALID
);
2591 /* assert(pte_to_phys(*pte) == phys); */
2596 * Check modify or reference bits.
2599 if (*pte
++ & bits
) {
2600 simple_unlock(&pmap
->lock
);
2601 PMAP_WRITE_UNLOCK(spl
);
2605 simple_unlock(&pmap
->lock
);
2608 PMAP_WRITE_UNLOCK(spl
);
2613 * Set specified attribute bits.
2623 assert(pn
!= vm_page_fictitious_addr
);
2624 if (!valid_page(pn
)) {
2626 * Not a managed page.
2632 * Lock the pmap system and set the requested bits in
2633 * the phys attributes array. Don't need to bother with
2634 * ptes because the test routine looks here first.
2636 phys
= i386_ptob(pn
);
2637 PMAP_WRITE_LOCK(spl
);
2638 pmap_phys_attributes
[pa_index(phys
)] |= bits
;
2639 PMAP_WRITE_UNLOCK(spl
);
2643 * Set the modify bit on the specified physical page.
2646 void pmap_set_modify(
2649 phys_attribute_set(pn
, PHYS_MODIFIED
);
2653 * Clear the modify bits on the specified physical page.
2660 phys_attribute_clear(pn
, PHYS_MODIFIED
);
2666 * Return whether or not the specified physical page is modified
2667 * by any physical maps.
2674 return (phys_attribute_test(pn
, PHYS_MODIFIED
));
2678 * pmap_clear_reference:
2680 * Clear the reference bit on the specified physical page.
2684 pmap_clear_reference(
2687 phys_attribute_clear(pn
, PHYS_REFERENCED
);
2691 pmap_set_reference(ppnum_t pn
)
2693 phys_attribute_set(pn
, PHYS_REFERENCED
);
2697 * pmap_is_referenced:
2699 * Return whether or not the specified physical page is referenced
2700 * by any physical maps.
2707 return (phys_attribute_test(pn
, PHYS_REFERENCED
));
2711 * pmap_get_refmod(phys)
2712 * returns the referenced and modified bits of the specified
2716 pmap_get_refmod(ppnum_t pa
)
2718 return ( ((phys_attribute_test(pa
, PHYS_MODIFIED
))? VM_MEM_MODIFIED
: 0)
2719 | ((phys_attribute_test(pa
, PHYS_REFERENCED
))? VM_MEM_REFERENCED
: 0));
2723 * pmap_clear_refmod(phys, mask)
2724 * clears the referenced and modified bits as specified by the mask
2725 * of the specified physical page.
2728 pmap_clear_refmod(ppnum_t pa
, unsigned int mask
)
2730 unsigned int x86Mask
;
2732 x86Mask
= ( ((mask
& VM_MEM_MODIFIED
)? PHYS_MODIFIED
: 0)
2733 | ((mask
& VM_MEM_REFERENCED
)? PHYS_REFERENCED
: 0));
2734 phys_attribute_clear(pa
, x86Mask
);
2738 * Set the modify bit on the specified range
2739 * of this map as requested.
2741 * This optimization stands only if each time the dirty bit
2742 * in vm_page_t is tested, it is also tested in the pmap.
2751 register pt_entry_t
*pde
;
2752 register pt_entry_t
*spte
, *epte
;
2754 vm_offset_t orig_s
= s
;
2756 if (map
== PMAP_NULL
)
2759 PMAP_READ_LOCK(map
, spl
);
2761 pde
= pmap_pde(map
, s
);
2762 while (s
&& s
< e
) {
2763 l
= (s
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
-1);
2766 if (*pde
& INTEL_PTE_VALID
) {
2767 spte
= (pt_entry_t
*)pmap_pte(map
, (s
& ~(PDE_MAPPED_SIZE
-1)));
2769 spte
= &spte
[ptenum(s
)];
2770 epte
= &spte
[intel_btop(l
-s
)];
2772 epte
= &spte
[intel_btop(PDE_MAPPED_SIZE
)];
2773 spte
= &spte
[ptenum(s
)];
2775 while (spte
< epte
) {
2776 if (*spte
& INTEL_PTE_VALID
) {
2777 *spte
|= (INTEL_PTE_MOD
| INTEL_PTE_WRITE
);
2785 PMAP_UPDATE_TLBS(map
, orig_s
, e
);
2786 PMAP_READ_UNLOCK(map
, spl
);
2791 invalidate_icache(__unused vm_offset_t addr
,
2792 __unused
unsigned cnt
,
2798 flush_dcache(__unused vm_offset_t addr
,
2799 __unused
unsigned count
,
2806 * TLB Coherence Code (TLB "shootdown" code)
2808 * Threads that belong to the same task share the same address space and
2809 * hence share a pmap. However, they may run on distinct cpus and thus
2810 * have distinct TLBs that cache page table entries. In order to guarantee
2811 * the TLBs are consistent, whenever a pmap is changed, all threads that
2812 * are active in that pmap must have their TLB updated. To keep track of
2813 * this information, the set of cpus that are currently using a pmap is
2814 * maintained within each pmap structure (cpus_using). Pmap_activate() and
2815 * pmap_deactivate add and remove, respectively, a cpu from this set.
2816 * Since the TLBs are not addressable over the bus, each processor must
2817 * flush its own TLB; a processor that needs to invalidate another TLB
2818 * needs to interrupt the processor that owns that TLB to signal the
2821 * Whenever a pmap is updated, the lock on that pmap is locked, and all
2822 * cpus using the pmap are signaled to invalidate. All threads that need
2823 * to activate a pmap must wait for the lock to clear to await any updates
2824 * in progress before using the pmap. They must ACQUIRE the lock to add
2825 * their cpu to the cpus_using set. An implicit assumption made
2826 * throughout the TLB code is that all kernel code that runs at or higher
2827 * than splvm blocks out update interrupts, and that such code does not
2828 * touch pageable pages.
2830 * A shootdown interrupt serves another function besides signaling a
2831 * processor to invalidate. The interrupt routine (pmap_update_interrupt)
2832 * waits for the both the pmap lock (and the kernel pmap lock) to clear,
2833 * preventing user code from making implicit pmap updates while the
2834 * sending processor is performing its update. (This could happen via a
2835 * user data write reference that turns on the modify bit in the page
2836 * table). It must wait for any kernel updates that may have started
2837 * concurrently with a user pmap update because the IPC code
2839 * Spinning on the VALUES of the locks is sufficient (rather than
2840 * having to acquire the locks) because any updates that occur subsequent
2841 * to finding the lock unlocked will be signaled via another interrupt.
2842 * (This assumes the interrupt is cleared before the low level interrupt code
2843 * calls pmap_update_interrupt()).
2845 * The signaling processor must wait for any implicit updates in progress
2846 * to terminate before continuing with its update. Thus it must wait for an
2847 * acknowledgement of the interrupt from each processor for which such
2848 * references could be made. For maintaining this information, a set
2849 * cpus_active is used. A cpu is in this set if and only if it can
2850 * use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from
2851 * this set; when all such cpus are removed, it is safe to update.
2853 * Before attempting to acquire the update lock on a pmap, a cpu (A) must
2854 * be at least at the priority of the interprocessor interrupt
2855 * (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a
2856 * kernel update; it would spin forever in pmap_update_interrupt() trying
2857 * to acquire the user pmap lock it had already acquired. Furthermore A
2858 * must remove itself from cpus_active. Otherwise, another cpu holding
2859 * the lock (B) could be in the process of sending an update signal to A,
2860 * and thus be waiting for A to remove itself from cpus_active. If A is
2861 * spinning on the lock at priority this will never happen and a deadlock
2866 * Signal another CPU that it must flush its TLB
2872 vm_offset_t start_addr
,
2873 vm_offset_t end_addr
)
2875 register int which_cpu
, j
;
2876 register pmap_update_list_t update_list_p
;
2878 while ((which_cpu
= ffs((unsigned long)use_list
)) != 0) {
2879 which_cpu
-= 1; /* convert to 0 origin */
2881 update_list_p
= cpu_update_list(which_cpu
);
2882 simple_lock(&update_list_p
->lock
);
2884 j
= update_list_p
->count
;
2885 if (j
>= UPDATE_LIST_SIZE
) {
2887 * list overflowed. Change last item to
2888 * indicate overflow.
2890 update_list_p
->item
[UPDATE_LIST_SIZE
-1].pmap
= kernel_pmap
;
2891 update_list_p
->item
[UPDATE_LIST_SIZE
-1].start
= VM_MIN_ADDRESS
;
2892 update_list_p
->item
[UPDATE_LIST_SIZE
-1].end
= VM_MAX_KERNEL_ADDRESS
;
2895 update_list_p
->item
[j
].pmap
= pmap
;
2896 update_list_p
->item
[j
].start
= start_addr
;
2897 update_list_p
->item
[j
].end
= end_addr
;
2898 update_list_p
->count
= j
+1;
2900 cpu_update_needed(which_cpu
) = TRUE
;
2901 simple_unlock(&update_list_p
->lock
);
2903 /* if its the kernel pmap, ignore cpus_idle */
2904 if (((cpus_idle
& (1 << which_cpu
)) == 0) ||
2905 (pmap
== kernel_pmap
) || PMAP_REAL(which_cpu
) == pmap
)
2907 i386_signal_cpu(which_cpu
, MP_TLB_FLUSH
, ASYNC
);
2909 use_list
&= ~(1 << which_cpu
);
2914 process_pmap_updates(
2915 register pmap_t my_pmap
)
2917 register int my_cpu
;
2918 register pmap_update_list_t update_list_p
;
2920 register pmap_t pmap
;
2922 mp_disable_preemption();
2923 my_cpu
= cpu_number();
2924 update_list_p
= cpu_update_list(my_cpu
);
2925 simple_lock(&update_list_p
->lock
);
2927 for (j
= 0; j
< update_list_p
->count
; j
++) {
2928 pmap
= update_list_p
->item
[j
].pmap
;
2929 if (pmap
== my_pmap
||
2930 pmap
== kernel_pmap
) {
2932 if (pmap
->ref_count
<= 0) {
2933 PMAP_CPU_CLR(pmap
, my_cpu
);
2934 PMAP_REAL(my_cpu
) = kernel_pmap
;
2936 set_cr3((unsigned int)kernel_pmap
->pm_ppdpt
);
2938 set_cr3((unsigned int)kernel_pmap
->pdirbase
);
2941 INVALIDATE_TLB(pmap
,
2942 update_list_p
->item
[j
].start
,
2943 update_list_p
->item
[j
].end
);
2946 update_list_p
->count
= 0;
2947 cpu_update_needed(my_cpu
) = FALSE
;
2948 simple_unlock(&update_list_p
->lock
);
2949 mp_enable_preemption();
2953 * Interrupt routine for TBIA requested from other processor.
2954 * This routine can also be called at all interrupts time if
2955 * the cpu was idle. Some driver interrupt routines might access
2956 * newly allocated vm. (This is the case for hd)
2959 pmap_update_interrupt(void)
2961 register int my_cpu
;
2963 register pmap_t my_pmap
;
2965 mp_disable_preemption();
2966 my_cpu
= cpu_number();
2969 * Raise spl to splvm (above splip) to block out pmap_extract
2970 * from IO code (which would put this cpu back in the active
2975 my_pmap
= PMAP_REAL(my_cpu
);
2977 if (!(my_pmap
&& pmap_in_use(my_pmap
, my_cpu
)))
2978 my_pmap
= kernel_pmap
;
2984 * Indicate that we're not using either user or kernel
2987 i_bit_clear(my_cpu
, &cpus_active
);
2990 * Wait for any pmap updates in progress, on either user
2993 while (*(volatile int *)(&my_pmap
->lock
.interlock
.lock_data
) ||
2994 *(volatile int *)(&kernel_pmap
->lock
.interlock
.lock_data
)) {
2995 LOOP_CHECK("pmap_update_interrupt", my_pmap
);
2999 process_pmap_updates(my_pmap
);
3001 i_bit_set(my_cpu
, &cpus_active
);
3003 } while (cpu_update_needed(my_cpu
));
3006 mp_enable_preemption();
3011 /* show phys page mappings and attributes */
3013 extern void db_show_page(pmap_paddr_t pa
);
3016 db_show_page(pmap_paddr_t pa
)
3023 pv_h
= pai_to_pvh(pai
);
3025 attr
= pmap_phys_attributes
[pai
];
3026 printf("phys page %x ", pa
);
3027 if (attr
& PHYS_MODIFIED
)
3028 printf("modified, ");
3029 if (attr
& PHYS_REFERENCED
)
3030 printf("referenced, ");
3031 if (pv_h
->pmap
|| pv_h
->next
)
3032 printf(" mapped at\n");
3034 printf(" not mapped\n");
3035 for (; pv_h
; pv_h
= pv_h
->next
)
3037 printf("%x in pmap %x\n", pv_h
->va
, pv_h
->pmap
);
3040 #endif /* MACH_KDB */
3043 void db_kvtophys(vm_offset_t
);
3044 void db_show_vaddrs(pt_entry_t
*);
3047 * print out the results of kvtophys(arg)
3053 db_printf("0x%x", kvtophys(vaddr
));
3057 * Walk the pages tables.
3061 pt_entry_t
*dirbase
)
3063 pt_entry_t
*ptep
, *pdep
, tmp
;
3064 int x
, y
, pdecnt
, ptecnt
;
3067 dirbase
= kernel_pmap
->dirbase
;
3070 db_printf("need a dirbase...\n");
3073 dirbase
= (pt_entry_t
*) ((unsigned long) dirbase
& ~INTEL_OFFMASK
);
3075 db_printf("dirbase: 0x%x\n", dirbase
);
3077 pdecnt
= ptecnt
= 0;
3079 for (y
= 0; y
< NPDEPG
; y
++, pdep
++) {
3080 if (((tmp
= *pdep
) & INTEL_PTE_VALID
) == 0) {
3084 ptep
= (pt_entry_t
*) ((*pdep
) & ~INTEL_OFFMASK
);
3085 db_printf("dir[%4d]: 0x%x\n", y
, *pdep
);
3086 for (x
= 0; x
< NPTEPG
; x
++, ptep
++) {
3087 if (((tmp
= *ptep
) & INTEL_PTE_VALID
) == 0) {
3091 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
3094 (y
<< 22) | (x
<< 12),
3095 *ptep
& ~INTEL_OFFMASK
);
3099 db_printf("total: %d tables, %d page table entries.\n", pdecnt
, ptecnt
);
3102 #endif /* MACH_KDB */
3104 #include <mach_vm_debug.h>
3106 #include <vm/vm_debug.h>
3109 pmap_list_resident_pages(
3110 __unused pmap_t pmap
,
3111 __unused vm_offset_t
*listp
,
3116 #endif /* MACH_VM_DEBUG */
3122 * BSD support routine to reassign virtual addresses.
3126 pmap_movepage(unsigned long from
, unsigned long to
, vm_size_t size
)
3129 pt_entry_t
*pte
, saved_pte
;
3131 /* Lock the kernel map */
3132 PMAP_READ_LOCK(kernel_pmap
, spl
);
3136 pte
= pmap_pte(kernel_pmap
, from
);
3138 panic("pmap_pagemove from pte NULL");
3140 PMAP_READ_UNLOCK(kernel_pmap
, spl
);
3142 pmap_enter(kernel_pmap
, to
, (ppnum_t
)i386_btop(i386_trunc_page(*pte
)),
3143 VM_PROT_READ
|VM_PROT_WRITE
, 0, *pte
& INTEL_PTE_WIRED
);
3145 pmap_remove(kernel_pmap
, (addr64_t
)from
, (addr64_t
)(from
+PAGE_SIZE
));
3147 PMAP_READ_LOCK(kernel_pmap
, spl
);
3148 pte
= pmap_pte(kernel_pmap
, to
);
3150 panic("pmap_pagemove 'to' pte NULL");
3159 /* Get the processors to update the TLBs */
3160 PMAP_UPDATE_TLBS(kernel_pmap
, from
, from
+size
);
3161 PMAP_UPDATE_TLBS(kernel_pmap
, to
, to
+size
);
3163 PMAP_READ_UNLOCK(kernel_pmap
, spl
);
3166 #endif /* MACH_BSD */
3168 /* temporary workaround */
3170 coredumpok(vm_map_t map
, vm_offset_t va
)
3174 ptep
= pmap_pte(map
->pmap
, va
);
3177 return ((*ptep
& (INTEL_PTE_NCACHE
| INTEL_PTE_WIRED
)) != (INTEL_PTE_NCACHE
| INTEL_PTE_WIRED
));
3181 * grow the number of kernel page table entries, if needed
3184 pmap_growkernel(vm_offset_t addr
)
3186 #if GROW_KERNEL_FUNCTION_IMPLEMENTED
3189 vm_offset_t ptppaddr
;
3192 pd_entry_t newpdir
= 0;
3196 * Losers return to try again until the winner completes the work.
3198 if (kptobj
== 0) panic("growkernel 0");
3199 if (!vm_object_lock_try(kptobj
)) {
3203 vm_page_lock_queues();
3208 * If this is the first time thru, locate the end of the
3209 * kernel page table entries and set nkpt to the current
3210 * number of kernel page table pages
3213 if (kernel_vm_end
== 0) {
3214 kernel_vm_end
= KERNBASE
;
3217 while (pdir_pde(kernel_pmap
->dirbase
, kernel_vm_end
)) {
3218 kernel_vm_end
= (kernel_vm_end
+ PAGE_SIZE
* NPTEPG
) & ~(PAGE_SIZE
* NPTEPG
- 1);
3224 * Now allocate and map the required number of page tables
3226 addr
= (addr
+ PAGE_SIZE
* NPTEPG
) & ~(PAGE_SIZE
* NPTEPG
- 1);
3227 while (kernel_vm_end
< addr
) {
3228 if (pdir_pde(kernel_pmap
->dirbase
, kernel_vm_end
)) {
3229 kernel_vm_end
= (kernel_vm_end
+ PAGE_SIZE
* NPTEPG
) & ~(PAGE_SIZE
* NPTEPG
- 1);
3230 continue; /* someone already filled this one */
3233 nkpg
= vm_page_alloc(kptobj
, nkpt
);
3235 panic("pmap_growkernel: no memory to grow kernel");
3239 ppn
= nkpg
->phys_page
;
3240 pmap_zero_page(ppn
);
3241 ptppaddr
= i386_ptob(ppn
);
3242 newpdir
= (pd_entry_t
) (ptppaddr
| INTEL_PTE_VALID
|
3243 INTEL_PTE_RW
| INTEL_PTE_REF
| INTEL_PTE_MOD
);
3244 pdir_pde(kernel_pmap
->dirbase
, kernel_vm_end
) = newpdir
;
3246 simple_lock(&free_pmap_lock
);
3247 for (pmap
= (struct pmap
*)kernel_pmap
->pmap_link
.next
;
3248 pmap
!= kernel_pmap
;
3249 pmap
= (struct pmap
*)pmap
->pmap_link
.next
) {
3250 *pmap_pde(pmap
, kernel_vm_end
) = newpdir
;
3252 simple_unlock(&free_pmap_lock
);
3255 vm_page_unlock_queues();
3256 vm_object_unlock(kptobj
);
3261 pmap_mapgetpte(vm_map_t map
, vm_offset_t v
)
3263 return pmap_pte(map
->pmap
, v
);
3272 assert(pn
!= vm_page_fictitious_addr
);
3274 if (!pmap_initialized
)
3276 phys
= (pmap_paddr_t
) i386_ptob(pn
);
3277 if (!pmap_valid_page(pn
))
3284 mapping_free_prime()
3289 for (i
= 0; i
< (5 * PV_ALLOC_CHUNK
); i
++) {
3290 pv_e
= (pv_entry_t
) zalloc(pv_list_zone
);
3302 if (mapping_adjust_call
== NULL
) {
3303 thread_call_setup(&mapping_adjust_call_data
,
3304 (thread_call_func_t
) mapping_adjust
,
3305 (thread_call_param_t
) NULL
);
3306 mapping_adjust_call
= &mapping_adjust_call_data
;
3308 /* XXX rethink best way to do locking here */
3309 if (pv_free_count
< PV_LOW_WATER_MARK
) {
3310 for (i
= 0; i
< PV_ALLOC_CHUNK
; i
++) {
3311 pv_e
= (pv_entry_t
) zalloc(pv_list_zone
);
3321 pmap_commpage_init(vm_offset_t kernel_commpage
, vm_offset_t user_commpage
, int cnt
)
3324 pt_entry_t
*opte
, *npte
;
3327 for (i
= 0; i
< cnt
; i
++) {
3328 opte
= pmap_pte(kernel_pmap
, kernel_commpage
);
3329 if (0 == opte
) panic("kernel_commpage");
3330 npte
= pmap_pte(kernel_pmap
, user_commpage
);
3331 if (0 == npte
) panic("user_commpage");
3332 pte
= *opte
| INTEL_PTE_USER
|INTEL_PTE_GLOBAL
;
3333 pte
&= ~INTEL_PTE_WRITE
; // ensure read only
3334 WRITE_PTE_FAST(npte
, pte
);
3335 kernel_commpage
+= INTEL_PGBYTES
;
3336 user_commpage
+= INTEL_PGBYTES
;
3340 static cpu_pmap_t cpu_pmap_master
;
3341 static struct pmap_update_list cpu_update_list_master
;
3344 pmap_cpu_alloc(boolean_t is_boot_cpu
)
3349 pmap_update_list_t up
;
3350 vm_offset_t address
;
3351 vm_map_entry_t entry
;
3354 cp
= &cpu_pmap_master
;
3355 up
= &cpu_update_list_master
;
3358 * The per-cpu pmap data structure itself.
3360 ret
= kmem_alloc(kernel_map
,
3361 (vm_offset_t
*) &cp
, sizeof(cpu_pmap_t
));
3362 if (ret
!= KERN_SUCCESS
) {
3363 printf("pmap_cpu_alloc() failed ret=%d\n", ret
);
3366 bzero((void *)cp
, sizeof(cpu_pmap_t
));
3369 * The tlb flush update list.
3371 ret
= kmem_alloc(kernel_map
,
3372 (vm_offset_t
*) &up
, sizeof(*up
));
3373 if (ret
!= KERN_SUCCESS
) {
3374 printf("pmap_cpu_alloc() failed ret=%d\n", ret
);
3380 * The temporary windows used for copy/zero - see loose_ends.c
3382 for (i
= 0; i
< PMAP_NWINDOWS
; i
++) {
3383 ret
= vm_map_find_space(kernel_map
,
3384 &address
, PAGE_SIZE
, 0, &entry
);
3385 if (ret
!= KERN_SUCCESS
) {
3386 printf("pmap_cpu_alloc() "
3387 "vm_map_find_space ret=%d\n", ret
);
3391 vm_map_unlock(kernel_map
);
3393 cp
->mapwindow
[i
].prv_CADDR
= (caddr_t
) address
;
3394 cp
->mapwindow
[i
].prv_CMAP
= vtopte(address
);
3395 * (int *) cp
->mapwindow
[i
].prv_CMAP
= 0;
3397 kprintf("pmap_cpu_alloc() "
3398 "window=%d CADDR=0x%x CMAP=0x%x\n",
3399 i
, address
, vtopte(address
));
3404 * Set up the pmap request list
3406 cp
->update_list
= up
;
3407 simple_lock_init(&up
->lock
, 0);
3414 pmap_cpu_free(struct cpu_pmap
*cp
)
3416 if (cp
!= NULL
&& cp
!= &cpu_pmap_master
) {
3417 if (cp
->update_list
!= NULL
)
3418 kfree((void *) cp
->update_list
,
3419 sizeof(*cp
->update_list
));
3420 kfree((void *) cp
, sizeof(cpu_pmap_t
));