2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
34 * Mach Operating System
35 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
36 * All Rights Reserved.
38 * Permission to use, copy, modify and distribute this software and its
39 * documentation is hereby granted, provided that both the copyright
40 * notice and this permission notice appear in all copies of the
41 * software, derivative works or modified versions, and any portions
42 * thereof, and that both notices appear in supporting documentation.
44 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
45 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
46 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
48 * Carnegie Mellon requests users of this software to return to
50 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
51 * School of Computer Science
52 * Carnegie Mellon University
53 * Pittsburgh PA 15213-3890
55 * any improvements or extensions that they make and grant Carnegie Mellon
56 * the rights to redistribute these changes.
63 * Author: Avadis Tevanian, Jr., Michael Wayne Young
64 * (These guys wrote the Vax version)
66 * Physical Map management code for Intel i386, i486, and i860.
68 * Manages physical address maps.
70 * In addition to hardware address maps, this
71 * module is called upon to provide software-use-only
72 * maps which may or may not be stored in the same
73 * form as hardware maps. These pseudo-maps are
74 * used to store intermediate results from copy
75 * operations to and from address spaces.
77 * Since the information managed by this module is
78 * also stored by the logical address mapping module,
79 * this module may throw away valid virtual-to-physical
80 * mappings at almost any time. However, invalidations
81 * of virtual-to-physical mappings must be done as
84 * In order to cope with hardware architectures which
85 * make virtual-to-physical map invalidates expensive,
86 * this module may delay invalidate or reduced protection
87 * operations until such time as they are actually
88 * necessary. This module is given full information as
89 * to which processors are currently using which maps,
90 * and to when physical maps must be made correct.
96 #include <mach_ldebug.h>
98 #include <mach/machine/vm_types.h>
100 #include <mach/boolean.h>
101 #include <kern/thread.h>
102 #include <kern/zalloc.h>
104 #include <kern/lock.h>
105 #include <kern/kalloc.h>
106 #include <kern/spl.h>
109 #include <vm/vm_map.h>
110 #include <vm/vm_kern.h>
111 #include <mach/vm_param.h>
112 #include <mach/vm_prot.h>
113 #include <vm/vm_object.h>
114 #include <vm/vm_page.h>
116 #include <mach/machine/vm_param.h>
117 #include <machine/thread.h>
119 #include <kern/misc_protos.h> /* prototyping */
120 #include <i386/misc_protos.h>
122 #include <i386/cpuid.h>
123 #include <i386/cpu_data.h>
124 #include <i386/cpu_number.h>
125 #include <i386/machine_cpu.h>
126 #include <i386/mp_slave_boot.h>
127 #include <i386/seg.h>
128 #include <i386/cpu_capabilities.h>
131 #include <ddb/db_command.h>
132 #include <ddb/db_output.h>
133 #include <ddb/db_sym.h>
134 #include <ddb/db_print.h>
135 #endif /* MACH_KDB */
137 #include <kern/xpr.h>
139 #include <vm/vm_protos.h>
142 #include <i386/mp_desc.h>
144 #include <sys/kdebug.h>
149 #define POSTCODE_DELAY 1
150 #include <i386/postcode.h>
151 #endif /* IWANTTODEBUG */
154 * Forward declarations for internal functions.
156 void pmap_expand_pml4(
160 void pmap_expand_pdpt(
168 static void pmap_remove_range(
174 void phys_attribute_clear(
178 boolean_t
phys_attribute_test(
182 void phys_attribute_set(
186 void pmap_set_reference(
194 boolean_t
phys_page_exists(
198 void dump_pmap(pmap_t
);
199 void dump_4GB_pdpt(pmap_t p
);
200 void dump_4GB_pdpt_thread(thread_t tp
);
203 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
205 int nx_enabled
= 1; /* enable no-execute protection */
211 * Private data structures.
215 * For each vm_page_t, there is a list of all currently
216 * valid virtual mappings of that page. An entry is
217 * a pv_entry_t; the list is the pv_table.
220 typedef struct pv_entry
{
221 struct pv_entry
*next
; /* next pv_entry */
222 pmap_t pmap
; /* pmap where mapping lies */
223 vm_map_offset_t va
; /* virtual address for mapping */
226 #define PV_ENTRY_NULL ((pv_entry_t) 0)
228 pv_entry_t pv_head_table
; /* array of entries, one per page */
231 * pv_list entries are kept on a list that can only be accessed
232 * with the pmap system locked (at SPLVM, not in the cpus_active set).
233 * The list is refilled from the pv_list_zone if it becomes empty.
235 pv_entry_t pv_free_list
; /* free list at SPLVM */
236 decl_simple_lock_data(,pv_free_list_lock
)
237 int pv_free_count
= 0;
238 #define PV_LOW_WATER_MARK 5000
239 #define PV_ALLOC_CHUNK 2000
240 thread_call_t mapping_adjust_call
;
241 static thread_call_data_t mapping_adjust_call_data
;
242 int mappingrecurse
= 0;
244 #define PV_ALLOC(pv_e) { \
245 simple_lock(&pv_free_list_lock); \
246 if ((pv_e = pv_free_list) != 0) { \
247 pv_free_list = pv_e->next; \
249 if (pv_free_count < PV_LOW_WATER_MARK) \
250 if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
251 thread_call_enter(mapping_adjust_call); \
253 simple_unlock(&pv_free_list_lock); \
256 #define PV_FREE(pv_e) { \
257 simple_lock(&pv_free_list_lock); \
258 pv_e->next = pv_free_list; \
259 pv_free_list = pv_e; \
261 simple_unlock(&pv_free_list_lock); \
264 zone_t pv_list_zone
; /* zone of pv_entry structures */
266 static zone_t pdpt_zone
;
269 * Each entry in the pv_head_table is locked by a bit in the
270 * pv_lock_table. The lock bits are accessed by the physical
271 * address of the page they lock.
274 char *pv_lock_table
; /* pointer to array of bits */
275 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
278 * First and last physical addresses that we maintain any information
279 * for. Initialized to zero so that pmap operations done before
280 * pmap_init won't touch any non-existent structures.
282 pmap_paddr_t vm_first_phys
= (pmap_paddr_t
) 0;
283 pmap_paddr_t vm_last_phys
= (pmap_paddr_t
) 0;
284 boolean_t pmap_initialized
= FALSE
;/* Has pmap_init completed? */
286 static struct vm_object kptobj_object_store
;
287 static vm_object_t kptobj
;
290 * Index into pv_head table, its lock bits, and the modify/reference
291 * bits starting at vm_first_phys.
294 #define pa_index(pa) (i386_btop(pa - vm_first_phys))
296 #define pai_to_pvh(pai) (&pv_head_table[pai])
297 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
298 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
301 * Array of physical page attribites for managed pages.
302 * One byte per physical page.
304 char *pmap_phys_attributes
;
307 * Physical page attributes. Copy bits from PTE definition.
309 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
310 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
311 #define PHYS_NCACHE INTEL_PTE_NCACHE
314 * Amount of virtual memory mapped by one
315 * page-directory entry.
317 #define PDE_MAPPED_SIZE (pdetova(1))
318 uint64_t pde_mapped_size
;
321 * Locking and TLB invalidation
327 * There are two structures in the pmap module that need locking:
328 * the pmaps themselves, and the per-page pv_lists (which are locked
329 * by locking the pv_lock_table entry that corresponds to the pv_head
330 * for the list in question.) Most routines want to lock a pmap and
331 * then do operations in it that require pv_list locking -- however
332 * pmap_remove_all and pmap_copy_on_write operate on a physical page
333 * basis and want to do the locking in the reverse order, i.e. lock
334 * a pv_list and then go through all the pmaps referenced by that list.
335 * To protect against deadlock between these two cases, the pmap_lock
336 * is used. There are three different locking protocols as a result:
338 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
341 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
342 * lock on the pmap_lock (shared read), then lock the pmap
343 * and finally the pv_lists as needed [i.e. pmap lock before
346 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
347 * Get a write lock on the pmap_lock (exclusive write); this
348 * also guaranteees exclusive access to the pv_lists. Lock the
351 * At no time may any routine hold more than one pmap lock or more than
352 * one pv_list lock. Because interrupt level routines can allocate
353 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
354 * kernel_pmap can only be held at splhigh.
358 * We raise the interrupt level to splvm, to block interprocessor
359 * interrupts during pmap operations. We mark the cpu's cr3 inactive
360 * while interrupts are blocked.
362 #define SPLVM(spl) { \
364 CPU_CR3_MARK_INACTIVE(); \
367 #define SPLX(spl) { \
368 if (current_cpu_datap()->cpu_tlb_invalid) \
369 process_pmap_updates(); \
370 CPU_CR3_MARK_ACTIVE(); \
375 * Lock on pmap system
377 lock_t pmap_system_lock
;
379 #define PMAP_READ_LOCK(pmap, spl) { \
381 lock_read(&pmap_system_lock); \
382 simple_lock(&(pmap)->lock); \
385 #define PMAP_WRITE_LOCK(spl) { \
387 lock_write(&pmap_system_lock); \
390 #define PMAP_READ_UNLOCK(pmap, spl) { \
391 simple_unlock(&(pmap)->lock); \
392 lock_read_done(&pmap_system_lock); \
396 #define PMAP_WRITE_UNLOCK(spl) { \
397 lock_write_done(&pmap_system_lock); \
401 #define PMAP_WRITE_TO_READ_LOCK(pmap) { \
402 simple_lock(&(pmap)->lock); \
403 lock_write_to_read(&pmap_system_lock); \
406 #define LOCK_PVH(index) lock_pvh_pai(index)
408 #define UNLOCK_PVH(index) unlock_pvh_pai(index)
411 extern int max_lock_loops
;
412 extern int disableSerialOuput
;
414 unsigned int loop_count; \
415 loop_count = disableSerialOuput ? max_lock_loops \
417 #define LOOP_CHECK(msg, pmap) \
418 if (--loop_count == 0) { \
419 mp_disable_preemption(); \
420 kprintf("%s: cpu %d pmap %x\n", \
421 msg, cpu_number(), pmap); \
422 Debugger("deadlock detection"); \
423 mp_enable_preemption(); \
424 loop_count = max_lock_loops; \
426 #else /* USLOCK_DEBUG */
428 #define LOOP_CHECK(msg, pmap)
429 #endif /* USLOCK_DEBUG */
432 static void pmap_flush_tlbs(pmap_t pmap
);
434 #define PMAP_UPDATE_TLBS(pmap, s, e) \
435 pmap_flush_tlbs(pmap)
438 #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
441 pmap_memory_region_t pmap_memory_regions
[PMAP_MEMORY_REGIONS_SIZE
];
444 * Other useful macros.
446 #define current_pmap() (vm_map_pmap(current_thread()->map))
448 struct pmap kernel_pmap_store
;
451 pd_entry_t high_shared_pde
;
452 pd_entry_t commpage64_pde
;
454 struct zone
*pmap_zone
; /* zone of pmap structures */
456 int pmap_debug
= 0; /* flag for debugging prints */
458 unsigned int inuse_ptepages_count
= 0; /* debugging */
460 addr64_t kernel64_cr3
;
461 boolean_t no_shared_cr3
= FALSE
; /* -no_shared_cr3 boot arg */
464 * Pmap cache. Cache is threaded through ref_count field of pmap.
465 * Max will eventually be constant -- variable for experimentation.
467 int pmap_cache_max
= 32;
468 int pmap_alloc_chunk
= 8;
469 pmap_t pmap_cache_list
;
470 int pmap_cache_count
;
471 decl_simple_lock_data(,pmap_cache_lock
)
476 extern uint32_t lowGlo
;
477 extern void *version
;
479 pt_entry_t
*DMAP1
, *DMAP2
;
484 #define PMAP_ALIAS_MAX 32
490 #define PMAP_ALIAS_COOKIE 0xdeadbeef
491 } pmap_aliasbuf
[PMAP_ALIAS_MAX
];
492 int pmap_alias_index
= 0;
493 extern vm_offset_t
get_rpc();
495 #endif /* DEBUG_ALIAS */
498 * for legacy, returns the address of the pde entry.
499 * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
500 * then returns the mapped address of the pde entry in that page
503 pmap_pde(pmap_t m
, vm_map_offset_t v
)
506 if (!cpu_64bit
|| (m
== kernel_pmap
)) {
507 pde
= (&((m
)->dirbase
[(vm_offset_t
)(v
) >> PDESHIFT
]));
510 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
511 pde
= pmap64_pde(m
, v
);
518 * the single pml4 page per pmap is allocated at pmap create time and exists
519 * for the duration of the pmap. we allocate this page in kernel vm (to save us one
520 * level of page table dynamic mapping.
521 * this returns the address of the requested pml4 entry in the top level page.
525 pmap64_pml4(pmap_t pmap
, vm_map_offset_t vaddr
)
527 return ((pml4_entry_t
*)pmap
->pm_hold
+ ((vm_offset_t
)((vaddr
>>PML4SHIFT
)&(NPML4PG
-1))));
531 * maps in the pml4 page, if any, containing the pdpt entry requested
532 * and returns the address of the pdpt entry in that mapped page
535 pmap64_pdpt(pmap_t pmap
, vm_map_offset_t vaddr
)
542 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
543 if ((vaddr
> 0x00007FFFFFFFFFFFULL
) && (vaddr
< 0xFFFF800000000000ULL
)) {
547 pml4
= pmap64_pml4(pmap
, vaddr
);
549 if (pml4
&& ((*pml4
& INTEL_PTE_VALID
))) {
551 newpf
= *pml4
& PG_FRAME
;
554 for (i
=PMAP_PDPT_FIRST_WINDOW
; i
< PMAP_PDPT_FIRST_WINDOW
+PMAP_PDPT_NWINDOWS
; i
++) {
555 if (((*(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP
)) & PG_FRAME
) == newpf
) {
556 return((pdpt_entry_t
*)(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CADDR
) +
557 ((vm_offset_t
)((vaddr
>>PDPTSHIFT
)&(NPDPTPG
-1))));
561 current_cpu_datap()->cpu_pmap
->pdpt_window_index
++;
562 if (current_cpu_datap()->cpu_pmap
->pdpt_window_index
> (PMAP_PDPT_FIRST_WINDOW
+PMAP_PDPT_NWINDOWS
-1))
563 current_cpu_datap()->cpu_pmap
->pdpt_window_index
= PMAP_PDPT_FIRST_WINDOW
;
565 (current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pdpt_window_index
].prv_CMAP
),
566 newpf
| INTEL_PTE_RW
| INTEL_PTE_VALID
);
567 invlpg((u_int
)(current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pdpt_window_index
].prv_CADDR
));
568 return ((pdpt_entry_t
*)(current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pdpt_window_index
].prv_CADDR
) +
569 ((vm_offset_t
)((vaddr
>>PDPTSHIFT
)&(NPDPTPG
-1))));
576 * maps in the pdpt page, if any, containing the pde entry requested
577 * and returns the address of the pde entry in that mapped page
580 pmap64_pde(pmap_t pmap
, vm_map_offset_t vaddr
)
587 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
588 if ((vaddr
> 0x00007FFFFFFFFFFFULL
) && (vaddr
< 0xFFFF800000000000ULL
)) {
592 /* if (vaddr & (1ULL << 63)) panic("neg addr");*/
593 pdpt
= pmap64_pdpt(pmap
, vaddr
);
595 if (pdpt
&& ((*pdpt
& INTEL_PTE_VALID
))) {
597 newpf
= *pdpt
& PG_FRAME
;
599 for (i
=PMAP_PDE_FIRST_WINDOW
; i
< PMAP_PDE_FIRST_WINDOW
+PMAP_PDE_NWINDOWS
; i
++) {
600 if (((*(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP
)) & PG_FRAME
) == newpf
) {
601 return((pd_entry_t
*)(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CADDR
) +
602 ((vm_offset_t
)((vaddr
>>PDSHIFT
)&(NPDPG
-1))));
606 current_cpu_datap()->cpu_pmap
->pde_window_index
++;
607 if (current_cpu_datap()->cpu_pmap
->pde_window_index
> (PMAP_PDE_FIRST_WINDOW
+PMAP_PDE_NWINDOWS
-1))
608 current_cpu_datap()->cpu_pmap
->pde_window_index
= PMAP_PDE_FIRST_WINDOW
;
610 (current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pde_window_index
].prv_CMAP
),
611 newpf
| INTEL_PTE_RW
| INTEL_PTE_VALID
);
612 invlpg((u_int
)(current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pde_window_index
].prv_CADDR
));
613 return ((pd_entry_t
*)(current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pde_window_index
].prv_CADDR
) +
614 ((vm_offset_t
)((vaddr
>>PDSHIFT
)&(NPDPG
-1))));
623 * return address of mapped pte for vaddr va in pmap pmap.
624 * must be called with pre-emption or interrupts disabled
625 * if targeted pmap is not the kernel pmap
626 * since we may be passing back a virtual address that is
627 * associated with this cpu... pre-emption or interrupts
628 * must remain disabled until the caller is done using
629 * the pointer that was passed back .
631 * maps the pde page, if any, containing the pte in and returns
632 * the address of the pte in that mapped page
635 pmap_pte(pmap_t pmap
, vm_map_offset_t vaddr
)
642 pde
= pmap_pde(pmap
,vaddr
);
644 if (pde
&& ((*pde
& INTEL_PTE_VALID
))) {
645 if (pmap
== kernel_pmap
) {
646 return (vtopte(vaddr
)); /* compat kernel still has pte's mapped */
649 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
651 newpf
= *pde
& PG_FRAME
;
653 for (i
=PMAP_PTE_FIRST_WINDOW
; i
< PMAP_PTE_FIRST_WINDOW
+PMAP_PTE_NWINDOWS
; i
++) {
654 if (((*(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP
)) & PG_FRAME
) == newpf
) {
655 return((pt_entry_t
*)(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CADDR
) +
656 ((vm_offset_t
)i386_btop(vaddr
) & (NPTEPG
-1)));
660 current_cpu_datap()->cpu_pmap
->pte_window_index
++;
661 if (current_cpu_datap()->cpu_pmap
->pte_window_index
> (PMAP_PTE_FIRST_WINDOW
+PMAP_PTE_NWINDOWS
-1))
662 current_cpu_datap()->cpu_pmap
->pte_window_index
= PMAP_PTE_FIRST_WINDOW
;
664 (current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pte_window_index
].prv_CMAP
),
665 newpf
| INTEL_PTE_RW
| INTEL_PTE_VALID
);
666 invlpg((u_int
)(current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pte_window_index
].prv_CADDR
));
667 return ((pt_entry_t
*)(current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pte_window_index
].prv_CADDR
) +
668 ((vm_offset_t
)i386_btop(vaddr
) & (NPTEPG
-1)));
676 * Map memory at initialization. The physical addresses being
677 * mapped are not managed and are never unmapped.
679 * For now, VM is already on, we only need to map the
685 vm_map_offset_t start_addr
,
686 vm_map_offset_t end_addr
,
693 while (start_addr
< end_addr
) {
694 pmap_enter(kernel_pmap
, (vm_map_offset_t
)virt
,
695 (ppnum_t
) i386_btop(start_addr
), prot
, flags
, FALSE
);
703 * Back-door routine for mapping kernel VM at initialization.
704 * Useful for mapping memory outside the range
705 * Sets no-cache, A, D.
706 * [vm_first_phys, vm_last_phys) (i.e., devices).
707 * Otherwise like pmap_map.
712 vm_map_offset_t start_addr
,
713 vm_map_offset_t end_addr
,
720 template = pa_to_pte(start_addr
)
726 if(flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
)) {
727 template |= INTEL_PTE_NCACHE
;
728 if(!(flags
& (VM_MEM_GUARDED
| VM_WIMG_USE_DEFAULT
)))
729 template |= INTEL_PTE_PTA
;
732 if (prot
& VM_PROT_WRITE
)
733 template |= INTEL_PTE_WRITE
;
735 while (start_addr
< end_addr
) {
736 pte
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)virt
);
737 if (pte
== PT_ENTRY_NULL
) {
738 panic("pmap_map_bd: Invalid kernel address\n");
740 pmap_store_pte(pte
, template);
741 pte_increment_pa(template);
743 start_addr
+= PAGE_SIZE
;
750 extern char *first_avail
;
751 extern vm_offset_t virtual_avail
, virtual_end
;
752 extern pmap_paddr_t avail_start
, avail_end
;
753 extern vm_offset_t etext
;
754 extern void *sectHIBB
;
755 extern int sectSizeHIB
;
759 pmap_high_shared_remap(enum high_fixed_addresses e
, vm_offset_t va
, int sz
)
761 vm_offset_t ve
= pmap_index_to_virt(e
);
766 assert(0 == (va
& PAGE_MASK
)); /* expecting page aligned */
767 ptep
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)ve
);
769 for (i
=0; i
< sz
; i
++) {
770 pa
= (pmap_paddr_t
) kvtophys(va
);
771 pmap_store_pte(ptep
, (pa
& PG_FRAME
)
784 pmap_cpu_high_shared_remap(int cpu
, enum high_cpu_types e
, vm_offset_t va
, int sz
)
786 enum high_fixed_addresses a
= e
+ HIGH_CPU_END
* cpu
;
787 return pmap_high_shared_remap(HIGH_FIXED_CPUS_BEGIN
+ a
, va
, sz
);
790 void pmap_init_high_shared(void);
792 extern vm_offset_t gdtptr
, idtptr
;
794 extern uint32_t low_intstack
;
796 extern struct fake_descriptor ldt_desc_pattern
;
797 extern struct fake_descriptor tss_desc_pattern
;
799 extern char hi_remap_text
, hi_remap_etext
;
800 extern char t_zero_div
;
802 pt_entry_t
*pte_unique_base
;
805 pmap_init_high_shared(void)
809 struct __gdt_desc_struct gdt_desc
= {0,0,0};
810 struct __idt_desc_struct idt_desc
= {0,0,0};
812 struct i386_tss
*ttss
;
815 kprintf("HIGH_MEM_BASE 0x%x fixed per-cpu begin 0x%x\n",
816 HIGH_MEM_BASE
,pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN
));
817 pte_unique_base
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN
));
819 if (i386_btop(&hi_remap_etext
- &hi_remap_text
+ 1) >
820 HIGH_FIXED_TRAMPS_END
- HIGH_FIXED_TRAMPS
+ 1)
821 panic("tramps too large");
822 haddr
= pmap_high_shared_remap(HIGH_FIXED_TRAMPS
,
823 (vm_offset_t
) &hi_remap_text
, 3);
824 kprintf("tramp: 0x%x, ",haddr
);
825 printf("hi mem tramps at 0x%x\n",haddr
);
826 /* map gdt up high and update ptr for reload */
827 haddr
= pmap_high_shared_remap(HIGH_FIXED_GDT
,
828 (vm_offset_t
) master_gdt
, 1);
829 __asm__
__volatile__("sgdt %0": "=m" (gdt_desc
): :"memory");
830 gdt_desc
.address
= haddr
;
831 kprintf("GDT: 0x%x, ",haddr
);
832 /* map ldt up high */
833 haddr
= pmap_high_shared_remap(HIGH_FIXED_LDT_BEGIN
,
834 (vm_offset_t
) master_ldt
,
835 HIGH_FIXED_LDT_END
- HIGH_FIXED_LDT_BEGIN
+ 1);
836 kprintf("LDT: 0x%x, ",haddr
);
837 /* put new ldt addr into gdt */
838 master_gdt
[sel_idx(KERNEL_LDT
)] = ldt_desc_pattern
;
839 master_gdt
[sel_idx(KERNEL_LDT
)].offset
= (vm_offset_t
) haddr
;
840 fix_desc(&master_gdt
[sel_idx(KERNEL_LDT
)], 1);
841 master_gdt
[sel_idx(USER_LDT
)] = ldt_desc_pattern
;
842 master_gdt
[sel_idx(USER_LDT
)].offset
= (vm_offset_t
) haddr
;
843 fix_desc(&master_gdt
[sel_idx(USER_LDT
)], 1);
845 /* map idt up high */
846 haddr
= pmap_high_shared_remap(HIGH_FIXED_IDT
,
847 (vm_offset_t
) master_idt
, 1);
848 __asm__
__volatile__("sidt %0" : "=m" (idt_desc
));
849 idt_desc
.address
= haddr
;
850 kprintf("IDT: 0x%x, ", haddr
);
851 /* remap ktss up high and put new high addr into gdt */
852 haddr
= pmap_high_shared_remap(HIGH_FIXED_KTSS
,
853 (vm_offset_t
) &master_ktss
, 1);
854 master_gdt
[sel_idx(KERNEL_TSS
)] = tss_desc_pattern
;
855 master_gdt
[sel_idx(KERNEL_TSS
)].offset
= (vm_offset_t
) haddr
;
856 fix_desc(&master_gdt
[sel_idx(KERNEL_TSS
)], 1);
857 kprintf("KTSS: 0x%x, ",haddr
);
859 /* remap dbtss up high and put new high addr into gdt */
860 haddr
= pmap_high_shared_remap(HIGH_FIXED_DBTSS
,
861 (vm_offset_t
) &master_dbtss
, 1);
862 master_gdt
[sel_idx(DEBUG_TSS
)] = tss_desc_pattern
;
863 master_gdt
[sel_idx(DEBUG_TSS
)].offset
= (vm_offset_t
) haddr
;
864 fix_desc(&master_gdt
[sel_idx(DEBUG_TSS
)], 1);
865 ttss
= (struct i386_tss
*)haddr
;
866 kprintf("DBTSS: 0x%x, ",haddr
);
867 #endif /* MACH_KDB */
869 /* remap dftss up high and put new high addr into gdt */
870 haddr
= pmap_high_shared_remap(HIGH_FIXED_DFTSS
,
871 (vm_offset_t
) &master_dftss
, 1);
872 master_gdt
[sel_idx(DF_TSS
)] = tss_desc_pattern
;
873 master_gdt
[sel_idx(DF_TSS
)].offset
= (vm_offset_t
) haddr
;
874 fix_desc(&master_gdt
[sel_idx(DF_TSS
)], 1);
875 kprintf("DFTSS: 0x%x\n",haddr
);
877 /* remap mctss up high and put new high addr into gdt */
878 haddr
= pmap_high_shared_remap(HIGH_FIXED_DFTSS
,
879 (vm_offset_t
) &master_mctss
, 1);
880 master_gdt
[sel_idx(MC_TSS
)] = tss_desc_pattern
;
881 master_gdt
[sel_idx(MC_TSS
)].offset
= (vm_offset_t
) haddr
;
882 fix_desc(&master_gdt
[sel_idx(MC_TSS
)], 1);
883 kprintf("MCTSS: 0x%x\n",haddr
);
885 __asm__
__volatile__("lgdt %0": "=m" (gdt_desc
));
886 __asm__
__volatile__("lidt %0": "=m" (idt_desc
));
887 kprintf("gdt/idt reloaded, ");
889 kprintf("tr reset to KERNEL_TSS\n");
894 * Bootstrap the system enough to run with virtual memory.
895 * Map the kernel's code and data, and allocate the system page table.
896 * Called with mapping OFF. Page_size must already be set.
899 * load_start: PA where kernel was loaded
900 * avail_start PA of first available physical page -
901 * after kernel page tables
902 * avail_end PA of last available physical page
903 * virtual_avail VA of first available page -
904 * after kernel page tables
905 * virtual_end VA of last available page -
906 * end of kernel address space
908 * &start_text start of kernel text
909 * &etext end of kernel text
914 __unused vm_offset_t load_start
,
920 int wpkernel
, boot_arg
;
923 vm_last_addr
= VM_MAX_KERNEL_ADDRESS
; /* Set the highest address
926 * The kernel's pmap is statically allocated so we don't
927 * have to use pmap_create, which is unlikely to work
928 * correctly at this part of the boot sequence.
932 kernel_pmap
= &kernel_pmap_store
;
933 kernel_pmap
->ref_count
= 1;
934 kernel_pmap
->nx_enabled
= FALSE
;
935 kernel_pmap
->pm_64bit
= 0;
936 kernel_pmap
->pm_obj
= (vm_object_t
) NULL
;
937 kernel_pmap
->dirbase
= (pd_entry_t
*)((unsigned int)IdlePTD
| KERNBASE
);
938 kernel_pmap
->pdirbase
= (pmap_paddr_t
)((int)IdlePTD
);
939 pdpt
= (pd_entry_t
*)((unsigned int)IdlePDPT
| KERNBASE
);
940 kernel_pmap
->pm_pdpt
= pdpt
;
941 kernel_pmap
->pm_cr3
= (pmap_paddr_t
)((int)IdlePDPT
);
943 va
= (vm_offset_t
)kernel_pmap
->dirbase
;
944 /* setup self referential mapping(s) */
945 for (i
= 0; i
< NPGPTD
; i
++, pdpt
++) {
947 pa
= (pmap_paddr_t
) kvtophys(va
+ i386_ptob(i
));
949 (pd_entry_t
*) (kernel_pmap
->dirbase
+ PTDPTDI
+ i
),
950 (pa
& PG_FRAME
) | INTEL_PTE_VALID
| INTEL_PTE_RW
| INTEL_PTE_REF
|
951 INTEL_PTE_MOD
| INTEL_PTE_WIRED
) ;
952 pmap_store_pte(pdpt
, pa
| INTEL_PTE_VALID
);
957 lo_kernel_cr3
= kernel_pmap
->pm_cr3
;
958 current_cpu_datap()->cpu_kernel_cr3
= (addr64_t
) kernel_pmap
->pm_cr3
;
960 /* save the value we stuff into created pmaps to share the gdts etc */
961 high_shared_pde
= *pmap_pde(kernel_pmap
, HIGH_MEM_BASE
);
962 /* make sure G bit is on for high shared pde entry */
963 high_shared_pde
|= INTEL_PTE_GLOBAL
;
964 pmap_store_pte(pmap_pde(kernel_pmap
, HIGH_MEM_BASE
), high_shared_pde
);
967 inuse_ptepages_count
+= NKPT
;
969 virtual_avail
= (vm_offset_t
)VADDR(KPTDI
,0) + (vm_offset_t
)first_avail
;
970 virtual_end
= (vm_offset_t
)(VM_MAX_KERNEL_ADDRESS
);
973 * Reserve some special page table entries/VA space for temporary
976 #define SYSMAP(c, p, v, n) \
977 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n)
982 for (i
=0; i
<PMAP_NWINDOWS
; i
++) {
984 (current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP
),
985 (current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CADDR
),
987 *current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP
= 0;
990 /* DMAP user for debugger */
991 SYSMAP(caddr_t
, DMAP1
, DADDR1
, 1);
992 SYSMAP(caddr_t
, DMAP2
, DADDR2
, 1); /* XXX temporary - can remove */
995 lock_init(&pmap_system_lock
,
996 FALSE
, /* NOT a sleep lock */
1002 if (PE_parse_boot_arg("wpkernel", &boot_arg
)) {
1007 /* Remap kernel text readonly unless the "wpkernel" boot-arg is present
1015 for (myva
= i386_round_page(MP_BOOT
+ MP_BOOTSTACK
); myva
< etext
; myva
+= PAGE_SIZE
) {
1016 if (myva
>= (vm_offset_t
)sectHIBB
&& myva
< ((vm_offset_t
)sectHIBB
+ sectSizeHIB
))
1018 ptep
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)myva
);
1020 pmap_store_pte(ptep
, *ptep
& ~INTEL_PTE_RW
);
1024 /* no matter what, kernel page zero is not accessible */
1025 pte
= pmap_pte(kernel_pmap
, 0);
1026 pmap_store_pte(pte
, INTEL_PTE_INVALID
);
1028 /* map lowmem global page into fixed addr 0x2000 */
1029 if (0 == (pte
= pmap_pte(kernel_pmap
,0x2000))) panic("lowmem pte");
1031 pmap_store_pte(pte
, kvtophys((vm_offset_t
)&lowGlo
)|INTEL_PTE_VALID
|INTEL_PTE_REF
|INTEL_PTE_MOD
|INTEL_PTE_WIRED
|INTEL_PTE_RW
);
1034 simple_lock_init(&kernel_pmap
->lock
, 0);
1035 simple_lock_init(&pv_free_list_lock
, 0);
1037 pmap_init_high_shared();
1039 pde_mapped_size
= PDE_MAPPED_SIZE
;
1042 pdpt_entry_t
*ppdpt
= (pdpt_entry_t
*)IdlePDPT
;
1043 pdpt_entry_t
*ppdpt64
= (pdpt_entry_t
*)IdlePDPT64
;
1044 pdpt_entry_t
*ppml4
= (pdpt_entry_t
*)IdlePML4
;
1045 int istate
= ml_set_interrupts_enabled(FALSE
);
1048 * Clone a new 64-bit 3rd-level page table directory, IdlePML4,
1049 * with page bits set for the correct IA-32e operation and so that
1050 * the legacy-mode IdlePDPT is retained for slave processor start-up.
1051 * This is necessary due to the incompatible use of page bits between
1052 * 64-bit and legacy modes.
1054 kernel_pmap
->pm_cr3
= (pmap_paddr_t
)((int)IdlePML4
); /* setup in start.s for us */
1055 kernel_pmap
->pm_pml4
= IdlePML4
;
1056 kernel_pmap
->pm_pdpt
= (pd_entry_t
*)
1057 ((unsigned int)IdlePDPT64
| KERNBASE
);
1058 #define PAGE_BITS INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF
1059 pmap_store_pte(kernel_pmap
->pm_pml4
,
1060 (uint32_t)IdlePDPT64
| PAGE_BITS
);
1061 pmap_store_pte((ppdpt64
+0), *(ppdpt
+0) | PAGE_BITS
);
1062 pmap_store_pte((ppdpt64
+1), *(ppdpt
+1) | PAGE_BITS
);
1063 pmap_store_pte((ppdpt64
+2), *(ppdpt
+2) | PAGE_BITS
);
1064 pmap_store_pte((ppdpt64
+3), *(ppdpt
+3) | PAGE_BITS
);
1067 * The kernel is also mapped in the uber-sapce at the 4GB starting
1068 * 0xFFFFFF80:00000000. This is the highest entry in the 4th-level.
1070 pmap_store_pte((ppml4
+KERNEL_UBER_PML4_INDEX
), *(ppml4
+0));
1072 kernel64_cr3
= (addr64_t
) kernel_pmap
->pm_cr3
;
1073 cpu_IA32e_enable(current_cpu_datap());
1074 current_cpu_datap()->cpu_is64bit
= TRUE
;
1075 /* welcome to a 64 bit world */
1077 /* Re-initialize and load descriptors */
1078 cpu_desc_init64(&cpu_data_master
, TRUE
);
1079 cpu_desc_load64(&cpu_data_master
);
1080 fast_syscall_init64();
1082 pde_mapped_size
= 512*4096 ;
1084 ml_set_interrupts_enabled(istate
);
1087 kernel_pmap
->pm_hold
= kernel_pmap
->pm_pml4
;
1089 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
1090 VADDR(KPTDI
,0), virtual_end
);
1091 printf("PAE enabled\n");
1093 printf("64 bit mode enabled\n");kprintf("64 bit mode enabled\n"); }
1095 kprintf("Available physical space from 0x%llx to 0x%llx\n",
1096 avail_start
, avail_end
);
1099 * By default for 64-bit users loaded at 4GB, share kernel mapping.
1100 * But this may be overridden by the -no_shared_cr3 boot-arg.
1102 if (PE_parse_boot_arg("-no_shared_cr3", &no_shared_cr3
)) {
1103 kprintf("Shared kernel address space disabled\n");
1109 vm_offset_t
*startp
,
1112 *startp
= virtual_avail
;
1113 *endp
= virtual_end
;
1117 * Initialize the pmap module.
1118 * Called by vm_init, to initialize any structures that the pmap
1119 * system needs to map virtual memory.
1124 register long npages
;
1126 register vm_size_t s
;
1127 vm_map_offset_t vaddr
;
1131 * Allocate memory for the pv_head_table and its lock bits,
1132 * the modify bit array, and the pte_page table.
1135 /* zero bias all these arrays now instead of off avail_start
1136 so we cover all memory */
1137 npages
= i386_btop(avail_end
);
1138 s
= (vm_size_t
) (sizeof(struct pv_entry
) * npages
1139 + pv_lock_table_size(npages
)
1143 if (kmem_alloc_wired(kernel_map
, &addr
, s
) != KERN_SUCCESS
)
1146 memset((char *)addr
, 0, s
);
1149 * Allocate the structures first to preserve word-alignment.
1151 pv_head_table
= (pv_entry_t
) addr
;
1152 addr
= (vm_offset_t
) (pv_head_table
+ npages
);
1154 pv_lock_table
= (char *) addr
;
1155 addr
= (vm_offset_t
) (pv_lock_table
+ pv_lock_table_size(npages
));
1157 pmap_phys_attributes
= (char *) addr
;
1160 * Create the zone of physical maps,
1161 * and of the physical-to-virtual entries.
1163 s
= (vm_size_t
) sizeof(struct pmap
);
1164 pmap_zone
= zinit(s
, 400*s
, 4096, "pmap"); /* XXX */
1165 s
= (vm_size_t
) sizeof(struct pv_entry
);
1166 pv_list_zone
= zinit(s
, 10000*s
, 4096, "pv_list"); /* XXX */
1168 pdpt_zone
= zinit(s
, 400*s
, 4096, "pdpt"); /* XXX */
1171 * Only now, when all of the data structures are allocated,
1172 * can we set vm_first_phys and vm_last_phys. If we set them
1173 * too soon, the kmem_alloc_wired above will try to use these
1174 * data structures and blow up.
1177 /* zero bias this now so we cover all memory */
1179 vm_last_phys
= avail_end
;
1181 kptobj
= &kptobj_object_store
;
1182 _vm_object_allocate((vm_object_size_t
)NKPDE
, kptobj
);
1183 kernel_pmap
->pm_obj
= kptobj
;
1185 /* create pv entries for kernel pages mapped by low level
1186 startup code. these have to exist so we can pmap_remove()
1187 e.g. kext pages from the middle of our addr space */
1189 vaddr
= (vm_map_offset_t
)0;
1190 for (ppn
= 0; ppn
< i386_btop(avail_start
) ; ppn
++ ) {
1193 pv_e
= pai_to_pvh(ppn
);
1196 kernel_pmap
->stats
.resident_count
++;
1197 pv_e
->pmap
= kernel_pmap
;
1198 pv_e
->next
= PV_ENTRY_NULL
;
1201 pmap_initialized
= TRUE
;
1204 * Initializie pmap cache.
1206 pmap_cache_list
= PMAP_NULL
;
1207 pmap_cache_count
= 0;
1208 simple_lock_init(&pmap_cache_lock
, 0);
1212 x86_lowmem_free(void)
1214 /* free lowmem pages back to the vm system. we had to defer doing this
1215 until the vm system was fully up.
1216 the actual pages that are released are determined by which
1217 pages the memory sizing code puts into the region table */
1219 ml_static_mfree((vm_offset_t
) i386_ptob(pmap_memory_regions
[0].base
),
1220 (vm_size_t
) i386_ptob(pmap_memory_regions
[0].end
- pmap_memory_regions
[0].base
));
1224 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
1236 assert(pn
!= vm_page_fictitious_addr
);
1237 phys
= (pmap_paddr_t
)i386_ptob(pn
);
1238 if (!pmap_initialized
)
1241 if (!pmap_valid_page(pn
))
1244 PMAP_WRITE_LOCK(spl
);
1246 pai
= pa_index(phys
);
1247 pv_h
= pai_to_pvh(pai
);
1249 result
= (pv_h
->pmap
== PMAP_NULL
);
1250 PMAP_WRITE_UNLOCK(spl
);
1256 * Create and return a physical map.
1258 * If the size specified for the map
1259 * is zero, the map is an actual physical
1260 * map, and may be referenced by the
1263 * If the size specified is non-zero,
1264 * the map will be used in software only, and
1265 * is bounded by that size.
1277 pml4_entry_t
*pml4p
;
1283 size
= (vm_size_t
) sz
;
1286 * A software use-only map doesn't even need a map.
1293 p
= (pmap_t
) zalloc(pmap_zone
);
1295 panic("pmap_create zalloc");
1297 /* init counts now since we'll be bumping some */
1298 simple_lock_init(&p
->lock
, 0);
1299 p
->stats
.resident_count
= 0;
1300 p
->stats
.wired_count
= 0;
1303 p
->pm_64bit
= is_64bit
;
1304 p
->pm_kernel_cr3
= FALSE
;
1307 /* legacy 32 bit setup */
1308 /* in the legacy case the pdpt layer is hardwired to 4 entries and each
1309 * entry covers 1GB of addr space */
1310 if (KERN_SUCCESS
!= kmem_alloc_wired(kernel_map
, (vm_offset_t
*)(&p
->dirbase
), NBPTD
))
1311 panic("pmap_create kmem_alloc_wired");
1312 p
->pm_hold
= (vm_offset_t
)zalloc(pdpt_zone
);
1313 if ((vm_offset_t
)NULL
== p
->pm_hold
) {
1314 panic("pdpt zalloc");
1316 pdpt
= (pdpt_entry_t
*) (( p
->pm_hold
+ 31) & ~31);
1317 p
->pm_cr3
= (pmap_paddr_t
)kvtophys((vm_offset_t
)pdpt
);
1318 if (NULL
== (p
->pm_obj
= vm_object_allocate((vm_object_size_t
)(NPGPTD
*NPTDPG
))))
1319 panic("pmap_create vm_object_allocate");
1321 memset((char *)p
->dirbase
, 0, NBPTD
);
1323 va
= (vm_offset_t
)p
->dirbase
;
1324 p
->pdirbase
= kvtophys(va
);
1326 template = cpu_64bit
? INTEL_PTE_VALID
|INTEL_PTE_RW
|INTEL_PTE_USER
|INTEL_PTE_REF
: INTEL_PTE_VALID
;
1327 for (i
= 0; i
< NPGPTD
; i
++, pdpt
++) {
1329 pa
= (pmap_paddr_t
) kvtophys(va
+ i386_ptob(i
));
1330 pmap_store_pte(pdpt
, pa
| template);
1333 /* map the high shared pde */
1334 pmap_store_pte(pmap_pde(p
, HIGH_MEM_BASE
), high_shared_pde
);
1340 /* alloc the pml4 page in kernel vm */
1341 if (KERN_SUCCESS
!= kmem_alloc_wired(kernel_map
, (vm_offset_t
*)(&p
->pm_hold
), PAGE_SIZE
))
1342 panic("pmap_create kmem_alloc_wired pml4");
1344 memset((char *)p
->pm_hold
, 0, PAGE_SIZE
);
1345 p
->pm_cr3
= (pmap_paddr_t
)kvtophys((vm_offset_t
)p
->pm_hold
);
1347 inuse_ptepages_count
++;
1348 p
->stats
.resident_count
++;
1349 p
->stats
.wired_count
++;
1351 /* allocate the vm_objs to hold the pdpt, pde and pte pages */
1353 if (NULL
== (p
->pm_obj_pml4
= vm_object_allocate((vm_object_size_t
)(NPML4PGS
))))
1354 panic("pmap_create pdpt obj");
1356 if (NULL
== (p
->pm_obj_pdpt
= vm_object_allocate((vm_object_size_t
)(NPDPTPGS
))))
1357 panic("pmap_create pdpt obj");
1359 if (NULL
== (p
->pm_obj
= vm_object_allocate((vm_object_size_t
)(NPDEPGS
))))
1360 panic("pmap_create pte obj");
1362 /* uber space points to uber mapped kernel */
1364 pml4p
= pmap64_pml4(p
, 0ULL);
1365 pmap_store_pte((pml4p
+KERNEL_UBER_PML4_INDEX
),*kernel_pmap
->pm_pml4
);
1367 while ((pdp
= pmap64_pde(p
, (uint64_t)HIGH_MEM_BASE
)) == PD_ENTRY_NULL
) {
1369 pmap_expand_pdpt(p
, (uint64_t)HIGH_MEM_BASE
); /* need room for another pde entry */
1372 pmap_store_pte(pdp
, high_shared_pde
);
1382 pmap_set_4GB_pagezero(pmap_t p
)
1385 pdpt_entry_t
*user_pdptp
;
1386 pdpt_entry_t
*kern_pdptp
;
1388 assert(p
->pm_64bit
);
1390 /* Kernel-shared cr3 may be disabled by boot arg. */
1395 * Set the bottom 4 3rd-level pte's to be the kernel's.
1398 while ((user_pdptp
= pmap64_pdpt(p
, 0x0)) == PDPT_ENTRY_NULL
) {
1400 pmap_expand_pml4(p
, 0x0);
1403 kern_pdptp
= kernel_pmap
->pm_pdpt
;
1404 pmap_store_pte(user_pdptp
+0, *(kern_pdptp
+0));
1405 pmap_store_pte(user_pdptp
+1, *(kern_pdptp
+1));
1406 pmap_store_pte(user_pdptp
+2, *(kern_pdptp
+2));
1407 pmap_store_pte(user_pdptp
+3, *(kern_pdptp
+3));
1409 p
->pm_kernel_cr3
= TRUE
;
1416 pmap_load_kernel_cr3(void)
1418 uint32_t kernel_cr3
;
1420 assert(!ml_get_interrupts_enabled());
1423 * Reload cr3 with the true kernel cr3.
1424 * Note: kernel's pml4 resides below 4GB physical.
1426 kernel_cr3
= current_cpu_datap()->cpu_kernel_cr3
;
1427 set_cr3(kernel_cr3
);
1428 current_cpu_datap()->cpu_active_cr3
= kernel_cr3
;
1429 current_cpu_datap()->cpu_task_map
= TASK_MAP_32BIT
;
1430 current_cpu_datap()->cpu_tlb_invalid
= FALSE
;
1431 __asm__
volatile("mfence");
1435 pmap_clear_4GB_pagezero(pmap_t p
)
1438 pdpt_entry_t
*user_pdptp
;
1441 if (!p
->pm_kernel_cr3
)
1445 user_pdptp
= pmap64_pdpt(p
, 0x0);
1446 pmap_store_pte(user_pdptp
+0, 0);
1447 pmap_store_pte(user_pdptp
+1, 0);
1448 pmap_store_pte(user_pdptp
+2, 0);
1449 pmap_store_pte(user_pdptp
+3, 0);
1451 p
->pm_kernel_cr3
= FALSE
;
1453 pmap_load_kernel_cr3();
1459 * Retire the given physical map from service.
1460 * Should only be called if the map contains
1461 * no valid mappings.
1471 register pt_entry_t
*pdep
;
1472 register vm_page_t m
;
1478 simple_lock(&p
->lock
);
1482 * If some cpu is not using the physical pmap pointer that it
1483 * is supposed to be (see set_dirbase), we might be using the
1484 * pmap that is being destroyed! Make sure we are
1485 * physically on the right pmap:
1489 VM_MAX_KERNEL_ADDRESS
);
1492 simple_unlock(&p
->lock
);
1496 return; /* still in use */
1500 * Free the memory maps, then the
1506 pdep
= (pt_entry_t
*)p
->dirbase
;
1508 while (pdep
< (pt_entry_t
*)&p
->dirbase
[(UMAXPTDI
+1)]) {
1511 if (*pdep
& INTEL_PTE_VALID
) {
1512 ind
= pdep
- (pt_entry_t
*)&p
->dirbase
[0];
1514 vm_object_lock(p
->pm_obj
);
1515 m
= vm_page_lookup(p
->pm_obj
, (vm_object_offset_t
)ind
);
1516 if (m
== VM_PAGE_NULL
) {
1517 panic("pmap_destroy: pte page not in object");
1519 vm_page_lock_queues();
1521 inuse_ptepages_count
--;
1523 vm_object_unlock(p
->pm_obj
);
1524 vm_page_unlock_queues();
1527 * Clear pdes, this might be headed for the cache.
1529 pmap_store_pte(pdep
, 0);
1533 pmap_store_pte(pdep
, 0);
1539 inuse_ptepages_count
-= p
->pm_obj
->resident_page_count
;
1541 vm_object_deallocate(p
->pm_obj
);
1542 kmem_free(kernel_map
, (vm_offset_t
)p
->dirbase
, NBPTD
);
1543 zfree(pdpt_zone
, (void *)p
->pm_hold
);
1548 pmap_unmap_sharedpage(p
);
1550 /* free 64 bit mode structs */
1551 inuse_ptepages_count
--;
1552 kmem_free(kernel_map
, (vm_offset_t
)p
->pm_hold
, PAGE_SIZE
);
1554 inuse_ptepages_count
-= p
->pm_obj_pml4
->resident_page_count
;
1555 vm_object_deallocate(p
->pm_obj_pml4
);
1557 inuse_ptepages_count
-= p
->pm_obj_pdpt
->resident_page_count
;
1558 vm_object_deallocate(p
->pm_obj_pdpt
);
1560 inuse_ptepages_count
-= p
->pm_obj
->resident_page_count
;
1561 vm_object_deallocate(p
->pm_obj
);
1565 zfree(pmap_zone
, p
);
1569 * Add a reference to the specified pmap.
1578 if (p
!= PMAP_NULL
) {
1580 simple_lock(&p
->lock
);
1582 simple_unlock(&p
->lock
);
1588 * Remove a range of hardware page-table entries.
1589 * The entries given are the first (inclusive)
1590 * and last (exclusive) entries for the VM pages.
1591 * The virtual address is the va for the first pte.
1593 * The pmap must be locked.
1594 * If the pmap is not the kernel pmap, the range must lie
1595 * entirely within one pte-page. This is NOT checked.
1596 * Assumes that the pte-page exists.
1602 vm_map_offset_t vaddr
,
1606 register pt_entry_t
*cpte
;
1607 int num_removed
, num_unwired
;
1614 for (cpte
= spte
; cpte
< epte
;
1615 cpte
++, vaddr
+= PAGE_SIZE
) {
1617 pa
= pte_to_pa(*cpte
);
1624 if (!valid_page(i386_btop(pa
))) {
1627 * Outside range of managed physical memory.
1628 * Just remove the mappings.
1630 register pt_entry_t
*lpte
= cpte
;
1632 pmap_store_pte(lpte
, 0);
1641 * Get the modify and reference bits.
1644 register pt_entry_t
*lpte
;
1647 pmap_phys_attributes
[pai
] |=
1648 *lpte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1649 pmap_store_pte(lpte
, 0);
1654 * Remove the mapping from the pvlist for
1655 * this physical page.
1658 register pv_entry_t pv_h
, prev
, cur
;
1660 pv_h
= pai_to_pvh(pai
);
1661 if (pv_h
->pmap
== PMAP_NULL
) {
1662 panic("pmap_remove: null pv_list!");
1664 if (pv_h
->va
== vaddr
&& pv_h
->pmap
== pmap
) {
1666 * Header is the pv_entry. Copy the next one
1667 * to header and free the next one (we cannot
1671 if (cur
!= PV_ENTRY_NULL
) {
1676 pv_h
->pmap
= PMAP_NULL
;
1683 if ((cur
= prev
->next
) == PV_ENTRY_NULL
) {
1684 panic("pmap-remove: mapping not in pv_list!");
1686 } while (cur
->va
!= vaddr
|| cur
->pmap
!= pmap
);
1687 prev
->next
= cur
->next
;
1697 assert(pmap
->stats
.resident_count
>= num_removed
);
1698 pmap
->stats
.resident_count
-= num_removed
;
1699 assert(pmap
->stats
.wired_count
>= num_unwired
);
1700 pmap
->stats
.wired_count
-= num_unwired
;
1704 * Remove phys addr if mapped in specified map
1708 pmap_remove_some_phys(
1709 __unused pmap_t map
,
1710 __unused ppnum_t pn
)
1713 /* Implement to support working set code */
1718 * Remove the given range of addresses
1719 * from the specified map.
1721 * It is assumed that the start and end are properly
1722 * rounded to the hardware page size.
1733 register pt_entry_t
*pde
;
1734 register pt_entry_t
*spte
, *epte
;
1738 if (map
== PMAP_NULL
|| s64
== e64
)
1741 PMAP_READ_LOCK(map
, spl
);
1746 l64
= (s64
+ pde_mapped_size
) & ~(pde_mapped_size
-1);
1749 pde
= pmap_pde(map
, s64
);
1750 if (pde
&& (*pde
& INTEL_PTE_VALID
)) {
1751 spte
= (pt_entry_t
*)pmap_pte(map
, (s64
& ~(pde_mapped_size
-1)));
1752 spte
= &spte
[ptenum(s64
)];
1753 epte
= &spte
[intel_btop(l64
-s64
)];
1754 pmap_remove_range(map
, s64
, spte
, epte
);
1759 PMAP_UPDATE_TLBS(map
, orig_s64
, e64
);
1761 PMAP_READ_UNLOCK(map
, spl
);
1765 * Routine: pmap_page_protect
1768 * Lower the permission for all mappings to a given
1776 pv_entry_t pv_h
, prev
;
1777 register pv_entry_t pv_e
;
1778 register pt_entry_t
*pte
;
1780 register pmap_t pmap
;
1785 assert(pn
!= vm_page_fictitious_addr
);
1787 if (!valid_page(pn
)) {
1789 * Not a managed page.
1795 * Determine the new protection.
1799 case VM_PROT_READ
|VM_PROT_EXECUTE
:
1803 return; /* nothing to do */
1808 phys
= (pmap_paddr_t
)i386_ptob(pn
);
1809 pai
= pa_index(phys
);
1810 pv_h
= pai_to_pvh(pai
);
1814 * Lock the pmap system first, since we will be changing
1817 PMAP_WRITE_LOCK(spl
);
1820 * Walk down PV list, changing or removing all mappings.
1821 * We do not have to lock the pv_list because we have
1822 * the entire pmap system locked.
1824 if (pv_h
->pmap
!= PMAP_NULL
) {
1829 register vm_map_offset_t vaddr
;
1833 * Lock the pmap to block pmap_extract and similar routines.
1835 simple_lock(&pmap
->lock
);
1838 pte
= pmap_pte(pmap
, vaddr
);
1840 kprintf("pmap_page_protect pmap 0x%x pn 0x%x vaddr 0x%llx\n",pmap
, pn
, vaddr
);
1841 panic("pmap_page_protect");
1844 * Consistency checks.
1846 /* assert(*pte & INTEL_PTE_VALID); XXX */
1847 /* assert(pte_to_phys(*pte) == phys); */
1851 * Remove the mapping if new protection is NONE
1852 * or if write-protecting a kernel mapping.
1854 if (remove
|| pmap
== kernel_pmap
) {
1856 * Remove the mapping, collecting any modify bits.
1858 pmap_store_pte(pte
, *pte
& ~INTEL_PTE_VALID
);
1860 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
1862 pmap_phys_attributes
[pai
] |= *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1864 pmap_store_pte(pte
, 0);
1867 //XXX breaks DEBUG build assert(pmap->stats.resident_count >= 1);
1868 pmap
->stats
.resident_count
--;
1871 * Remove the pv_entry.
1875 * Fix up head later.
1877 pv_h
->pmap
= PMAP_NULL
;
1881 * Delete this entry.
1883 prev
->next
= pv_e
->next
;
1890 pmap_store_pte(pte
, *pte
& ~INTEL_PTE_WRITE
);
1892 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
1899 simple_unlock(&pmap
->lock
);
1901 } while ((pv_e
= prev
->next
) != PV_ENTRY_NULL
);
1904 * If pv_head mapping was removed, fix it up.
1906 if (pv_h
->pmap
== PMAP_NULL
) {
1909 if (pv_e
!= PV_ENTRY_NULL
) {
1915 PMAP_WRITE_UNLOCK(spl
);
1923 * Disconnect all mappings for this page and return reference and change status
1924 * in generic format.
1927 unsigned int pmap_disconnect(
1930 pmap_page_protect(pa
, 0); /* disconnect the page */
1931 return (pmap_get_refmod(pa
)); /* return ref/chg status */
1935 * Set the physical protection on the
1936 * specified range of this map as requested.
1937 * Will not increase permissions.
1942 vm_map_offset_t sva
,
1943 vm_map_offset_t eva
,
1946 register pt_entry_t
*pde
;
1947 register pt_entry_t
*spte
, *epte
;
1948 vm_map_offset_t lva
;
1949 vm_map_offset_t orig_sva
;
1953 if (map
== PMAP_NULL
)
1956 if (prot
== VM_PROT_NONE
) {
1957 pmap_remove(map
, sva
, eva
);
1961 if ( (prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !map
->nx_enabled
)
1967 simple_lock(&map
->lock
);
1971 lva
= (sva
+ pde_mapped_size
) & ~(pde_mapped_size
-1);
1974 pde
= pmap_pde(map
, sva
);
1975 if (pde
&& (*pde
& INTEL_PTE_VALID
)) {
1976 spte
= (pt_entry_t
*)pmap_pte(map
, (sva
& ~(pde_mapped_size
-1)));
1977 spte
= &spte
[ptenum(sva
)];
1978 epte
= &spte
[intel_btop(lva
-sva
)];
1980 while (spte
< epte
) {
1981 if (*spte
& INTEL_PTE_VALID
) {
1983 if (prot
& VM_PROT_WRITE
)
1984 pmap_store_pte(spte
, *spte
| INTEL_PTE_WRITE
);
1986 pmap_store_pte(spte
, *spte
& ~INTEL_PTE_WRITE
);
1989 pmap_store_pte(spte
, *spte
| INTEL_PTE_NX
);
1991 pmap_store_pte(spte
, *spte
& ~INTEL_PTE_NX
);
2000 PMAP_UPDATE_TLBS(map
, orig_sva
, eva
);
2002 simple_unlock(&map
->lock
);
2006 /* Map a (possibly) autogenned block */
2015 __unused
unsigned int flags
)
2019 for (page
= 0; page
< size
; page
++) {
2020 pmap_enter(pmap
, va
, pa
, prot
, attr
, TRUE
);
2028 * Insert the given physical page (p) at
2029 * the specified virtual address (v) in the
2030 * target physical map with the protection requested.
2032 * If specified, the page will be wired down, meaning
2033 * that the related pte cannot be reclaimed.
2035 * NB: This is the only routine which MAY NOT lazy-evaluate
2036 * or lose information. That is, this routine must actually
2037 * insert this page into the given map NOW.
2041 register pmap_t pmap
,
2042 vm_map_offset_t vaddr
,
2048 register pt_entry_t
*pte
;
2049 register pv_entry_t pv_h
;
2052 pt_entry_t
template;
2054 pmap_paddr_t old_pa
;
2055 pmap_paddr_t pa
= (pmap_paddr_t
)i386_ptob(pn
);
2056 boolean_t need_tlbflush
= FALSE
;
2059 XPR(0x80000000, "%x/%x: pmap_enter %x/%qx/%x\n",
2064 assert(pn
!= vm_page_fictitious_addr
);
2066 printf("pmap(%qx, %x)\n", vaddr
, pn
);
2067 if (pmap
== PMAP_NULL
)
2070 if ( (prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
2076 * Must allocate a new pvlist entry while we're unlocked;
2077 * zalloc may cause pageout (which will lock the pmap system).
2078 * If we determine we need a pvlist entry, we will unlock
2079 * and allocate one. Then we will retry, throughing away
2080 * the allocated entry later (if we no longer need it).
2082 pv_e
= PV_ENTRY_NULL
;
2084 PMAP_READ_LOCK(pmap
, spl
);
2087 * Expand pmap to include this pte. Assume that
2088 * pmap is always expanded to include enough hardware
2089 * pages to map one VM page.
2092 while ((pte
= pmap_pte(pmap
, vaddr
)) == PT_ENTRY_NULL
) {
2094 * Must unlock to expand the pmap.
2096 PMAP_READ_UNLOCK(pmap
, spl
);
2098 pmap_expand(pmap
, vaddr
); /* going to grow pde level page(s) */
2100 PMAP_READ_LOCK(pmap
, spl
);
2103 * Special case if the physical page is already mapped
2106 old_pa
= pte_to_pa(*pte
);
2109 * May be changing its wired attribute or protection
2112 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
2114 if(VM_MEM_NOT_CACHEABLE
== (flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
))) {
2115 if(!(flags
& VM_MEM_GUARDED
))
2116 template |= INTEL_PTE_PTA
;
2117 template |= INTEL_PTE_NCACHE
;
2120 if (pmap
!= kernel_pmap
)
2121 template |= INTEL_PTE_USER
;
2122 if (prot
& VM_PROT_WRITE
)
2123 template |= INTEL_PTE_WRITE
;
2126 template |= INTEL_PTE_NX
;
2129 template |= INTEL_PTE_WIRED
;
2131 pmap
->stats
.wired_count
++;
2134 if (iswired(*pte
)) {
2135 assert(pmap
->stats
.wired_count
>= 1);
2136 pmap
->stats
.wired_count
--;
2140 if (*pte
& INTEL_PTE_MOD
)
2141 template |= INTEL_PTE_MOD
;
2143 pmap_store_pte(pte
, template);
2146 need_tlbflush
= TRUE
;
2151 * Outline of code from here:
2152 * 1) If va was mapped, update TLBs, remove the mapping
2153 * and remove old pvlist entry.
2154 * 2) Add pvlist entry for new mapping
2155 * 3) Enter new mapping.
2157 * SHARING FAULTS IS HORRIBLY BROKEN
2158 * SHARING_FAULTS complicates this slightly in that it cannot
2159 * replace the mapping, but must remove it (because adding the
2160 * pvlist entry for the new mapping may remove others), and
2161 * hence always enters the new mapping at step 3)
2163 * If the old physical page is not managed step 1) is skipped
2164 * (except for updating the TLBs), and the mapping is
2165 * overwritten at step 3). If the new physical page is not
2166 * managed, step 2) is skipped.
2169 if (old_pa
!= (pmap_paddr_t
) 0) {
2172 * Don't do anything to pages outside valid memory here.
2173 * Instead convince the code that enters a new mapping
2174 * to overwrite the old one.
2177 if (valid_page(i386_btop(old_pa
))) {
2179 pai
= pa_index(old_pa
);
2182 assert(pmap
->stats
.resident_count
>= 1);
2183 pmap
->stats
.resident_count
--;
2184 if (iswired(*pte
)) {
2185 assert(pmap
->stats
.wired_count
>= 1);
2186 pmap
->stats
.wired_count
--;
2189 pmap_phys_attributes
[pai
] |=
2190 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
2192 pmap_store_pte(pte
, 0);
2194 * Remove the mapping from the pvlist for
2195 * this physical page.
2198 register pv_entry_t prev
, cur
;
2200 pv_h
= pai_to_pvh(pai
);
2201 if (pv_h
->pmap
== PMAP_NULL
) {
2202 panic("pmap_enter: null pv_list!");
2204 if (pv_h
->va
== vaddr
&& pv_h
->pmap
== pmap
) {
2206 * Header is the pv_entry. Copy the next one
2207 * to header and free the next one (we cannot
2211 if (cur
!= PV_ENTRY_NULL
) {
2216 pv_h
->pmap
= PMAP_NULL
;
2223 if ((cur
= prev
->next
) == PV_ENTRY_NULL
) {
2224 panic("pmap_enter: mapping not in pv_list!");
2226 } while (cur
->va
!= vaddr
|| cur
->pmap
!= pmap
);
2227 prev
->next
= cur
->next
;
2236 * old_pa is not managed. Pretend it's zero so code
2237 * at Step 3) will enter new mapping (overwriting old
2238 * one). Do removal part of accounting.
2240 old_pa
= (pmap_paddr_t
) 0;
2242 if (iswired(*pte
)) {
2243 assert(pmap
->stats
.wired_count
>= 1);
2244 pmap
->stats
.wired_count
--;
2247 need_tlbflush
= TRUE
;
2251 if (valid_page(i386_btop(pa
))) {
2254 * Step 2) Enter the mapping in the PV list for this
2261 #if SHARING_FAULTS /* this is horribly broken , do not enable */
2264 * We can return here from the sharing fault code below
2265 * in case we removed the only entry on the pv list and thus
2266 * must enter the new one in the list header.
2268 #endif /* SHARING_FAULTS */
2270 pv_h
= pai_to_pvh(pai
);
2272 if (pv_h
->pmap
== PMAP_NULL
) {
2278 pv_h
->next
= PV_ENTRY_NULL
;
2284 * check that this mapping is not already there
2285 * or there is no alias for this mapping in the same map
2287 pv_entry_t e
= pv_h
;
2288 while (e
!= PV_ENTRY_NULL
) {
2289 if (e
->pmap
== pmap
&& e
->va
== vaddr
)
2290 panic("pmap_enter: already in pv_list");
2295 #if SHARING_FAULTS /* broken, do not enable */
2298 * do sharing faults.
2299 * if we find an entry on this pv list in the same address
2300 * space, remove it. we know there will not be more
2303 pv_entry_t e
= pv_h
;
2306 while (e
!= PV_ENTRY_NULL
) {
2307 if (e
->pmap
== pmap
) {
2309 * Remove it, drop pv list lock first.
2313 opte
= pmap_pte(pmap
, e
->va
);
2314 assert(opte
!= PT_ENTRY_NULL
);
2316 * Invalidate the translation buffer,
2317 * then remove the mapping.
2319 pmap_remove_range(pmap
, e
->va
, opte
,
2322 PMAP_UPDATE_TLBS(pmap
, e
->va
, e
->va
+ PAGE_SIZE
);
2325 * We could have remove the head entry,
2326 * so there could be no more entries
2327 * and so we have to use the pv head entry.
2328 * so, go back to the top and try the entry
2337 * check that this mapping is not already there
2340 while (e
!= PV_ENTRY_NULL
) {
2341 if (e
->pmap
== pmap
)
2342 panic("pmap_enter: alias in pv_list");
2346 #endif /* SHARING_FAULTS */
2350 * check for aliases within the same address space.
2352 pv_entry_t e
= pv_h
;
2353 vm_offset_t rpc
= get_rpc();
2355 while (e
!= PV_ENTRY_NULL
) {
2356 if (e
->pmap
== pmap
) {
2358 * log this entry in the alias ring buffer
2359 * if it's not there already.
2361 struct pmap_alias
*pma
;
2365 for (ii
= 0; ii
< pmap_alias_index
; ii
++) {
2366 if (pmap_aliasbuf
[ii
].rpc
== rpc
) {
2367 /* found it in the log already */
2373 pma
= &pmap_aliasbuf
[pmap_alias_index
];
2377 pma
->cookie
= PMAP_ALIAS_COOKIE
;
2378 if (++pmap_alias_index
>= PMAP_ALIAS_MAX
)
2379 panic("pmap_enter: exhausted alias log");
2385 #endif /* DEBUG_ALIAS */
2387 * Add new pv_entry after header.
2389 if (pv_e
== PV_ENTRY_NULL
) {
2391 if (pv_e
== PV_ENTRY_NULL
) {
2392 panic("pmap no pv_e's");
2397 pv_e
->next
= pv_h
->next
;
2400 * Remember that we used the pvlist entry.
2402 pv_e
= PV_ENTRY_NULL
;
2407 * only count the mapping
2408 * for 'managed memory'
2410 pmap
->stats
.resident_count
++;
2414 * Step 3) Enter the mapping.
2419 * Build a template to speed up entering -
2420 * only the pfn changes.
2422 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
2424 if(flags
& VM_MEM_NOT_CACHEABLE
) {
2425 if(!(flags
& VM_MEM_GUARDED
))
2426 template |= INTEL_PTE_PTA
;
2427 template |= INTEL_PTE_NCACHE
;
2430 if (pmap
!= kernel_pmap
)
2431 template |= INTEL_PTE_USER
;
2432 if (prot
& VM_PROT_WRITE
)
2433 template |= INTEL_PTE_WRITE
;
2436 template |= INTEL_PTE_NX
;
2439 template |= INTEL_PTE_WIRED
;
2440 pmap
->stats
.wired_count
++;
2442 pmap_store_pte(pte
, template);
2445 if (need_tlbflush
== TRUE
)
2446 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
2448 if (pv_e
!= PV_ENTRY_NULL
) {
2452 PMAP_READ_UNLOCK(pmap
, spl
);
2456 * Routine: pmap_change_wiring
2457 * Function: Change the wiring attribute for a map/virtual-address
2459 * In/out conditions:
2460 * The mapping must already exist in the pmap.
2464 register pmap_t map
,
2465 vm_map_offset_t vaddr
,
2468 register pt_entry_t
*pte
;
2473 * We must grab the pmap system lock because we may
2474 * change a pte_page queue.
2476 PMAP_READ_LOCK(map
, spl
);
2478 if ((pte
= pmap_pte(map
, vaddr
)) == PT_ENTRY_NULL
)
2479 panic("pmap_change_wiring: pte missing");
2481 if (wired
&& !iswired(*pte
)) {
2483 * wiring down mapping
2485 map
->stats
.wired_count
++;
2486 pmap_store_pte(pte
, *pte
| INTEL_PTE_WIRED
);
2489 else if (!wired
&& iswired(*pte
)) {
2493 assert(map
->stats
.wired_count
>= 1);
2494 map
->stats
.wired_count
--;
2495 pmap_store_pte(pte
, *pte
& ~INTEL_PTE_WIRED
);
2499 PMAP_READ_UNLOCK(map
, spl
);
2508 pmap_find_phys(pmap_t pmap
, addr64_t va
)
2513 mp_disable_preemption();
2515 ptp
= pmap_pte(pmap
, va
);
2516 if (PT_ENTRY_NULL
== ptp
) {
2519 ppn
= (ppnum_t
) i386_btop(pte_to_pa(*ptp
));
2521 mp_enable_preemption();
2527 * Routine: pmap_extract
2529 * Extract the physical page address associated
2530 * with the given map/virtual_address pair.
2531 * Change to shim for backwards compatibility but will not
2532 * work for 64 bit systems. Some old drivers that we cannot
2538 register pmap_t pmap
,
2539 vm_map_offset_t vaddr
)
2544 paddr
= (vm_offset_t
)0;
2545 ppn
= pmap_find_phys(pmap
, vaddr
);
2547 paddr
= ((vm_offset_t
)i386_ptob(ppn
)) | (vaddr
& INTEL_OFFMASK
);
2555 vm_map_offset_t vaddr
)
2557 register vm_page_t m
;
2558 register pmap_paddr_t pa
;
2562 pml4_entry_t
*pml4p
;
2564 if (kernel_pmap
== map
) panic("expand kernel pml4");
2567 pml4p
= pmap64_pml4(map
, vaddr
);
2569 if (PML4_ENTRY_NULL
== pml4p
) panic("pmap_expand_pml4 no pml4p");
2572 * Allocate a VM page for the pml4 page
2574 while ((m
= vm_page_grab()) == VM_PAGE_NULL
)
2578 * put the page into the pmap's obj list so it
2579 * can be found later.
2583 i
= pml4idx(map
, vaddr
);
2585 vm_object_lock(map
->pm_obj_pml4
);
2587 if (0 != vm_page_lookup(map
->pm_obj_pml4
, (vm_object_offset_t
)i
)) {
2588 kprintf("pmap_expand_pml4: obj_pml4 not empty, pmap 0x%x pm_obj_pml4 0x%x vaddr 0x%llx i 0x%llx\n",
2589 map
, map
->pm_obj_pml4
, vaddr
, i
);
2592 vm_page_insert(m
, map
->pm_obj_pml4
, (vm_object_offset_t
)i
);
2594 vm_page_lock_queues();
2597 vm_page_unlock_queues();
2598 vm_object_unlock(map
->pm_obj_pml4
);
2599 inuse_ptepages_count
++;
2600 map
->stats
.resident_count
++;
2601 map
->stats
.wired_count
++;
2608 PMAP_READ_LOCK(map
, spl
);
2610 * See if someone else expanded us first
2612 if (pmap64_pdpt(map
, vaddr
) != PDPT_ENTRY_NULL
) {
2613 PMAP_READ_UNLOCK(map
, spl
);
2614 vm_object_lock(map
->pm_obj_pml4
);
2615 vm_page_lock_queues();
2617 inuse_ptepages_count
--;
2618 map
->stats
.resident_count
--;
2619 map
->stats
.wired_count
--;
2621 vm_page_unlock_queues();
2622 vm_object_unlock(map
->pm_obj_pml4
);
2627 * Set the page directory entry for this page table.
2628 * If we have allocated more than one hardware page,
2629 * set several page directory entries.
2632 pml4p
= pmap64_pml4(map
, vaddr
); /* refetch under lock */
2634 pmap_store_pte(pml4p
, pa_to_pte(pa
)
2639 PMAP_READ_UNLOCK(map
, spl
);
2648 vm_map_offset_t vaddr
)
2650 register vm_page_t m
;
2651 register pmap_paddr_t pa
;
2655 pdpt_entry_t
*pdptp
;
2657 if (kernel_pmap
== map
) panic("expand kernel pdpt");
2660 while ((pdptp
= pmap64_pdpt(map
, vaddr
)) == PDPT_ENTRY_NULL
) {
2662 pmap_expand_pml4(map
, vaddr
); /* need room for another pdpt entry */
2669 * Allocate a VM page for the pdpt page
2671 while ((m
= vm_page_grab()) == VM_PAGE_NULL
)
2675 * put the page into the pmap's obj list so it
2676 * can be found later.
2680 i
= pdptidx(map
, vaddr
);
2682 vm_object_lock(map
->pm_obj_pdpt
);
2684 if (0 != vm_page_lookup(map
->pm_obj_pdpt
, (vm_object_offset_t
)i
)) {
2685 kprintf("pmap_expand_pdpt: obj_pdpt not empty, pmap 0x%x pm_obj_pdpt 0x%x vaddr 0x%llx i 0x%llx\n",
2686 map
, map
->pm_obj_pdpt
, vaddr
, i
);
2689 vm_page_insert(m
, map
->pm_obj_pdpt
, (vm_object_offset_t
)i
);
2691 vm_page_lock_queues();
2694 vm_page_unlock_queues();
2695 vm_object_unlock(map
->pm_obj_pdpt
);
2696 inuse_ptepages_count
++;
2697 map
->stats
.resident_count
++;
2698 map
->stats
.wired_count
++;
2705 PMAP_READ_LOCK(map
, spl
);
2707 * See if someone else expanded us first
2709 if (pmap64_pde(map
, vaddr
) != PD_ENTRY_NULL
) {
2710 PMAP_READ_UNLOCK(map
, spl
);
2711 vm_object_lock(map
->pm_obj_pdpt
);
2712 vm_page_lock_queues();
2714 inuse_ptepages_count
--;
2715 map
->stats
.resident_count
--;
2716 map
->stats
.wired_count
--;
2718 vm_page_unlock_queues();
2719 vm_object_unlock(map
->pm_obj_pdpt
);
2724 * Set the page directory entry for this page table.
2725 * If we have allocated more than one hardware page,
2726 * set several page directory entries.
2729 pdptp
= pmap64_pdpt(map
, vaddr
); /* refetch under lock */
2731 pmap_store_pte(pdptp
, pa_to_pte(pa
)
2736 PMAP_READ_UNLOCK(map
, spl
);
2745 * Routine: pmap_expand
2747 * Expands a pmap to be able to map the specified virtual address.
2749 * Allocates new virtual memory for the P0 or P1 portion of the
2750 * pmap, then re-maps the physical pages that were in the old
2751 * pmap to be in the new pmap.
2753 * Must be called with the pmap system and the pmap unlocked,
2754 * since these must be unlocked to use vm_allocate or vm_deallocate.
2755 * Thus it must be called in a loop that checks whether the map
2756 * has been expanded enough.
2757 * (We won't loop forever, since page tables aren't shrunk.)
2762 vm_map_offset_t vaddr
)
2765 register vm_page_t m
;
2766 register pmap_paddr_t pa
;
2772 * if not the kernel map (while we are still compat kernel mode)
2773 * and we are 64 bit, propagate expand upwards
2776 if (cpu_64bit
&& (map
!= kernel_pmap
)) {
2778 while ((pdp
= pmap64_pde(map
, vaddr
)) == PD_ENTRY_NULL
) {
2780 pmap_expand_pdpt(map
, vaddr
); /* need room for another pde entry */
2785 pdp
= pmap_pde(map
, vaddr
);
2790 * Allocate a VM page for the pde entries.
2792 while ((m
= vm_page_grab()) == VM_PAGE_NULL
)
2796 * put the page into the pmap's obj list so it
2797 * can be found later.
2801 i
= pdeidx(map
, vaddr
);
2803 vm_object_lock(map
->pm_obj
);
2805 if (0 != vm_page_lookup(map
->pm_obj
, (vm_object_offset_t
)i
)) {
2806 kprintf("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n",
2807 map
, map
->pm_obj
, vaddr
, i
);
2810 vm_page_insert(m
, map
->pm_obj
, (vm_object_offset_t
)i
);
2812 vm_page_lock_queues();
2814 inuse_ptepages_count
++;
2816 vm_page_unlock_queues();
2817 vm_object_unlock(map
->pm_obj
);
2824 PMAP_READ_LOCK(map
, spl
);
2826 * See if someone else expanded us first
2828 if (pmap_pte(map
, vaddr
) != PT_ENTRY_NULL
) {
2829 PMAP_READ_UNLOCK(map
, spl
);
2830 vm_object_lock(map
->pm_obj
);
2832 vm_page_lock_queues();
2834 inuse_ptepages_count
--;
2836 vm_page_unlock_queues();
2837 vm_object_unlock(map
->pm_obj
);
2841 pdp
= pmap_pde(map
, vaddr
); /* refetch while locked */
2844 * Set the page directory entry for this page table.
2845 * If we have allocated more than one hardware page,
2846 * set several page directory entries.
2849 pmap_store_pte(pdp
, pa_to_pte(pa
)
2855 PMAP_READ_UNLOCK(map
, spl
);
2862 * pmap_sync_page_data_phys(ppnum_t pa)
2864 * Invalidates all of the instruction cache on a physical page and
2865 * pushes any dirty data from the data cache for the same physical page
2866 * Not required in i386.
2869 pmap_sync_page_data_phys(__unused ppnum_t pa
)
2875 * pmap_sync_page_attributes_phys(ppnum_t pa)
2877 * Write back and invalidate all cachelines on a physical page.
2880 pmap_sync_page_attributes_phys(ppnum_t pa
)
2882 cache_flush_page_phys(pa
);
2889 * Routine: pmap_collect
2891 * Garbage collects the physical map system for
2892 * pages which are no longer used.
2893 * Success need not be guaranteed -- that is, there
2894 * may well be pages which are not referenced, but
2895 * others may be collected.
2897 * Called by the pageout daemon when pages are scarce.
2903 register pt_entry_t
*pdp
, *ptp
;
2911 if (p
== kernel_pmap
)
2915 * Garbage collect map.
2917 PMAP_READ_LOCK(p
, spl
);
2919 for (pdp
= (pt_entry_t
*)p
->dirbase
;
2920 pdp
< (pt_entry_t
*)&p
->dirbase
[(UMAXPTDI
+1)];
2923 if (*pdp
& INTEL_PTE_VALID
) {
2924 if(*pdp
& INTEL_PTE_REF
) {
2925 pmap_store_pte(pdp
, *pdp
& ~INTEL_PTE_REF
);
2929 ptp
= pmap_pte(p
, pdetova(pdp
- (pt_entry_t
*)p
->dirbase
));
2930 eptp
= ptp
+ NPTEPG
;
2933 * If the pte page has any wired mappings, we cannot
2938 register pt_entry_t
*ptep
;
2939 for (ptep
= ptp
; ptep
< eptp
; ptep
++) {
2940 if (iswired(*ptep
)) {
2948 * Remove the virtual addresses mapped by this pte page.
2950 pmap_remove_range(p
,
2951 pdetova(pdp
- (pt_entry_t
*)p
->dirbase
),
2956 * Invalidate the page directory pointer.
2958 pmap_store_pte(pdp
, 0x0);
2960 PMAP_READ_UNLOCK(p
, spl
);
2963 * And free the pte page itself.
2966 register vm_page_t m
;
2968 vm_object_lock(p
->pm_obj
);
2969 m
= vm_page_lookup(p
->pm_obj
,(vm_object_offset_t
)(pdp
- (pt_entry_t
*)&p
->dirbase
[0]));
2970 if (m
== VM_PAGE_NULL
)
2971 panic("pmap_collect: pte page not in object");
2972 vm_page_lock_queues();
2974 inuse_ptepages_count
--;
2975 vm_page_unlock_queues();
2976 vm_object_unlock(p
->pm_obj
);
2979 PMAP_READ_LOCK(p
, spl
);
2984 PMAP_UPDATE_TLBS(p
, VM_MIN_ADDRESS
, VM_MAX_ADDRESS
);
2986 PMAP_READ_UNLOCK(p
, spl
);
2993 pmap_copy_page(src
, dst
)
2997 bcopy_phys((addr64_t
)i386_ptob(src
),
2998 (addr64_t
)i386_ptob(dst
),
3004 * Routine: pmap_pageable
3006 * Make the specified pages (by pmap, offset)
3007 * pageable (or not) as requested.
3009 * A page which is not pageable may not take
3010 * a fault; therefore, its page table entry
3011 * must remain valid for the duration.
3013 * This routine is merely advisory; pmap_enter
3014 * will specify that these pages are to be wired
3015 * down (or not) as appropriate.
3019 __unused pmap_t pmap
,
3020 __unused vm_map_offset_t start_addr
,
3021 __unused vm_map_offset_t end_addr
,
3022 __unused boolean_t pageable
)
3025 pmap
++; start_addr
++; end_addr
++; pageable
++;
3030 * Clear specified attribute bits.
3033 phys_attribute_clear(
3038 register pv_entry_t pv_e
;
3039 register pt_entry_t
*pte
;
3041 register pmap_t pmap
;
3045 assert(pn
!= vm_page_fictitious_addr
);
3046 if (!valid_page(pn
)) {
3048 * Not a managed page.
3054 * Lock the pmap system first, since we will be changing
3058 PMAP_WRITE_LOCK(spl
);
3059 phys
= i386_ptob(pn
);
3060 pai
= pa_index(phys
);
3061 pv_h
= pai_to_pvh(pai
);
3064 * Walk down PV list, clearing all modify or reference bits.
3065 * We do not have to lock the pv_list because we have
3066 * the entire pmap system locked.
3068 if (pv_h
->pmap
!= PMAP_NULL
) {
3070 * There are some mappings.
3072 for (pv_e
= pv_h
; pv_e
!= PV_ENTRY_NULL
; pv_e
= pv_e
->next
) {
3076 * Lock the pmap to block pmap_extract and similar routines.
3078 simple_lock(&pmap
->lock
);
3081 register vm_map_offset_t va
;
3084 pte
= pmap_pte(pmap
, va
);
3088 * Consistency checks.
3090 assert(*pte
& INTEL_PTE_VALID
);
3091 /* assert(pte_to_phys(*pte) == phys); */
3095 * Clear modify or reference bits.
3098 pmap_store_pte(pte
, *pte
& ~bits
);
3100 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
3102 simple_unlock(&pmap
->lock
);
3107 pmap_phys_attributes
[pai
] &= ~bits
;
3109 PMAP_WRITE_UNLOCK(spl
);
3113 * Check specified attribute bits.
3116 phys_attribute_test(
3121 register pv_entry_t pv_e
;
3122 register pt_entry_t
*pte
;
3124 register pmap_t pmap
;
3128 assert(pn
!= vm_page_fictitious_addr
);
3129 if (!valid_page(pn
)) {
3131 * Not a managed page.
3136 phys
= i386_ptob(pn
);
3137 pai
= pa_index(phys
);
3139 * super fast check... if bits already collected
3140 * no need to take any locks...
3141 * if not set, we need to recheck after taking
3142 * the lock in case they got pulled in while
3143 * we were waiting for the lock
3145 if (pmap_phys_attributes
[pai
] & bits
)
3147 pv_h
= pai_to_pvh(pai
);
3150 * Lock the pmap system first, since we will be checking
3153 PMAP_WRITE_LOCK(spl
);
3155 if (pmap_phys_attributes
[pai
] & bits
) {
3156 PMAP_WRITE_UNLOCK(spl
);
3161 * Walk down PV list, checking all mappings.
3162 * We do not have to lock the pv_list because we have
3163 * the entire pmap system locked.
3165 if (pv_h
->pmap
!= PMAP_NULL
) {
3167 * There are some mappings.
3169 for (pv_e
= pv_h
; pv_e
!= PV_ENTRY_NULL
; pv_e
= pv_e
->next
) {
3173 * Lock the pmap to block pmap_extract and similar routines.
3175 simple_lock(&pmap
->lock
);
3178 register vm_map_offset_t va
;
3181 pte
= pmap_pte(pmap
, va
);
3185 * Consistency checks.
3187 assert(*pte
& INTEL_PTE_VALID
);
3188 /* assert(pte_to_phys(*pte) == phys); */
3193 * Check modify or reference bits.
3196 if (*pte
++ & bits
) {
3197 simple_unlock(&pmap
->lock
);
3198 PMAP_WRITE_UNLOCK(spl
);
3202 simple_unlock(&pmap
->lock
);
3205 PMAP_WRITE_UNLOCK(spl
);
3210 * Set specified attribute bits.
3220 assert(pn
!= vm_page_fictitious_addr
);
3221 if (!valid_page(pn
)) {
3223 * Not a managed page.
3229 * Lock the pmap system and set the requested bits in
3230 * the phys attributes array. Don't need to bother with
3231 * ptes because the test routine looks here first.
3233 phys
= i386_ptob(pn
);
3234 PMAP_WRITE_LOCK(spl
);
3235 pmap_phys_attributes
[pa_index(phys
)] |= bits
;
3236 PMAP_WRITE_UNLOCK(spl
);
3240 * Set the modify bit on the specified physical page.
3243 void pmap_set_modify(
3246 phys_attribute_set(pn
, PHYS_MODIFIED
);
3250 * Clear the modify bits on the specified physical page.
3257 phys_attribute_clear(pn
, PHYS_MODIFIED
);
3263 * Return whether or not the specified physical page is modified
3264 * by any physical maps.
3271 return (phys_attribute_test(pn
, PHYS_MODIFIED
));
3275 * pmap_clear_reference:
3277 * Clear the reference bit on the specified physical page.
3281 pmap_clear_reference(
3284 phys_attribute_clear(pn
, PHYS_REFERENCED
);
3288 pmap_set_reference(ppnum_t pn
)
3290 phys_attribute_set(pn
, PHYS_REFERENCED
);
3294 * pmap_is_referenced:
3296 * Return whether or not the specified physical page is referenced
3297 * by any physical maps.
3304 return (phys_attribute_test(pn
, PHYS_REFERENCED
));
3308 * pmap_get_refmod(phys)
3309 * returns the referenced and modified bits of the specified
3313 pmap_get_refmod(ppnum_t pa
)
3315 return ( ((phys_attribute_test(pa
, PHYS_MODIFIED
))? VM_MEM_MODIFIED
: 0)
3316 | ((phys_attribute_test(pa
, PHYS_REFERENCED
))? VM_MEM_REFERENCED
: 0));
3320 * pmap_clear_refmod(phys, mask)
3321 * clears the referenced and modified bits as specified by the mask
3322 * of the specified physical page.
3325 pmap_clear_refmod(ppnum_t pa
, unsigned int mask
)
3327 unsigned int x86Mask
;
3329 x86Mask
= ( ((mask
& VM_MEM_MODIFIED
)? PHYS_MODIFIED
: 0)
3330 | ((mask
& VM_MEM_REFERENCED
)? PHYS_REFERENCED
: 0));
3331 phys_attribute_clear(pa
, x86Mask
);
3335 * Set the modify bit on the specified range
3336 * of this map as requested.
3338 * This optimization stands only if each time the dirty bit
3339 * in vm_page_t is tested, it is also tested in the pmap.
3344 vm_map_offset_t sva
,
3345 vm_map_offset_t eva
)
3348 register pt_entry_t
*pde
;
3349 register pt_entry_t
*spte
, *epte
;
3350 vm_map_offset_t lva
;
3351 vm_map_offset_t orig_sva
;
3353 if (map
== PMAP_NULL
)
3356 PMAP_READ_LOCK(map
, spl
);
3359 while (sva
&& sva
< eva
) {
3360 lva
= (sva
+ pde_mapped_size
) & ~(pde_mapped_size
-1);
3363 pde
= pmap_pde(map
, sva
);
3364 if (pde
&& (*pde
& INTEL_PTE_VALID
)) {
3365 spte
= (pt_entry_t
*)pmap_pte(map
, (sva
& ~(pde_mapped_size
-1)));
3367 spte
= &spte
[ptenum(sva
)];
3368 epte
= &spte
[intel_btop(lva
-sva
)];
3370 epte
= &spte
[intel_btop(pde_mapped_size
)];
3371 spte
= &spte
[ptenum(sva
)];
3373 while (spte
< epte
) {
3374 if (*spte
& INTEL_PTE_VALID
) {
3375 pmap_store_pte(spte
, *spte
3385 PMAP_UPDATE_TLBS(map
, orig_sva
, eva
);
3387 PMAP_READ_UNLOCK(map
, spl
);
3392 invalidate_icache(__unused vm_offset_t addr
,
3393 __unused
unsigned cnt
,
3399 flush_dcache(__unused vm_offset_t addr
,
3400 __unused
unsigned count
,
3408 /* show phys page mappings and attributes */
3410 extern void db_show_page(pmap_paddr_t pa
);
3413 db_show_page(pmap_paddr_t pa
)
3420 pv_h
= pai_to_pvh(pai
);
3422 attr
= pmap_phys_attributes
[pai
];
3423 printf("phys page %x ", pa
);
3424 if (attr
& PHYS_MODIFIED
)
3425 printf("modified, ");
3426 if (attr
& PHYS_REFERENCED
)
3427 printf("referenced, ");
3428 if (pv_h
->pmap
|| pv_h
->next
)
3429 printf(" mapped at\n");
3431 printf(" not mapped\n");
3432 for (; pv_h
; pv_h
= pv_h
->next
)
3434 printf("%x in pmap %x\n", pv_h
->va
, pv_h
->pmap
);
3437 #endif /* MACH_KDB */
3440 void db_kvtophys(vm_offset_t
);
3441 void db_show_vaddrs(pt_entry_t
*);
3444 * print out the results of kvtophys(arg)
3450 db_printf("0x%qx", kvtophys(vaddr
));
3454 * Walk the pages tables.
3458 pt_entry_t
*dirbase
)
3460 pt_entry_t
*ptep
, *pdep
, tmp
;
3461 unsigned int x
, y
, pdecnt
, ptecnt
;
3464 dirbase
= kernel_pmap
->dirbase
;
3467 db_printf("need a dirbase...\n");
3470 dirbase
= (pt_entry_t
*) (int) ((unsigned long) dirbase
& ~INTEL_OFFMASK
);
3472 db_printf("dirbase: 0x%x\n", dirbase
);
3474 pdecnt
= ptecnt
= 0;
3476 for (y
= 0; y
< NPDEPG
; y
++, pdep
++) {
3477 if (((tmp
= *pdep
) & INTEL_PTE_VALID
) == 0) {
3481 ptep
= (pt_entry_t
*) ((*pdep
) & ~INTEL_OFFMASK
);
3482 db_printf("dir[%4d]: 0x%x\n", y
, *pdep
);
3483 for (x
= 0; x
< NPTEPG
; x
++, ptep
++) {
3484 if (((tmp
= *ptep
) & INTEL_PTE_VALID
) == 0) {
3488 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
3491 (y
<< 22) | (x
<< 12),
3492 *ptep
& ~INTEL_OFFMASK
);
3496 db_printf("total: %d tables, %d page table entries.\n", pdecnt
, ptecnt
);
3499 #endif /* MACH_KDB */
3501 #include <mach_vm_debug.h>
3503 #include <vm/vm_debug.h>
3506 pmap_list_resident_pages(
3507 __unused pmap_t pmap
,
3508 __unused vm_offset_t
*listp
,
3513 #endif /* MACH_VM_DEBUG */
3517 /* temporary workaround */
3519 coredumpok(__unused vm_map_t map
, __unused vm_offset_t va
)
3524 ptep
= pmap_pte(map
->pmap
, va
);
3527 return ((*ptep
& (INTEL_PTE_NCACHE
| INTEL_PTE_WIRED
)) != (INTEL_PTE_NCACHE
| INTEL_PTE_WIRED
));
3540 assert(pn
!= vm_page_fictitious_addr
);
3542 if (!pmap_initialized
)
3544 phys
= (pmap_paddr_t
) i386_ptob(pn
);
3545 if (!pmap_valid_page(pn
))
3552 mapping_free_prime()
3557 for (i
= 0; i
< (5 * PV_ALLOC_CHUNK
); i
++) {
3558 pv_e
= (pv_entry_t
) zalloc(pv_list_zone
);
3570 if (mapping_adjust_call
== NULL
) {
3571 thread_call_setup(&mapping_adjust_call_data
,
3572 (thread_call_func_t
) mapping_adjust
,
3573 (thread_call_param_t
) NULL
);
3574 mapping_adjust_call
= &mapping_adjust_call_data
;
3576 /* XXX rethink best way to do locking here */
3577 if (pv_free_count
< PV_LOW_WATER_MARK
) {
3578 for (i
= 0; i
< PV_ALLOC_CHUNK
; i
++) {
3579 pv_e
= (pv_entry_t
) zalloc(pv_list_zone
);
3589 pmap_commpage32_init(vm_offset_t kernel_commpage
, vm_offset_t user_commpage
, int cnt
)
3592 pt_entry_t
*opte
, *npte
;
3596 for (i
= 0; i
< cnt
; i
++) {
3597 opte
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)kernel_commpage
);
3598 if (0 == opte
) panic("kernel_commpage");
3599 pte
= *opte
| INTEL_PTE_USER
|INTEL_PTE_GLOBAL
;
3600 pte
&= ~INTEL_PTE_WRITE
; // ensure read only
3601 npte
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)user_commpage
);
3602 if (0 == npte
) panic("user_commpage");
3603 pmap_store_pte(npte
, pte
);
3604 kernel_commpage
+= INTEL_PGBYTES
;
3605 user_commpage
+= INTEL_PGBYTES
;
3609 #define PMAP_COMMPAGE64_CNT (_COMM_PAGE64_AREA_USED/PAGE_SIZE)
3610 pt_entry_t pmap_commpage64_ptes
[PMAP_COMMPAGE64_CNT
];
3613 pmap_commpage64_init(vm_offset_t kernel_commpage
, __unused vm_map_offset_t user_commpage
, int cnt
)
3620 for (i
= 0; i
< cnt
; i
++) {
3621 kptep
= pmap_pte(kernel_pmap
, (uint64_t)kernel_commpage
+ (i
*PAGE_SIZE
));
3622 if ((0 == kptep
) || (0 == (*kptep
& INTEL_PTE_VALID
))) panic("pmap_commpage64_init pte");
3623 pmap_commpage64_ptes
[i
] = ((*kptep
& ~INTEL_PTE_WRITE
) | INTEL_PTE_USER
);
3630 pmap_map_sharedpage(__unused task_t task
, pmap_t p
)
3636 if (!p
->pm_64bit
) return;
3637 /* setup high 64 bit commpage */
3639 while ((ptep
= pmap_pte(p
, (uint64_t)_COMM_PAGE64_BASE_ADDRESS
)) == PD_ENTRY_NULL
) {
3641 pmap_expand(p
, (uint64_t)_COMM_PAGE64_BASE_ADDRESS
);
3645 for (i
= 0; i
< PMAP_COMMPAGE64_CNT
; i
++) {
3646 ptep
= pmap_pte(p
, (uint64_t)_COMM_PAGE64_BASE_ADDRESS
+ (i
*PAGE_SIZE
));
3647 if (0 == ptep
) panic("pmap_map_sharedpage");
3648 pmap_store_pte(ptep
, pmap_commpage64_ptes
[i
]);
3655 pmap_unmap_sharedpage(pmap_t pmap
)
3661 if (!pmap
->pm_64bit
) return;
3663 for (i
= 0; i
< PMAP_COMMPAGE64_CNT
; i
++) {
3664 ptep
= pmap_pte(pmap
, (uint64_t)_COMM_PAGE64_BASE_ADDRESS
+ (i
*PAGE_SIZE
));
3665 if (ptep
) pmap_store_pte(ptep
, 0);
3670 static cpu_pmap_t cpu_pmap_master
;
3673 pmap_cpu_alloc(boolean_t is_boot_cpu
)
3678 vm_offset_t address
;
3679 vm_map_address_t mapaddr
;
3680 vm_map_entry_t entry
;
3684 cp
= &cpu_pmap_master
;
3687 * The per-cpu pmap data structure itself.
3689 ret
= kmem_alloc(kernel_map
,
3690 (vm_offset_t
*) &cp
, sizeof(cpu_pmap_t
));
3691 if (ret
!= KERN_SUCCESS
) {
3692 printf("pmap_cpu_alloc() failed ret=%d\n", ret
);
3695 bzero((void *)cp
, sizeof(cpu_pmap_t
));
3698 * The temporary windows used for copy/zero - see loose_ends.c
3700 ret
= vm_map_find_space(kernel_map
,
3701 &mapaddr
, PMAP_NWINDOWS
*PAGE_SIZE
, (vm_map_offset_t
)0, 0, &entry
);
3702 if (ret
!= KERN_SUCCESS
) {
3703 printf("pmap_cpu_alloc() "
3704 "vm_map_find_space ret=%d\n", ret
);
3708 address
= (vm_offset_t
)mapaddr
;
3710 for (i
= 0; i
< PMAP_NWINDOWS
; i
++, address
+= PAGE_SIZE
) {
3711 while ((pte
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)address
)) == 0)
3712 pmap_expand(kernel_pmap
, (vm_map_offset_t
)address
);
3714 cp
->mapwindow
[i
].prv_CADDR
= (caddr_t
) address
;
3715 cp
->mapwindow
[i
].prv_CMAP
= pte
;
3717 vm_map_unlock(kernel_map
);
3720 cp
->pdpt_window_index
= PMAP_PDPT_FIRST_WINDOW
;
3721 cp
->pde_window_index
= PMAP_PDE_FIRST_WINDOW
;
3722 cp
->pte_window_index
= PMAP_PTE_FIRST_WINDOW
;
3728 pmap_cpu_free(struct cpu_pmap
*cp
)
3730 if (cp
!= NULL
&& cp
!= &cpu_pmap_master
) {
3731 kfree((void *) cp
, sizeof(cpu_pmap_t
));
3737 pmap_get_mapwindow(pt_entry_t pentry
)
3744 * can be called from hardware interrupt context
3745 * so we need to protect the lookup process
3747 istate
= ml_set_interrupts_enabled(FALSE
);
3750 * Note: 0th map reserved for pmap_pte()
3752 for (i
= PMAP_NWINDOWS_FIRSTFREE
; i
< PMAP_NWINDOWS
; i
++) {
3753 mp
= ¤t_cpu_datap()->cpu_pmap
->mapwindow
[i
];
3755 if (*mp
->prv_CMAP
== 0) {
3756 *mp
->prv_CMAP
= pentry
;
3760 if (i
>= PMAP_NWINDOWS
)
3762 (void) ml_set_interrupts_enabled(istate
);
3769 * kern_return_t pmap_nest(grand, subord, vstart, size)
3771 * grand = the pmap that we will nest subord into
3772 * subord = the pmap that goes into the grand
3773 * vstart = start of range in pmap to be inserted
3774 * nstart = start of range in pmap nested pmap
3775 * size = Size of nest area (up to 16TB)
3777 * Inserts a pmap into another. This is used to implement shared segments.
3779 * on x86 this is very limited right now. must be exactly 1 segment.
3781 * Note that we depend upon higher level VM locks to insure that things don't change while
3782 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
3783 * or do 2 nests at once.
3787 kern_return_t
pmap_nest(pmap_t grand
, pmap_t subord
, addr64_t vstart
, addr64_t nstart
, uint64_t size
) {
3789 vm_map_offset_t vaddr
, nvaddr
;
3790 pd_entry_t
*pde
,*npde
;
3791 unsigned int i
, need_flush
;
3792 unsigned int num_pde
;
3795 // do validity tests
3797 if(size
& 0x0FFFFFFFULL
) return KERN_INVALID_VALUE
; /* We can only do this for multiples of 256MB */
3798 if((size
>> 28) > 65536) return KERN_INVALID_VALUE
; /* Max size we can nest is 16TB */
3799 if(vstart
& 0x0FFFFFFFULL
) return KERN_INVALID_VALUE
; /* We can only do this aligned to 256MB */
3800 if(nstart
& 0x0FFFFFFFULL
) return KERN_INVALID_VALUE
; /* We can only do this aligned to 256MB */
3802 panic("pmap_nest: size is invalid - %016llX\n", size
);
3804 if ((size
>> 28) != 1) panic("pmap_nest: size 0x%llx must be 0x%x", size
, NBPDE
);
3806 // prepopulate subord pmap pde's if necessary
3810 while (PD_ENTRY_NULL
== (npde
= pmap_pde(subord
, nstart
))) {
3812 pmap_expand(subord
, nstart
);
3818 PMAP_READ_LOCK(subord
,s
);
3819 nvaddr
= (vm_map_offset_t
)nstart
;
3821 num_pde
= size
>> PDESHIFT
;
3823 for (i
=0;i
<num_pde
;i
++) {
3824 npde
= pmap_pde(subord
, nvaddr
);
3825 if ((0 == npde
) || (*npde
++ & INTEL_PTE_VALID
) == 0) {
3826 PMAP_READ_UNLOCK(subord
,s
);
3827 pmap_expand(subord
, nvaddr
); // pmap_expand handles races
3828 PMAP_READ_LOCK(subord
,s
);
3835 nvaddr
= (vm_map_offset_t
)nstart
;
3836 PMAP_UPDATE_TLBS(subord
, nvaddr
, nvaddr
+ (1 << 28) -1 );
3838 PMAP_READ_UNLOCK(subord
,s
);
3840 // copy pde's from subord pmap into grand pmap
3844 while (PD_ENTRY_NULL
== (pde
= pmap_pde(grand
, vstart
))) {
3846 pmap_expand(grand
, vstart
);
3852 PMAP_READ_LOCK(grand
,s
);
3853 vaddr
= (vm_map_offset_t
)vstart
;
3854 for (i
=0;i
<num_pde
;i
++,pde
++) {
3856 npde
= pmap_pde(subord
, nstart
);
3857 if (npde
== 0) panic("pmap_nest: no npde, subord 0x%x nstart 0x%llx", subord
, nstart
);
3860 pde
= pmap_pde(grand
, vaddr
);
3861 if (pde
== 0) panic("pmap_nest: no pde, grand 0x%x vaddr 0x%llx", grand
, vaddr
);
3863 pmap_store_pte(pde
, tpde
);
3865 PMAP_UPDATE_TLBS(grand
, vaddr
, vaddr
+ (1 << 28) -1 );
3867 PMAP_READ_UNLOCK(grand
,s
);
3869 return KERN_SUCCESS
;
3873 * kern_return_t pmap_unnest(grand, vaddr)
3875 * grand = the pmap that we will nest subord into
3876 * vaddr = start of range in pmap to be unnested
3878 * Removes a pmap from another. This is used to implement shared segments.
3879 * On the current PPC processors, this is limited to segment (256MB) aligned
3880 * segment sized ranges.
3883 kern_return_t
pmap_unnest(pmap_t grand
, addr64_t vaddr
) {
3888 unsigned int num_pde
;
3890 PMAP_READ_LOCK(grand
,s
);
3892 // invalidate all pdes for segment at vaddr in pmap grand
3894 num_pde
= (1<<28) >> PDESHIFT
;
3896 for (i
=0;i
<num_pde
;i
++,pde
++) {
3897 pde
= pmap_pde(grand
, (vm_map_offset_t
)vaddr
);
3898 if (pde
== 0) panic("pmap_unnest: no pde, grand 0x%x vaddr 0x%llx\n", grand
, vaddr
);
3899 pmap_store_pte(pde
, (pd_entry_t
)0);
3902 PMAP_UPDATE_TLBS(grand
, vaddr
, vaddr
+ (1<<28) -1 );
3904 PMAP_READ_UNLOCK(grand
,s
);
3906 return KERN_SUCCESS
; /* Bye, bye, butterfly... */
3910 pmap_switch(pmap_t tpmap
)
3915 s
= splhigh(); /* Make sure interruptions are disabled */
3916 my_cpu
= cpu_number();
3918 set_dirbase(tpmap
, my_cpu
);
3925 * disable no-execute capability on
3926 * the specified pmap
3928 void pmap_disable_NX(pmap_t pmap
) {
3930 pmap
->nx_enabled
= 0;
3934 pt_fake_zone_info(int *count
, vm_size_t
*cur_size
, vm_size_t
*max_size
, vm_size_t
*elem_size
,
3935 vm_size_t
*alloc_size
, int *collectable
, int *exhaustable
)
3937 *count
= inuse_ptepages_count
;
3938 *cur_size
= PAGE_SIZE
* inuse_ptepages_count
;
3939 *max_size
= PAGE_SIZE
* (inuse_ptepages_count
+ vm_page_inactive_count
+ vm_page_active_count
+ vm_page_free_count
);
3940 *elem_size
= PAGE_SIZE
;
3941 *alloc_size
= PAGE_SIZE
;
3947 vm_offset_t
pmap_cpu_high_map_vaddr(int cpu
, enum high_cpu_types e
)
3949 enum high_fixed_addresses a
;
3950 a
= e
+ HIGH_CPU_END
* cpu
;
3951 return pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN
+ a
);
3954 vm_offset_t
pmap_high_map_vaddr(enum high_cpu_types e
)
3956 return pmap_cpu_high_map_vaddr(cpu_number(), e
);
3959 vm_offset_t
pmap_high_map(pt_entry_t pte
, enum high_cpu_types e
)
3961 enum high_fixed_addresses a
;
3964 a
= e
+ HIGH_CPU_END
* cpu_number();
3965 vaddr
= (vm_offset_t
)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN
+ a
);
3966 *(pte_unique_base
+ a
) = pte
;
3968 /* TLB flush for this page for this cpu */
3969 invlpg((uintptr_t)vaddr
);
3976 * Called with pmap locked, we:
3977 * - scan through per-cpu data to see which other cpus need to flush
3978 * - send an IPI to each non-idle cpu to be flushed
3979 * - wait for all to signal back that they are inactive or we see that
3980 * they are in an interrupt handler or at a safe point
3981 * - flush the local tlb is active for this pmap
3982 * - return ... the caller will unlock the pmap
3985 pmap_flush_tlbs(pmap_t pmap
)
3988 unsigned int cpu_bit
;
3989 cpu_set cpus_to_signal
;
3990 unsigned int my_cpu
= cpu_number();
3991 pmap_paddr_t pmap_cr3
= pmap
->pm_cr3
;
3992 boolean_t flush_self
= FALSE
;
3995 assert(!ml_get_interrupts_enabled());
3998 * Scan other cpus for matching active or task CR3.
3999 * For idle cpus (with no active map) we mark them invalid but
4000 * don't signal -- they'll check as they go busy.
4001 * Note: for the kernel pmap we look for 64-bit shared address maps.
4004 for (cpu
= 0, cpu_bit
= 1; cpu
< real_ncpus
; cpu
++, cpu_bit
<<= 1) {
4005 if (!cpu_datap(cpu
)->cpu_running
)
4007 if ((cpu_datap(cpu
)->cpu_task_cr3
== pmap_cr3
) ||
4008 (cpu_datap(cpu
)->cpu_active_cr3
== pmap_cr3
) ||
4009 ((pmap
== kernel_pmap
) &&
4010 (!CPU_CR3_IS_ACTIVE(cpu
) ||
4011 cpu_datap(cpu
)->cpu_task_map
== TASK_MAP_64BIT_SHARED
))) {
4012 if (cpu
== my_cpu
) {
4016 cpu_datap(cpu
)->cpu_tlb_invalid
= TRUE
;
4017 __asm__
volatile("mfence");
4019 if (CPU_CR3_IS_ACTIVE(cpu
)) {
4020 cpus_to_signal
|= cpu_bit
;
4021 i386_signal_cpu(cpu
, MP_TLB_FLUSH
, ASYNC
);
4026 if (cpus_to_signal
) {
4027 KERNEL_DEBUG(0xef800024 | DBG_FUNC_START
, cpus_to_signal
, 0, 0, 0, 0);
4029 deadline
= mach_absolute_time() + LockTimeOut
;
4031 * Wait for those other cpus to acknowledge
4033 for (cpu
= 0, cpu_bit
= 1; cpu
< real_ncpus
; cpu
++, cpu_bit
<<= 1) {
4034 while ((cpus_to_signal
& cpu_bit
) != 0) {
4035 if (!cpu_datap(cpu
)->cpu_running
||
4036 cpu_datap(cpu
)->cpu_tlb_invalid
== FALSE
||
4037 !CPU_CR3_IS_ACTIVE(cpu
)) {
4038 cpus_to_signal
&= ~cpu_bit
;
4041 if (mach_absolute_time() > deadline
)
4042 panic("pmap_flush_tlbs() "
4043 "timeout pmap=%p cpus_to_signal=%p",
4044 pmap
, cpus_to_signal
);
4047 if (cpus_to_signal
== 0)
4050 KERNEL_DEBUG(0xef800024 | DBG_FUNC_END
, cpus_to_signal
, 0, 0, 0, 0);
4054 * Flush local tlb if required.
4055 * We need this flush even if the pmap being changed
4056 * is the user map... in case we do a copyin/out
4057 * before returning to user mode.
4065 process_pmap_updates(void)
4069 current_cpu_datap()->cpu_tlb_invalid
= FALSE
;
4070 __asm__
volatile("mfence");
4074 pmap_update_interrupt(void)
4076 KERNEL_DEBUG(0xef800028 | DBG_FUNC_START
, 0, 0, 0, 0, 0);
4078 assert(!ml_get_interrupts_enabled());
4080 process_pmap_updates();
4082 KERNEL_DEBUG(0xef800028 | DBG_FUNC_END
, 0, 0, 0, 0, 0);
4086 unsigned int pmap_cache_attributes(ppnum_t pn
) {
4088 if (!pmap_valid_page(pn
))
4089 return (VM_WIMG_IO
);
4091 return (VM_WIMG_COPYBACK
);
4100 kprintf("pmap 0x%x\n",p
);
4102 kprintf(" pm_cr3 0x%llx\n",p
->pm_cr3
);
4103 kprintf(" pm_pml4 0x%x\n",p
->pm_pml4
);
4104 kprintf(" pm_pdpt 0x%x\n",p
->pm_pdpt
);
4106 kprintf(" pml4[0] 0x%llx\n",*p
->pm_pml4
);
4108 kprintf(" pdpt[%d] 0x%llx\n",i
, p
->pm_pdpt
[i
]);
4111 void pmap_dump_wrap(void)
4113 pmap_dump(current_cpu_datap()->cpu_active_thread
->task
->map
->pmap
);
4117 dump_4GB_pdpt(pmap_t p
)
4120 pdpt_entry_t
*user_pdptp
;
4121 pdpt_entry_t
*kern_pdptp
;
4122 pdpt_entry_t
*pml4p
;
4125 while ((user_pdptp
= pmap64_pdpt(p
, 0x0)) == PDPT_ENTRY_NULL
) {
4127 pmap_expand_pml4(p
, 0x0);
4130 kern_pdptp
= kernel_pmap
->pm_pdpt
;
4131 if (kern_pdptp
== NULL
)
4132 panic("kern_pdptp == NULL");
4133 kprintf("dump_4GB_pdpt(%p)\n"
4134 "kern_pdptp=%p (phys=0x%016llx)\n"
4135 "\t 0x%08x: 0x%016llx\n"
4136 "\t 0x%08x: 0x%016llx\n"
4137 "\t 0x%08x: 0x%016llx\n"
4138 "\t 0x%08x: 0x%016llx\n"
4139 "\t 0x%08x: 0x%016llx\n"
4140 "user_pdptp=%p (phys=0x%016llx)\n"
4141 "\t 0x%08x: 0x%016llx\n"
4142 "\t 0x%08x: 0x%016llx\n"
4143 "\t 0x%08x: 0x%016llx\n"
4144 "\t 0x%08x: 0x%016llx\n"
4145 "\t 0x%08x: 0x%016llx\n",
4146 p
, kern_pdptp
, kvtophys(kern_pdptp
),
4147 kern_pdptp
+0, *(kern_pdptp
+0),
4148 kern_pdptp
+1, *(kern_pdptp
+1),
4149 kern_pdptp
+2, *(kern_pdptp
+2),
4150 kern_pdptp
+3, *(kern_pdptp
+3),
4151 kern_pdptp
+4, *(kern_pdptp
+4),
4152 user_pdptp
, kvtophys(user_pdptp
),
4153 user_pdptp
+0, *(user_pdptp
+0),
4154 user_pdptp
+1, *(user_pdptp
+1),
4155 user_pdptp
+2, *(user_pdptp
+2),
4156 user_pdptp
+3, *(user_pdptp
+3),
4157 user_pdptp
+4, *(user_pdptp
+4));
4158 kprintf("user pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
4159 p
->pm_cr3
, p
->pm_hold
, p
->pm_pml4
);
4160 pml4p
= (pdpt_entry_t
*)p
->pm_hold
;
4162 panic("user pml4p == NULL");
4163 kprintf("\t 0x%08x: 0x%016llx\n"
4164 "\t 0x%08x: 0x%016llx\n",
4166 pml4p
+KERNEL_UBER_PML4_INDEX
, *(pml4p
+KERNEL_UBER_PML4_INDEX
));
4167 kprintf("kern pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
4168 kernel_pmap
->pm_cr3
, kernel_pmap
->pm_hold
, kernel_pmap
->pm_pml4
);
4169 pml4p
= (pdpt_entry_t
*)kernel_pmap
->pm_hold
;
4171 panic("kern pml4p == NULL");
4172 kprintf("\t 0x%08x: 0x%016llx\n"
4173 "\t 0x%08x: 0x%016llx\n",
4175 pml4p
+511, *(pml4p
+511));
4179 void dump_4GB_pdpt_thread(thread_t tp
)
4181 dump_4GB_pdpt(tp
->map
->pmap
);