2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
55 * Author: Avadis Tevanian, Jr., Michael Wayne Young
56 * (These guys wrote the Vax version)
58 * Physical Map management code for Intel i386, i486, and i860.
60 * Manages physical address maps.
62 * In addition to hardware address maps, this
63 * module is called upon to provide software-use-only
64 * maps which may or may not be stored in the same
65 * form as hardware maps. These pseudo-maps are
66 * used to store intermediate results from copy
67 * operations to and from address spaces.
69 * Since the information managed by this module is
70 * also stored by the logical address mapping module,
71 * this module may throw away valid virtual-to-physical
72 * mappings at almost any time. However, invalidations
73 * of virtual-to-physical mappings must be done as
76 * In order to cope with hardware architectures which
77 * make virtual-to-physical map invalidates expensive,
78 * this module may delay invalidate or reduced protection
79 * operations until such time as they are actually
80 * necessary. This module is given full information as
81 * to which processors are currently using which maps,
82 * and to when physical maps must be made correct.
88 #include <mach_ldebug.h>
90 #include <mach/machine/vm_types.h>
92 #include <mach/boolean.h>
93 #include <kern/thread.h>
94 #include <kern/zalloc.h>
96 #include <kern/lock.h>
97 #include <kern/kalloc.h>
101 #include <vm/vm_map.h>
102 #include <vm/vm_kern.h>
103 #include <mach/vm_param.h>
104 #include <mach/vm_prot.h>
105 #include <vm/vm_object.h>
106 #include <vm/vm_page.h>
108 #include <mach/machine/vm_param.h>
109 #include <machine/thread.h>
111 #include <kern/misc_protos.h> /* prototyping */
112 #include <i386/misc_protos.h>
114 #include <i386/cpuid.h>
115 #include <i386/cpu_data.h>
116 #include <i386/cpu_number.h>
117 #include <i386/machine_cpu.h>
118 #include <i386/mp_slave_boot.h>
119 #include <i386/seg.h>
120 #include <i386/cpu_capabilities.h>
123 #include <ddb/db_command.h>
124 #include <ddb/db_output.h>
125 #include <ddb/db_sym.h>
126 #include <ddb/db_print.h>
127 #endif /* MACH_KDB */
129 #include <kern/xpr.h>
131 #include <vm/vm_protos.h>
134 #include <i386/mp_desc.h>
136 #include <sys/kdebug.h>
141 #define POSTCODE_DELAY 1
142 #include <i386/postcode.h>
143 #endif /* IWANTTODEBUG */
146 * Forward declarations for internal functions.
148 void pmap_expand_pml4(
152 void pmap_expand_pdpt(
160 static void pmap_remove_range(
166 void phys_attribute_clear(
170 boolean_t
phys_attribute_test(
174 void phys_attribute_set(
178 void pmap_set_reference(
186 boolean_t
phys_page_exists(
190 void dump_pmap(pmap_t
);
191 void dump_4GB_pdpt(pmap_t p
);
192 void dump_4GB_pdpt_thread(thread_t tp
);
195 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
197 int nx_enabled
= 1; /* enable no-execute protection */
203 * Private data structures.
207 * For each vm_page_t, there is a list of all currently
208 * valid virtual mappings of that page. An entry is
209 * a pv_entry_t; the list is the pv_table.
212 typedef struct pv_entry
{
213 struct pv_entry
*next
; /* next pv_entry */
214 pmap_t pmap
; /* pmap where mapping lies */
215 vm_map_offset_t va
; /* virtual address for mapping */
218 #define PV_ENTRY_NULL ((pv_entry_t) 0)
220 pv_entry_t pv_head_table
; /* array of entries, one per page */
223 * pv_list entries are kept on a list that can only be accessed
224 * with the pmap system locked (at SPLVM, not in the cpus_active set).
225 * The list is refilled from the pv_list_zone if it becomes empty.
227 pv_entry_t pv_free_list
; /* free list at SPLVM */
228 decl_simple_lock_data(,pv_free_list_lock
)
229 int pv_free_count
= 0;
230 #define PV_LOW_WATER_MARK 5000
231 #define PV_ALLOC_CHUNK 2000
232 thread_call_t mapping_adjust_call
;
233 static thread_call_data_t mapping_adjust_call_data
;
234 int mappingrecurse
= 0;
236 #define PV_ALLOC(pv_e) { \
237 simple_lock(&pv_free_list_lock); \
238 if ((pv_e = pv_free_list) != 0) { \
239 pv_free_list = pv_e->next; \
241 if (pv_free_count < PV_LOW_WATER_MARK) \
242 if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
243 thread_call_enter(mapping_adjust_call); \
245 simple_unlock(&pv_free_list_lock); \
248 #define PV_FREE(pv_e) { \
249 simple_lock(&pv_free_list_lock); \
250 pv_e->next = pv_free_list; \
251 pv_free_list = pv_e; \
253 simple_unlock(&pv_free_list_lock); \
256 zone_t pv_list_zone
; /* zone of pv_entry structures */
258 static zone_t pdpt_zone
;
261 * Each entry in the pv_head_table is locked by a bit in the
262 * pv_lock_table. The lock bits are accessed by the physical
263 * address of the page they lock.
266 char *pv_lock_table
; /* pointer to array of bits */
267 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
270 * First and last physical addresses that we maintain any information
271 * for. Initialized to zero so that pmap operations done before
272 * pmap_init won't touch any non-existent structures.
274 pmap_paddr_t vm_first_phys
= (pmap_paddr_t
) 0;
275 pmap_paddr_t vm_last_phys
= (pmap_paddr_t
) 0;
276 boolean_t pmap_initialized
= FALSE
;/* Has pmap_init completed? */
278 static struct vm_object kptobj_object_store
;
279 static vm_object_t kptobj
;
282 * Index into pv_head table, its lock bits, and the modify/reference
283 * bits starting at vm_first_phys.
286 #define pa_index(pa) (i386_btop(pa - vm_first_phys))
288 #define pai_to_pvh(pai) (&pv_head_table[pai])
289 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
290 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
293 * Array of physical page attribites for managed pages.
294 * One byte per physical page.
296 char *pmap_phys_attributes
;
299 * Physical page attributes. Copy bits from PTE definition.
301 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
302 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
303 #define PHYS_NCACHE INTEL_PTE_NCACHE
306 * Amount of virtual memory mapped by one
307 * page-directory entry.
309 #define PDE_MAPPED_SIZE (pdetova(1))
310 uint64_t pde_mapped_size
;
313 * Locking and TLB invalidation
319 * There are two structures in the pmap module that need locking:
320 * the pmaps themselves, and the per-page pv_lists (which are locked
321 * by locking the pv_lock_table entry that corresponds to the pv_head
322 * for the list in question.) Most routines want to lock a pmap and
323 * then do operations in it that require pv_list locking -- however
324 * pmap_remove_all and pmap_copy_on_write operate on a physical page
325 * basis and want to do the locking in the reverse order, i.e. lock
326 * a pv_list and then go through all the pmaps referenced by that list.
327 * To protect against deadlock between these two cases, the pmap_lock
328 * is used. There are three different locking protocols as a result:
330 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
333 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
334 * lock on the pmap_lock (shared read), then lock the pmap
335 * and finally the pv_lists as needed [i.e. pmap lock before
338 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
339 * Get a write lock on the pmap_lock (exclusive write); this
340 * also guaranteees exclusive access to the pv_lists. Lock the
343 * At no time may any routine hold more than one pmap lock or more than
344 * one pv_list lock. Because interrupt level routines can allocate
345 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
346 * kernel_pmap can only be held at splhigh.
350 * We raise the interrupt level to splvm, to block interprocessor
351 * interrupts during pmap operations. We mark the cpu's cr3 inactive
352 * while interrupts are blocked.
354 #define SPLVM(spl) { \
356 CPU_CR3_MARK_INACTIVE(); \
359 #define SPLX(spl) { \
360 if (current_cpu_datap()->cpu_tlb_invalid) \
361 process_pmap_updates(); \
362 CPU_CR3_MARK_ACTIVE(); \
367 * Lock on pmap system
369 lock_t pmap_system_lock
;
371 #define PMAP_READ_LOCK(pmap, spl) { \
373 lock_read(&pmap_system_lock); \
374 simple_lock(&(pmap)->lock); \
377 #define PMAP_WRITE_LOCK(spl) { \
379 lock_write(&pmap_system_lock); \
382 #define PMAP_READ_UNLOCK(pmap, spl) { \
383 simple_unlock(&(pmap)->lock); \
384 lock_read_done(&pmap_system_lock); \
388 #define PMAP_WRITE_UNLOCK(spl) { \
389 lock_write_done(&pmap_system_lock); \
393 #define PMAP_WRITE_TO_READ_LOCK(pmap) { \
394 simple_lock(&(pmap)->lock); \
395 lock_write_to_read(&pmap_system_lock); \
398 #define LOCK_PVH(index) lock_pvh_pai(index)
400 #define UNLOCK_PVH(index) unlock_pvh_pai(index)
403 extern int max_lock_loops
;
404 extern int disableSerialOuput
;
406 unsigned int loop_count; \
407 loop_count = disableSerialOuput ? max_lock_loops \
409 #define LOOP_CHECK(msg, pmap) \
410 if (--loop_count == 0) { \
411 mp_disable_preemption(); \
412 kprintf("%s: cpu %d pmap %x\n", \
413 msg, cpu_number(), pmap); \
414 Debugger("deadlock detection"); \
415 mp_enable_preemption(); \
416 loop_count = max_lock_loops; \
418 #else /* USLOCK_DEBUG */
420 #define LOOP_CHECK(msg, pmap)
421 #endif /* USLOCK_DEBUG */
424 static void pmap_flush_tlbs(pmap_t pmap
);
426 #define PMAP_UPDATE_TLBS(pmap, s, e) \
427 pmap_flush_tlbs(pmap)
430 #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
433 pmap_memory_region_t pmap_memory_regions
[PMAP_MEMORY_REGIONS_SIZE
];
436 * Other useful macros.
438 #define current_pmap() (vm_map_pmap(current_thread()->map))
440 struct pmap kernel_pmap_store
;
443 pd_entry_t high_shared_pde
;
444 pd_entry_t commpage64_pde
;
446 struct zone
*pmap_zone
; /* zone of pmap structures */
448 int pmap_debug
= 0; /* flag for debugging prints */
450 unsigned int inuse_ptepages_count
= 0; /* debugging */
452 addr64_t kernel64_cr3
;
453 boolean_t no_shared_cr3
= FALSE
; /* -no_shared_cr3 boot arg */
456 * Pmap cache. Cache is threaded through ref_count field of pmap.
457 * Max will eventually be constant -- variable for experimentation.
459 int pmap_cache_max
= 32;
460 int pmap_alloc_chunk
= 8;
461 pmap_t pmap_cache_list
;
462 int pmap_cache_count
;
463 decl_simple_lock_data(,pmap_cache_lock
)
468 extern uint32_t lowGlo
;
469 extern void *version
;
471 pt_entry_t
*DMAP1
, *DMAP2
;
476 #define PMAP_ALIAS_MAX 32
482 #define PMAP_ALIAS_COOKIE 0xdeadbeef
483 } pmap_aliasbuf
[PMAP_ALIAS_MAX
];
484 int pmap_alias_index
= 0;
485 extern vm_offset_t
get_rpc();
487 #endif /* DEBUG_ALIAS */
490 * for legacy, returns the address of the pde entry.
491 * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
492 * then returns the mapped address of the pde entry in that page
495 pmap_pde(pmap_t m
, vm_map_offset_t v
)
498 if (!cpu_64bit
|| (m
== kernel_pmap
)) {
499 pde
= (&((m
)->dirbase
[(vm_offset_t
)(v
) >> PDESHIFT
]));
502 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
503 pde
= pmap64_pde(m
, v
);
510 * the single pml4 page per pmap is allocated at pmap create time and exists
511 * for the duration of the pmap. we allocate this page in kernel vm (to save us one
512 * level of page table dynamic mapping.
513 * this returns the address of the requested pml4 entry in the top level page.
517 pmap64_pml4(pmap_t pmap
, vm_map_offset_t vaddr
)
519 return ((pml4_entry_t
*)pmap
->pm_hold
+ ((vm_offset_t
)((vaddr
>>PML4SHIFT
)&(NPML4PG
-1))));
523 * maps in the pml4 page, if any, containing the pdpt entry requested
524 * and returns the address of the pdpt entry in that mapped page
527 pmap64_pdpt(pmap_t pmap
, vm_map_offset_t vaddr
)
534 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
535 if ((vaddr
> 0x00007FFFFFFFFFFFULL
) && (vaddr
< 0xFFFF800000000000ULL
)) {
539 pml4
= pmap64_pml4(pmap
, vaddr
);
541 if (pml4
&& ((*pml4
& INTEL_PTE_VALID
))) {
543 newpf
= *pml4
& PG_FRAME
;
546 for (i
=PMAP_PDPT_FIRST_WINDOW
; i
< PMAP_PDPT_FIRST_WINDOW
+PMAP_PDPT_NWINDOWS
; i
++) {
547 if (((*(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP
)) & PG_FRAME
) == newpf
) {
548 return((pdpt_entry_t
*)(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CADDR
) +
549 ((vm_offset_t
)((vaddr
>>PDPTSHIFT
)&(NPDPTPG
-1))));
553 current_cpu_datap()->cpu_pmap
->pdpt_window_index
++;
554 if (current_cpu_datap()->cpu_pmap
->pdpt_window_index
> (PMAP_PDPT_FIRST_WINDOW
+PMAP_PDPT_NWINDOWS
-1))
555 current_cpu_datap()->cpu_pmap
->pdpt_window_index
= PMAP_PDPT_FIRST_WINDOW
;
557 (current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pdpt_window_index
].prv_CMAP
),
558 newpf
| INTEL_PTE_RW
| INTEL_PTE_VALID
);
559 invlpg((u_int
)(current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pdpt_window_index
].prv_CADDR
));
560 return ((pdpt_entry_t
*)(current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pdpt_window_index
].prv_CADDR
) +
561 ((vm_offset_t
)((vaddr
>>PDPTSHIFT
)&(NPDPTPG
-1))));
568 * maps in the pdpt page, if any, containing the pde entry requested
569 * and returns the address of the pde entry in that mapped page
572 pmap64_pde(pmap_t pmap
, vm_map_offset_t vaddr
)
579 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
580 if ((vaddr
> 0x00007FFFFFFFFFFFULL
) && (vaddr
< 0xFFFF800000000000ULL
)) {
584 /* if (vaddr & (1ULL << 63)) panic("neg addr");*/
585 pdpt
= pmap64_pdpt(pmap
, vaddr
);
587 if (pdpt
&& ((*pdpt
& INTEL_PTE_VALID
))) {
589 newpf
= *pdpt
& PG_FRAME
;
591 for (i
=PMAP_PDE_FIRST_WINDOW
; i
< PMAP_PDE_FIRST_WINDOW
+PMAP_PDE_NWINDOWS
; i
++) {
592 if (((*(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP
)) & PG_FRAME
) == newpf
) {
593 return((pd_entry_t
*)(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CADDR
) +
594 ((vm_offset_t
)((vaddr
>>PDSHIFT
)&(NPDPG
-1))));
598 current_cpu_datap()->cpu_pmap
->pde_window_index
++;
599 if (current_cpu_datap()->cpu_pmap
->pde_window_index
> (PMAP_PDE_FIRST_WINDOW
+PMAP_PDE_NWINDOWS
-1))
600 current_cpu_datap()->cpu_pmap
->pde_window_index
= PMAP_PDE_FIRST_WINDOW
;
602 (current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pde_window_index
].prv_CMAP
),
603 newpf
| INTEL_PTE_RW
| INTEL_PTE_VALID
);
604 invlpg((u_int
)(current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pde_window_index
].prv_CADDR
));
605 return ((pd_entry_t
*)(current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pde_window_index
].prv_CADDR
) +
606 ((vm_offset_t
)((vaddr
>>PDSHIFT
)&(NPDPG
-1))));
615 * return address of mapped pte for vaddr va in pmap pmap.
616 * must be called with pre-emption or interrupts disabled
617 * if targeted pmap is not the kernel pmap
618 * since we may be passing back a virtual address that is
619 * associated with this cpu... pre-emption or interrupts
620 * must remain disabled until the caller is done using
621 * the pointer that was passed back .
623 * maps the pde page, if any, containing the pte in and returns
624 * the address of the pte in that mapped page
627 pmap_pte(pmap_t pmap
, vm_map_offset_t vaddr
)
634 pde
= pmap_pde(pmap
,vaddr
);
636 if (pde
&& ((*pde
& INTEL_PTE_VALID
))) {
637 if (pmap
== kernel_pmap
) {
638 return (vtopte(vaddr
)); /* compat kernel still has pte's mapped */
641 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
643 newpf
= *pde
& PG_FRAME
;
645 for (i
=PMAP_PTE_FIRST_WINDOW
; i
< PMAP_PTE_FIRST_WINDOW
+PMAP_PTE_NWINDOWS
; i
++) {
646 if (((*(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP
)) & PG_FRAME
) == newpf
) {
647 return((pt_entry_t
*)(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CADDR
) +
648 ((vm_offset_t
)i386_btop(vaddr
) & (NPTEPG
-1)));
652 current_cpu_datap()->cpu_pmap
->pte_window_index
++;
653 if (current_cpu_datap()->cpu_pmap
->pte_window_index
> (PMAP_PTE_FIRST_WINDOW
+PMAP_PTE_NWINDOWS
-1))
654 current_cpu_datap()->cpu_pmap
->pte_window_index
= PMAP_PTE_FIRST_WINDOW
;
656 (current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pte_window_index
].prv_CMAP
),
657 newpf
| INTEL_PTE_RW
| INTEL_PTE_VALID
);
658 invlpg((u_int
)(current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pte_window_index
].prv_CADDR
));
659 return ((pt_entry_t
*)(current_cpu_datap()->cpu_pmap
->mapwindow
[current_cpu_datap()->cpu_pmap
->pte_window_index
].prv_CADDR
) +
660 ((vm_offset_t
)i386_btop(vaddr
) & (NPTEPG
-1)));
668 * Map memory at initialization. The physical addresses being
669 * mapped are not managed and are never unmapped.
671 * For now, VM is already on, we only need to map the
677 vm_map_offset_t start_addr
,
678 vm_map_offset_t end_addr
,
685 while (start_addr
< end_addr
) {
686 pmap_enter(kernel_pmap
, (vm_map_offset_t
)virt
,
687 (ppnum_t
) i386_btop(start_addr
), prot
, flags
, FALSE
);
695 * Back-door routine for mapping kernel VM at initialization.
696 * Useful for mapping memory outside the range
697 * Sets no-cache, A, D.
698 * [vm_first_phys, vm_last_phys) (i.e., devices).
699 * Otherwise like pmap_map.
704 vm_map_offset_t start_addr
,
705 vm_map_offset_t end_addr
,
712 template = pa_to_pte(start_addr
)
718 if(flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
)) {
719 template |= INTEL_PTE_NCACHE
;
720 if(!(flags
& (VM_MEM_GUARDED
| VM_WIMG_USE_DEFAULT
)))
721 template |= INTEL_PTE_PTA
;
724 if (prot
& VM_PROT_WRITE
)
725 template |= INTEL_PTE_WRITE
;
727 while (start_addr
< end_addr
) {
728 pte
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)virt
);
729 if (pte
== PT_ENTRY_NULL
) {
730 panic("pmap_map_bd: Invalid kernel address\n");
732 pmap_store_pte(pte
, template);
733 pte_increment_pa(template);
735 start_addr
+= PAGE_SIZE
;
742 extern char *first_avail
;
743 extern vm_offset_t virtual_avail
, virtual_end
;
744 extern pmap_paddr_t avail_start
, avail_end
;
745 extern vm_offset_t etext
;
746 extern void *sectHIBB
;
747 extern int sectSizeHIB
;
751 pmap_high_shared_remap(enum high_fixed_addresses e
, vm_offset_t va
, int sz
)
753 vm_offset_t ve
= pmap_index_to_virt(e
);
758 assert(0 == (va
& PAGE_MASK
)); /* expecting page aligned */
759 ptep
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)ve
);
761 for (i
=0; i
< sz
; i
++) {
762 pa
= (pmap_paddr_t
) kvtophys(va
);
763 pmap_store_pte(ptep
, (pa
& PG_FRAME
)
776 pmap_cpu_high_shared_remap(int cpu
, enum high_cpu_types e
, vm_offset_t va
, int sz
)
778 enum high_fixed_addresses a
= e
+ HIGH_CPU_END
* cpu
;
779 return pmap_high_shared_remap(HIGH_FIXED_CPUS_BEGIN
+ a
, va
, sz
);
782 void pmap_init_high_shared(void);
784 extern vm_offset_t gdtptr
, idtptr
;
786 extern uint32_t low_intstack
;
788 extern struct fake_descriptor ldt_desc_pattern
;
789 extern struct fake_descriptor tss_desc_pattern
;
791 extern char hi_remap_text
, hi_remap_etext
;
792 extern char t_zero_div
;
794 pt_entry_t
*pte_unique_base
;
797 pmap_init_high_shared(void)
801 struct __gdt_desc_struct gdt_desc
= {0,0,0};
802 struct __idt_desc_struct idt_desc
= {0,0,0};
804 struct i386_tss
*ttss
;
807 kprintf("HIGH_MEM_BASE 0x%x fixed per-cpu begin 0x%x\n",
808 HIGH_MEM_BASE
,pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN
));
809 pte_unique_base
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN
));
811 if (i386_btop(&hi_remap_etext
- &hi_remap_text
+ 1) >
812 HIGH_FIXED_TRAMPS_END
- HIGH_FIXED_TRAMPS
+ 1)
813 panic("tramps too large");
814 haddr
= pmap_high_shared_remap(HIGH_FIXED_TRAMPS
,
815 (vm_offset_t
) &hi_remap_text
, 3);
816 kprintf("tramp: 0x%x, ",haddr
);
817 printf("hi mem tramps at 0x%x\n",haddr
);
818 /* map gdt up high and update ptr for reload */
819 haddr
= pmap_high_shared_remap(HIGH_FIXED_GDT
,
820 (vm_offset_t
) master_gdt
, 1);
821 __asm__
__volatile__("sgdt %0": "=m" (gdt_desc
): :"memory");
822 gdt_desc
.address
= haddr
;
823 kprintf("GDT: 0x%x, ",haddr
);
824 /* map ldt up high */
825 haddr
= pmap_high_shared_remap(HIGH_FIXED_LDT_BEGIN
,
826 (vm_offset_t
) master_ldt
,
827 HIGH_FIXED_LDT_END
- HIGH_FIXED_LDT_BEGIN
+ 1);
828 kprintf("LDT: 0x%x, ",haddr
);
829 /* put new ldt addr into gdt */
830 master_gdt
[sel_idx(KERNEL_LDT
)] = ldt_desc_pattern
;
831 master_gdt
[sel_idx(KERNEL_LDT
)].offset
= (vm_offset_t
) haddr
;
832 fix_desc(&master_gdt
[sel_idx(KERNEL_LDT
)], 1);
833 master_gdt
[sel_idx(USER_LDT
)] = ldt_desc_pattern
;
834 master_gdt
[sel_idx(USER_LDT
)].offset
= (vm_offset_t
) haddr
;
835 fix_desc(&master_gdt
[sel_idx(USER_LDT
)], 1);
837 /* map idt up high */
838 haddr
= pmap_high_shared_remap(HIGH_FIXED_IDT
,
839 (vm_offset_t
) master_idt
, 1);
840 __asm__
__volatile__("sidt %0" : "=m" (idt_desc
));
841 idt_desc
.address
= haddr
;
842 kprintf("IDT: 0x%x, ", haddr
);
843 /* remap ktss up high and put new high addr into gdt */
844 haddr
= pmap_high_shared_remap(HIGH_FIXED_KTSS
,
845 (vm_offset_t
) &master_ktss
, 1);
846 master_gdt
[sel_idx(KERNEL_TSS
)] = tss_desc_pattern
;
847 master_gdt
[sel_idx(KERNEL_TSS
)].offset
= (vm_offset_t
) haddr
;
848 fix_desc(&master_gdt
[sel_idx(KERNEL_TSS
)], 1);
849 kprintf("KTSS: 0x%x, ",haddr
);
851 /* remap dbtss up high and put new high addr into gdt */
852 haddr
= pmap_high_shared_remap(HIGH_FIXED_DBTSS
,
853 (vm_offset_t
) &master_dbtss
, 1);
854 master_gdt
[sel_idx(DEBUG_TSS
)] = tss_desc_pattern
;
855 master_gdt
[sel_idx(DEBUG_TSS
)].offset
= (vm_offset_t
) haddr
;
856 fix_desc(&master_gdt
[sel_idx(DEBUG_TSS
)], 1);
857 ttss
= (struct i386_tss
*)haddr
;
858 kprintf("DBTSS: 0x%x, ",haddr
);
859 #endif /* MACH_KDB */
861 /* remap dftss up high and put new high addr into gdt */
862 haddr
= pmap_high_shared_remap(HIGH_FIXED_DFTSS
,
863 (vm_offset_t
) &master_dftss
, 1);
864 master_gdt
[sel_idx(DF_TSS
)] = tss_desc_pattern
;
865 master_gdt
[sel_idx(DF_TSS
)].offset
= (vm_offset_t
) haddr
;
866 fix_desc(&master_gdt
[sel_idx(DF_TSS
)], 1);
867 kprintf("DFTSS: 0x%x\n",haddr
);
869 /* remap mctss up high and put new high addr into gdt */
870 haddr
= pmap_high_shared_remap(HIGH_FIXED_DFTSS
,
871 (vm_offset_t
) &master_mctss
, 1);
872 master_gdt
[sel_idx(MC_TSS
)] = tss_desc_pattern
;
873 master_gdt
[sel_idx(MC_TSS
)].offset
= (vm_offset_t
) haddr
;
874 fix_desc(&master_gdt
[sel_idx(MC_TSS
)], 1);
875 kprintf("MCTSS: 0x%x\n",haddr
);
877 __asm__
__volatile__("lgdt %0": "=m" (gdt_desc
));
878 __asm__
__volatile__("lidt %0": "=m" (idt_desc
));
879 kprintf("gdt/idt reloaded, ");
881 kprintf("tr reset to KERNEL_TSS\n");
886 * Bootstrap the system enough to run with virtual memory.
887 * Map the kernel's code and data, and allocate the system page table.
888 * Called with mapping OFF. Page_size must already be set.
891 * load_start: PA where kernel was loaded
892 * avail_start PA of first available physical page -
893 * after kernel page tables
894 * avail_end PA of last available physical page
895 * virtual_avail VA of first available page -
896 * after kernel page tables
897 * virtual_end VA of last available page -
898 * end of kernel address space
900 * &start_text start of kernel text
901 * &etext end of kernel text
906 __unused vm_offset_t load_start
,
912 int wpkernel
, boot_arg
;
915 vm_last_addr
= VM_MAX_KERNEL_ADDRESS
; /* Set the highest address
918 * The kernel's pmap is statically allocated so we don't
919 * have to use pmap_create, which is unlikely to work
920 * correctly at this part of the boot sequence.
924 kernel_pmap
= &kernel_pmap_store
;
925 kernel_pmap
->ref_count
= 1;
926 kernel_pmap
->nx_enabled
= FALSE
;
927 kernel_pmap
->pm_64bit
= 0;
928 kernel_pmap
->pm_obj
= (vm_object_t
) NULL
;
929 kernel_pmap
->dirbase
= (pd_entry_t
*)((unsigned int)IdlePTD
| KERNBASE
);
930 kernel_pmap
->pdirbase
= (pmap_paddr_t
)((int)IdlePTD
);
931 pdpt
= (pd_entry_t
*)((unsigned int)IdlePDPT
| KERNBASE
);
932 kernel_pmap
->pm_pdpt
= pdpt
;
933 kernel_pmap
->pm_cr3
= (pmap_paddr_t
)((int)IdlePDPT
);
935 va
= (vm_offset_t
)kernel_pmap
->dirbase
;
936 /* setup self referential mapping(s) */
937 for (i
= 0; i
< NPGPTD
; i
++, pdpt
++) {
939 pa
= (pmap_paddr_t
) kvtophys(va
+ i386_ptob(i
));
941 (pd_entry_t
*) (kernel_pmap
->dirbase
+ PTDPTDI
+ i
),
942 (pa
& PG_FRAME
) | INTEL_PTE_VALID
| INTEL_PTE_RW
| INTEL_PTE_REF
|
943 INTEL_PTE_MOD
| INTEL_PTE_WIRED
) ;
944 pmap_store_pte(pdpt
, pa
| INTEL_PTE_VALID
);
949 lo_kernel_cr3
= kernel_pmap
->pm_cr3
;
950 current_cpu_datap()->cpu_kernel_cr3
= (addr64_t
) kernel_pmap
->pm_cr3
;
952 /* save the value we stuff into created pmaps to share the gdts etc */
953 high_shared_pde
= *pmap_pde(kernel_pmap
, HIGH_MEM_BASE
);
954 /* make sure G bit is on for high shared pde entry */
955 high_shared_pde
|= INTEL_PTE_GLOBAL
;
956 pmap_store_pte(pmap_pde(kernel_pmap
, HIGH_MEM_BASE
), high_shared_pde
);
959 inuse_ptepages_count
+= NKPT
;
961 virtual_avail
= (vm_offset_t
)VADDR(KPTDI
,0) + (vm_offset_t
)first_avail
;
962 virtual_end
= (vm_offset_t
)(VM_MAX_KERNEL_ADDRESS
);
965 * Reserve some special page table entries/VA space for temporary
968 #define SYSMAP(c, p, v, n) \
969 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n)
974 for (i
=0; i
<PMAP_NWINDOWS
; i
++) {
976 (current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP
),
977 (current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CADDR
),
979 *current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP
= 0;
982 /* DMAP user for debugger */
983 SYSMAP(caddr_t
, DMAP1
, DADDR1
, 1);
984 SYSMAP(caddr_t
, DMAP2
, DADDR2
, 1); /* XXX temporary - can remove */
987 lock_init(&pmap_system_lock
,
988 FALSE
, /* NOT a sleep lock */
994 if (PE_parse_boot_arg("wpkernel", &boot_arg
)) {
999 /* Remap kernel text readonly unless the "wpkernel" boot-arg is present
1007 for (myva
= i386_round_page(MP_BOOT
+ MP_BOOTSTACK
); myva
< etext
; myva
+= PAGE_SIZE
) {
1008 if (myva
>= (vm_offset_t
)sectHIBB
&& myva
< ((vm_offset_t
)sectHIBB
+ sectSizeHIB
))
1010 ptep
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)myva
);
1012 pmap_store_pte(ptep
, *ptep
& ~INTEL_PTE_RW
);
1016 /* no matter what, kernel page zero is not accessible */
1017 pte
= pmap_pte(kernel_pmap
, 0);
1018 pmap_store_pte(pte
, INTEL_PTE_INVALID
);
1020 /* map lowmem global page into fixed addr 0x2000 */
1021 if (0 == (pte
= pmap_pte(kernel_pmap
,0x2000))) panic("lowmem pte");
1023 pmap_store_pte(pte
, kvtophys((vm_offset_t
)&lowGlo
)|INTEL_PTE_VALID
|INTEL_PTE_REF
|INTEL_PTE_MOD
|INTEL_PTE_WIRED
|INTEL_PTE_RW
);
1026 simple_lock_init(&kernel_pmap
->lock
, 0);
1027 simple_lock_init(&pv_free_list_lock
, 0);
1029 pmap_init_high_shared();
1031 pde_mapped_size
= PDE_MAPPED_SIZE
;
1034 pdpt_entry_t
*ppdpt
= (pdpt_entry_t
*)IdlePDPT
;
1035 pdpt_entry_t
*ppdpt64
= (pdpt_entry_t
*)IdlePDPT64
;
1036 pdpt_entry_t
*ppml4
= (pdpt_entry_t
*)IdlePML4
;
1037 int istate
= ml_set_interrupts_enabled(FALSE
);
1040 * Clone a new 64-bit 3rd-level page table directory, IdlePML4,
1041 * with page bits set for the correct IA-32e operation and so that
1042 * the legacy-mode IdlePDPT is retained for slave processor start-up.
1043 * This is necessary due to the incompatible use of page bits between
1044 * 64-bit and legacy modes.
1046 kernel_pmap
->pm_cr3
= (pmap_paddr_t
)((int)IdlePML4
); /* setup in start.s for us */
1047 kernel_pmap
->pm_pml4
= IdlePML4
;
1048 kernel_pmap
->pm_pdpt
= (pd_entry_t
*)
1049 ((unsigned int)IdlePDPT64
| KERNBASE
);
1050 #define PAGE_BITS INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF
1051 pmap_store_pte(kernel_pmap
->pm_pml4
,
1052 (uint32_t)IdlePDPT64
| PAGE_BITS
);
1053 pmap_store_pte((ppdpt64
+0), *(ppdpt
+0) | PAGE_BITS
);
1054 pmap_store_pte((ppdpt64
+1), *(ppdpt
+1) | PAGE_BITS
);
1055 pmap_store_pte((ppdpt64
+2), *(ppdpt
+2) | PAGE_BITS
);
1056 pmap_store_pte((ppdpt64
+3), *(ppdpt
+3) | PAGE_BITS
);
1059 * The kernel is also mapped in the uber-sapce at the 4GB starting
1060 * 0xFFFFFF80:00000000. This is the highest entry in the 4th-level.
1062 pmap_store_pte((ppml4
+KERNEL_UBER_PML4_INDEX
), *(ppml4
+0));
1064 kernel64_cr3
= (addr64_t
) kernel_pmap
->pm_cr3
;
1065 cpu_IA32e_enable(current_cpu_datap());
1066 current_cpu_datap()->cpu_is64bit
= TRUE
;
1067 /* welcome to a 64 bit world */
1069 /* Re-initialize and load descriptors */
1070 cpu_desc_init64(&cpu_data_master
, TRUE
);
1071 cpu_desc_load64(&cpu_data_master
);
1072 fast_syscall_init64();
1074 pde_mapped_size
= 512*4096 ;
1076 ml_set_interrupts_enabled(istate
);
1079 kernel_pmap
->pm_hold
= kernel_pmap
->pm_pml4
;
1081 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
1082 VADDR(KPTDI
,0), virtual_end
);
1083 printf("PAE enabled\n");
1085 printf("64 bit mode enabled\n");kprintf("64 bit mode enabled\n"); }
1087 kprintf("Available physical space from 0x%llx to 0x%llx\n",
1088 avail_start
, avail_end
);
1091 * By default for 64-bit users loaded at 4GB, share kernel mapping.
1092 * But this may be overridden by the -no_shared_cr3 boot-arg.
1094 if (PE_parse_boot_arg("-no_shared_cr3", &no_shared_cr3
)) {
1095 kprintf("Shared kernel address space disabled\n");
1101 vm_offset_t
*startp
,
1104 *startp
= virtual_avail
;
1105 *endp
= virtual_end
;
1109 * Initialize the pmap module.
1110 * Called by vm_init, to initialize any structures that the pmap
1111 * system needs to map virtual memory.
1116 register long npages
;
1118 register vm_size_t s
;
1119 vm_map_offset_t vaddr
;
1123 * Allocate memory for the pv_head_table and its lock bits,
1124 * the modify bit array, and the pte_page table.
1127 /* zero bias all these arrays now instead of off avail_start
1128 so we cover all memory */
1129 npages
= i386_btop(avail_end
);
1130 s
= (vm_size_t
) (sizeof(struct pv_entry
) * npages
1131 + pv_lock_table_size(npages
)
1135 if (kmem_alloc_wired(kernel_map
, &addr
, s
) != KERN_SUCCESS
)
1138 memset((char *)addr
, 0, s
);
1141 * Allocate the structures first to preserve word-alignment.
1143 pv_head_table
= (pv_entry_t
) addr
;
1144 addr
= (vm_offset_t
) (pv_head_table
+ npages
);
1146 pv_lock_table
= (char *) addr
;
1147 addr
= (vm_offset_t
) (pv_lock_table
+ pv_lock_table_size(npages
));
1149 pmap_phys_attributes
= (char *) addr
;
1152 * Create the zone of physical maps,
1153 * and of the physical-to-virtual entries.
1155 s
= (vm_size_t
) sizeof(struct pmap
);
1156 pmap_zone
= zinit(s
, 400*s
, 4096, "pmap"); /* XXX */
1157 s
= (vm_size_t
) sizeof(struct pv_entry
);
1158 pv_list_zone
= zinit(s
, 10000*s
, 4096, "pv_list"); /* XXX */
1160 pdpt_zone
= zinit(s
, 400*s
, 4096, "pdpt"); /* XXX */
1163 * Only now, when all of the data structures are allocated,
1164 * can we set vm_first_phys and vm_last_phys. If we set them
1165 * too soon, the kmem_alloc_wired above will try to use these
1166 * data structures and blow up.
1169 /* zero bias this now so we cover all memory */
1171 vm_last_phys
= avail_end
;
1173 kptobj
= &kptobj_object_store
;
1174 _vm_object_allocate((vm_object_size_t
)NKPDE
, kptobj
);
1175 kernel_pmap
->pm_obj
= kptobj
;
1177 /* create pv entries for kernel pages mapped by low level
1178 startup code. these have to exist so we can pmap_remove()
1179 e.g. kext pages from the middle of our addr space */
1181 vaddr
= (vm_map_offset_t
)0;
1182 for (ppn
= 0; ppn
< i386_btop(avail_start
) ; ppn
++ ) {
1185 pv_e
= pai_to_pvh(ppn
);
1188 kernel_pmap
->stats
.resident_count
++;
1189 pv_e
->pmap
= kernel_pmap
;
1190 pv_e
->next
= PV_ENTRY_NULL
;
1193 pmap_initialized
= TRUE
;
1196 * Initializie pmap cache.
1198 pmap_cache_list
= PMAP_NULL
;
1199 pmap_cache_count
= 0;
1200 simple_lock_init(&pmap_cache_lock
, 0);
1204 x86_lowmem_free(void)
1206 /* free lowmem pages back to the vm system. we had to defer doing this
1207 until the vm system was fully up.
1208 the actual pages that are released are determined by which
1209 pages the memory sizing code puts into the region table */
1211 ml_static_mfree((vm_offset_t
) i386_ptob(pmap_memory_regions
[0].base
),
1212 (vm_size_t
) i386_ptob(pmap_memory_regions
[0].end
- pmap_memory_regions
[0].base
));
1216 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
1228 assert(pn
!= vm_page_fictitious_addr
);
1229 phys
= (pmap_paddr_t
)i386_ptob(pn
);
1230 if (!pmap_initialized
)
1233 if (!pmap_valid_page(pn
))
1236 PMAP_WRITE_LOCK(spl
);
1238 pai
= pa_index(phys
);
1239 pv_h
= pai_to_pvh(pai
);
1241 result
= (pv_h
->pmap
== PMAP_NULL
);
1242 PMAP_WRITE_UNLOCK(spl
);
1248 * Create and return a physical map.
1250 * If the size specified for the map
1251 * is zero, the map is an actual physical
1252 * map, and may be referenced by the
1255 * If the size specified is non-zero,
1256 * the map will be used in software only, and
1257 * is bounded by that size.
1269 pml4_entry_t
*pml4p
;
1275 size
= (vm_size_t
) sz
;
1278 * A software use-only map doesn't even need a map.
1285 p
= (pmap_t
) zalloc(pmap_zone
);
1287 panic("pmap_create zalloc");
1289 /* init counts now since we'll be bumping some */
1290 simple_lock_init(&p
->lock
, 0);
1291 p
->stats
.resident_count
= 0;
1292 p
->stats
.wired_count
= 0;
1295 p
->pm_64bit
= is_64bit
;
1296 p
->pm_kernel_cr3
= FALSE
;
1299 /* legacy 32 bit setup */
1300 /* in the legacy case the pdpt layer is hardwired to 4 entries and each
1301 * entry covers 1GB of addr space */
1302 if (KERN_SUCCESS
!= kmem_alloc_wired(kernel_map
, (vm_offset_t
*)(&p
->dirbase
), NBPTD
))
1303 panic("pmap_create kmem_alloc_wired");
1304 p
->pm_hold
= (vm_offset_t
)zalloc(pdpt_zone
);
1305 if ((vm_offset_t
)NULL
== p
->pm_hold
) {
1306 panic("pdpt zalloc");
1308 pdpt
= (pdpt_entry_t
*) (( p
->pm_hold
+ 31) & ~31);
1309 p
->pm_cr3
= (pmap_paddr_t
)kvtophys((vm_offset_t
)pdpt
);
1310 if (NULL
== (p
->pm_obj
= vm_object_allocate((vm_object_size_t
)(NPGPTD
*NPTDPG
))))
1311 panic("pmap_create vm_object_allocate");
1313 memset((char *)p
->dirbase
, 0, NBPTD
);
1315 va
= (vm_offset_t
)p
->dirbase
;
1316 p
->pdirbase
= kvtophys(va
);
1318 template = cpu_64bit
? INTEL_PTE_VALID
|INTEL_PTE_RW
|INTEL_PTE_USER
|INTEL_PTE_REF
: INTEL_PTE_VALID
;
1319 for (i
= 0; i
< NPGPTD
; i
++, pdpt
++) {
1321 pa
= (pmap_paddr_t
) kvtophys(va
+ i386_ptob(i
));
1322 pmap_store_pte(pdpt
, pa
| template);
1325 /* map the high shared pde */
1326 pmap_store_pte(pmap_pde(p
, HIGH_MEM_BASE
), high_shared_pde
);
1332 /* alloc the pml4 page in kernel vm */
1333 if (KERN_SUCCESS
!= kmem_alloc_wired(kernel_map
, (vm_offset_t
*)(&p
->pm_hold
), PAGE_SIZE
))
1334 panic("pmap_create kmem_alloc_wired pml4");
1336 memset((char *)p
->pm_hold
, 0, PAGE_SIZE
);
1337 p
->pm_cr3
= (pmap_paddr_t
)kvtophys((vm_offset_t
)p
->pm_hold
);
1339 inuse_ptepages_count
++;
1340 p
->stats
.resident_count
++;
1341 p
->stats
.wired_count
++;
1343 /* allocate the vm_objs to hold the pdpt, pde and pte pages */
1345 if (NULL
== (p
->pm_obj_pml4
= vm_object_allocate((vm_object_size_t
)(NPML4PGS
))))
1346 panic("pmap_create pdpt obj");
1348 if (NULL
== (p
->pm_obj_pdpt
= vm_object_allocate((vm_object_size_t
)(NPDPTPGS
))))
1349 panic("pmap_create pdpt obj");
1351 if (NULL
== (p
->pm_obj
= vm_object_allocate((vm_object_size_t
)(NPDEPGS
))))
1352 panic("pmap_create pte obj");
1354 /* uber space points to uber mapped kernel */
1356 pml4p
= pmap64_pml4(p
, 0ULL);
1357 pmap_store_pte((pml4p
+KERNEL_UBER_PML4_INDEX
),*kernel_pmap
->pm_pml4
);
1359 while ((pdp
= pmap64_pde(p
, (uint64_t)HIGH_MEM_BASE
)) == PD_ENTRY_NULL
) {
1361 pmap_expand_pdpt(p
, (uint64_t)HIGH_MEM_BASE
); /* need room for another pde entry */
1364 pmap_store_pte(pdp
, high_shared_pde
);
1374 pmap_set_4GB_pagezero(pmap_t p
)
1377 pdpt_entry_t
*user_pdptp
;
1378 pdpt_entry_t
*kern_pdptp
;
1380 assert(p
->pm_64bit
);
1382 /* Kernel-shared cr3 may be disabled by boot arg. */
1387 * Set the bottom 4 3rd-level pte's to be the kernel's.
1390 while ((user_pdptp
= pmap64_pdpt(p
, 0x0)) == PDPT_ENTRY_NULL
) {
1392 pmap_expand_pml4(p
, 0x0);
1395 kern_pdptp
= kernel_pmap
->pm_pdpt
;
1396 pmap_store_pte(user_pdptp
+0, *(kern_pdptp
+0));
1397 pmap_store_pte(user_pdptp
+1, *(kern_pdptp
+1));
1398 pmap_store_pte(user_pdptp
+2, *(kern_pdptp
+2));
1399 pmap_store_pte(user_pdptp
+3, *(kern_pdptp
+3));
1401 p
->pm_kernel_cr3
= TRUE
;
1408 pmap_load_kernel_cr3(void)
1410 uint32_t kernel_cr3
;
1412 assert(!ml_get_interrupts_enabled());
1415 * Reload cr3 with the true kernel cr3.
1416 * Note: kernel's pml4 resides below 4GB physical.
1418 kernel_cr3
= current_cpu_datap()->cpu_kernel_cr3
;
1419 set_cr3(kernel_cr3
);
1420 current_cpu_datap()->cpu_active_cr3
= kernel_cr3
;
1421 current_cpu_datap()->cpu_task_map
= TASK_MAP_32BIT
;
1422 current_cpu_datap()->cpu_tlb_invalid
= FALSE
;
1423 __asm__
volatile("mfence");
1427 pmap_clear_4GB_pagezero(pmap_t p
)
1430 pdpt_entry_t
*user_pdptp
;
1433 if (!p
->pm_kernel_cr3
)
1437 user_pdptp
= pmap64_pdpt(p
, 0x0);
1438 pmap_store_pte(user_pdptp
+0, 0);
1439 pmap_store_pte(user_pdptp
+1, 0);
1440 pmap_store_pte(user_pdptp
+2, 0);
1441 pmap_store_pte(user_pdptp
+3, 0);
1443 p
->pm_kernel_cr3
= FALSE
;
1445 pmap_load_kernel_cr3();
1451 * Retire the given physical map from service.
1452 * Should only be called if the map contains
1453 * no valid mappings.
1463 register pt_entry_t
*pdep
;
1464 register vm_page_t m
;
1470 simple_lock(&p
->lock
);
1474 * If some cpu is not using the physical pmap pointer that it
1475 * is supposed to be (see set_dirbase), we might be using the
1476 * pmap that is being destroyed! Make sure we are
1477 * physically on the right pmap:
1481 VM_MAX_KERNEL_ADDRESS
);
1484 simple_unlock(&p
->lock
);
1488 return; /* still in use */
1492 * Free the memory maps, then the
1498 pdep
= (pt_entry_t
*)p
->dirbase
;
1500 while (pdep
< (pt_entry_t
*)&p
->dirbase
[(UMAXPTDI
+1)]) {
1503 if (*pdep
& INTEL_PTE_VALID
) {
1504 ind
= pdep
- (pt_entry_t
*)&p
->dirbase
[0];
1506 vm_object_lock(p
->pm_obj
);
1507 m
= vm_page_lookup(p
->pm_obj
, (vm_object_offset_t
)ind
);
1508 if (m
== VM_PAGE_NULL
) {
1509 panic("pmap_destroy: pte page not in object");
1511 vm_page_lock_queues();
1513 inuse_ptepages_count
--;
1515 vm_object_unlock(p
->pm_obj
);
1516 vm_page_unlock_queues();
1519 * Clear pdes, this might be headed for the cache.
1521 pmap_store_pte(pdep
, 0);
1525 pmap_store_pte(pdep
, 0);
1531 inuse_ptepages_count
-= p
->pm_obj
->resident_page_count
;
1533 vm_object_deallocate(p
->pm_obj
);
1534 kmem_free(kernel_map
, (vm_offset_t
)p
->dirbase
, NBPTD
);
1535 zfree(pdpt_zone
, (void *)p
->pm_hold
);
1540 pmap_unmap_sharedpage(p
);
1542 /* free 64 bit mode structs */
1543 inuse_ptepages_count
--;
1544 kmem_free(kernel_map
, (vm_offset_t
)p
->pm_hold
, PAGE_SIZE
);
1546 inuse_ptepages_count
-= p
->pm_obj_pml4
->resident_page_count
;
1547 vm_object_deallocate(p
->pm_obj_pml4
);
1549 inuse_ptepages_count
-= p
->pm_obj_pdpt
->resident_page_count
;
1550 vm_object_deallocate(p
->pm_obj_pdpt
);
1552 inuse_ptepages_count
-= p
->pm_obj
->resident_page_count
;
1553 vm_object_deallocate(p
->pm_obj
);
1557 zfree(pmap_zone
, p
);
1561 * Add a reference to the specified pmap.
1570 if (p
!= PMAP_NULL
) {
1572 simple_lock(&p
->lock
);
1574 simple_unlock(&p
->lock
);
1580 * Remove a range of hardware page-table entries.
1581 * The entries given are the first (inclusive)
1582 * and last (exclusive) entries for the VM pages.
1583 * The virtual address is the va for the first pte.
1585 * The pmap must be locked.
1586 * If the pmap is not the kernel pmap, the range must lie
1587 * entirely within one pte-page. This is NOT checked.
1588 * Assumes that the pte-page exists.
1594 vm_map_offset_t vaddr
,
1598 register pt_entry_t
*cpte
;
1599 int num_removed
, num_unwired
;
1606 for (cpte
= spte
; cpte
< epte
;
1607 cpte
++, vaddr
+= PAGE_SIZE
) {
1609 pa
= pte_to_pa(*cpte
);
1616 if (!valid_page(i386_btop(pa
))) {
1619 * Outside range of managed physical memory.
1620 * Just remove the mappings.
1622 register pt_entry_t
*lpte
= cpte
;
1624 pmap_store_pte(lpte
, 0);
1633 * Get the modify and reference bits.
1636 register pt_entry_t
*lpte
;
1639 pmap_phys_attributes
[pai
] |=
1640 *lpte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1641 pmap_store_pte(lpte
, 0);
1646 * Remove the mapping from the pvlist for
1647 * this physical page.
1650 register pv_entry_t pv_h
, prev
, cur
;
1652 pv_h
= pai_to_pvh(pai
);
1653 if (pv_h
->pmap
== PMAP_NULL
) {
1654 panic("pmap_remove: null pv_list!");
1656 if (pv_h
->va
== vaddr
&& pv_h
->pmap
== pmap
) {
1658 * Header is the pv_entry. Copy the next one
1659 * to header and free the next one (we cannot
1663 if (cur
!= PV_ENTRY_NULL
) {
1668 pv_h
->pmap
= PMAP_NULL
;
1675 if ((cur
= prev
->next
) == PV_ENTRY_NULL
) {
1676 panic("pmap-remove: mapping not in pv_list!");
1678 } while (cur
->va
!= vaddr
|| cur
->pmap
!= pmap
);
1679 prev
->next
= cur
->next
;
1689 assert(pmap
->stats
.resident_count
>= num_removed
);
1690 pmap
->stats
.resident_count
-= num_removed
;
1691 assert(pmap
->stats
.wired_count
>= num_unwired
);
1692 pmap
->stats
.wired_count
-= num_unwired
;
1696 * Remove phys addr if mapped in specified map
1700 pmap_remove_some_phys(
1701 __unused pmap_t map
,
1702 __unused ppnum_t pn
)
1705 /* Implement to support working set code */
1710 * Remove the given range of addresses
1711 * from the specified map.
1713 * It is assumed that the start and end are properly
1714 * rounded to the hardware page size.
1725 register pt_entry_t
*pde
;
1726 register pt_entry_t
*spte
, *epte
;
1730 if (map
== PMAP_NULL
|| s64
== e64
)
1733 PMAP_READ_LOCK(map
, spl
);
1738 l64
= (s64
+ pde_mapped_size
) & ~(pde_mapped_size
-1);
1741 pde
= pmap_pde(map
, s64
);
1742 if (pde
&& (*pde
& INTEL_PTE_VALID
)) {
1743 spte
= (pt_entry_t
*)pmap_pte(map
, (s64
& ~(pde_mapped_size
-1)));
1744 spte
= &spte
[ptenum(s64
)];
1745 epte
= &spte
[intel_btop(l64
-s64
)];
1746 pmap_remove_range(map
, s64
, spte
, epte
);
1751 PMAP_UPDATE_TLBS(map
, orig_s64
, e64
);
1753 PMAP_READ_UNLOCK(map
, spl
);
1757 * Routine: pmap_page_protect
1760 * Lower the permission for all mappings to a given
1768 pv_entry_t pv_h
, prev
;
1769 register pv_entry_t pv_e
;
1770 register pt_entry_t
*pte
;
1772 register pmap_t pmap
;
1777 assert(pn
!= vm_page_fictitious_addr
);
1779 if (!valid_page(pn
)) {
1781 * Not a managed page.
1787 * Determine the new protection.
1791 case VM_PROT_READ
|VM_PROT_EXECUTE
:
1795 return; /* nothing to do */
1800 phys
= (pmap_paddr_t
)i386_ptob(pn
);
1801 pai
= pa_index(phys
);
1802 pv_h
= pai_to_pvh(pai
);
1806 * Lock the pmap system first, since we will be changing
1809 PMAP_WRITE_LOCK(spl
);
1812 * Walk down PV list, changing or removing all mappings.
1813 * We do not have to lock the pv_list because we have
1814 * the entire pmap system locked.
1816 if (pv_h
->pmap
!= PMAP_NULL
) {
1821 register vm_map_offset_t vaddr
;
1825 * Lock the pmap to block pmap_extract and similar routines.
1827 simple_lock(&pmap
->lock
);
1830 pte
= pmap_pte(pmap
, vaddr
);
1832 kprintf("pmap_page_protect pmap 0x%x pn 0x%x vaddr 0x%llx\n",pmap
, pn
, vaddr
);
1833 panic("pmap_page_protect");
1836 * Consistency checks.
1838 /* assert(*pte & INTEL_PTE_VALID); XXX */
1839 /* assert(pte_to_phys(*pte) == phys); */
1843 * Remove the mapping if new protection is NONE
1844 * or if write-protecting a kernel mapping.
1846 if (remove
|| pmap
== kernel_pmap
) {
1848 * Remove the mapping, collecting any modify bits.
1850 pmap_store_pte(pte
, *pte
& ~INTEL_PTE_VALID
);
1852 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
1854 pmap_phys_attributes
[pai
] |= *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1856 pmap_store_pte(pte
, 0);
1859 //XXX breaks DEBUG build assert(pmap->stats.resident_count >= 1);
1860 pmap
->stats
.resident_count
--;
1863 * Remove the pv_entry.
1867 * Fix up head later.
1869 pv_h
->pmap
= PMAP_NULL
;
1873 * Delete this entry.
1875 prev
->next
= pv_e
->next
;
1882 pmap_store_pte(pte
, *pte
& ~INTEL_PTE_WRITE
);
1884 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
1891 simple_unlock(&pmap
->lock
);
1893 } while ((pv_e
= prev
->next
) != PV_ENTRY_NULL
);
1896 * If pv_head mapping was removed, fix it up.
1898 if (pv_h
->pmap
== PMAP_NULL
) {
1901 if (pv_e
!= PV_ENTRY_NULL
) {
1907 PMAP_WRITE_UNLOCK(spl
);
1915 * Disconnect all mappings for this page and return reference and change status
1916 * in generic format.
1919 unsigned int pmap_disconnect(
1922 pmap_page_protect(pa
, 0); /* disconnect the page */
1923 return (pmap_get_refmod(pa
)); /* return ref/chg status */
1927 * Set the physical protection on the
1928 * specified range of this map as requested.
1929 * Will not increase permissions.
1934 vm_map_offset_t sva
,
1935 vm_map_offset_t eva
,
1938 register pt_entry_t
*pde
;
1939 register pt_entry_t
*spte
, *epte
;
1940 vm_map_offset_t lva
;
1941 vm_map_offset_t orig_sva
;
1945 if (map
== PMAP_NULL
)
1948 if (prot
== VM_PROT_NONE
) {
1949 pmap_remove(map
, sva
, eva
);
1953 if ( (prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !map
->nx_enabled
)
1959 simple_lock(&map
->lock
);
1963 lva
= (sva
+ pde_mapped_size
) & ~(pde_mapped_size
-1);
1966 pde
= pmap_pde(map
, sva
);
1967 if (pde
&& (*pde
& INTEL_PTE_VALID
)) {
1968 spte
= (pt_entry_t
*)pmap_pte(map
, (sva
& ~(pde_mapped_size
-1)));
1969 spte
= &spte
[ptenum(sva
)];
1970 epte
= &spte
[intel_btop(lva
-sva
)];
1972 while (spte
< epte
) {
1973 if (*spte
& INTEL_PTE_VALID
) {
1975 if (prot
& VM_PROT_WRITE
)
1976 pmap_store_pte(spte
, *spte
| INTEL_PTE_WRITE
);
1978 pmap_store_pte(spte
, *spte
& ~INTEL_PTE_WRITE
);
1981 pmap_store_pte(spte
, *spte
| INTEL_PTE_NX
);
1983 pmap_store_pte(spte
, *spte
& ~INTEL_PTE_NX
);
1992 PMAP_UPDATE_TLBS(map
, orig_sva
, eva
);
1994 simple_unlock(&map
->lock
);
1998 /* Map a (possibly) autogenned block */
2007 __unused
unsigned int flags
)
2011 for (page
= 0; page
< size
; page
++) {
2012 pmap_enter(pmap
, va
, pa
, prot
, attr
, TRUE
);
2020 * Insert the given physical page (p) at
2021 * the specified virtual address (v) in the
2022 * target physical map with the protection requested.
2024 * If specified, the page will be wired down, meaning
2025 * that the related pte cannot be reclaimed.
2027 * NB: This is the only routine which MAY NOT lazy-evaluate
2028 * or lose information. That is, this routine must actually
2029 * insert this page into the given map NOW.
2033 register pmap_t pmap
,
2034 vm_map_offset_t vaddr
,
2040 register pt_entry_t
*pte
;
2041 register pv_entry_t pv_h
;
2044 pt_entry_t
template;
2046 pmap_paddr_t old_pa
;
2047 pmap_paddr_t pa
= (pmap_paddr_t
)i386_ptob(pn
);
2048 boolean_t need_tlbflush
= FALSE
;
2051 XPR(0x80000000, "%x/%x: pmap_enter %x/%qx/%x\n",
2056 assert(pn
!= vm_page_fictitious_addr
);
2058 printf("pmap(%qx, %x)\n", vaddr
, pn
);
2059 if (pmap
== PMAP_NULL
)
2062 if ( (prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
2068 * Must allocate a new pvlist entry while we're unlocked;
2069 * zalloc may cause pageout (which will lock the pmap system).
2070 * If we determine we need a pvlist entry, we will unlock
2071 * and allocate one. Then we will retry, throughing away
2072 * the allocated entry later (if we no longer need it).
2074 pv_e
= PV_ENTRY_NULL
;
2076 PMAP_READ_LOCK(pmap
, spl
);
2079 * Expand pmap to include this pte. Assume that
2080 * pmap is always expanded to include enough hardware
2081 * pages to map one VM page.
2084 while ((pte
= pmap_pte(pmap
, vaddr
)) == PT_ENTRY_NULL
) {
2086 * Must unlock to expand the pmap.
2088 PMAP_READ_UNLOCK(pmap
, spl
);
2090 pmap_expand(pmap
, vaddr
); /* going to grow pde level page(s) */
2092 PMAP_READ_LOCK(pmap
, spl
);
2095 * Special case if the physical page is already mapped
2098 old_pa
= pte_to_pa(*pte
);
2101 * May be changing its wired attribute or protection
2104 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
2106 if(VM_MEM_NOT_CACHEABLE
== (flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
))) {
2107 if(!(flags
& VM_MEM_GUARDED
))
2108 template |= INTEL_PTE_PTA
;
2109 template |= INTEL_PTE_NCACHE
;
2112 if (pmap
!= kernel_pmap
)
2113 template |= INTEL_PTE_USER
;
2114 if (prot
& VM_PROT_WRITE
)
2115 template |= INTEL_PTE_WRITE
;
2118 template |= INTEL_PTE_NX
;
2121 template |= INTEL_PTE_WIRED
;
2123 pmap
->stats
.wired_count
++;
2126 if (iswired(*pte
)) {
2127 assert(pmap
->stats
.wired_count
>= 1);
2128 pmap
->stats
.wired_count
--;
2132 if (*pte
& INTEL_PTE_MOD
)
2133 template |= INTEL_PTE_MOD
;
2135 pmap_store_pte(pte
, template);
2138 need_tlbflush
= TRUE
;
2143 * Outline of code from here:
2144 * 1) If va was mapped, update TLBs, remove the mapping
2145 * and remove old pvlist entry.
2146 * 2) Add pvlist entry for new mapping
2147 * 3) Enter new mapping.
2149 * SHARING FAULTS IS HORRIBLY BROKEN
2150 * SHARING_FAULTS complicates this slightly in that it cannot
2151 * replace the mapping, but must remove it (because adding the
2152 * pvlist entry for the new mapping may remove others), and
2153 * hence always enters the new mapping at step 3)
2155 * If the old physical page is not managed step 1) is skipped
2156 * (except for updating the TLBs), and the mapping is
2157 * overwritten at step 3). If the new physical page is not
2158 * managed, step 2) is skipped.
2161 if (old_pa
!= (pmap_paddr_t
) 0) {
2164 * Don't do anything to pages outside valid memory here.
2165 * Instead convince the code that enters a new mapping
2166 * to overwrite the old one.
2169 if (valid_page(i386_btop(old_pa
))) {
2171 pai
= pa_index(old_pa
);
2174 assert(pmap
->stats
.resident_count
>= 1);
2175 pmap
->stats
.resident_count
--;
2176 if (iswired(*pte
)) {
2177 assert(pmap
->stats
.wired_count
>= 1);
2178 pmap
->stats
.wired_count
--;
2181 pmap_phys_attributes
[pai
] |=
2182 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
2184 pmap_store_pte(pte
, 0);
2186 * Remove the mapping from the pvlist for
2187 * this physical page.
2190 register pv_entry_t prev
, cur
;
2192 pv_h
= pai_to_pvh(pai
);
2193 if (pv_h
->pmap
== PMAP_NULL
) {
2194 panic("pmap_enter: null pv_list!");
2196 if (pv_h
->va
== vaddr
&& pv_h
->pmap
== pmap
) {
2198 * Header is the pv_entry. Copy the next one
2199 * to header and free the next one (we cannot
2203 if (cur
!= PV_ENTRY_NULL
) {
2208 pv_h
->pmap
= PMAP_NULL
;
2215 if ((cur
= prev
->next
) == PV_ENTRY_NULL
) {
2216 panic("pmap_enter: mapping not in pv_list!");
2218 } while (cur
->va
!= vaddr
|| cur
->pmap
!= pmap
);
2219 prev
->next
= cur
->next
;
2228 * old_pa is not managed. Pretend it's zero so code
2229 * at Step 3) will enter new mapping (overwriting old
2230 * one). Do removal part of accounting.
2232 old_pa
= (pmap_paddr_t
) 0;
2234 if (iswired(*pte
)) {
2235 assert(pmap
->stats
.wired_count
>= 1);
2236 pmap
->stats
.wired_count
--;
2239 need_tlbflush
= TRUE
;
2243 if (valid_page(i386_btop(pa
))) {
2246 * Step 2) Enter the mapping in the PV list for this
2253 #if SHARING_FAULTS /* this is horribly broken , do not enable */
2256 * We can return here from the sharing fault code below
2257 * in case we removed the only entry on the pv list and thus
2258 * must enter the new one in the list header.
2260 #endif /* SHARING_FAULTS */
2262 pv_h
= pai_to_pvh(pai
);
2264 if (pv_h
->pmap
== PMAP_NULL
) {
2270 pv_h
->next
= PV_ENTRY_NULL
;
2276 * check that this mapping is not already there
2277 * or there is no alias for this mapping in the same map
2279 pv_entry_t e
= pv_h
;
2280 while (e
!= PV_ENTRY_NULL
) {
2281 if (e
->pmap
== pmap
&& e
->va
== vaddr
)
2282 panic("pmap_enter: already in pv_list");
2287 #if SHARING_FAULTS /* broken, do not enable */
2290 * do sharing faults.
2291 * if we find an entry on this pv list in the same address
2292 * space, remove it. we know there will not be more
2295 pv_entry_t e
= pv_h
;
2298 while (e
!= PV_ENTRY_NULL
) {
2299 if (e
->pmap
== pmap
) {
2301 * Remove it, drop pv list lock first.
2305 opte
= pmap_pte(pmap
, e
->va
);
2306 assert(opte
!= PT_ENTRY_NULL
);
2308 * Invalidate the translation buffer,
2309 * then remove the mapping.
2311 pmap_remove_range(pmap
, e
->va
, opte
,
2314 PMAP_UPDATE_TLBS(pmap
, e
->va
, e
->va
+ PAGE_SIZE
);
2317 * We could have remove the head entry,
2318 * so there could be no more entries
2319 * and so we have to use the pv head entry.
2320 * so, go back to the top and try the entry
2329 * check that this mapping is not already there
2332 while (e
!= PV_ENTRY_NULL
) {
2333 if (e
->pmap
== pmap
)
2334 panic("pmap_enter: alias in pv_list");
2338 #endif /* SHARING_FAULTS */
2342 * check for aliases within the same address space.
2344 pv_entry_t e
= pv_h
;
2345 vm_offset_t rpc
= get_rpc();
2347 while (e
!= PV_ENTRY_NULL
) {
2348 if (e
->pmap
== pmap
) {
2350 * log this entry in the alias ring buffer
2351 * if it's not there already.
2353 struct pmap_alias
*pma
;
2357 for (ii
= 0; ii
< pmap_alias_index
; ii
++) {
2358 if (pmap_aliasbuf
[ii
].rpc
== rpc
) {
2359 /* found it in the log already */
2365 pma
= &pmap_aliasbuf
[pmap_alias_index
];
2369 pma
->cookie
= PMAP_ALIAS_COOKIE
;
2370 if (++pmap_alias_index
>= PMAP_ALIAS_MAX
)
2371 panic("pmap_enter: exhausted alias log");
2377 #endif /* DEBUG_ALIAS */
2379 * Add new pv_entry after header.
2381 if (pv_e
== PV_ENTRY_NULL
) {
2383 if (pv_e
== PV_ENTRY_NULL
) {
2384 panic("pmap no pv_e's");
2389 pv_e
->next
= pv_h
->next
;
2392 * Remember that we used the pvlist entry.
2394 pv_e
= PV_ENTRY_NULL
;
2399 * only count the mapping
2400 * for 'managed memory'
2402 pmap
->stats
.resident_count
++;
2406 * Step 3) Enter the mapping.
2411 * Build a template to speed up entering -
2412 * only the pfn changes.
2414 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
2416 if(flags
& VM_MEM_NOT_CACHEABLE
) {
2417 if(!(flags
& VM_MEM_GUARDED
))
2418 template |= INTEL_PTE_PTA
;
2419 template |= INTEL_PTE_NCACHE
;
2422 if (pmap
!= kernel_pmap
)
2423 template |= INTEL_PTE_USER
;
2424 if (prot
& VM_PROT_WRITE
)
2425 template |= INTEL_PTE_WRITE
;
2428 template |= INTEL_PTE_NX
;
2431 template |= INTEL_PTE_WIRED
;
2432 pmap
->stats
.wired_count
++;
2434 pmap_store_pte(pte
, template);
2437 if (need_tlbflush
== TRUE
)
2438 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
2440 if (pv_e
!= PV_ENTRY_NULL
) {
2444 PMAP_READ_UNLOCK(pmap
, spl
);
2448 * Routine: pmap_change_wiring
2449 * Function: Change the wiring attribute for a map/virtual-address
2451 * In/out conditions:
2452 * The mapping must already exist in the pmap.
2456 register pmap_t map
,
2457 vm_map_offset_t vaddr
,
2460 register pt_entry_t
*pte
;
2465 * We must grab the pmap system lock because we may
2466 * change a pte_page queue.
2468 PMAP_READ_LOCK(map
, spl
);
2470 if ((pte
= pmap_pte(map
, vaddr
)) == PT_ENTRY_NULL
)
2471 panic("pmap_change_wiring: pte missing");
2473 if (wired
&& !iswired(*pte
)) {
2475 * wiring down mapping
2477 map
->stats
.wired_count
++;
2478 pmap_store_pte(pte
, *pte
| INTEL_PTE_WIRED
);
2481 else if (!wired
&& iswired(*pte
)) {
2485 assert(map
->stats
.wired_count
>= 1);
2486 map
->stats
.wired_count
--;
2487 pmap_store_pte(pte
, *pte
& ~INTEL_PTE_WIRED
);
2491 PMAP_READ_UNLOCK(map
, spl
);
2500 pmap_find_phys(pmap_t pmap
, addr64_t va
)
2505 mp_disable_preemption();
2507 ptp
= pmap_pte(pmap
, va
);
2508 if (PT_ENTRY_NULL
== ptp
) {
2511 ppn
= (ppnum_t
) i386_btop(pte_to_pa(*ptp
));
2513 mp_enable_preemption();
2519 * Routine: pmap_extract
2521 * Extract the physical page address associated
2522 * with the given map/virtual_address pair.
2523 * Change to shim for backwards compatibility but will not
2524 * work for 64 bit systems. Some old drivers that we cannot
2530 register pmap_t pmap
,
2531 vm_map_offset_t vaddr
)
2536 paddr
= (vm_offset_t
)0;
2537 ppn
= pmap_find_phys(pmap
, vaddr
);
2539 paddr
= ((vm_offset_t
)i386_ptob(ppn
)) | (vaddr
& INTEL_OFFMASK
);
2547 vm_map_offset_t vaddr
)
2549 register vm_page_t m
;
2550 register pmap_paddr_t pa
;
2554 pml4_entry_t
*pml4p
;
2556 if (kernel_pmap
== map
) panic("expand kernel pml4");
2559 pml4p
= pmap64_pml4(map
, vaddr
);
2561 if (PML4_ENTRY_NULL
== pml4p
) panic("pmap_expand_pml4 no pml4p");
2564 * Allocate a VM page for the pml4 page
2566 while ((m
= vm_page_grab()) == VM_PAGE_NULL
)
2570 * put the page into the pmap's obj list so it
2571 * can be found later.
2575 i
= pml4idx(map
, vaddr
);
2577 vm_object_lock(map
->pm_obj_pml4
);
2579 if (0 != vm_page_lookup(map
->pm_obj_pml4
, (vm_object_offset_t
)i
)) {
2580 kprintf("pmap_expand_pml4: obj_pml4 not empty, pmap 0x%x pm_obj_pml4 0x%x vaddr 0x%llx i 0x%llx\n",
2581 map
, map
->pm_obj_pml4
, vaddr
, i
);
2584 vm_page_insert(m
, map
->pm_obj_pml4
, (vm_object_offset_t
)i
);
2586 vm_page_lock_queues();
2589 vm_page_unlock_queues();
2590 vm_object_unlock(map
->pm_obj_pml4
);
2591 inuse_ptepages_count
++;
2592 map
->stats
.resident_count
++;
2593 map
->stats
.wired_count
++;
2600 PMAP_READ_LOCK(map
, spl
);
2602 * See if someone else expanded us first
2604 if (pmap64_pdpt(map
, vaddr
) != PDPT_ENTRY_NULL
) {
2605 PMAP_READ_UNLOCK(map
, spl
);
2606 vm_object_lock(map
->pm_obj_pml4
);
2607 vm_page_lock_queues();
2609 inuse_ptepages_count
--;
2610 map
->stats
.resident_count
--;
2611 map
->stats
.wired_count
--;
2613 vm_page_unlock_queues();
2614 vm_object_unlock(map
->pm_obj_pml4
);
2619 * Set the page directory entry for this page table.
2620 * If we have allocated more than one hardware page,
2621 * set several page directory entries.
2624 pml4p
= pmap64_pml4(map
, vaddr
); /* refetch under lock */
2626 pmap_store_pte(pml4p
, pa_to_pte(pa
)
2631 PMAP_READ_UNLOCK(map
, spl
);
2640 vm_map_offset_t vaddr
)
2642 register vm_page_t m
;
2643 register pmap_paddr_t pa
;
2647 pdpt_entry_t
*pdptp
;
2649 if (kernel_pmap
== map
) panic("expand kernel pdpt");
2652 while ((pdptp
= pmap64_pdpt(map
, vaddr
)) == PDPT_ENTRY_NULL
) {
2654 pmap_expand_pml4(map
, vaddr
); /* need room for another pdpt entry */
2661 * Allocate a VM page for the pdpt page
2663 while ((m
= vm_page_grab()) == VM_PAGE_NULL
)
2667 * put the page into the pmap's obj list so it
2668 * can be found later.
2672 i
= pdptidx(map
, vaddr
);
2674 vm_object_lock(map
->pm_obj_pdpt
);
2676 if (0 != vm_page_lookup(map
->pm_obj_pdpt
, (vm_object_offset_t
)i
)) {
2677 kprintf("pmap_expand_pdpt: obj_pdpt not empty, pmap 0x%x pm_obj_pdpt 0x%x vaddr 0x%llx i 0x%llx\n",
2678 map
, map
->pm_obj_pdpt
, vaddr
, i
);
2681 vm_page_insert(m
, map
->pm_obj_pdpt
, (vm_object_offset_t
)i
);
2683 vm_page_lock_queues();
2686 vm_page_unlock_queues();
2687 vm_object_unlock(map
->pm_obj_pdpt
);
2688 inuse_ptepages_count
++;
2689 map
->stats
.resident_count
++;
2690 map
->stats
.wired_count
++;
2697 PMAP_READ_LOCK(map
, spl
);
2699 * See if someone else expanded us first
2701 if (pmap64_pde(map
, vaddr
) != PD_ENTRY_NULL
) {
2702 PMAP_READ_UNLOCK(map
, spl
);
2703 vm_object_lock(map
->pm_obj_pdpt
);
2704 vm_page_lock_queues();
2706 inuse_ptepages_count
--;
2707 map
->stats
.resident_count
--;
2708 map
->stats
.wired_count
--;
2710 vm_page_unlock_queues();
2711 vm_object_unlock(map
->pm_obj_pdpt
);
2716 * Set the page directory entry for this page table.
2717 * If we have allocated more than one hardware page,
2718 * set several page directory entries.
2721 pdptp
= pmap64_pdpt(map
, vaddr
); /* refetch under lock */
2723 pmap_store_pte(pdptp
, pa_to_pte(pa
)
2728 PMAP_READ_UNLOCK(map
, spl
);
2737 * Routine: pmap_expand
2739 * Expands a pmap to be able to map the specified virtual address.
2741 * Allocates new virtual memory for the P0 or P1 portion of the
2742 * pmap, then re-maps the physical pages that were in the old
2743 * pmap to be in the new pmap.
2745 * Must be called with the pmap system and the pmap unlocked,
2746 * since these must be unlocked to use vm_allocate or vm_deallocate.
2747 * Thus it must be called in a loop that checks whether the map
2748 * has been expanded enough.
2749 * (We won't loop forever, since page tables aren't shrunk.)
2754 vm_map_offset_t vaddr
)
2757 register vm_page_t m
;
2758 register pmap_paddr_t pa
;
2764 * if not the kernel map (while we are still compat kernel mode)
2765 * and we are 64 bit, propagate expand upwards
2768 if (cpu_64bit
&& (map
!= kernel_pmap
)) {
2770 while ((pdp
= pmap64_pde(map
, vaddr
)) == PD_ENTRY_NULL
) {
2772 pmap_expand_pdpt(map
, vaddr
); /* need room for another pde entry */
2777 pdp
= pmap_pde(map
, vaddr
);
2782 * Allocate a VM page for the pde entries.
2784 while ((m
= vm_page_grab()) == VM_PAGE_NULL
)
2788 * put the page into the pmap's obj list so it
2789 * can be found later.
2793 i
= pdeidx(map
, vaddr
);
2795 vm_object_lock(map
->pm_obj
);
2797 if (0 != vm_page_lookup(map
->pm_obj
, (vm_object_offset_t
)i
)) {
2798 kprintf("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n",
2799 map
, map
->pm_obj
, vaddr
, i
);
2802 vm_page_insert(m
, map
->pm_obj
, (vm_object_offset_t
)i
);
2804 vm_page_lock_queues();
2806 inuse_ptepages_count
++;
2808 vm_page_unlock_queues();
2809 vm_object_unlock(map
->pm_obj
);
2816 PMAP_READ_LOCK(map
, spl
);
2818 * See if someone else expanded us first
2820 if (pmap_pte(map
, vaddr
) != PT_ENTRY_NULL
) {
2821 PMAP_READ_UNLOCK(map
, spl
);
2822 vm_object_lock(map
->pm_obj
);
2824 vm_page_lock_queues();
2826 inuse_ptepages_count
--;
2828 vm_page_unlock_queues();
2829 vm_object_unlock(map
->pm_obj
);
2833 pdp
= pmap_pde(map
, vaddr
); /* refetch while locked */
2836 * Set the page directory entry for this page table.
2837 * If we have allocated more than one hardware page,
2838 * set several page directory entries.
2841 pmap_store_pte(pdp
, pa_to_pte(pa
)
2847 PMAP_READ_UNLOCK(map
, spl
);
2854 * pmap_sync_page_data_phys(ppnum_t pa)
2856 * Invalidates all of the instruction cache on a physical page and
2857 * pushes any dirty data from the data cache for the same physical page
2858 * Not required in i386.
2861 pmap_sync_page_data_phys(__unused ppnum_t pa
)
2867 * pmap_sync_page_attributes_phys(ppnum_t pa)
2869 * Write back and invalidate all cachelines on a physical page.
2872 pmap_sync_page_attributes_phys(ppnum_t pa
)
2874 cache_flush_page_phys(pa
);
2881 * Routine: pmap_collect
2883 * Garbage collects the physical map system for
2884 * pages which are no longer used.
2885 * Success need not be guaranteed -- that is, there
2886 * may well be pages which are not referenced, but
2887 * others may be collected.
2889 * Called by the pageout daemon when pages are scarce.
2895 register pt_entry_t
*pdp
, *ptp
;
2903 if (p
== kernel_pmap
)
2907 * Garbage collect map.
2909 PMAP_READ_LOCK(p
, spl
);
2911 for (pdp
= (pt_entry_t
*)p
->dirbase
;
2912 pdp
< (pt_entry_t
*)&p
->dirbase
[(UMAXPTDI
+1)];
2915 if (*pdp
& INTEL_PTE_VALID
) {
2916 if(*pdp
& INTEL_PTE_REF
) {
2917 pmap_store_pte(pdp
, *pdp
& ~INTEL_PTE_REF
);
2921 ptp
= pmap_pte(p
, pdetova(pdp
- (pt_entry_t
*)p
->dirbase
));
2922 eptp
= ptp
+ NPTEPG
;
2925 * If the pte page has any wired mappings, we cannot
2930 register pt_entry_t
*ptep
;
2931 for (ptep
= ptp
; ptep
< eptp
; ptep
++) {
2932 if (iswired(*ptep
)) {
2940 * Remove the virtual addresses mapped by this pte page.
2942 pmap_remove_range(p
,
2943 pdetova(pdp
- (pt_entry_t
*)p
->dirbase
),
2948 * Invalidate the page directory pointer.
2950 pmap_store_pte(pdp
, 0x0);
2952 PMAP_READ_UNLOCK(p
, spl
);
2955 * And free the pte page itself.
2958 register vm_page_t m
;
2960 vm_object_lock(p
->pm_obj
);
2961 m
= vm_page_lookup(p
->pm_obj
,(vm_object_offset_t
)(pdp
- (pt_entry_t
*)&p
->dirbase
[0]));
2962 if (m
== VM_PAGE_NULL
)
2963 panic("pmap_collect: pte page not in object");
2964 vm_page_lock_queues();
2966 inuse_ptepages_count
--;
2967 vm_page_unlock_queues();
2968 vm_object_unlock(p
->pm_obj
);
2971 PMAP_READ_LOCK(p
, spl
);
2976 PMAP_UPDATE_TLBS(p
, VM_MIN_ADDRESS
, VM_MAX_ADDRESS
);
2978 PMAP_READ_UNLOCK(p
, spl
);
2985 pmap_copy_page(src
, dst
)
2989 bcopy_phys((addr64_t
)i386_ptob(src
),
2990 (addr64_t
)i386_ptob(dst
),
2996 * Routine: pmap_pageable
2998 * Make the specified pages (by pmap, offset)
2999 * pageable (or not) as requested.
3001 * A page which is not pageable may not take
3002 * a fault; therefore, its page table entry
3003 * must remain valid for the duration.
3005 * This routine is merely advisory; pmap_enter
3006 * will specify that these pages are to be wired
3007 * down (or not) as appropriate.
3011 __unused pmap_t pmap
,
3012 __unused vm_map_offset_t start_addr
,
3013 __unused vm_map_offset_t end_addr
,
3014 __unused boolean_t pageable
)
3017 pmap
++; start_addr
++; end_addr
++; pageable
++;
3022 * Clear specified attribute bits.
3025 phys_attribute_clear(
3030 register pv_entry_t pv_e
;
3031 register pt_entry_t
*pte
;
3033 register pmap_t pmap
;
3037 assert(pn
!= vm_page_fictitious_addr
);
3038 if (!valid_page(pn
)) {
3040 * Not a managed page.
3046 * Lock the pmap system first, since we will be changing
3050 PMAP_WRITE_LOCK(spl
);
3051 phys
= i386_ptob(pn
);
3052 pai
= pa_index(phys
);
3053 pv_h
= pai_to_pvh(pai
);
3056 * Walk down PV list, clearing all modify or reference bits.
3057 * We do not have to lock the pv_list because we have
3058 * the entire pmap system locked.
3060 if (pv_h
->pmap
!= PMAP_NULL
) {
3062 * There are some mappings.
3064 for (pv_e
= pv_h
; pv_e
!= PV_ENTRY_NULL
; pv_e
= pv_e
->next
) {
3068 * Lock the pmap to block pmap_extract and similar routines.
3070 simple_lock(&pmap
->lock
);
3073 register vm_map_offset_t va
;
3076 pte
= pmap_pte(pmap
, va
);
3080 * Consistency checks.
3082 assert(*pte
& INTEL_PTE_VALID
);
3083 /* assert(pte_to_phys(*pte) == phys); */
3087 * Clear modify or reference bits.
3090 pmap_store_pte(pte
, *pte
& ~bits
);
3092 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
3094 simple_unlock(&pmap
->lock
);
3099 pmap_phys_attributes
[pai
] &= ~bits
;
3101 PMAP_WRITE_UNLOCK(spl
);
3105 * Check specified attribute bits.
3108 phys_attribute_test(
3113 register pv_entry_t pv_e
;
3114 register pt_entry_t
*pte
;
3116 register pmap_t pmap
;
3120 assert(pn
!= vm_page_fictitious_addr
);
3121 if (!valid_page(pn
)) {
3123 * Not a managed page.
3128 phys
= i386_ptob(pn
);
3129 pai
= pa_index(phys
);
3131 * super fast check... if bits already collected
3132 * no need to take any locks...
3133 * if not set, we need to recheck after taking
3134 * the lock in case they got pulled in while
3135 * we were waiting for the lock
3137 if (pmap_phys_attributes
[pai
] & bits
)
3139 pv_h
= pai_to_pvh(pai
);
3142 * Lock the pmap system first, since we will be checking
3145 PMAP_WRITE_LOCK(spl
);
3147 if (pmap_phys_attributes
[pai
] & bits
) {
3148 PMAP_WRITE_UNLOCK(spl
);
3153 * Walk down PV list, checking all mappings.
3154 * We do not have to lock the pv_list because we have
3155 * the entire pmap system locked.
3157 if (pv_h
->pmap
!= PMAP_NULL
) {
3159 * There are some mappings.
3161 for (pv_e
= pv_h
; pv_e
!= PV_ENTRY_NULL
; pv_e
= pv_e
->next
) {
3165 * Lock the pmap to block pmap_extract and similar routines.
3167 simple_lock(&pmap
->lock
);
3170 register vm_map_offset_t va
;
3173 pte
= pmap_pte(pmap
, va
);
3177 * Consistency checks.
3179 assert(*pte
& INTEL_PTE_VALID
);
3180 /* assert(pte_to_phys(*pte) == phys); */
3185 * Check modify or reference bits.
3188 if (*pte
++ & bits
) {
3189 simple_unlock(&pmap
->lock
);
3190 PMAP_WRITE_UNLOCK(spl
);
3194 simple_unlock(&pmap
->lock
);
3197 PMAP_WRITE_UNLOCK(spl
);
3202 * Set specified attribute bits.
3212 assert(pn
!= vm_page_fictitious_addr
);
3213 if (!valid_page(pn
)) {
3215 * Not a managed page.
3221 * Lock the pmap system and set the requested bits in
3222 * the phys attributes array. Don't need to bother with
3223 * ptes because the test routine looks here first.
3225 phys
= i386_ptob(pn
);
3226 PMAP_WRITE_LOCK(spl
);
3227 pmap_phys_attributes
[pa_index(phys
)] |= bits
;
3228 PMAP_WRITE_UNLOCK(spl
);
3232 * Set the modify bit on the specified physical page.
3235 void pmap_set_modify(
3238 phys_attribute_set(pn
, PHYS_MODIFIED
);
3242 * Clear the modify bits on the specified physical page.
3249 phys_attribute_clear(pn
, PHYS_MODIFIED
);
3255 * Return whether or not the specified physical page is modified
3256 * by any physical maps.
3263 return (phys_attribute_test(pn
, PHYS_MODIFIED
));
3267 * pmap_clear_reference:
3269 * Clear the reference bit on the specified physical page.
3273 pmap_clear_reference(
3276 phys_attribute_clear(pn
, PHYS_REFERENCED
);
3280 pmap_set_reference(ppnum_t pn
)
3282 phys_attribute_set(pn
, PHYS_REFERENCED
);
3286 * pmap_is_referenced:
3288 * Return whether or not the specified physical page is referenced
3289 * by any physical maps.
3296 return (phys_attribute_test(pn
, PHYS_REFERENCED
));
3300 * pmap_get_refmod(phys)
3301 * returns the referenced and modified bits of the specified
3305 pmap_get_refmod(ppnum_t pa
)
3307 return ( ((phys_attribute_test(pa
, PHYS_MODIFIED
))? VM_MEM_MODIFIED
: 0)
3308 | ((phys_attribute_test(pa
, PHYS_REFERENCED
))? VM_MEM_REFERENCED
: 0));
3312 * pmap_clear_refmod(phys, mask)
3313 * clears the referenced and modified bits as specified by the mask
3314 * of the specified physical page.
3317 pmap_clear_refmod(ppnum_t pa
, unsigned int mask
)
3319 unsigned int x86Mask
;
3321 x86Mask
= ( ((mask
& VM_MEM_MODIFIED
)? PHYS_MODIFIED
: 0)
3322 | ((mask
& VM_MEM_REFERENCED
)? PHYS_REFERENCED
: 0));
3323 phys_attribute_clear(pa
, x86Mask
);
3327 * Set the modify bit on the specified range
3328 * of this map as requested.
3330 * This optimization stands only if each time the dirty bit
3331 * in vm_page_t is tested, it is also tested in the pmap.
3336 vm_map_offset_t sva
,
3337 vm_map_offset_t eva
)
3340 register pt_entry_t
*pde
;
3341 register pt_entry_t
*spte
, *epte
;
3342 vm_map_offset_t lva
;
3343 vm_map_offset_t orig_sva
;
3345 if (map
== PMAP_NULL
)
3348 PMAP_READ_LOCK(map
, spl
);
3351 while (sva
&& sva
< eva
) {
3352 lva
= (sva
+ pde_mapped_size
) & ~(pde_mapped_size
-1);
3355 pde
= pmap_pde(map
, sva
);
3356 if (pde
&& (*pde
& INTEL_PTE_VALID
)) {
3357 spte
= (pt_entry_t
*)pmap_pte(map
, (sva
& ~(pde_mapped_size
-1)));
3359 spte
= &spte
[ptenum(sva
)];
3360 epte
= &spte
[intel_btop(lva
-sva
)];
3362 epte
= &spte
[intel_btop(pde_mapped_size
)];
3363 spte
= &spte
[ptenum(sva
)];
3365 while (spte
< epte
) {
3366 if (*spte
& INTEL_PTE_VALID
) {
3367 pmap_store_pte(spte
, *spte
3377 PMAP_UPDATE_TLBS(map
, orig_sva
, eva
);
3379 PMAP_READ_UNLOCK(map
, spl
);
3384 invalidate_icache(__unused vm_offset_t addr
,
3385 __unused
unsigned cnt
,
3391 flush_dcache(__unused vm_offset_t addr
,
3392 __unused
unsigned count
,
3400 /* show phys page mappings and attributes */
3402 extern void db_show_page(pmap_paddr_t pa
);
3405 db_show_page(pmap_paddr_t pa
)
3412 pv_h
= pai_to_pvh(pai
);
3414 attr
= pmap_phys_attributes
[pai
];
3415 printf("phys page %x ", pa
);
3416 if (attr
& PHYS_MODIFIED
)
3417 printf("modified, ");
3418 if (attr
& PHYS_REFERENCED
)
3419 printf("referenced, ");
3420 if (pv_h
->pmap
|| pv_h
->next
)
3421 printf(" mapped at\n");
3423 printf(" not mapped\n");
3424 for (; pv_h
; pv_h
= pv_h
->next
)
3426 printf("%x in pmap %x\n", pv_h
->va
, pv_h
->pmap
);
3429 #endif /* MACH_KDB */
3432 void db_kvtophys(vm_offset_t
);
3433 void db_show_vaddrs(pt_entry_t
*);
3436 * print out the results of kvtophys(arg)
3442 db_printf("0x%qx", kvtophys(vaddr
));
3446 * Walk the pages tables.
3450 pt_entry_t
*dirbase
)
3452 pt_entry_t
*ptep
, *pdep
, tmp
;
3453 unsigned int x
, y
, pdecnt
, ptecnt
;
3456 dirbase
= kernel_pmap
->dirbase
;
3459 db_printf("need a dirbase...\n");
3462 dirbase
= (pt_entry_t
*) (int) ((unsigned long) dirbase
& ~INTEL_OFFMASK
);
3464 db_printf("dirbase: 0x%x\n", dirbase
);
3466 pdecnt
= ptecnt
= 0;
3468 for (y
= 0; y
< NPDEPG
; y
++, pdep
++) {
3469 if (((tmp
= *pdep
) & INTEL_PTE_VALID
) == 0) {
3473 ptep
= (pt_entry_t
*) ((*pdep
) & ~INTEL_OFFMASK
);
3474 db_printf("dir[%4d]: 0x%x\n", y
, *pdep
);
3475 for (x
= 0; x
< NPTEPG
; x
++, ptep
++) {
3476 if (((tmp
= *ptep
) & INTEL_PTE_VALID
) == 0) {
3480 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
3483 (y
<< 22) | (x
<< 12),
3484 *ptep
& ~INTEL_OFFMASK
);
3488 db_printf("total: %d tables, %d page table entries.\n", pdecnt
, ptecnt
);
3491 #endif /* MACH_KDB */
3493 #include <mach_vm_debug.h>
3495 #include <vm/vm_debug.h>
3498 pmap_list_resident_pages(
3499 __unused pmap_t pmap
,
3500 __unused vm_offset_t
*listp
,
3505 #endif /* MACH_VM_DEBUG */
3509 /* temporary workaround */
3511 coredumpok(__unused vm_map_t map
, __unused vm_offset_t va
)
3516 ptep
= pmap_pte(map
->pmap
, va
);
3519 return ((*ptep
& (INTEL_PTE_NCACHE
| INTEL_PTE_WIRED
)) != (INTEL_PTE_NCACHE
| INTEL_PTE_WIRED
));
3532 assert(pn
!= vm_page_fictitious_addr
);
3534 if (!pmap_initialized
)
3536 phys
= (pmap_paddr_t
) i386_ptob(pn
);
3537 if (!pmap_valid_page(pn
))
3544 mapping_free_prime()
3549 for (i
= 0; i
< (5 * PV_ALLOC_CHUNK
); i
++) {
3550 pv_e
= (pv_entry_t
) zalloc(pv_list_zone
);
3562 if (mapping_adjust_call
== NULL
) {
3563 thread_call_setup(&mapping_adjust_call_data
,
3564 (thread_call_func_t
) mapping_adjust
,
3565 (thread_call_param_t
) NULL
);
3566 mapping_adjust_call
= &mapping_adjust_call_data
;
3568 /* XXX rethink best way to do locking here */
3569 if (pv_free_count
< PV_LOW_WATER_MARK
) {
3570 for (i
= 0; i
< PV_ALLOC_CHUNK
; i
++) {
3571 pv_e
= (pv_entry_t
) zalloc(pv_list_zone
);
3581 pmap_commpage32_init(vm_offset_t kernel_commpage
, vm_offset_t user_commpage
, int cnt
)
3584 pt_entry_t
*opte
, *npte
;
3588 for (i
= 0; i
< cnt
; i
++) {
3589 opte
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)kernel_commpage
);
3590 if (0 == opte
) panic("kernel_commpage");
3591 pte
= *opte
| INTEL_PTE_USER
|INTEL_PTE_GLOBAL
;
3592 pte
&= ~INTEL_PTE_WRITE
; // ensure read only
3593 npte
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)user_commpage
);
3594 if (0 == npte
) panic("user_commpage");
3595 pmap_store_pte(npte
, pte
);
3596 kernel_commpage
+= INTEL_PGBYTES
;
3597 user_commpage
+= INTEL_PGBYTES
;
3601 #define PMAP_COMMPAGE64_CNT (_COMM_PAGE64_AREA_USED/PAGE_SIZE)
3602 pt_entry_t pmap_commpage64_ptes
[PMAP_COMMPAGE64_CNT
];
3605 pmap_commpage64_init(vm_offset_t kernel_commpage
, __unused vm_map_offset_t user_commpage
, int cnt
)
3612 for (i
= 0; i
< cnt
; i
++) {
3613 kptep
= pmap_pte(kernel_pmap
, (uint64_t)kernel_commpage
+ (i
*PAGE_SIZE
));
3614 if ((0 == kptep
) || (0 == (*kptep
& INTEL_PTE_VALID
))) panic("pmap_commpage64_init pte");
3615 pmap_commpage64_ptes
[i
] = ((*kptep
& ~INTEL_PTE_WRITE
) | INTEL_PTE_USER
);
3622 pmap_map_sharedpage(__unused task_t task
, pmap_t p
)
3628 if (!p
->pm_64bit
) return;
3629 /* setup high 64 bit commpage */
3631 while ((ptep
= pmap_pte(p
, (uint64_t)_COMM_PAGE64_BASE_ADDRESS
)) == PD_ENTRY_NULL
) {
3633 pmap_expand(p
, (uint64_t)_COMM_PAGE64_BASE_ADDRESS
);
3637 for (i
= 0; i
< PMAP_COMMPAGE64_CNT
; i
++) {
3638 ptep
= pmap_pte(p
, (uint64_t)_COMM_PAGE64_BASE_ADDRESS
+ (i
*PAGE_SIZE
));
3639 if (0 == ptep
) panic("pmap_map_sharedpage");
3640 pmap_store_pte(ptep
, pmap_commpage64_ptes
[i
]);
3647 pmap_unmap_sharedpage(pmap_t pmap
)
3653 if (!pmap
->pm_64bit
) return;
3655 for (i
= 0; i
< PMAP_COMMPAGE64_CNT
; i
++) {
3656 ptep
= pmap_pte(pmap
, (uint64_t)_COMM_PAGE64_BASE_ADDRESS
+ (i
*PAGE_SIZE
));
3657 if (ptep
) pmap_store_pte(ptep
, 0);
3662 static cpu_pmap_t cpu_pmap_master
;
3665 pmap_cpu_alloc(boolean_t is_boot_cpu
)
3670 vm_offset_t address
;
3671 vm_map_address_t mapaddr
;
3672 vm_map_entry_t entry
;
3676 cp
= &cpu_pmap_master
;
3679 * The per-cpu pmap data structure itself.
3681 ret
= kmem_alloc(kernel_map
,
3682 (vm_offset_t
*) &cp
, sizeof(cpu_pmap_t
));
3683 if (ret
!= KERN_SUCCESS
) {
3684 printf("pmap_cpu_alloc() failed ret=%d\n", ret
);
3687 bzero((void *)cp
, sizeof(cpu_pmap_t
));
3690 * The temporary windows used for copy/zero - see loose_ends.c
3692 ret
= vm_map_find_space(kernel_map
,
3693 &mapaddr
, PMAP_NWINDOWS
*PAGE_SIZE
, (vm_map_offset_t
)0, 0, &entry
);
3694 if (ret
!= KERN_SUCCESS
) {
3695 printf("pmap_cpu_alloc() "
3696 "vm_map_find_space ret=%d\n", ret
);
3700 address
= (vm_offset_t
)mapaddr
;
3702 for (i
= 0; i
< PMAP_NWINDOWS
; i
++, address
+= PAGE_SIZE
) {
3703 while ((pte
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)address
)) == 0)
3704 pmap_expand(kernel_pmap
, (vm_map_offset_t
)address
);
3706 cp
->mapwindow
[i
].prv_CADDR
= (caddr_t
) address
;
3707 cp
->mapwindow
[i
].prv_CMAP
= pte
;
3709 vm_map_unlock(kernel_map
);
3712 cp
->pdpt_window_index
= PMAP_PDPT_FIRST_WINDOW
;
3713 cp
->pde_window_index
= PMAP_PDE_FIRST_WINDOW
;
3714 cp
->pte_window_index
= PMAP_PTE_FIRST_WINDOW
;
3720 pmap_cpu_free(struct cpu_pmap
*cp
)
3722 if (cp
!= NULL
&& cp
!= &cpu_pmap_master
) {
3723 kfree((void *) cp
, sizeof(cpu_pmap_t
));
3729 pmap_get_mapwindow(pt_entry_t pentry
)
3736 * can be called from hardware interrupt context
3737 * so we need to protect the lookup process
3739 istate
= ml_set_interrupts_enabled(FALSE
);
3742 * Note: 0th map reserved for pmap_pte()
3744 for (i
= PMAP_NWINDOWS_FIRSTFREE
; i
< PMAP_NWINDOWS
; i
++) {
3745 mp
= ¤t_cpu_datap()->cpu_pmap
->mapwindow
[i
];
3747 if (*mp
->prv_CMAP
== 0) {
3748 *mp
->prv_CMAP
= pentry
;
3752 if (i
>= PMAP_NWINDOWS
)
3754 (void) ml_set_interrupts_enabled(istate
);
3761 * kern_return_t pmap_nest(grand, subord, vstart, size)
3763 * grand = the pmap that we will nest subord into
3764 * subord = the pmap that goes into the grand
3765 * vstart = start of range in pmap to be inserted
3766 * nstart = start of range in pmap nested pmap
3767 * size = Size of nest area (up to 16TB)
3769 * Inserts a pmap into another. This is used to implement shared segments.
3771 * on x86 this is very limited right now. must be exactly 1 segment.
3773 * Note that we depend upon higher level VM locks to insure that things don't change while
3774 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
3775 * or do 2 nests at once.
3779 kern_return_t
pmap_nest(pmap_t grand
, pmap_t subord
, addr64_t vstart
, addr64_t nstart
, uint64_t size
) {
3781 vm_map_offset_t vaddr
, nvaddr
;
3782 pd_entry_t
*pde
,*npde
;
3783 unsigned int i
, need_flush
;
3784 unsigned int num_pde
;
3787 // do validity tests
3789 if(size
& 0x0FFFFFFFULL
) return KERN_INVALID_VALUE
; /* We can only do this for multiples of 256MB */
3790 if((size
>> 28) > 65536) return KERN_INVALID_VALUE
; /* Max size we can nest is 16TB */
3791 if(vstart
& 0x0FFFFFFFULL
) return KERN_INVALID_VALUE
; /* We can only do this aligned to 256MB */
3792 if(nstart
& 0x0FFFFFFFULL
) return KERN_INVALID_VALUE
; /* We can only do this aligned to 256MB */
3794 panic("pmap_nest: size is invalid - %016llX\n", size
);
3796 if ((size
>> 28) != 1) panic("pmap_nest: size 0x%llx must be 0x%x", size
, NBPDE
);
3798 // prepopulate subord pmap pde's if necessary
3802 while (PD_ENTRY_NULL
== (npde
= pmap_pde(subord
, nstart
))) {
3804 pmap_expand(subord
, nstart
);
3810 PMAP_READ_LOCK(subord
,s
);
3811 nvaddr
= (vm_map_offset_t
)nstart
;
3813 num_pde
= size
>> PDESHIFT
;
3815 for (i
=0;i
<num_pde
;i
++) {
3816 npde
= pmap_pde(subord
, nvaddr
);
3817 if ((0 == npde
) || (*npde
++ & INTEL_PTE_VALID
) == 0) {
3818 PMAP_READ_UNLOCK(subord
,s
);
3819 pmap_expand(subord
, nvaddr
); // pmap_expand handles races
3820 PMAP_READ_LOCK(subord
,s
);
3827 nvaddr
= (vm_map_offset_t
)nstart
;
3828 PMAP_UPDATE_TLBS(subord
, nvaddr
, nvaddr
+ (1 << 28) -1 );
3830 PMAP_READ_UNLOCK(subord
,s
);
3832 // copy pde's from subord pmap into grand pmap
3836 while (PD_ENTRY_NULL
== (pde
= pmap_pde(grand
, vstart
))) {
3838 pmap_expand(grand
, vstart
);
3844 PMAP_READ_LOCK(grand
,s
);
3845 vaddr
= (vm_map_offset_t
)vstart
;
3846 for (i
=0;i
<num_pde
;i
++,pde
++) {
3848 npde
= pmap_pde(subord
, nstart
);
3849 if (npde
== 0) panic("pmap_nest: no npde, subord 0x%x nstart 0x%llx", subord
, nstart
);
3852 pde
= pmap_pde(grand
, vaddr
);
3853 if (pde
== 0) panic("pmap_nest: no pde, grand 0x%x vaddr 0x%llx", grand
, vaddr
);
3855 pmap_store_pte(pde
, tpde
);
3857 PMAP_UPDATE_TLBS(grand
, vaddr
, vaddr
+ (1 << 28) -1 );
3859 PMAP_READ_UNLOCK(grand
,s
);
3861 return KERN_SUCCESS
;
3865 * kern_return_t pmap_unnest(grand, vaddr)
3867 * grand = the pmap that we will nest subord into
3868 * vaddr = start of range in pmap to be unnested
3870 * Removes a pmap from another. This is used to implement shared segments.
3871 * On the current PPC processors, this is limited to segment (256MB) aligned
3872 * segment sized ranges.
3875 kern_return_t
pmap_unnest(pmap_t grand
, addr64_t vaddr
) {
3880 unsigned int num_pde
;
3882 PMAP_READ_LOCK(grand
,s
);
3884 // invalidate all pdes for segment at vaddr in pmap grand
3886 num_pde
= (1<<28) >> PDESHIFT
;
3888 for (i
=0;i
<num_pde
;i
++,pde
++) {
3889 pde
= pmap_pde(grand
, (vm_map_offset_t
)vaddr
);
3890 if (pde
== 0) panic("pmap_unnest: no pde, grand 0x%x vaddr 0x%llx\n", grand
, vaddr
);
3891 pmap_store_pte(pde
, (pd_entry_t
)0);
3894 PMAP_UPDATE_TLBS(grand
, vaddr
, vaddr
+ (1<<28) -1 );
3896 PMAP_READ_UNLOCK(grand
,s
);
3898 return KERN_SUCCESS
; /* Bye, bye, butterfly... */
3902 pmap_switch(pmap_t tpmap
)
3907 s
= splhigh(); /* Make sure interruptions are disabled */
3908 my_cpu
= cpu_number();
3910 set_dirbase(tpmap
, my_cpu
);
3917 * disable no-execute capability on
3918 * the specified pmap
3920 void pmap_disable_NX(pmap_t pmap
) {
3922 pmap
->nx_enabled
= 0;
3926 pt_fake_zone_info(int *count
, vm_size_t
*cur_size
, vm_size_t
*max_size
, vm_size_t
*elem_size
,
3927 vm_size_t
*alloc_size
, int *collectable
, int *exhaustable
)
3929 *count
= inuse_ptepages_count
;
3930 *cur_size
= PAGE_SIZE
* inuse_ptepages_count
;
3931 *max_size
= PAGE_SIZE
* (inuse_ptepages_count
+ vm_page_inactive_count
+ vm_page_active_count
+ vm_page_free_count
);
3932 *elem_size
= PAGE_SIZE
;
3933 *alloc_size
= PAGE_SIZE
;
3939 vm_offset_t
pmap_cpu_high_map_vaddr(int cpu
, enum high_cpu_types e
)
3941 enum high_fixed_addresses a
;
3942 a
= e
+ HIGH_CPU_END
* cpu
;
3943 return pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN
+ a
);
3946 vm_offset_t
pmap_high_map_vaddr(enum high_cpu_types e
)
3948 return pmap_cpu_high_map_vaddr(cpu_number(), e
);
3951 vm_offset_t
pmap_high_map(pt_entry_t pte
, enum high_cpu_types e
)
3953 enum high_fixed_addresses a
;
3956 a
= e
+ HIGH_CPU_END
* cpu_number();
3957 vaddr
= (vm_offset_t
)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN
+ a
);
3958 *(pte_unique_base
+ a
) = pte
;
3960 /* TLB flush for this page for this cpu */
3961 invlpg((uintptr_t)vaddr
);
3968 * Called with pmap locked, we:
3969 * - scan through per-cpu data to see which other cpus need to flush
3970 * - send an IPI to each non-idle cpu to be flushed
3971 * - wait for all to signal back that they are inactive or we see that
3972 * they are in an interrupt handler or at a safe point
3973 * - flush the local tlb is active for this pmap
3974 * - return ... the caller will unlock the pmap
3977 pmap_flush_tlbs(pmap_t pmap
)
3980 unsigned int cpu_bit
;
3981 cpu_set cpus_to_signal
;
3982 unsigned int my_cpu
= cpu_number();
3983 pmap_paddr_t pmap_cr3
= pmap
->pm_cr3
;
3984 boolean_t flush_self
= FALSE
;
3987 assert(!ml_get_interrupts_enabled());
3990 * Scan other cpus for matching active or task CR3.
3991 * For idle cpus (with no active map) we mark them invalid but
3992 * don't signal -- they'll check as they go busy.
3993 * Note: for the kernel pmap we look for 64-bit shared address maps.
3996 for (cpu
= 0, cpu_bit
= 1; cpu
< real_ncpus
; cpu
++, cpu_bit
<<= 1) {
3997 if (!cpu_datap(cpu
)->cpu_running
)
3999 if ((cpu_datap(cpu
)->cpu_task_cr3
== pmap_cr3
) ||
4000 (cpu_datap(cpu
)->cpu_active_cr3
== pmap_cr3
) ||
4001 ((pmap
== kernel_pmap
) &&
4002 (!CPU_CR3_IS_ACTIVE(cpu
) ||
4003 cpu_datap(cpu
)->cpu_task_map
== TASK_MAP_64BIT_SHARED
))) {
4004 if (cpu
== my_cpu
) {
4008 cpu_datap(cpu
)->cpu_tlb_invalid
= TRUE
;
4009 __asm__
volatile("mfence");
4011 if (CPU_CR3_IS_ACTIVE(cpu
)) {
4012 cpus_to_signal
|= cpu_bit
;
4013 i386_signal_cpu(cpu
, MP_TLB_FLUSH
, ASYNC
);
4018 if (cpus_to_signal
) {
4019 KERNEL_DEBUG(0xef800024 | DBG_FUNC_START
, cpus_to_signal
, 0, 0, 0, 0);
4021 deadline
= mach_absolute_time() + LockTimeOut
;
4023 * Wait for those other cpus to acknowledge
4025 for (cpu
= 0, cpu_bit
= 1; cpu
< real_ncpus
; cpu
++, cpu_bit
<<= 1) {
4026 while ((cpus_to_signal
& cpu_bit
) != 0) {
4027 if (!cpu_datap(cpu
)->cpu_running
||
4028 cpu_datap(cpu
)->cpu_tlb_invalid
== FALSE
||
4029 !CPU_CR3_IS_ACTIVE(cpu
)) {
4030 cpus_to_signal
&= ~cpu_bit
;
4033 if (mach_absolute_time() > deadline
)
4034 panic("pmap_flush_tlbs() "
4035 "timeout pmap=%p cpus_to_signal=%p",
4036 pmap
, cpus_to_signal
);
4039 if (cpus_to_signal
== 0)
4042 KERNEL_DEBUG(0xef800024 | DBG_FUNC_END
, cpus_to_signal
, 0, 0, 0, 0);
4046 * Flush local tlb if required.
4047 * We need this flush even if the pmap being changed
4048 * is the user map... in case we do a copyin/out
4049 * before returning to user mode.
4057 process_pmap_updates(void)
4061 current_cpu_datap()->cpu_tlb_invalid
= FALSE
;
4062 __asm__
volatile("mfence");
4066 pmap_update_interrupt(void)
4068 KERNEL_DEBUG(0xef800028 | DBG_FUNC_START
, 0, 0, 0, 0, 0);
4070 assert(!ml_get_interrupts_enabled());
4072 process_pmap_updates();
4074 KERNEL_DEBUG(0xef800028 | DBG_FUNC_END
, 0, 0, 0, 0, 0);
4078 unsigned int pmap_cache_attributes(ppnum_t pn
) {
4080 if (!pmap_valid_page(pn
))
4081 return (VM_WIMG_IO
);
4083 return (VM_WIMG_COPYBACK
);
4092 kprintf("pmap 0x%x\n",p
);
4094 kprintf(" pm_cr3 0x%llx\n",p
->pm_cr3
);
4095 kprintf(" pm_pml4 0x%x\n",p
->pm_pml4
);
4096 kprintf(" pm_pdpt 0x%x\n",p
->pm_pdpt
);
4098 kprintf(" pml4[0] 0x%llx\n",*p
->pm_pml4
);
4100 kprintf(" pdpt[%d] 0x%llx\n",i
, p
->pm_pdpt
[i
]);
4103 void pmap_dump_wrap(void)
4105 pmap_dump(current_cpu_datap()->cpu_active_thread
->task
->map
->pmap
);
4109 dump_4GB_pdpt(pmap_t p
)
4112 pdpt_entry_t
*user_pdptp
;
4113 pdpt_entry_t
*kern_pdptp
;
4114 pdpt_entry_t
*pml4p
;
4117 while ((user_pdptp
= pmap64_pdpt(p
, 0x0)) == PDPT_ENTRY_NULL
) {
4119 pmap_expand_pml4(p
, 0x0);
4122 kern_pdptp
= kernel_pmap
->pm_pdpt
;
4123 if (kern_pdptp
== NULL
)
4124 panic("kern_pdptp == NULL");
4125 kprintf("dump_4GB_pdpt(%p)\n"
4126 "kern_pdptp=%p (phys=0x%016llx)\n"
4127 "\t 0x%08x: 0x%016llx\n"
4128 "\t 0x%08x: 0x%016llx\n"
4129 "\t 0x%08x: 0x%016llx\n"
4130 "\t 0x%08x: 0x%016llx\n"
4131 "\t 0x%08x: 0x%016llx\n"
4132 "user_pdptp=%p (phys=0x%016llx)\n"
4133 "\t 0x%08x: 0x%016llx\n"
4134 "\t 0x%08x: 0x%016llx\n"
4135 "\t 0x%08x: 0x%016llx\n"
4136 "\t 0x%08x: 0x%016llx\n"
4137 "\t 0x%08x: 0x%016llx\n",
4138 p
, kern_pdptp
, kvtophys(kern_pdptp
),
4139 kern_pdptp
+0, *(kern_pdptp
+0),
4140 kern_pdptp
+1, *(kern_pdptp
+1),
4141 kern_pdptp
+2, *(kern_pdptp
+2),
4142 kern_pdptp
+3, *(kern_pdptp
+3),
4143 kern_pdptp
+4, *(kern_pdptp
+4),
4144 user_pdptp
, kvtophys(user_pdptp
),
4145 user_pdptp
+0, *(user_pdptp
+0),
4146 user_pdptp
+1, *(user_pdptp
+1),
4147 user_pdptp
+2, *(user_pdptp
+2),
4148 user_pdptp
+3, *(user_pdptp
+3),
4149 user_pdptp
+4, *(user_pdptp
+4));
4150 kprintf("user pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
4151 p
->pm_cr3
, p
->pm_hold
, p
->pm_pml4
);
4152 pml4p
= (pdpt_entry_t
*)p
->pm_hold
;
4154 panic("user pml4p == NULL");
4155 kprintf("\t 0x%08x: 0x%016llx\n"
4156 "\t 0x%08x: 0x%016llx\n",
4158 pml4p
+KERNEL_UBER_PML4_INDEX
, *(pml4p
+KERNEL_UBER_PML4_INDEX
));
4159 kprintf("kern pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
4160 kernel_pmap
->pm_cr3
, kernel_pmap
->pm_hold
, kernel_pmap
->pm_pml4
);
4161 pml4p
= (pdpt_entry_t
*)kernel_pmap
->pm_hold
;
4163 panic("kern pml4p == NULL");
4164 kprintf("\t 0x%08x: 0x%016llx\n"
4165 "\t 0x%08x: 0x%016llx\n",
4167 pml4p
+511, *(pml4p
+511));
4171 void dump_4GB_pdpt_thread(thread_t tp
)
4173 dump_4GB_pdpt(tp
->map
->pmap
);