3 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
5 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. The rights granted to you under the License
11 * may not be used to create, or enable the creation or redistribution of,
12 * unlawful or unlicensed copies of an Apple operating system, or to
13 * circumvent, violate, or enable the circumvention or violation of, any
14 * terms of an Apple operating system software license agreement.
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this file.
19 * The Original Code and all software distributed under the License are
20 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
21 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
22 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
24 * Please see the License for the specific language governing rights and
25 * limitations under the License.
27 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
33 * Mach Operating System
34 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
35 * All Rights Reserved.
37 * Permission to use, copy, modify and distribute this software and its
38 * documentation is hereby granted, provided that both the copyright
39 * notice and this permission notice appear in all copies of the
40 * software, derivative works or modified versions, and any portions
41 * thereof, and that both notices appear in supporting documentation.
43 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
44 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
45 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
47 * Carnegie Mellon requests users of this software to return to
49 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
50 * School of Computer Science
51 * Carnegie Mellon University
52 * Pittsburgh PA 15213-3890
54 * any improvements or extensions that they make and grant Carnegie Mellon
55 * the rights to redistribute these changes.
62 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * (These guys wrote the Vax version)
65 * Physical Map management code for Intel i386, i486, and i860.
67 * Manages physical address maps.
69 * In addition to hardware address maps, this
70 * module is called upon to provide software-use-only
71 * maps which may or may not be stored in the same
72 * form as hardware maps. These pseudo-maps are
73 * used to store intermediate results from copy
74 * operations to and from address spaces.
76 * Since the information managed by this module is
77 * also stored by the logical address mapping module,
78 * this module may throw away valid virtual-to-physical
79 * mappings at almost any time. However, invalidations
80 * of virtual-to-physical mappings must be done as
83 * In order to cope with hardware architectures which
84 * make virtual-to-physical map invalidates expensive,
85 * this module may delay invalidate or reduced protection
86 * operations until such time as they are actually
87 * necessary. This module is given full information as
88 * to which processors are currently using which maps,
89 * and to when physical maps must be made correct.
95 #include <mach_ldebug.h>
97 #include <libkern/OSAtomic.h>
99 #include <mach/machine/vm_types.h>
101 #include <mach/boolean.h>
102 #include <kern/thread.h>
103 #include <kern/zalloc.h>
104 #include <kern/queue.h>
106 #include <kern/lock.h>
107 #include <kern/kalloc.h>
108 #include <kern/spl.h>
111 #include <vm/vm_map.h>
112 #include <vm/vm_kern.h>
113 #include <mach/vm_param.h>
114 #include <mach/vm_prot.h>
115 #include <vm/vm_object.h>
116 #include <vm/vm_page.h>
118 #include <mach/machine/vm_param.h>
119 #include <machine/thread.h>
121 #include <kern/misc_protos.h> /* prototyping */
122 #include <i386/misc_protos.h>
123 #include <x86_64/lowglobals.h>
125 #include <i386/cpuid.h>
126 #include <i386/cpu_data.h>
127 #include <i386/cpu_number.h>
128 #include <i386/machine_cpu.h>
129 #include <i386/seg.h>
130 #include <i386/serial_io.h>
131 #include <i386/cpu_capabilities.h>
132 #include <i386/machine_routines.h>
133 #include <i386/proc_reg.h>
134 #include <i386/tsc.h>
135 #include <i386/pmap_internal.h>
138 #include <ddb/db_command.h>
139 #include <ddb/db_output.h>
140 #include <ddb/db_sym.h>
141 #include <ddb/db_print.h>
142 #endif /* MACH_KDB */
144 #include <vm/vm_protos.h>
147 #include <i386/mp_desc.h>
150 /* #define DEBUGINTERRUPTS 1 uncomment to ensure pmap callers have interrupts enabled */
151 #ifdef DEBUGINTERRUPTS
152 #define pmap_intr_assert() { \
153 if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) \
154 panic("pmap interrupt assert %s, %d",__FILE__, __LINE__); \
157 #define pmap_intr_assert()
163 #define POSTCODE_DELAY 1
164 #include <i386/postcode.h>
165 #endif /* IWANTTODEBUG */
167 boolean_t pmap_trace
= FALSE
;
170 #define DBG(x...) kprintf("DBG: " x)
175 boolean_t no_shared_cr3
= DEBUG
; /* TRUE for DEBUG by default */
178 * Forward declarations for internal functions.
181 void pmap_remove_range(
187 void phys_attribute_clear(
191 int phys_attribute_test(
195 void phys_attribute_set(
199 void pmap_set_reference(
202 boolean_t
phys_page_exists(
206 int nx_enabled
= 1; /* enable no-execute protection */
207 int allow_data_exec
= VM_ABI_32
; /* 32-bit apps may execute data by default, 64-bit apps may not */
208 int allow_stack_exec
= 0; /* No apps may execute from the stack by default */
210 const boolean_t cpu_64bit
= TRUE
; /* Mais oui! */
213 * when spinning through pmap_remove
214 * ensure that we don't spend too much
215 * time with preemption disabled.
216 * I'm setting the current threshold
219 #define MAX_PREEMPTION_LATENCY_NS 20000
221 uint64_t max_preemption_latency_tsc
= 0;
225 * Private data structures.
229 * For each vm_page_t, there is a list of all currently
230 * valid virtual mappings of that page. An entry is
231 * a pv_rooted_entry_t; the list is the pv_table.
233 * N.B. with the new combo rooted/hashed scheme it is
234 * only possibly to remove individual non-rooted entries
235 * if they are found via the hashed chains as there is no
236 * way to unlink the singly linked hashed entries if navigated to
237 * via the queue list off the rooted entries. Think of it as
238 * hash/walk/pull, keeping track of the prev pointer while walking
239 * the singly linked hash list. All of this is to save memory and
240 * keep both types of pv_entries as small as possible.
245 PV HASHING Changes - JK 1/2007
247 Pve's establish physical to virtual mappings. These are used for aliasing of a
248 physical page to (potentially many) virtual addresses within pmaps. In the
249 previous implementation the structure of the pv_entries (each 16 bytes in size) was
251 typedef struct pv_entry {
252 struct pv_entry_t next;
257 An initial array of these is created at boot time, one per physical page of
258 memory, indexed by the physical page number. Additionally, a pool of entries
259 is created from a pv_zone to be used as needed by pmap_enter() when it is
260 creating new mappings. Originally, we kept this pool around because the code
261 in pmap_enter() was unable to block if it needed an entry and none were
262 available - we'd panic. Some time ago I restructured the pmap_enter() code
263 so that for user pmaps it can block while zalloc'ing a pv structure and restart,
264 removing a panic from the code (in the case of the kernel pmap we cannot block
265 and still panic, so, we keep a separate hot pool for use only on kernel pmaps).
266 The pool has not been removed since there is a large performance gain keeping
267 freed pv's around for reuse and not suffering the overhead of zalloc for every
270 As pmap_enter() created new mappings it linked the new pve's for them off the
271 fixed pv array for that ppn (off the next pointer). These pve's are accessed
272 for several operations, one of them being address space teardown. In that case,
275 for (every page/pte in the space) {
276 calc pve_ptr from the ppn in the pte
277 for (every pv in the list for the ppn) {
278 if (this pv is for this pmap/vaddr) {
285 The problem arose when we were running, say 8000 (or even 2000) apache or
286 other processes and one or all terminate. The list hanging off each pv array
287 entry could have thousands of entries. We were continuously linearly searching
288 each of these lists as we stepped through the address space we were tearing
289 down. Because of the locks we hold, likely taking a cache miss for each node,
290 and interrupt disabling for MP issues the system became completely unresponsive
291 for many seconds while we did this.
293 Realizing that pve's are accessed in two distinct ways (linearly running the
294 list by ppn for operations like pmap_page_protect and finding and
295 modifying/removing a single pve as part of pmap_enter processing) has led to
296 modifying the pve structures and databases.
298 There are now two types of pve structures. A "rooted" structure which is
299 basically the original structure accessed in an array by ppn, and a ''hashed''
300 structure accessed on a hash list via a hash of [pmap, vaddr]. These have been
301 designed with the two goals of minimizing wired memory and making the lookup of
302 a ppn faster. Since a vast majority of pages in the system are not aliased
303 and hence represented by a single pv entry I've kept the rooted entry size as
304 small as possible because there is one of these dedicated for every physical
305 page of memory. The hashed pve's are larger due to the addition of the hash
306 link and the ppn entry needed for matching while running the hash list to find
307 the entry we are looking for. This way, only systems that have lots of
308 aliasing (like 2000+ httpd procs) will pay the extra memory price. Both
309 structures have the same first three fields allowing some simplification in
312 They have these shapes
314 typedef struct pv_rooted_entry {
318 } *pv_rooted_entry_t;
321 typedef struct pv_hashed_entry {
326 struct pv_hashed_entry *nexth;
327 } *pv_hashed_entry_t;
329 The main flow difference is that the code is now aware of the rooted entry and
330 the hashed entries. Code that runs the pv list still starts with the rooted
331 entry and then continues down the qlink onto the hashed entries. Code that is
332 looking up a specific pv entry first checks the rooted entry and then hashes
333 and runs the hash list for the match. The hash list lengths are much smaller
334 than the original pv lists that contained all aliases for the specific ppn.
338 typedef struct pv_rooted_entry
{
339 /* first three entries must match pv_hashed_entry_t */
341 vm_map_offset_t va
; /* virtual address for mapping */
342 pmap_t pmap
; /* pmap where mapping lies */
343 } *pv_rooted_entry_t
;
345 #define PV_ROOTED_ENTRY_NULL ((pv_rooted_entry_t) 0)
347 pv_rooted_entry_t pv_head_table
; /* array of entries, one per page */
349 typedef struct pv_hashed_entry
{
350 /* first three entries must match pv_rooted_entry_t */
355 struct pv_hashed_entry
*nexth
;
356 } *pv_hashed_entry_t
;
358 #define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
360 #define NPVHASH 4095 /* MUST BE 2^N - 1 */
361 pv_hashed_entry_t
*pv_hash_table
; /* hash lists */
363 uint32_t npvhash
= 0;
365 //#define PV_DEBUG 1 /* uncomment to enable some PV debugging code */
367 #define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized");
369 #define CHK_NPVHASH(x)
372 pv_hashed_entry_t pv_hashed_free_list
= PV_HASHED_ENTRY_NULL
;
373 pv_hashed_entry_t pv_hashed_kern_free_list
= PV_HASHED_ENTRY_NULL
;
374 decl_simple_lock_data(,pv_hashed_free_list_lock
)
375 decl_simple_lock_data(,pv_hashed_kern_free_list_lock
)
376 decl_simple_lock_data(,pv_hash_table_lock
)
378 int pv_hashed_free_count
= 0;
379 int pv_hashed_kern_free_count
= 0;
380 #define PV_HASHED_LOW_WATER_MARK 5000
381 #define PV_HASHED_KERN_LOW_WATER_MARK 100
382 #define PV_HASHED_ALLOC_CHUNK 2000
383 #define PV_HASHED_KERN_ALLOC_CHUNK 50
384 thread_call_t mapping_adjust_call
;
385 static thread_call_data_t mapping_adjust_call_data
;
386 uint32_t mappingrecurse
= 0;
388 #define PV_HASHED_ALLOC(pvh_e) { \
389 simple_lock(&pv_hashed_free_list_lock); \
390 if ((pvh_e = pv_hashed_free_list) != 0) { \
391 pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \
392 pv_hashed_free_count--; \
393 if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) \
394 if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
395 thread_call_enter(mapping_adjust_call); \
397 simple_unlock(&pv_hashed_free_list_lock); \
400 #define PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) { \
401 simple_lock(&pv_hashed_free_list_lock); \
402 pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list; \
403 pv_hashed_free_list = pvh_eh; \
404 pv_hashed_free_count += pv_cnt; \
405 simple_unlock(&pv_hashed_free_list_lock); \
408 #define PV_HASHED_KERN_ALLOC(pvh_e) { \
409 simple_lock(&pv_hashed_kern_free_list_lock); \
410 if ((pvh_e = pv_hashed_kern_free_list) != 0) { \
411 pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \
412 pv_hashed_kern_free_count--; \
413 if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK)\
414 if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
415 thread_call_enter(mapping_adjust_call); \
417 simple_unlock(&pv_hashed_kern_free_list_lock); \
420 #define PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) { \
421 simple_lock(&pv_hashed_kern_free_list_lock); \
422 pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list; \
423 pv_hashed_kern_free_list = pvh_eh; \
424 pv_hashed_kern_free_count += pv_cnt; \
425 simple_unlock(&pv_hashed_kern_free_list_lock); \
428 zone_t pv_hashed_list_zone
; /* zone of pv_hashed_entry structures */
430 static zone_t pdpt_zone
;
433 * Each entry in the pv_head_table is locked by a bit in the
434 * pv_lock_table. The lock bits are accessed by the physical
435 * address of the page they lock.
438 char *pv_lock_table
; /* pointer to array of bits */
439 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
441 char *pv_hash_lock_table
;
442 #define pv_hash_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
445 * First and last physical addresses that we maintain any information
446 * for. Initialized to zero so that pmap operations done before
447 * pmap_init won't touch any non-existent structures.
449 boolean_t pmap_initialized
= FALSE
;/* Has pmap_init completed? */
451 static struct vm_object kptobj_object_store
;
452 static struct vm_object kpml4obj_object_store
;
453 static struct vm_object kpdptobj_object_store
;
456 * Index into pv_head table, its lock bits, and the modify/reference and managed bits
459 #define pa_index(pa) (i386_btop(pa))
460 #define ppn_to_pai(ppn) ((int)ppn)
462 #define pai_to_pvh(pai) (&pv_head_table[pai])
463 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
464 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
466 static inline uint32_t
467 pvhashidx(pmap_t pmap
, vm_offset_t va
)
469 return ((uint32_t)(uint64_t)pmap
^
470 ((uint32_t)((uint64_t)va
>> PAGE_SHIFT
) & 0xFFFFFFFF)) &
473 #define pvhash(idx) (&pv_hash_table[idx])
475 #define lock_hash_hash(hash) bit_lock(hash, (void *)pv_hash_lock_table)
476 #define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
479 * Array of physical page attribites for managed pages.
480 * One byte per physical page.
482 char *pmap_phys_attributes
;
483 unsigned int last_managed_page
= 0;
484 #define IS_MANAGED_PAGE(x) \
485 ((unsigned int)(x) <= last_managed_page && \
486 (pmap_phys_attributes[x] & PHYS_MANAGED))
489 * Physical page attributes. Copy bits from PTE definition.
491 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
492 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
493 #define PHYS_MANAGED INTEL_PTE_VALID /* page is managed */
496 * Amount of virtual memory mapped by one
497 * page-directory entry.
499 #define PDE_MAPPED_SIZE (pdetova(1))
500 uint64_t pde_mapped_size
= PDE_MAPPED_SIZE
;
503 * Locking and TLB invalidation
507 * Locking Protocols: (changed 2/2007 JK)
509 * There are two structures in the pmap module that need locking:
510 * the pmaps themselves, and the per-page pv_lists (which are locked
511 * by locking the pv_lock_table entry that corresponds to the pv_head
512 * for the list in question.) Most routines want to lock a pmap and
513 * then do operations in it that require pv_list locking -- however
514 * pmap_remove_all and pmap_copy_on_write operate on a physical page
515 * basis and want to do the locking in the reverse order, i.e. lock
516 * a pv_list and then go through all the pmaps referenced by that list.
518 * The system wide pmap lock has been removed. Now, paths take a lock
519 * on the pmap before changing its 'shape' and the reverse order lockers
520 * (coming in by phys ppn) take a lock on the corresponding pv and then
521 * retest to be sure nothing changed during the window before they locked
522 * and can then run up/down the pv lists holding the list lock. This also
523 * lets the pmap layer run (nearly completely) interrupt enabled, unlike
531 #define LOCK_PVH(index) { \
532 mp_disable_preemption(); \
533 lock_pvh_pai(index); \
536 #define UNLOCK_PVH(index) { \
537 unlock_pvh_pai(index); \
538 mp_enable_preemption(); \
544 #define LOCK_PV_HASH(hash) lock_hash_hash(hash)
545 #define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash)
547 unsigned pmap_memory_region_count
;
548 unsigned pmap_memory_region_current
;
550 pmap_memory_region_t pmap_memory_regions
[PMAP_MEMORY_REGIONS_SIZE
];
553 * Other useful macros.
555 #define current_pmap() (vm_map_pmap(current_thread()->map))
557 struct pmap kernel_pmap_store
;
560 pd_entry_t high_shared_pde
;
561 pd_entry_t commpage64_pde
;
563 struct zone
*pmap_zone
; /* zone of pmap structures */
565 int pmap_debug
= 0; /* flag for debugging prints */
567 unsigned int inuse_ptepages_count
= 0;
569 addr64_t kernel64_cr3
;
572 * Pmap cache. Cache is threaded through ref_count field of pmap.
573 * Max will eventually be constant -- variable for experimentation.
575 int pmap_cache_max
= 32;
576 int pmap_alloc_chunk
= 8;
577 pmap_t pmap_cache_list
;
578 int pmap_cache_count
;
579 decl_simple_lock_data(,pmap_cache_lock
)
585 pt_entry_t
*DMAP1
, *DMAP2
;
590 * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
591 * properly deals with the anchor.
592 * must be called with the hash locked, does not unlock it
596 pmap_pvh_unlink(pv_hashed_entry_t pvh
)
598 pv_hashed_entry_t curh
;
599 pv_hashed_entry_t
*pprevh
;
603 pvhash_idx
= pvhashidx(pvh
->pmap
, pvh
->va
);
605 pprevh
= pvhash(pvhash_idx
);
609 panic("pvh_unlink null anchor"); /* JK DEBUG */
613 while (PV_HASHED_ENTRY_NULL
!= curh
) {
616 pprevh
= &curh
->nexth
;
619 if (PV_HASHED_ENTRY_NULL
== curh
) panic("pmap_pvh_unlink no pvh");
620 *pprevh
= pvh
->nexth
;
625 pv_hash_add(pv_hashed_entry_t pvh_e
,
626 pv_rooted_entry_t pv_h
)
628 pv_hashed_entry_t
*hashp
;
632 pvhash_idx
= pvhashidx(pvh_e
->pmap
, pvh_e
->va
);
633 LOCK_PV_HASH(pvhash_idx
);
634 insque(&pvh_e
->qlink
, &pv_h
->qlink
);
635 hashp
= pvhash(pvhash_idx
);
638 panic("pv_hash_add(%p) null hash bucket", pvh_e
);
640 pvh_e
->nexth
= *hashp
;
642 UNLOCK_PV_HASH(pvhash_idx
);
646 pv_hash_remove(pv_hashed_entry_t pvh_e
)
651 pvhash_idx
= pvhashidx(pvh_e
->pmap
,pvh_e
->va
);
652 LOCK_PV_HASH(pvhash_idx
);
653 remque(&pvh_e
->qlink
);
654 pmap_pvh_unlink(pvh_e
);
655 UNLOCK_PV_HASH(pvhash_idx
);
659 * Remove pv list entry.
660 * Called with pv_head_table entry locked.
661 * Returns pv entry to be freed (or NULL).
663 static inline pv_hashed_entry_t
664 pmap_pv_remove(pmap_t pmap
,
665 vm_map_offset_t vaddr
,
668 pv_hashed_entry_t pvh_e
;
669 pv_rooted_entry_t pv_h
;
670 pv_hashed_entry_t
*pprevh
;
674 pvh_e
= PV_HASHED_ENTRY_NULL
;
675 pv_h
= pai_to_pvh(ppn_to_pai(ppn
));
676 if (pv_h
->pmap
== PMAP_NULL
)
677 panic("pmap_pv_remove(%p,%llu,%u): null pv_list!",
680 if (pv_h
->va
== vaddr
&& pv_h
->pmap
== pmap
) {
682 * Header is the pv_rooted_entry.
683 * We can't free that. If there is a queued
684 * entry after this one we remove that
685 * from the ppn queue, we remove it from the hash chain
686 * and copy it to the rooted entry. Then free it instead.
688 pvh_e
= (pv_hashed_entry_t
) queue_next(&pv_h
->qlink
);
689 if (pv_h
!= (pv_rooted_entry_t
) pvh_e
) {
691 * Entry queued to root, remove this from hash
692 * and install as nem root.
695 pvhash_idx
= pvhashidx(pvh_e
->pmap
, pvh_e
->va
);
696 LOCK_PV_HASH(pvhash_idx
);
697 remque(&pvh_e
->qlink
);
698 pprevh
= pvhash(pvhash_idx
);
699 if (PV_HASHED_ENTRY_NULL
== *pprevh
) {
700 panic("pmap_pv_remove(%p,%llu,%u): "
701 "empty hash, removing rooted",
704 pmap_pvh_unlink(pvh_e
);
705 UNLOCK_PV_HASH(pvhash_idx
);
706 pv_h
->pmap
= pvh_e
->pmap
;
707 pv_h
->va
= pvh_e
->va
; /* dispose of pvh_e */
709 /* none queued after rooted */
710 pv_h
->pmap
= PMAP_NULL
;
711 pvh_e
= PV_HASHED_ENTRY_NULL
;
715 * not removing rooted pv. find it on hash chain, remove from
716 * ppn queue and hash chain and free it
719 pvhash_idx
= pvhashidx(pmap
, vaddr
);
720 LOCK_PV_HASH(pvhash_idx
);
721 pprevh
= pvhash(pvhash_idx
);
722 if (PV_HASHED_ENTRY_NULL
== *pprevh
) {
723 panic("pmap_pv_remove(%p,%llu,%u): empty hash",
727 pmap_pv_hashlist_walks
++;
729 while (PV_HASHED_ENTRY_NULL
!= pvh_e
) {
731 if (pvh_e
->pmap
== pmap
&&
732 pvh_e
->va
== vaddr
&&
735 pprevh
= &pvh_e
->nexth
;
736 pvh_e
= pvh_e
->nexth
;
738 if (PV_HASHED_ENTRY_NULL
== pvh_e
)
739 panic("pmap_pv_remove(%p,%llu,%u): pv not on hash",
741 pmap_pv_hashlist_cnts
+= pv_cnt
;
742 if (pmap_pv_hashlist_max
< pv_cnt
)
743 pmap_pv_hashlist_max
= pv_cnt
;
744 *pprevh
= pvh_e
->nexth
;
745 remque(&pvh_e
->qlink
);
746 UNLOCK_PV_HASH(pvhash_idx
);
753 * for legacy, returns the address of the pde entry.
754 * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
755 * then returns the mapped address of the pde entry in that page
758 pmap_pde(pmap_t m
, vm_map_offset_t v
)
764 if (m
== kernel_pmap
)
765 pde
= (&((m
)->dirbase
[(vm_offset_t
)(v
) >> PDESHIFT
]));
768 pde
= pmap64_pde(m
, v
);
774 * the single pml4 page per pmap is allocated at pmap create time and exists
775 * for the duration of the pmap. we allocate this page in kernel vm.
776 * this returns the address of the requested pml4 entry in the top level page.
780 pmap64_pml4(pmap_t pmap
, vm_map_offset_t vaddr
)
782 return &pmap
->pm_pml4
[(vaddr
>> PML4SHIFT
) & (NPML4PG
-1)];
786 * maps in the pml4 page, if any, containing the pdpt entry requested
787 * and returns the address of the pdpt entry in that mapped page
790 pmap64_pdpt(pmap_t pmap
, vm_map_offset_t vaddr
)
796 if ((vaddr
> 0x00007FFFFFFFFFFFULL
) &&
797 (vaddr
< 0xFFFF800000000000ULL
)) {
801 pml4
= pmap64_pml4(pmap
, vaddr
);
802 if (pml4
&& ((*pml4
& INTEL_PTE_VALID
))) {
803 newpf
= *pml4
& PG_FRAME
;
804 return &((pdpt_entry_t
*) PHYSMAP_PTOV(newpf
))
805 [(vaddr
>> PDPTSHIFT
) & (NPDPTPG
-1)];
810 * maps in the pdpt page, if any, containing the pde entry requested
811 * and returns the address of the pde entry in that mapped page
814 pmap64_pde(pmap_t pmap
, vm_map_offset_t vaddr
)
820 if ((vaddr
> 0x00007FFFFFFFFFFFULL
) &&
821 (vaddr
< 0xFFFF800000000000ULL
)) {
825 pdpt
= pmap64_pdpt(pmap
, vaddr
);
827 if (pdpt
&& ((*pdpt
& INTEL_PTE_VALID
))) {
828 newpf
= *pdpt
& PG_FRAME
;
829 return &((pd_entry_t
*) PHYSMAP_PTOV(newpf
))
830 [(vaddr
>> PDSHIFT
) & (NPDPG
-1)];
836 * return address of mapped pte for vaddr va in pmap pmap.
838 * physically maps the pde page, if any, containing the pte in and returns
839 * the address of the pte in that mapped page
841 * In case the pde maps a superpage, return the pde, which, in this case
842 * is the actual page table entry.
845 pmap_pte(pmap_t pmap
, vm_map_offset_t vaddr
)
851 pde
= pmap_pde(pmap
, vaddr
);
853 if (pde
&& ((*pde
& INTEL_PTE_VALID
))) {
854 if (*pde
& INTEL_PTE_PS
)
856 newpf
= *pde
& PG_FRAME
;
857 return &((pt_entry_t
*)PHYSMAP_PTOV(newpf
))
858 [i386_btop(vaddr
) & (ppnum_t
)(NPTEPG
-1)];
864 * Map memory at initialization. The physical addresses being
865 * mapped are not managed and are never unmapped.
867 * For now, VM is already on, we only need to map the
873 vm_map_offset_t start_addr
,
874 vm_map_offset_t end_addr
,
881 while (start_addr
< end_addr
) {
882 pmap_enter(kernel_pmap
, (vm_map_offset_t
)virt
,
883 (ppnum_t
) i386_btop(start_addr
), prot
, flags
, FALSE
);
891 * Back-door routine for mapping kernel VM at initialization.
892 * Useful for mapping memory outside the range
893 * Sets no-cache, A, D.
894 * Otherwise like pmap_map.
899 vm_map_offset_t start_addr
,
900 vm_map_offset_t end_addr
,
908 template = pa_to_pte(start_addr
)
914 if (flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
)) {
915 template |= INTEL_PTE_NCACHE
;
916 if (!(flags
& (VM_MEM_GUARDED
| VM_WIMG_USE_DEFAULT
)))
917 template |= INTEL_PTE_PTA
;
919 if (prot
& VM_PROT_WRITE
)
920 template |= INTEL_PTE_WRITE
;
923 while (start_addr
< end_addr
) {
925 pte
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)virt
);
926 if (pte
== PT_ENTRY_NULL
) {
927 panic("pmap_map_bd: Invalid kernel address\n");
929 pmap_store_pte(pte
, template);
931 pte_increment_pa(template);
933 start_addr
+= PAGE_SIZE
;
941 extern char *first_avail
;
942 extern vm_offset_t virtual_avail
, virtual_end
;
943 extern pmap_paddr_t avail_start
, avail_end
;
944 extern vm_offset_t sHIB
;
945 extern vm_offset_t eHIB
;
946 extern vm_offset_t stext
;
947 extern vm_offset_t etext
;
948 extern vm_offset_t sdata
;
954 * Here early in the life of a processor (from cpu_mode_init()).
955 * Ensure global page feature is disabled.
957 set_cr4(get_cr4() &~ CR4_PGE
);
960 * Initialize the per-cpu, TLB-related fields.
962 current_cpu_datap()->cpu_kernel_cr3
= kernel_pmap
->pm_cr3
;
963 current_cpu_datap()->cpu_active_cr3
= kernel_pmap
->pm_cr3
;
964 current_cpu_datap()->cpu_tlb_invalid
= FALSE
;
970 * Bootstrap the system enough to run with virtual memory.
971 * Map the kernel's code and data, and allocate the system page table.
972 * Called with mapping OFF. Page_size must already be set.
977 __unused vm_offset_t load_start
,
978 __unused boolean_t IA32e
)
980 #if NCOPY_WINDOWS > 0
987 vm_last_addr
= VM_MAX_KERNEL_ADDRESS
; /* Set the highest address
990 * The kernel's pmap is statically allocated so we don't
991 * have to use pmap_create, which is unlikely to work
992 * correctly at this part of the boot sequence.
995 kernel_pmap
= &kernel_pmap_store
;
996 kernel_pmap
->ref_count
= 1;
997 kernel_pmap
->nx_enabled
= FALSE
;
998 kernel_pmap
->pm_task_map
= TASK_MAP_64BIT
;
999 kernel_pmap
->pm_obj
= (vm_object_t
) NULL
;
1000 kernel_pmap
->dirbase
= (pd_entry_t
*)((uintptr_t)IdlePTD
);
1001 kernel_pmap
->pm_pdpt
= (pd_entry_t
*) ((uintptr_t)IdlePDPT
);
1002 kernel_pmap
->pm_pml4
= IdlePML4
;
1003 kernel_pmap
->pm_cr3
= (uintptr_t)ID_MAP_VTOP(IdlePML4
);
1006 current_cpu_datap()->cpu_kernel_cr3
= (addr64_t
) kernel_pmap
->pm_cr3
;
1009 OSAddAtomic(NKPT
, &inuse_ptepages_count
);
1011 virtual_avail
= (vm_offset_t
)(VM_MIN_KERNEL_ADDRESS
) + (vm_offset_t
)first_avail
;
1012 virtual_end
= (vm_offset_t
)(VM_MAX_KERNEL_ADDRESS
);
1014 #if NCOPY_WINDOWS > 0
1016 * Reserve some special page table entries/VA space for temporary
1019 #define SYSMAP(c, p, v, n) \
1020 v = (c)va; va += ((n)*INTEL_PGBYTES);
1024 for (i
=0; i
<PMAP_NWINDOWS
; i
++) {
1026 kprintf("trying to do SYSMAP idx %d %p\n", i
,
1027 current_cpu_datap());
1028 kprintf("cpu_pmap %p\n", current_cpu_datap()->cpu_pmap
);
1029 kprintf("mapwindow %p\n", current_cpu_datap()->cpu_pmap
->mapwindow
);
1030 kprintf("two stuff %p %p\n",
1031 (void *)(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP
),
1032 (void *)(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CADDR
));
1035 (current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP
),
1036 (current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CADDR
),
1038 current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP
=
1039 &(current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP_store
);
1040 *current_cpu_datap()->cpu_pmap
->mapwindow
[i
].prv_CMAP
= 0;
1043 /* DMAP user for debugger */
1044 SYSMAP(caddr_t
, DMAP1
, DADDR1
, 1);
1045 SYSMAP(caddr_t
, DMAP2
, DADDR2
, 1); /* XXX temporary - can remove */
1050 if (PE_parse_boot_argn("npvhash", &npvhash
, sizeof (npvhash
))) {
1051 if (0 != ((npvhash
+ 1) & npvhash
)) {
1052 kprintf("invalid hash %d, must be ((2^N)-1), "
1053 "using default %d\n", npvhash
, NPVHASH
);
1060 printf("npvhash=%d\n", npvhash
);
1062 simple_lock_init(&kernel_pmap
->lock
, 0);
1063 simple_lock_init(&pv_hashed_free_list_lock
, 0);
1064 simple_lock_init(&pv_hashed_kern_free_list_lock
, 0);
1065 simple_lock_init(&pv_hash_table_lock
,0);
1069 kprintf("Kernel virtual space from 0x%lx to 0x%lx.\n",
1070 (long)KERNEL_BASE
, (long)virtual_end
);
1071 kprintf("Available physical space from 0x%llx to 0x%llx\n",
1072 avail_start
, avail_end
);
1075 * The -no_shared_cr3 boot-arg is a debugging feature (set by default
1076 * in the DEBUG kernel) to force the kernel to switch to its own map
1077 * (and cr3) when control is in kernelspace. The kernel's map does not
1078 * include (i.e. share) userspace so wild references will cause
1079 * a panic. Only copyin and copyout are exempt from this.
1081 (void) PE_parse_boot_argn("-no_shared_cr3",
1082 &no_shared_cr3
, sizeof (no_shared_cr3
));
1084 kprintf("Kernel not sharing user map\n");
1087 if (PE_parse_boot_argn("-pmap_trace", &pmap_trace
, sizeof (pmap_trace
))) {
1088 kprintf("Kernel traces for pmap operations enabled\n");
1090 #endif /* PMAP_TRACES */
1095 vm_offset_t
*startp
,
1098 *startp
= virtual_avail
;
1099 *endp
= virtual_end
;
1103 * Initialize the pmap module.
1104 * Called by vm_init, to initialize any structures that the pmap
1105 * system needs to map virtual memory.
1113 vm_map_offset_t vaddr
;
1117 kernel_pmap
->pm_obj_pml4
= &kpml4obj_object_store
;
1118 _vm_object_allocate((vm_object_size_t
)NPML4PGS
, &kpml4obj_object_store
);
1120 kernel_pmap
->pm_obj_pdpt
= &kpdptobj_object_store
;
1121 _vm_object_allocate((vm_object_size_t
)NPDPTPGS
, &kpdptobj_object_store
);
1123 kernel_pmap
->pm_obj
= &kptobj_object_store
;
1124 _vm_object_allocate((vm_object_size_t
)NPDEPGS
, &kptobj_object_store
);
1127 * Allocate memory for the pv_head_table and its lock bits,
1128 * the modify bit array, and the pte_page table.
1132 * zero bias all these arrays now instead of off avail_start
1133 * so we cover all memory
1136 npages
= i386_btop(avail_end
);
1137 s
= (vm_size_t
) (sizeof(struct pv_rooted_entry
) * npages
1138 + (sizeof (struct pv_hashed_entry_t
*) * (npvhash
+1))
1139 + pv_lock_table_size(npages
)
1140 + pv_hash_lock_table_size((npvhash
+1))
1144 if (kernel_memory_allocate(kernel_map
, &addr
, s
, 0,
1145 KMA_KOBJECT
| KMA_PERMANENT
)
1149 memset((char *)addr
, 0, s
);
1152 if (0 == npvhash
) panic("npvhash not initialized");
1156 * Allocate the structures first to preserve word-alignment.
1158 pv_head_table
= (pv_rooted_entry_t
) addr
;
1159 addr
= (vm_offset_t
) (pv_head_table
+ npages
);
1161 pv_hash_table
= (pv_hashed_entry_t
*)addr
;
1162 addr
= (vm_offset_t
) (pv_hash_table
+ (npvhash
+ 1));
1164 pv_lock_table
= (char *) addr
;
1165 addr
= (vm_offset_t
) (pv_lock_table
+ pv_lock_table_size(npages
));
1167 pv_hash_lock_table
= (char *) addr
;
1168 addr
= (vm_offset_t
) (pv_hash_lock_table
+ pv_hash_lock_table_size((npvhash
+1)));
1170 pmap_phys_attributes
= (char *) addr
;
1172 ppnum_t last_pn
= i386_btop(avail_end
);
1174 pmap_memory_region_t
*pmptr
= pmap_memory_regions
;
1175 for (i
= 0; i
< pmap_memory_region_count
; i
++, pmptr
++) {
1176 if (pmptr
->type
!= kEfiConventionalMemory
)
1179 for (pn
= pmptr
->base
; pn
<= pmptr
->end
; pn
++) {
1181 pmap_phys_attributes
[pn
] |= PHYS_MANAGED
;
1182 if (pn
> last_managed_page
)
1183 last_managed_page
= pn
;
1189 * Create the zone of physical maps,
1190 * and of the physical-to-virtual entries.
1192 s
= (vm_size_t
) sizeof(struct pmap
);
1193 pmap_zone
= zinit(s
, 400*s
, 4096, "pmap"); /* XXX */
1194 s
= (vm_size_t
) sizeof(struct pv_hashed_entry
);
1195 pv_hashed_list_zone
= zinit(s
, 10000*s
, 4096, "pv_list"); /* XXX */
1197 pdpt_zone
= zinit(s
, 400*s
, 4096, "pdpt"); /* XXX */
1200 /* create pv entries for kernel pages mapped by low level
1201 startup code. these have to exist so we can pmap_remove()
1202 e.g. kext pages from the middle of our addr space */
1204 vaddr
= (vm_map_offset_t
) VM_MIN_KERNEL_ADDRESS
;
1205 for (ppn
= 0; ppn
< i386_btop(avail_start
); ppn
++) {
1206 pv_rooted_entry_t pv_e
;
1208 pv_e
= pai_to_pvh(ppn
);
1211 pv_e
->pmap
= kernel_pmap
;
1212 queue_init(&pv_e
->qlink
);
1214 pmap_initialized
= TRUE
;
1217 * Initialize pmap cache.
1219 pmap_cache_list
= PMAP_NULL
;
1220 pmap_cache_count
= 0;
1221 simple_lock_init(&pmap_cache_lock
, 0);
1223 max_preemption_latency_tsc
= tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS
, tscFCvtn2t
);
1226 * Ensure the kernel's PML4 entry exists for the basement
1227 * before this is shared with any user.
1229 pmap_expand_pml4(kernel_pmap
, KERNEL_BASEMENT
);
1234 * this function is only used for debugging fron the vm layer
1240 pv_rooted_entry_t pv_h
;
1244 assert(pn
!= vm_page_fictitious_addr
);
1246 if (!pmap_initialized
)
1249 if (pn
== vm_page_guard_addr
)
1252 pai
= ppn_to_pai(pn
);
1253 if (!IS_MANAGED_PAGE(pai
))
1255 pv_h
= pai_to_pvh(pn
);
1256 result
= (pv_h
->pmap
== PMAP_NULL
);
1263 vm_map_offset_t va_start
,
1264 vm_map_offset_t va_end
)
1266 vm_map_offset_t offset
;
1269 if (pmap
== PMAP_NULL
) {
1274 * Check the resident page count
1275 * - if it's zero, the pmap is completely empty.
1276 * This short-circuit test prevents a virtual address scan which is
1277 * painfully slow for 64-bit spaces.
1278 * This assumes the count is correct
1279 * .. the debug kernel ought to be checking perhaps by page table walk.
1281 if (pmap
->stats
.resident_count
== 0)
1284 for (offset
= va_start
;
1286 offset
+= PAGE_SIZE_64
) {
1287 phys_page
= pmap_find_phys(pmap
, offset
);
1289 kprintf("pmap_is_empty(%p,0x%llx,0x%llx): "
1290 "page %d at 0x%llx\n",
1291 pmap
, va_start
, va_end
, phys_page
, offset
);
1301 * Create and return a physical map.
1303 * If the size specified for the map
1304 * is zero, the map is an actual physical
1305 * map, and may be referenced by the
1308 * If the size specified is non-zero,
1309 * the map will be used in software only, and
1310 * is bounded by that size.
1320 pml4_entry_t
*kpml4
;
1322 PMAP_TRACE(PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_START
,
1323 (uint32_t) (sz
>>32), (uint32_t) sz
, is_64bit
, 0, 0);
1325 size
= (vm_size_t
) sz
;
1328 * A software use-only map doesn't even need a map.
1335 p
= (pmap_t
) zalloc(pmap_zone
);
1337 panic("pmap_create zalloc");
1339 /* init counts now since we'll be bumping some */
1340 simple_lock_init(&p
->lock
, 0);
1341 p
->stats
.resident_count
= 0;
1342 p
->stats
.resident_max
= 0;
1343 p
->stats
.wired_count
= 0;
1346 p
->pm_shared
= FALSE
;
1348 p
->pm_task_map
= is_64bit
? TASK_MAP_64BIT
: TASK_MAP_32BIT
;;
1350 /* alloc the pml4 page in kernel vm */
1351 if (KERN_SUCCESS
!= kmem_alloc_kobject(kernel_map
, (vm_offset_t
*)(&p
->pm_pml4
), PAGE_SIZE
))
1352 panic("pmap_create kmem_alloc_kobject pml4");
1354 memset((char *)p
->pm_pml4
, 0, PAGE_SIZE
);
1355 p
->pm_cr3
= (pmap_paddr_t
)kvtophys((vm_offset_t
)p
->pm_pml4
);
1357 OSAddAtomic(1, &inuse_ptepages_count
);
1359 /* allocate the vm_objs to hold the pdpt, pde and pte pages */
1361 p
->pm_obj_pml4
= vm_object_allocate((vm_object_size_t
)(NPML4PGS
));
1362 if (NULL
== p
->pm_obj_pml4
)
1363 panic("pmap_create pdpt obj");
1365 p
->pm_obj_pdpt
= vm_object_allocate((vm_object_size_t
)(NPDPTPGS
));
1366 if (NULL
== p
->pm_obj_pdpt
)
1367 panic("pmap_create pdpt obj");
1369 p
->pm_obj
= vm_object_allocate((vm_object_size_t
)(NPDEPGS
));
1370 if (NULL
== p
->pm_obj
)
1371 panic("pmap_create pte obj");
1373 /* All pmaps share the kennel's pml4 */
1374 pml4
= pmap64_pml4(p
, 0ULL);
1375 kpml4
= kernel_pmap
->pm_pml4
;
1376 pml4
[KERNEL_PML4_INDEX
] = kpml4
[KERNEL_PML4_INDEX
];
1377 pml4
[KERNEL_KEXTS_INDEX
] = kpml4
[KERNEL_KEXTS_INDEX
];
1378 pml4
[KERNEL_PHYSMAP_INDEX
] = kpml4
[KERNEL_PHYSMAP_INDEX
];
1380 PMAP_TRACE(PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_START
,
1381 p
, is_64bit
, 0, 0, 0);
1387 * Retire the given physical map from service.
1388 * Should only be called if the map contains
1389 * no valid mappings.
1401 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_START
,
1410 * If some cpu is not using the physical pmap pointer that it
1411 * is supposed to be (see set_dirbase), we might be using the
1412 * pmap that is being destroyed! Make sure we are
1413 * physically on the right pmap:
1415 PMAP_UPDATE_TLBS(p
, 0x0ULL
, 0xFFFFFFFFFFFFF000ULL
);
1421 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_END
,
1423 return; /* still in use */
1427 * Free the memory maps, then the
1430 int inuse_ptepages
= 0;
1433 kmem_free(kernel_map
, (vm_offset_t
)p
->pm_pml4
, PAGE_SIZE
);
1435 inuse_ptepages
+= p
->pm_obj_pml4
->resident_page_count
;
1436 vm_object_deallocate(p
->pm_obj_pml4
);
1438 inuse_ptepages
+= p
->pm_obj_pdpt
->resident_page_count
;
1439 vm_object_deallocate(p
->pm_obj_pdpt
);
1441 inuse_ptepages
+= p
->pm_obj
->resident_page_count
;
1442 vm_object_deallocate(p
->pm_obj
);
1444 OSAddAtomic(-inuse_ptepages
, &inuse_ptepages_count
);
1446 zfree(pmap_zone
, p
);
1448 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_END
,
1453 * Add a reference to the specified pmap.
1457 pmap_reference(pmap_t p
)
1459 if (p
!= PMAP_NULL
) {
1467 * Remove a range of hardware page-table entries.
1468 * The entries given are the first (inclusive)
1469 * and last (exclusive) entries for the VM pages.
1470 * The virtual address is the va for the first pte.
1472 * The pmap must be locked.
1473 * If the pmap is not the kernel pmap, the range must lie
1474 * entirely within one pte-page. This is NOT checked.
1475 * Assumes that the pte-page exists.
1481 vm_map_offset_t start_vaddr
,
1486 pv_hashed_entry_t pvh_et
= PV_HASHED_ENTRY_NULL
;
1487 pv_hashed_entry_t pvh_eh
= PV_HASHED_ENTRY_NULL
;
1488 pv_hashed_entry_t pvh_e
;
1490 int num_removed
, num_unwired
, num_found
;
1493 vm_map_offset_t vaddr
;
1499 /* invalidate the PTEs first to "freeze" them */
1500 for (cpte
= spte
, vaddr
= start_vaddr
;
1502 cpte
++, vaddr
+= PAGE_SIZE_64
) {
1504 pa
= pte_to_pa(*cpte
);
1514 if (!IS_MANAGED_PAGE(pai
)) {
1516 * Outside range of managed physical memory.
1517 * Just remove the mappings.
1519 pmap_store_pte(cpte
, 0);
1523 /* invalidate the PTE */
1524 pmap_update_pte(cpte
, *cpte
, (*cpte
& ~INTEL_PTE_VALID
));
1527 if (num_found
== 0) {
1528 /* nothing was changed: we're done */
1532 /* propagate the invalidates to other CPUs */
1534 PMAP_UPDATE_TLBS(pmap
, start_vaddr
, vaddr
);
1536 for (cpte
= spte
, vaddr
= start_vaddr
;
1538 cpte
++, vaddr
+= PAGE_SIZE_64
) {
1540 pa
= pte_to_pa(*cpte
);
1548 pa
= pte_to_pa(*cpte
);
1556 * Get the modify and reference bits, then
1557 * nuke the entry in the page table
1559 /* remember reference and change */
1560 pmap_phys_attributes
[pai
] |=
1561 (char) (*cpte
& (PHYS_MODIFIED
| PHYS_REFERENCED
));
1562 /* completely invalidate the PTE */
1563 pmap_store_pte(cpte
, 0);
1566 * Remove the mapping from the pvlist for this physical page.
1568 pvh_e
= pmap_pv_remove(pmap
, vaddr
, (ppnum_t
) pai
);
1572 if (pvh_e
!= PV_HASHED_ENTRY_NULL
) {
1573 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1576 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
1583 if (pvh_eh
!= PV_HASHED_ENTRY_NULL
) {
1584 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pvh_cnt
);
1591 if (pmap
->stats
.resident_count
< num_removed
)
1592 panic("pmap_remove_range: resident_count");
1594 assert(pmap
->stats
.resident_count
>= num_removed
);
1595 OSAddAtomic(-num_removed
, &pmap
->stats
.resident_count
);
1598 if (pmap
->stats
.wired_count
< num_unwired
)
1599 panic("pmap_remove_range: wired_count");
1601 assert(pmap
->stats
.wired_count
>= num_unwired
);
1602 OSAddAtomic(-num_unwired
, &pmap
->stats
.wired_count
);
1608 * Remove phys addr if mapped in specified map
1612 pmap_remove_some_phys(
1613 __unused pmap_t map
,
1614 __unused ppnum_t pn
)
1617 /* Implement to support working set code */
1622 * Remove the given range of addresses
1623 * from the specified map.
1625 * It is assumed that the start and end are properly
1626 * rounded to the hardware page size.
1635 pt_entry_t
*spte
, *epte
;
1641 if (map
== PMAP_NULL
|| s64
== e64
)
1644 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_START
,
1646 (uint32_t) (s64
>> 32), s64
,
1647 (uint32_t) (e64
>> 32), e64
);
1654 * Check that address range in the kernel does not overlap the stacks.
1655 * We initialize local static min/max variables once to avoid making
1656 * 2 function calls for every remove. Note also that these functions
1657 * both return 0 before kernel stacks have been initialized, and hence
1658 * the panic is not triggered in this case.
1660 if (map
== kernel_pmap
) {
1661 static vm_offset_t kernel_stack_min
= 0;
1662 static vm_offset_t kernel_stack_max
= 0;
1664 if (kernel_stack_min
== 0) {
1665 kernel_stack_min
= min_valid_stack_address();
1666 kernel_stack_max
= max_valid_stack_address();
1668 if ((kernel_stack_min
<= s64
&& s64
< kernel_stack_max
) ||
1669 (kernel_stack_min
< e64
&& e64
<= kernel_stack_max
))
1670 panic("pmap_remove() attempted in kernel stack");
1675 * The values of kernel_stack_min and kernel_stack_max are no longer
1676 * relevant now that we allocate kernel stacks in the kernel map,
1677 * so the old code above no longer applies. If we wanted to check that
1678 * we weren't removing a mapping of a page in a kernel stack we'd
1679 * mark the PTE with an unused bit and check that here.
1684 deadline
= rdtsc64() + max_preemption_latency_tsc
;
1687 l64
= (s64
+ pde_mapped_size
) & ~(pde_mapped_size
- 1);
1690 pde
= pmap_pde(map
, s64
);
1692 if (pde
&& (*pde
& INTEL_PTE_VALID
)) {
1693 if (*pde
& INTEL_PTE_PS
) {
1695 * If we're removing a superpage, pmap_remove_range()
1696 * must work on level 2 instead of level 1; and we're
1697 * only passing a single level 2 entry instead of a
1701 epte
= spte
+1; /* excluded */
1703 spte
= pmap_pte(map
, (s64
& ~(pde_mapped_size
- 1)));
1704 spte
= &spte
[ptenum(s64
)];
1705 epte
= &spte
[intel_btop(l64
- s64
)];
1707 pmap_remove_range(map
, s64
, spte
, epte
);
1712 if (s64
< e64
&& rdtsc64() >= deadline
) {
1715 deadline
= rdtsc64() + max_preemption_latency_tsc
;
1721 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_END
,
1727 * Routine: pmap_page_protect
1730 * Lower the permission for all mappings to a given
1738 pv_hashed_entry_t pvh_eh
= PV_HASHED_ENTRY_NULL
;
1739 pv_hashed_entry_t pvh_et
= PV_HASHED_ENTRY_NULL
;
1740 pv_hashed_entry_t nexth
;
1742 pv_rooted_entry_t pv_h
;
1743 pv_rooted_entry_t pv_e
;
1744 pv_hashed_entry_t pvh_e
;
1751 assert(pn
!= vm_page_fictitious_addr
);
1752 if (pn
== vm_page_guard_addr
)
1755 pai
= ppn_to_pai(pn
);
1757 if (!IS_MANAGED_PAGE(pai
)) {
1759 * Not a managed page.
1763 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_START
,
1767 * Determine the new protection.
1771 case VM_PROT_READ
| VM_PROT_EXECUTE
:
1775 return; /* nothing to do */
1781 pv_h
= pai_to_pvh(pai
);
1787 * Walk down PV list, if any, changing or removing all mappings.
1789 if (pv_h
->pmap
== PMAP_NULL
)
1793 pvh_e
= (pv_hashed_entry_t
) pv_e
; /* cheat */
1796 vm_map_offset_t vaddr
;
1800 pte
= pmap_pte(pmap
, vaddr
);
1802 panic("pmap_page_protect() "
1803 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1806 nexth
= (pv_hashed_entry_t
) queue_next(&pvh_e
->qlink
);
1809 * Remove the mapping if new protection is NONE
1810 * or if write-protecting a kernel mapping.
1812 if (remove
|| pmap
== kernel_pmap
) {
1814 * Remove the mapping, collecting dirty bits.
1816 pmap_update_pte(pte
, *pte
, *pte
& ~INTEL_PTE_VALID
);
1817 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+PAGE_SIZE
);
1818 pmap_phys_attributes
[pai
] |=
1819 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1820 pmap_store_pte(pte
, 0);
1823 if (pmap
->stats
.resident_count
< 1)
1824 panic("pmap_page_protect: resident_count");
1826 assert(pmap
->stats
.resident_count
>= 1);
1827 OSAddAtomic(-1, &pmap
->stats
.resident_count
);
1830 * Deal with the pv_rooted_entry.
1835 * Fix up head later.
1837 pv_h
->pmap
= PMAP_NULL
;
1840 * Delete this entry.
1842 pv_hash_remove(pvh_e
);
1843 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1846 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
1854 pmap_update_pte(pte
, *pte
, *pte
& ~INTEL_PTE_WRITE
);
1855 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+PAGE_SIZE
);
1858 } while ((pv_e
= (pv_rooted_entry_t
) nexth
) != pv_h
);
1862 * If pv_head mapping was removed, fix it up.
1864 if (pv_h
->pmap
== PMAP_NULL
) {
1865 pvh_e
= (pv_hashed_entry_t
) queue_next(&pv_h
->qlink
);
1867 if (pvh_e
!= (pv_hashed_entry_t
) pv_h
) {
1868 pv_hash_remove(pvh_e
);
1869 pv_h
->pmap
= pvh_e
->pmap
;
1870 pv_h
->va
= pvh_e
->va
;
1871 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1874 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
1879 if (pvh_eh
!= PV_HASHED_ENTRY_NULL
) {
1880 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pvh_cnt
);
1885 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_END
,
1895 * Disconnect all mappings for this page and return reference and change status
1896 * in generic format.
1899 unsigned int pmap_disconnect(
1902 pmap_page_protect(pa
, 0); /* disconnect the page */
1903 return (pmap_get_refmod(pa
)); /* return ref/chg status */
1907 * Set the physical protection on the
1908 * specified range of this map as requested.
1909 * Will not increase permissions.
1914 vm_map_offset_t sva
,
1915 vm_map_offset_t eva
,
1919 pt_entry_t
*spte
, *epte
;
1920 vm_map_offset_t lva
;
1921 vm_map_offset_t orig_sva
;
1927 if (map
== PMAP_NULL
)
1930 if (prot
== VM_PROT_NONE
) {
1931 pmap_remove(map
, sva
, eva
);
1934 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_START
,
1936 (uint32_t) (sva
>> 32), (uint32_t) sva
,
1937 (uint32_t) (eva
>> 32), (uint32_t) eva
);
1939 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !map
->nx_enabled
)
1948 lva
= (sva
+ pde_mapped_size
) & ~(pde_mapped_size
- 1);
1951 pde
= pmap_pde(map
, sva
);
1952 if (pde
&& (*pde
& INTEL_PTE_VALID
)) {
1953 if (*pde
& INTEL_PTE_PS
) {
1956 epte
= spte
+1; /* excluded */
1958 spte
= pmap_pte(map
, (sva
& ~(pde_mapped_size
- 1)));
1959 spte
= &spte
[ptenum(sva
)];
1960 epte
= &spte
[intel_btop(lva
- sva
)];
1963 for (; spte
< epte
; spte
++) {
1964 if (!(*spte
& INTEL_PTE_VALID
))
1967 if (prot
& VM_PROT_WRITE
)
1968 pmap_update_pte(spte
, *spte
,
1969 *spte
| INTEL_PTE_WRITE
);
1971 pmap_update_pte(spte
, *spte
,
1972 *spte
& ~INTEL_PTE_WRITE
);
1975 pmap_update_pte(spte
, *spte
,
1976 *spte
| INTEL_PTE_NX
);
1978 pmap_update_pte(spte
, *spte
,
1979 *spte
& ~INTEL_PTE_NX
);
1987 PMAP_UPDATE_TLBS(map
, orig_sva
, eva
);
1991 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_END
,
1996 /* Map a (possibly) autogenned block */
2005 __unused
unsigned int flags
)
2010 if (attr
& VM_MEM_SUPERPAGE
)
2011 cur_page_size
= SUPERPAGE_SIZE
;
2013 cur_page_size
= PAGE_SIZE
;
2015 for (page
= 0; page
< size
; page
+=cur_page_size
/PAGE_SIZE
) {
2016 pmap_enter(pmap
, va
, pa
, prot
, attr
, TRUE
);
2017 va
+= cur_page_size
;
2018 pa
+=cur_page_size
/PAGE_SIZE
;
2024 * Insert the given physical page (p) at
2025 * the specified virtual address (v) in the
2026 * target physical map with the protection requested.
2028 * If specified, the page will be wired down, meaning
2029 * that the related pte cannot be reclaimed.
2031 * NB: This is the only routine which MAY NOT lazy-evaluate
2032 * or lose information. That is, this routine must actually
2033 * insert this page into the given map NOW.
2037 register pmap_t pmap
,
2038 vm_map_offset_t vaddr
,
2045 pv_rooted_entry_t pv_h
;
2047 pv_hashed_entry_t pvh_e
;
2048 pv_hashed_entry_t pvh_new
;
2049 pt_entry_t
template;
2050 pmap_paddr_t old_pa
;
2051 pmap_paddr_t pa
= (pmap_paddr_t
) i386_ptob(pn
);
2052 boolean_t need_tlbflush
= FALSE
;
2055 boolean_t old_pa_locked
;
2056 boolean_t superpage
= flags
& VM_MEM_SUPERPAGE
;
2057 vm_object_t delpage_pm_obj
= NULL
;
2058 int delpage_pde_index
= 0;
2062 assert(pn
!= vm_page_fictitious_addr
);
2064 kprintf("pmap_enter(%p,%llu,%u)\n", pmap
, vaddr
, pn
);
2065 if (pmap
== PMAP_NULL
)
2067 if (pn
== vm_page_guard_addr
)
2070 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_START
,
2072 (uint32_t) (vaddr
>> 32), (uint32_t) vaddr
,
2075 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
2081 * Must allocate a new pvlist entry while we're unlocked;
2082 * zalloc may cause pageout (which will lock the pmap system).
2083 * If we determine we need a pvlist entry, we will unlock
2084 * and allocate one. Then we will retry, throughing away
2085 * the allocated entry later (if we no longer need it).
2088 pvh_new
= PV_HASHED_ENTRY_NULL
;
2090 pvh_e
= PV_HASHED_ENTRY_NULL
;
2095 * Expand pmap to include this pte. Assume that
2096 * pmap is always expanded to include enough hardware
2097 * pages to map one VM page.
2100 while ((pte
= pmap64_pde(pmap
, vaddr
)) == PD_ENTRY_NULL
) {
2101 /* need room for another pde entry */
2103 pmap_expand_pdpt(pmap
, vaddr
);
2107 while ((pte
= pmap_pte(pmap
, vaddr
)) == PT_ENTRY_NULL
) {
2109 * Must unlock to expand the pmap
2110 * going to grow pde level page(s)
2113 pmap_expand(pmap
, vaddr
);
2118 if (superpage
&& *pte
&& !(*pte
& INTEL_PTE_PS
)) {
2120 * There is still an empty page table mapped that
2121 * was used for a previous base page mapping.
2122 * Remember the PDE and the PDE index, so that we
2123 * can free the page at the end of this function.
2125 delpage_pde_index
= (int)pdeidx(pmap
, vaddr
);
2126 delpage_pm_obj
= pmap
->pm_obj
;
2130 old_pa
= pte_to_pa(*pte
);
2131 pai
= pa_index(old_pa
);
2132 old_pa_locked
= FALSE
;
2135 * if we have a previous managed page, lock the pv entry now. after
2136 * we lock it, check to see if someone beat us to the lock and if so
2139 if ((0 != old_pa
) && IS_MANAGED_PAGE(pai
)) {
2141 old_pa_locked
= TRUE
;
2142 old_pa
= pte_to_pa(*pte
);
2144 UNLOCK_PVH(pai
); /* another path beat us to it */
2145 old_pa_locked
= FALSE
;
2150 * Special case if the incoming physical page is already mapped
2156 * May be changing its wired attribute or protection
2159 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
2161 if (VM_MEM_NOT_CACHEABLE
==
2162 (flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
))) {
2163 if (!(flags
& VM_MEM_GUARDED
))
2164 template |= INTEL_PTE_PTA
;
2165 template |= INTEL_PTE_NCACHE
;
2167 if (pmap
!= kernel_pmap
)
2168 template |= INTEL_PTE_USER
;
2169 if (prot
& VM_PROT_WRITE
)
2170 template |= INTEL_PTE_WRITE
;
2173 template |= INTEL_PTE_NX
;
2176 template |= INTEL_PTE_WIRED
;
2179 &pmap
->stats
.wired_count
);
2181 if (iswired(*pte
)) {
2182 assert(pmap
->stats
.wired_count
>= 1);
2184 &pmap
->stats
.wired_count
);
2187 if (superpage
) /* this path can not be used */
2188 template |= INTEL_PTE_PS
; /* to change the page size! */
2190 /* store modified PTE and preserve RC bits */
2191 pmap_update_pte(pte
, *pte
,
2192 template | (*pte
& (INTEL_PTE_REF
| INTEL_PTE_MOD
)));
2193 if (old_pa_locked
) {
2195 old_pa_locked
= FALSE
;
2197 need_tlbflush
= TRUE
;
2202 * Outline of code from here:
2203 * 1) If va was mapped, update TLBs, remove the mapping
2204 * and remove old pvlist entry.
2205 * 2) Add pvlist entry for new mapping
2206 * 3) Enter new mapping.
2208 * If the old physical page is not managed step 1) is skipped
2209 * (except for updating the TLBs), and the mapping is
2210 * overwritten at step 3). If the new physical page is not
2211 * managed, step 2) is skipped.
2214 if (old_pa
!= (pmap_paddr_t
) 0) {
2217 * Don't do anything to pages outside valid memory here.
2218 * Instead convince the code that enters a new mapping
2219 * to overwrite the old one.
2222 /* invalidate the PTE */
2223 pmap_update_pte(pte
, *pte
, (*pte
& ~INTEL_PTE_VALID
));
2224 /* propagate invalidate everywhere */
2225 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
2226 /* remember reference and change */
2227 oattr
= (char) (*pte
& (PHYS_MODIFIED
| PHYS_REFERENCED
));
2228 /* completely invalidate the PTE */
2229 pmap_store_pte(pte
, 0);
2231 if (IS_MANAGED_PAGE(pai
)) {
2233 if (pmap
->stats
.resident_count
< 1)
2234 panic("pmap_enter: resident_count");
2236 assert(pmap
->stats
.resident_count
>= 1);
2238 &pmap
->stats
.resident_count
);
2240 if (iswired(*pte
)) {
2242 if (pmap
->stats
.wired_count
< 1)
2243 panic("pmap_enter: wired_count");
2245 assert(pmap
->stats
.wired_count
>= 1);
2247 &pmap
->stats
.wired_count
);
2249 pmap_phys_attributes
[pai
] |= oattr
;
2252 * Remove the mapping from the pvlist for
2253 * this physical page.
2254 * We'll end up with either a rooted pv or a
2257 pvh_e
= pmap_pv_remove(pmap
, vaddr
, (ppnum_t
) pai
);
2262 * old_pa is not managed.
2263 * Do removal part of accounting.
2266 if (iswired(*pte
)) {
2267 assert(pmap
->stats
.wired_count
>= 1);
2269 &pmap
->stats
.wired_count
);
2275 * if we had a previously managed paged locked, unlock it now
2277 if (old_pa_locked
) {
2279 old_pa_locked
= FALSE
;
2282 pai
= pa_index(pa
); /* now working with new incoming phys page */
2283 if (IS_MANAGED_PAGE(pai
)) {
2286 * Step 2) Enter the mapping in the PV list for this
2289 pv_h
= pai_to_pvh(pai
);
2293 if (pv_h
->pmap
== PMAP_NULL
) {
2295 * No mappings yet, use rooted pv
2299 queue_init(&pv_h
->qlink
);
2302 * Add new pv_hashed_entry after header.
2304 if ((PV_HASHED_ENTRY_NULL
== pvh_e
) && pvh_new
) {
2306 pvh_new
= PV_HASHED_ENTRY_NULL
;
2307 } else if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
2308 PV_HASHED_ALLOC(pvh_e
);
2309 if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
2311 * the pv list is empty. if we are on
2312 * the kernel pmap we'll use one of
2313 * the special private kernel pv_e's,
2314 * else, we need to unlock
2315 * everything, zalloc a pv_e, and
2316 * restart bringing in the pv_e with
2319 if (kernel_pmap
== pmap
) {
2320 PV_HASHED_KERN_ALLOC(pvh_e
);
2324 pvh_new
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
2329 if (PV_HASHED_ENTRY_NULL
== pvh_e
)
2330 panic("pvh_e exhaustion");
2335 pv_hash_add(pvh_e
, pv_h
);
2338 * Remember that we used the pvlist entry.
2340 pvh_e
= PV_HASHED_ENTRY_NULL
;
2344 * only count the mapping
2345 * for 'managed memory'
2347 OSAddAtomic(+1, & pmap
->stats
.resident_count
);
2348 if (pmap
->stats
.resident_count
> pmap
->stats
.resident_max
) {
2349 pmap
->stats
.resident_max
= pmap
->stats
.resident_count
;
2353 * Step 3) Enter the mapping.
2355 * Build a template to speed up entering -
2356 * only the pfn changes.
2358 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
2360 if (flags
& VM_MEM_NOT_CACHEABLE
) {
2361 if (!(flags
& VM_MEM_GUARDED
))
2362 template |= INTEL_PTE_PTA
;
2363 template |= INTEL_PTE_NCACHE
;
2365 if (pmap
!= kernel_pmap
)
2366 template |= INTEL_PTE_USER
;
2367 if (prot
& VM_PROT_WRITE
)
2368 template |= INTEL_PTE_WRITE
;
2370 template |= INTEL_PTE_NX
;
2372 template |= INTEL_PTE_WIRED
;
2373 OSAddAtomic(+1, & pmap
->stats
.wired_count
);
2376 template |= INTEL_PTE_PS
;
2377 pmap_store_pte(pte
, template);
2380 * if this was a managed page we delayed unlocking the pv until here
2381 * to prevent pmap_page_protect et al from finding it until the pte
2384 if (IS_MANAGED_PAGE(pai
)) {
2388 if (need_tlbflush
== TRUE
)
2389 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
2391 if (pvh_e
!= PV_HASHED_ENTRY_NULL
) {
2392 PV_HASHED_FREE_LIST(pvh_e
, pvh_e
, 1);
2394 if (pvh_new
!= PV_HASHED_ENTRY_NULL
) {
2395 PV_HASHED_KERN_FREE_LIST(pvh_new
, pvh_new
, 1);
2399 if (delpage_pm_obj
) {
2402 vm_object_lock(delpage_pm_obj
);
2403 m
= vm_page_lookup(delpage_pm_obj
, delpage_pde_index
);
2404 if (m
== VM_PAGE_NULL
)
2405 panic("pmap_enter: pte page not in object");
2407 OSAddAtomic(-1, &inuse_ptepages_count
);
2408 vm_object_unlock(delpage_pm_obj
);
2411 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
2415 * Routine: pmap_change_wiring
2416 * Function: Change the wiring attribute for a map/virtual-address
2418 * In/out conditions:
2419 * The mapping must already exist in the pmap.
2424 vm_map_offset_t vaddr
,
2431 if ((pte
= pmap_pte(map
, vaddr
)) == PT_ENTRY_NULL
)
2432 panic("pmap_change_wiring: pte missing");
2434 if (wired
&& !iswired(*pte
)) {
2436 * wiring down mapping
2438 OSAddAtomic(+1, &map
->stats
.wired_count
);
2439 pmap_update_pte(pte
, *pte
, (*pte
| INTEL_PTE_WIRED
));
2441 else if (!wired
&& iswired(*pte
)) {
2445 assert(map
->stats
.wired_count
>= 1);
2446 OSAddAtomic(-1, &map
->stats
.wired_count
);
2447 pmap_update_pte(pte
, *pte
, (*pte
& ~INTEL_PTE_WIRED
));
2456 vm_map_offset_t vaddr
)
2462 pml4_entry_t
*pml4p
;
2464 DBG("pmap_expand_pml4(%p,%p)\n", map
, (void *)vaddr
);
2467 * Allocate a VM page for the pml4 page
2469 while ((m
= vm_page_grab()) == VM_PAGE_NULL
)
2473 * put the page into the pmap's obj list so it
2474 * can be found later.
2478 i
= pml4idx(map
, vaddr
);
2485 vm_page_lockspin_queues();
2487 vm_page_unlock_queues();
2489 OSAddAtomic(1, &inuse_ptepages_count
);
2491 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
2492 vm_object_lock(map
->pm_obj_pml4
);
2496 * See if someone else expanded us first
2498 if (pmap64_pdpt(map
, vaddr
) != PDPT_ENTRY_NULL
) {
2500 vm_object_unlock(map
->pm_obj_pml4
);
2504 OSAddAtomic(-1, &inuse_ptepages_count
);
2509 if (0 != vm_page_lookup(map
->pm_obj_pml4
, (vm_object_offset_t
)i
)) {
2510 panic("pmap_expand_pml4: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n",
2511 map
, map
->pm_obj_pml4
, vaddr
, i
);
2514 vm_page_insert(m
, map
->pm_obj_pml4
, (vm_object_offset_t
)i
);
2515 vm_object_unlock(map
->pm_obj_pml4
);
2518 * Set the page directory entry for this page table.
2520 pml4p
= pmap64_pml4(map
, vaddr
); /* refetch under lock */
2522 pmap_store_pte(pml4p
, pa_to_pte(pa
)
2535 vm_map_offset_t vaddr
)
2541 pdpt_entry_t
*pdptp
;
2543 DBG("pmap_expand_pdpt(%p,%p)\n", map
, (void *)vaddr
);
2545 while ((pdptp
= pmap64_pdpt(map
, vaddr
)) == PDPT_ENTRY_NULL
) {
2546 pmap_expand_pml4(map
, vaddr
);
2550 * Allocate a VM page for the pdpt page
2552 while ((m
= vm_page_grab()) == VM_PAGE_NULL
)
2556 * put the page into the pmap's obj list so it
2557 * can be found later.
2561 i
= pdptidx(map
, vaddr
);
2568 vm_page_lockspin_queues();
2570 vm_page_unlock_queues();
2572 OSAddAtomic(1, &inuse_ptepages_count
);
2574 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
2575 vm_object_lock(map
->pm_obj_pdpt
);
2579 * See if someone else expanded us first
2581 if (pmap64_pde(map
, vaddr
) != PD_ENTRY_NULL
) {
2583 vm_object_unlock(map
->pm_obj_pdpt
);
2587 OSAddAtomic(-1, &inuse_ptepages_count
);
2592 if (0 != vm_page_lookup(map
->pm_obj_pdpt
, (vm_object_offset_t
)i
)) {
2593 panic("pmap_expand_pdpt: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n",
2594 map
, map
->pm_obj_pdpt
, vaddr
, i
);
2597 vm_page_insert(m
, map
->pm_obj_pdpt
, (vm_object_offset_t
)i
);
2598 vm_object_unlock(map
->pm_obj_pdpt
);
2601 * Set the page directory entry for this page table.
2603 pdptp
= pmap64_pdpt(map
, vaddr
); /* refetch under lock */
2605 pmap_store_pte(pdptp
, pa_to_pte(pa
)
2619 * Routine: pmap_expand
2621 * Expands a pmap to be able to map the specified virtual address.
2623 * Allocates new virtual memory for the P0 or P1 portion of the
2624 * pmap, then re-maps the physical pages that were in the old
2625 * pmap to be in the new pmap.
2627 * Must be called with the pmap system and the pmap unlocked,
2628 * since these must be unlocked to use vm_allocate or vm_deallocate.
2629 * Thus it must be called in a loop that checks whether the map
2630 * has been expanded enough.
2631 * (We won't loop forever, since page tables aren't shrunk.)
2636 vm_map_offset_t vaddr
)
2639 register vm_page_t m
;
2640 register pmap_paddr_t pa
;
2646 * For the kernel, the virtual address must be in or above the basement
2647 * which is for kexts and is in the 512GB immediately below the kernel..
2648 * XXX - should use VM_MIN_KERNEL_AND_KEXT_ADDRESS not KERNEL_BASEMENT
2650 if (map
== kernel_pmap
&&
2651 !(vaddr
>= KERNEL_BASEMENT
&& vaddr
<= VM_MAX_KERNEL_ADDRESS
))
2652 panic("pmap_expand: bad vaddr 0x%llx for kernel pmap", vaddr
);
2655 while ((pdp
= pmap64_pde(map
, vaddr
)) == PD_ENTRY_NULL
) {
2656 /* need room for another pde entry */
2657 pmap_expand_pdpt(map
, vaddr
);
2661 * Allocate a VM page for the pde entries.
2663 while ((m
= vm_page_grab()) == VM_PAGE_NULL
)
2667 * put the page into the pmap's obj list so it
2668 * can be found later.
2672 i
= pdeidx(map
, vaddr
);
2679 vm_page_lockspin_queues();
2681 vm_page_unlock_queues();
2683 OSAddAtomic(1, &inuse_ptepages_count
);
2685 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
2686 vm_object_lock(map
->pm_obj
);
2691 * See if someone else expanded us first
2693 if (pmap_pte(map
, vaddr
) != PT_ENTRY_NULL
) {
2695 vm_object_unlock(map
->pm_obj
);
2699 OSAddAtomic(-1, &inuse_ptepages_count
);
2704 if (0 != vm_page_lookup(map
->pm_obj
, (vm_object_offset_t
)i
)) {
2705 panic("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n",
2706 map
, map
->pm_obj
, vaddr
, i
);
2709 vm_page_insert(m
, map
->pm_obj
, (vm_object_offset_t
)i
);
2710 vm_object_unlock(map
->pm_obj
);
2713 * Set the page directory entry for this page table.
2715 pdp
= pmap_pde(map
, vaddr
);
2716 pmap_store_pte(pdp
, pa_to_pte(pa
)
2726 /* On K64 machines with more than 32GB of memory, pmap_steal_memory
2727 * will allocate past the 1GB of pre-expanded virtual kernel area. This
2728 * function allocates all the page tables using memory from the same pool
2729 * that pmap_steal_memory uses, rather than calling vm_page_grab (which
2730 * isn't available yet). */
2732 pmap_pre_expand(pmap_t pmap
, vm_map_offset_t vaddr
) {
2738 if(pmap64_pdpt(pmap
, vaddr
) == PDPT_ENTRY_NULL
) {
2739 if (!pmap_next_page_k64(&pn
))
2740 panic("pmap_pre_expand");
2744 pte
= pmap64_pml4(pmap
, vaddr
);
2746 pmap_store_pte(pte
, pa_to_pte(i386_ptob(pn
))
2752 if(pmap64_pde(pmap
, vaddr
) == PD_ENTRY_NULL
) {
2753 if (!pmap_next_page_k64(&pn
))
2754 panic("pmap_pre_expand");
2758 pte
= pmap64_pdpt(pmap
, vaddr
);
2760 pmap_store_pte(pte
, pa_to_pte(i386_ptob(pn
))
2766 if(pmap_pte(pmap
, vaddr
) == PT_ENTRY_NULL
) {
2767 if (!pmap_next_page_k64(&pn
))
2768 panic("pmap_pre_expand");
2772 pte
= pmap64_pde(pmap
, vaddr
);
2774 pmap_store_pte(pte
, pa_to_pte(i386_ptob(pn
))
2784 * pmap_sync_page_data_phys(ppnum_t pa)
2786 * Invalidates all of the instruction cache on a physical page and
2787 * pushes any dirty data from the data cache for the same physical page
2788 * Not required in i386.
2791 pmap_sync_page_data_phys(__unused ppnum_t pa
)
2797 * pmap_sync_page_attributes_phys(ppnum_t pa)
2799 * Write back and invalidate all cachelines on a physical page.
2802 pmap_sync_page_attributes_phys(ppnum_t pa
)
2804 cache_flush_page_phys(pa
);
2809 #ifdef CURRENTLY_UNUSED_AND_UNTESTED
2815 * Routine: pmap_collect
2817 * Garbage collects the physical map system for
2818 * pages which are no longer used.
2819 * Success need not be guaranteed -- that is, there
2820 * may well be pages which are not referenced, but
2821 * others may be collected.
2823 * Called by the pageout daemon when pages are scarce.
2829 register pt_entry_t
*pdp
, *ptp
;
2836 if (p
== kernel_pmap
)
2840 * Garbage collect map.
2844 for (pdp
= (pt_entry_t
*)p
->dirbase
;
2845 pdp
< (pt_entry_t
*)&p
->dirbase
[(UMAXPTDI
+1)];
2848 if (*pdp
& INTEL_PTE_VALID
) {
2849 if(*pdp
& INTEL_PTE_REF
) {
2850 pmap_store_pte(pdp
, *pdp
& ~INTEL_PTE_REF
);
2854 ptp
= pmap_pte(p
, pdetova(pdp
- (pt_entry_t
*)p
->dirbase
));
2855 eptp
= ptp
+ NPTEPG
;
2858 * If the pte page has any wired mappings, we cannot
2863 register pt_entry_t
*ptep
;
2864 for (ptep
= ptp
; ptep
< eptp
; ptep
++) {
2865 if (iswired(*ptep
)) {
2873 * Remove the virtual addresses mapped by this pte page.
2875 pmap_remove_range(p
,
2876 pdetova(pdp
- (pt_entry_t
*)p
->dirbase
),
2881 * Invalidate the page directory pointer.
2883 pmap_store_pte(pdp
, 0x0);
2888 * And free the pte page itself.
2891 register vm_page_t m
;
2893 vm_object_lock(p
->pm_obj
);
2895 m
= vm_page_lookup(p
->pm_obj
,(vm_object_offset_t
)(pdp
- (pt_entry_t
*)&p
->dirbase
[0]));
2896 if (m
== VM_PAGE_NULL
)
2897 panic("pmap_collect: pte page not in object");
2901 OSAddAtomic(-1, &inuse_ptepages_count
);
2903 vm_object_unlock(p
->pm_obj
);
2912 PMAP_UPDATE_TLBS(p
, 0x0, 0xFFFFFFFFFFFFF000ULL
);
2921 pmap_copy_page(ppnum_t src
, ppnum_t dst
)
2923 bcopy_phys((addr64_t
)i386_ptob(src
),
2924 (addr64_t
)i386_ptob(dst
),
2930 * Routine: pmap_pageable
2932 * Make the specified pages (by pmap, offset)
2933 * pageable (or not) as requested.
2935 * A page which is not pageable may not take
2936 * a fault; therefore, its page table entry
2937 * must remain valid for the duration.
2939 * This routine is merely advisory; pmap_enter
2940 * will specify that these pages are to be wired
2941 * down (or not) as appropriate.
2945 __unused pmap_t pmap
,
2946 __unused vm_map_offset_t start_addr
,
2947 __unused vm_map_offset_t end_addr
,
2948 __unused boolean_t pageable
)
2951 pmap
++; start_addr
++; end_addr
++; pageable
++;
2956 * Clear specified attribute bits.
2959 phys_attribute_clear(
2963 pv_rooted_entry_t pv_h
;
2964 pv_hashed_entry_t pv_e
;
2970 assert(pn
!= vm_page_fictitious_addr
);
2971 if (pn
== vm_page_guard_addr
)
2974 pai
= ppn_to_pai(pn
);
2976 if (!IS_MANAGED_PAGE(pai
)) {
2978 * Not a managed page.
2984 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_START
,
2987 pv_h
= pai_to_pvh(pai
);
2992 * Walk down PV list, clearing all modify or reference bits.
2993 * We do not have to lock the pv_list because we have
2994 * the entire pmap system locked.
2996 if (pv_h
->pmap
!= PMAP_NULL
) {
2998 * There are some mappings.
3001 pv_e
= (pv_hashed_entry_t
)pv_h
;
3010 * Clear modify and/or reference bits.
3012 pte
= pmap_pte(pmap
, va
);
3013 pmap_update_pte(pte
, *pte
, (*pte
& ~bits
));
3014 /* Ensure all processors using this translation
3015 * invalidate this TLB entry. The invalidation *must*
3016 * follow the PTE update, to ensure that the TLB
3017 * shadow of the 'D' bit (in particular) is
3018 * synchronized with the updated PTE.
3020 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
3022 pv_e
= (pv_hashed_entry_t
)queue_next(&pv_e
->qlink
);
3024 } while (pv_e
!= (pv_hashed_entry_t
)pv_h
);
3026 pmap_phys_attributes
[pai
] &= ~bits
;
3030 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_END
,
3035 * Check specified attribute bits.
3038 phys_attribute_test(
3042 pv_rooted_entry_t pv_h
;
3043 pv_hashed_entry_t pv_e
;
3050 assert(pn
!= vm_page_fictitious_addr
);
3051 if (pn
== vm_page_guard_addr
)
3054 pai
= ppn_to_pai(pn
);
3056 if (!IS_MANAGED_PAGE(pai
)) {
3058 * Not a managed page.
3064 * super fast check... if bits already collected
3065 * no need to take any locks...
3066 * if not set, we need to recheck after taking
3067 * the lock in case they got pulled in while
3068 * we were waiting for the lock
3070 if ((pmap_phys_attributes
[pai
] & bits
) == bits
)
3073 pv_h
= pai_to_pvh(pai
);
3077 attributes
= pmap_phys_attributes
[pai
] & bits
;
3081 * Walk down PV list, checking the mappings until we
3082 * reach the end or we've found the attributes we've asked for
3083 * We do not have to lock the pv_list because we have
3084 * the entire pmap system locked.
3086 if (attributes
!= bits
&&
3087 pv_h
->pmap
!= PMAP_NULL
) {
3089 * There are some mappings.
3091 pv_e
= (pv_hashed_entry_t
)pv_h
;
3098 * first make sure any processor actively
3099 * using this pmap, flushes its TLB state
3101 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
3104 * pick up modify and/or reference bits from mapping
3107 pte
= pmap_pte(pmap
, va
);
3108 attributes
|= (int)(*pte
& bits
);
3110 pv_e
= (pv_hashed_entry_t
)queue_next(&pv_e
->qlink
);
3112 } while ((attributes
!= bits
) &&
3113 (pv_e
!= (pv_hashed_entry_t
)pv_h
));
3117 return (attributes
);
3121 * Set specified attribute bits.
3131 assert(pn
!= vm_page_fictitious_addr
);
3132 if (pn
== vm_page_guard_addr
)
3135 pai
= ppn_to_pai(pn
);
3137 if (!IS_MANAGED_PAGE(pai
)) {
3138 /* Not a managed page. */
3143 pmap_phys_attributes
[pai
] |= bits
;
3148 * Set the modify bit on the specified physical page.
3152 pmap_set_modify(ppnum_t pn
)
3154 phys_attribute_set(pn
, PHYS_MODIFIED
);
3158 * Clear the modify bits on the specified physical page.
3162 pmap_clear_modify(ppnum_t pn
)
3164 phys_attribute_clear(pn
, PHYS_MODIFIED
);
3170 * Return whether or not the specified physical page is modified
3171 * by any physical maps.
3175 pmap_is_modified(ppnum_t pn
)
3177 if (phys_attribute_test(pn
, PHYS_MODIFIED
))
3183 * pmap_clear_reference:
3185 * Clear the reference bit on the specified physical page.
3189 pmap_clear_reference(ppnum_t pn
)
3191 phys_attribute_clear(pn
, PHYS_REFERENCED
);
3195 pmap_set_reference(ppnum_t pn
)
3197 phys_attribute_set(pn
, PHYS_REFERENCED
);
3201 * pmap_is_referenced:
3203 * Return whether or not the specified physical page is referenced
3204 * by any physical maps.
3208 pmap_is_referenced(ppnum_t pn
)
3210 if (phys_attribute_test(pn
, PHYS_REFERENCED
))
3216 * pmap_get_refmod(phys)
3217 * returns the referenced and modified bits of the specified
3221 pmap_get_refmod(ppnum_t pn
)
3224 unsigned int retval
= 0;
3226 refmod
= phys_attribute_test(pn
, PHYS_MODIFIED
| PHYS_REFERENCED
);
3228 if (refmod
& PHYS_MODIFIED
)
3229 retval
|= VM_MEM_MODIFIED
;
3230 if (refmod
& PHYS_REFERENCED
)
3231 retval
|= VM_MEM_REFERENCED
;
3237 * pmap_clear_refmod(phys, mask)
3238 * clears the referenced and modified bits as specified by the mask
3239 * of the specified physical page.
3242 pmap_clear_refmod(ppnum_t pn
, unsigned int mask
)
3244 unsigned int x86Mask
;
3246 x86Mask
= ( ((mask
& VM_MEM_MODIFIED
)? PHYS_MODIFIED
: 0)
3247 | ((mask
& VM_MEM_REFERENCED
)? PHYS_REFERENCED
: 0));
3248 phys_attribute_clear(pn
, x86Mask
);
3252 invalidate_icache(__unused vm_offset_t addr
,
3253 __unused
unsigned cnt
,
3260 flush_dcache(__unused vm_offset_t addr
,
3261 __unused
unsigned count
,
3269 * Constrain DTrace copyin/copyout actions
3271 extern kern_return_t
dtrace_copyio_preflight(addr64_t
);
3272 extern kern_return_t
dtrace_copyio_postflight(addr64_t
);
3274 kern_return_t
dtrace_copyio_preflight(__unused addr64_t va
)
3276 thread_t thread
= current_thread();
3278 if (current_map() == kernel_map
)
3279 return KERN_FAILURE
;
3280 else if (get_cr3() != thread
->map
->pmap
->pm_cr3
)
3281 return KERN_FAILURE
;
3282 else if (thread
->machine
.specFlags
& CopyIOActive
)
3283 return KERN_FAILURE
;
3285 return KERN_SUCCESS
;
3288 kern_return_t
dtrace_copyio_postflight(__unused addr64_t va
)
3290 return KERN_SUCCESS
;
3292 #endif /* CONFIG_DTRACE */
3294 #include <mach_vm_debug.h>
3296 #include <vm/vm_debug.h>
3299 pmap_list_resident_pages(
3300 __unused pmap_t pmap
,
3301 __unused vm_offset_t
*listp
,
3306 #endif /* MACH_VM_DEBUG */
3310 /* temporary workaround */
3312 coredumpok(__unused vm_map_t map
, __unused vm_offset_t va
)
3317 ptep
= pmap_pte(map
->pmap
, va
);
3320 return ((*ptep
& (INTEL_PTE_NCACHE
| INTEL_PTE_WIRED
)) != (INTEL_PTE_NCACHE
| INTEL_PTE_WIRED
));
3328 phys_page_exists(ppnum_t pn
)
3330 assert(pn
!= vm_page_fictitious_addr
);
3332 if (!pmap_initialized
)
3335 if (pn
== vm_page_guard_addr
)
3338 if (!IS_MANAGED_PAGE(ppn_to_pai(pn
)))
3345 mapping_free_prime(void)
3348 pv_hashed_entry_t pvh_e
;
3349 pv_hashed_entry_t pvh_eh
;
3350 pv_hashed_entry_t pvh_et
;
3354 pvh_eh
= pvh_et
= PV_HASHED_ENTRY_NULL
;
3355 for (i
= 0; i
< (5 * PV_HASHED_ALLOC_CHUNK
); i
++) {
3356 pvh_e
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
3358 pvh_e
->qlink
.next
= (queue_entry_t
)pvh_eh
;
3361 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
3365 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pv_cnt
);
3368 pvh_eh
= pvh_et
= PV_HASHED_ENTRY_NULL
;
3369 for (i
= 0; i
< PV_HASHED_KERN_ALLOC_CHUNK
; i
++) {
3370 pvh_e
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
3372 pvh_e
->qlink
.next
= (queue_entry_t
)pvh_eh
;
3375 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
3379 PV_HASHED_KERN_FREE_LIST(pvh_eh
, pvh_et
, pv_cnt
);
3384 mapping_adjust(void)
3386 pv_hashed_entry_t pvh_e
;
3387 pv_hashed_entry_t pvh_eh
;
3388 pv_hashed_entry_t pvh_et
;
3392 if (mapping_adjust_call
== NULL
) {
3393 thread_call_setup(&mapping_adjust_call_data
,
3394 (thread_call_func_t
) mapping_adjust
,
3395 (thread_call_param_t
) NULL
);
3396 mapping_adjust_call
= &mapping_adjust_call_data
;
3400 pvh_eh
= pvh_et
= PV_HASHED_ENTRY_NULL
;
3401 if (pv_hashed_kern_free_count
< PV_HASHED_KERN_LOW_WATER_MARK
) {
3402 for (i
= 0; i
< PV_HASHED_KERN_ALLOC_CHUNK
; i
++) {
3403 pvh_e
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
3405 pvh_e
->qlink
.next
= (queue_entry_t
)pvh_eh
;
3408 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
3412 PV_HASHED_KERN_FREE_LIST(pvh_eh
, pvh_et
, pv_cnt
);
3416 pvh_eh
= pvh_et
= PV_HASHED_ENTRY_NULL
;
3417 if (pv_hashed_free_count
< PV_HASHED_LOW_WATER_MARK
) {
3418 for (i
= 0; i
< PV_HASHED_ALLOC_CHUNK
; i
++) {
3419 pvh_e
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
3421 pvh_e
->qlink
.next
= (queue_entry_t
)pvh_eh
;
3424 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
3428 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pv_cnt
);
3435 pmap_switch(pmap_t tpmap
)
3439 s
= splhigh(); /* Make sure interruptions are disabled */
3440 set_dirbase(tpmap
, current_thread());
3446 * disable no-execute capability on
3447 * the specified pmap
3450 pmap_disable_NX(pmap_t pmap
)
3452 pmap
->nx_enabled
= 0;
3458 vm_size_t
*cur_size
,
3459 vm_size_t
*max_size
,
3460 vm_size_t
*elem_size
,
3461 vm_size_t
*alloc_size
,
3465 *count
= inuse_ptepages_count
;
3466 *cur_size
= PAGE_SIZE
* inuse_ptepages_count
;
3467 *max_size
= PAGE_SIZE
* (inuse_ptepages_count
+
3468 vm_page_inactive_count
+
3469 vm_page_active_count
+
3470 vm_page_free_count
);
3471 *elem_size
= PAGE_SIZE
;
3472 *alloc_size
= PAGE_SIZE
;
3479 pmap_cpuset_NMIPI(cpu_set cpu_mask
) {
3480 unsigned int cpu
, cpu_bit
;
3483 for (cpu
= 0, cpu_bit
= 1; cpu
< real_ncpus
; cpu
++, cpu_bit
<<= 1) {
3484 if (cpu_mask
& cpu_bit
)
3485 cpu_NMI_interrupt(cpu
);
3487 deadline
= mach_absolute_time() + (LockTimeOut
);
3488 while (mach_absolute_time() < deadline
)
3493 * Called with pmap locked, we:
3494 * - scan through per-cpu data to see which other cpus need to flush
3495 * - send an IPI to each non-idle cpu to be flushed
3496 * - wait for all to signal back that they are inactive or we see that
3497 * they are at a safe point (idle).
3498 * - flush the local tlb if active for this pmap
3499 * - return ... the caller will unlock the pmap
3502 pmap_flush_tlbs(pmap_t pmap
)
3505 unsigned int cpu_bit
;
3506 cpu_set cpus_to_signal
;
3507 unsigned int my_cpu
= cpu_number();
3508 pmap_paddr_t pmap_cr3
= pmap
->pm_cr3
;
3509 boolean_t flush_self
= FALSE
;
3512 assert((processor_avail_count
< 2) ||
3513 (ml_get_interrupts_enabled() && get_preemption_level() != 0));
3516 * Scan other cpus for matching active or task CR3.
3517 * For idle cpus (with no active map) we mark them invalid but
3518 * don't signal -- they'll check as they go busy.
3521 for (cpu
= 0, cpu_bit
= 1; cpu
< real_ncpus
; cpu
++, cpu_bit
<<= 1) {
3522 if (!cpu_datap(cpu
)->cpu_running
)
3524 uint64_t cpu_active_cr3
= CPU_GET_ACTIVE_CR3(cpu
);
3525 uint64_t cpu_task_cr3
= CPU_GET_TASK_CR3(cpu
);
3527 if ((pmap_cr3
== cpu_task_cr3
) ||
3528 (pmap_cr3
== cpu_active_cr3
) ||
3529 (pmap
->pm_shared
) ||
3530 (pmap
== kernel_pmap
)) {
3531 if (cpu
== my_cpu
) {
3535 cpu_datap(cpu
)->cpu_tlb_invalid
= TRUE
;
3536 __asm__
volatile("mfence");
3539 * We don't need to signal processors which will flush
3540 * lazily at the idle state or kernel boundary.
3541 * For example, if we're invalidating the kernel pmap,
3542 * processors currently in userspace don't need to flush
3543 * their TLBs until the next time they enter the kernel.
3544 * Alterations to the address space of a task active
3545 * on a remote processor result in a signal, to
3546 * account for copy operations. (There may be room
3547 * for optimization in such cases).
3548 * The order of the loads below with respect
3549 * to the store to the "cpu_tlb_invalid" field above
3550 * is important--hence the barrier.
3552 if (CPU_CR3_IS_ACTIVE(cpu
) &&
3553 (pmap_cr3
== CPU_GET_ACTIVE_CR3(cpu
) ||
3555 (pmap_cr3
== CPU_GET_TASK_CR3(cpu
)))) {
3556 cpus_to_signal
|= cpu_bit
;
3557 i386_signal_cpu(cpu
, MP_TLB_FLUSH
, ASYNC
);
3562 PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS
) | DBG_FUNC_START
,
3563 pmap
, cpus_to_signal
, flush_self
, 0, 0);
3566 * Flush local tlb if required.
3567 * Do this now to overlap with other processors responding.
3572 if (cpus_to_signal
) {
3573 cpu_set cpus_to_respond
= cpus_to_signal
;
3575 deadline
= mach_absolute_time() + LockTimeOut
;
3577 * Wait for those other cpus to acknowledge
3579 while (cpus_to_respond
!= 0) {
3580 if (mach_absolute_time() > deadline
) {
3581 if (mp_recent_debugger_activity())
3583 if (!panic_active()) {
3584 pmap_tlb_flush_timeout
= TRUE
;
3585 pmap_cpuset_NMIPI(cpus_to_respond
);
3587 panic("pmap_flush_tlbs() timeout: "
3588 "cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx",
3589 pmap
, cpus_to_respond
);
3592 for (cpu
= 0, cpu_bit
= 1; cpu
< real_ncpus
; cpu
++, cpu_bit
<<= 1) {
3593 if ((cpus_to_respond
& cpu_bit
) != 0) {
3594 if (!cpu_datap(cpu
)->cpu_running
||
3595 cpu_datap(cpu
)->cpu_tlb_invalid
== FALSE
||
3596 !CPU_CR3_IS_ACTIVE(cpu
)) {
3597 cpus_to_respond
&= ~cpu_bit
;
3601 if (cpus_to_respond
== 0)
3607 PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS
) | DBG_FUNC_END
,
3608 pmap
, cpus_to_signal
, flush_self
, 0, 0);
3612 process_pmap_updates(void)
3614 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
3618 current_cpu_datap()->cpu_tlb_invalid
= FALSE
;
3619 __asm__
volatile("mfence");
3623 pmap_update_interrupt(void)
3625 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT
) | DBG_FUNC_START
,
3628 process_pmap_updates();
3630 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT
) | DBG_FUNC_END
,
3636 pmap_cache_attributes(ppnum_t pn
)
3638 return IS_MANAGED_PAGE(ppn_to_pai(pn
)) ? VM_WIMG_COPYBACK