]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap.c
xnu-1228.15.4.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap.c
CommitLineData
1c79356b 1/*
c910b4d9 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
59/*
60 * File: pmap.c
61 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * (These guys wrote the Vax version)
63 *
64 * Physical Map management code for Intel i386, i486, and i860.
65 *
66 * Manages physical address maps.
67 *
68 * In addition to hardware address maps, this
69 * module is called upon to provide software-use-only
70 * maps which may or may not be stored in the same
71 * form as hardware maps. These pseudo-maps are
72 * used to store intermediate results from copy
73 * operations to and from address spaces.
74 *
75 * Since the information managed by this module is
76 * also stored by the logical address mapping module,
77 * this module may throw away valid virtual-to-physical
78 * mappings at almost any time. However, invalidations
79 * of virtual-to-physical mappings must be done as
80 * requested.
81 *
82 * In order to cope with hardware architectures which
83 * make virtual-to-physical map invalidates expensive,
84 * this module may delay invalidate or reduced protection
85 * operations until such time as they are actually
86 * necessary. This module is given full information as
87 * to which processors are currently using which maps,
88 * and to when physical maps must be made correct.
89 */
90
1c79356b
A
91#include <string.h>
92#include <norma_vm.h>
93#include <mach_kdb.h>
94#include <mach_ldebug.h>
95
2d21ac55
A
96#include <libkern/OSAtomic.h>
97
1c79356b
A
98#include <mach/machine/vm_types.h>
99
100#include <mach/boolean.h>
101#include <kern/thread.h>
102#include <kern/zalloc.h>
2d21ac55 103#include <kern/queue.h>
1c79356b
A
104
105#include <kern/lock.h>
91447636 106#include <kern/kalloc.h>
1c79356b
A
107#include <kern/spl.h>
108
109#include <vm/pmap.h>
110#include <vm/vm_map.h>
111#include <vm/vm_kern.h>
112#include <mach/vm_param.h>
113#include <mach/vm_prot.h>
114#include <vm/vm_object.h>
115#include <vm/vm_page.h>
116
117#include <mach/machine/vm_param.h>
118#include <machine/thread.h>
119
120#include <kern/misc_protos.h> /* prototyping */
121#include <i386/misc_protos.h>
122
123#include <i386/cpuid.h>
91447636 124#include <i386/cpu_data.h>
55e303ae
A
125#include <i386/cpu_number.h>
126#include <i386/machine_cpu.h>
91447636 127#include <i386/mp_slave_boot.h>
0c530ab8 128#include <i386/seg.h>
2d21ac55 129#include <i386/serial_io.h>
0c530ab8 130#include <i386/cpu_capabilities.h>
2d21ac55
A
131#include <i386/machine_routines.h>
132#include <i386/proc_reg.h>
133#include <i386/tsc.h>
1c79356b
A
134
135#if MACH_KDB
136#include <ddb/db_command.h>
137#include <ddb/db_output.h>
138#include <ddb/db_sym.h>
139#include <ddb/db_print.h>
140#endif /* MACH_KDB */
141
91447636
A
142#include <vm/vm_protos.h>
143
144#include <i386/mp.h>
0c530ab8
A
145#include <i386/mp_desc.h>
146
147#include <sys/kdebug.h>
148
2d21ac55
A
149/* #define DEBUGINTERRUPTS 1 uncomment to ensure pmap callers have interrupts enabled */
150#ifdef DEBUGINTERRUPTS
151#define pmap_intr_assert() {if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) panic("pmap interrupt assert %s, %d",__FILE__, __LINE__);}
152#else
153#define pmap_intr_assert()
154#endif
155
0c530ab8
A
156#ifdef IWANTTODEBUG
157#undef DEBUG
158#define DEBUG 1
159#define POSTCODE_DELAY 1
160#include <i386/postcode.h>
161#endif /* IWANTTODEBUG */
1c79356b 162
2d21ac55
A
163//#define PMAP_TRACES 1
164#ifdef PMAP_TRACES
165boolean_t pmap_trace = FALSE;
166#define PMAP_TRACE(x,a,b,c,d,e) \
167 if (pmap_trace) { \
168 KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e); \
169 }
170#else
171#define PMAP_TRACE(x,a,b,c,d,e) KERNEL_DEBUG(x,a,b,c,d,e)
172#endif /* PMAP_TRACES */
173
1c79356b
A
174/*
175 * Forward declarations for internal functions.
176 */
0c530ab8
A
177void pmap_expand_pml4(
178 pmap_t map,
179 vm_map_offset_t v);
180
181void pmap_expand_pdpt(
182 pmap_t map,
183 vm_map_offset_t v);
184
2d21ac55 185void pmap_remove_range(
1c79356b 186 pmap_t pmap,
0c530ab8 187 vm_map_offset_t va,
1c79356b
A
188 pt_entry_t *spte,
189 pt_entry_t *epte);
190
91447636 191void phys_attribute_clear(
2d21ac55 192 ppnum_t phys,
1c79356b
A
193 int bits);
194
2d21ac55
A
195int phys_attribute_test(
196 ppnum_t phys,
1c79356b
A
197 int bits);
198
91447636 199void phys_attribute_set(
2d21ac55 200 ppnum_t phys,
1c79356b
A
201 int bits);
202
91447636
A
203void pmap_set_reference(
204 ppnum_t pn);
205
206void pmap_movepage(
207 unsigned long from,
208 unsigned long to,
209 vm_size_t size);
210
91447636
A
211boolean_t phys_page_exists(
212 ppnum_t pn);
1c79356b 213
2d21ac55 214
0c530ab8
A
215#ifdef PMAP_DEBUG
216void dump_pmap(pmap_t);
217void dump_4GB_pdpt(pmap_t p);
218void dump_4GB_pdpt_thread(thread_t tp);
219#endif
1c79356b 220
1c79356b
A
221#define iswired(pte) ((pte) & INTEL_PTE_WIRED)
222
0c530ab8 223int nx_enabled = 1; /* enable no-execute protection */
4a3eedf9
A
224#ifdef CONFIG_EMBEDDED
225int allow_data_exec = 0; /* no exec from data, embedded is hardcore like that */
226#else
2d21ac55 227int allow_data_exec = VM_ABI_32; /* 32-bit apps may execute data by default, 64-bit apps may not */
4a3eedf9 228#endif
2d21ac55 229int allow_stack_exec = 0; /* No apps may execute from the stack by default */
0c530ab8
A
230
231int cpu_64bit = 0;
1c79356b 232
2d21ac55
A
233/*
234 * when spinning through pmap_remove
235 * ensure that we don't spend too much
236 * time with preemption disabled.
237 * I'm setting the current threshold
238 * to 20us
239 */
240#define MAX_PREEMPTION_LATENCY_NS 20000
241
242uint64_t max_preemption_latency_tsc = 0;
243
55e303ae 244
1c79356b
A
245/*
246 * Private data structures.
247 */
248
249/*
250 * For each vm_page_t, there is a list of all currently
251 * valid virtual mappings of that page. An entry is
2d21ac55
A
252 * a pv_rooted_entry_t; the list is the pv_table.
253 *
254 * N.B. with the new combo rooted/hashed scheme it is
255 * only possibly to remove individual non-rooted entries
256 * if they are found via the hashed chains as there is no
257 * way to unlink the singly linked hashed entries if navigated to
258 * via the queue list off the rooted entries. Think of it as
259 * hash/walk/pull, keeping track of the prev pointer while walking
260 * the singly linked hash list. All of this is to save memory and
261 * keep both types of pv_entries as small as possible.
1c79356b
A
262 */
263
2d21ac55
A
264/*
265
266PV HASHING Changes - JK 1/2007
267
268Pve's establish physical to virtual mappings. These are used for aliasing of a
269physical page to (potentially many) virtual addresses within pmaps. In the previous
270implementation the structure of the pv_entries (each 16 bytes in size) was
271
1c79356b 272typedef struct pv_entry {
2d21ac55
A
273 struct pv_entry_t next;
274 pmap_t pmap;
275 vm_map_offset_t va;
1c79356b
A
276} *pv_entry_t;
277
2d21ac55
A
278An initial array of these is created at boot time, one per physical page of memory,
279indexed by the physical page number. Additionally, a pool of entries is created from a
280pv_zone to be used as needed by pmap_enter() when it is creating new mappings.
281Originally, we kept this pool around because the code in pmap_enter() was unable to
282block if it needed an entry and none were available - we'd panic. Some time ago I
283restructured the pmap_enter() code so that for user pmaps it can block while zalloc'ing
284a pv structure and restart, removing a panic from the code (in the case of the kernel
285pmap we cannot block and still panic, so, we keep a separate hot pool for use only on
286kernel pmaps). The pool has not been removed since there is a large performance gain
287keeping freed pv's around for reuse and not suffering the overhead of zalloc for every new pv we need.
288
289As pmap_enter() created new mappings it linked the new pve's for them off the fixed
290pv array for that ppn (off the next pointer). These pve's are accessed for several
291operations, one of them being address space teardown. In that case, we basically do this
292
293 for (every page/pte in the space) {
294 calc pve_ptr from the ppn in the pte
295 for (every pv in the list for the ppn) {
296 if (this pv is for this pmap/vaddr) {
297 do housekeeping
298 unlink/free the pv
299 }
300 }
301 }
302
303The problem arose when we were running, say 8000 (or even 2000) apache or other processes
304and one or all terminate. The list hanging off each pv array entry could have thousands of
305entries. We were continuously linearly searching each of these lists as we stepped through
306the address space we were tearing down. Because of the locks we hold, likely taking a cache
307miss for each node, and interrupt disabling for MP issues the system became completely
308unresponsive for many seconds while we did this.
309
310Realizing that pve's are accessed in two distinct ways (linearly running the list by ppn
311for operations like pmap_page_protect and finding and modifying/removing a single pve as
312part of pmap_enter processing) has led to modifying the pve structures and databases.
313
314There are now two types of pve structures. A "rooted" structure which is basically the
315original structure accessed in an array by ppn, and a ''hashed'' structure accessed on a
316hash list via a hash of [pmap, vaddr]. These have been designed with the two goals of
317minimizing wired memory and making the lookup of a ppn faster. Since a vast majority of
318pages in the system are not aliased and hence represented by a single pv entry I've kept
319the rooted entry size as small as possible because there is one of these dedicated for
320every physical page of memory. The hashed pve's are larger due to the addition of the hash
321link and the ppn entry needed for matching while running the hash list to find the entry we
322are looking for. This way, only systems that have lots of aliasing (like 2000+ httpd procs)
323will pay the extra memory price. Both structures have the same first three fields allowing
324some simplification in the code.
325
326They have these shapes
327
328typedef struct pv_rooted_entry {
329 queue_head_t qlink;
330 vm_map_offset_t va;
331 pmap_t pmap;
332} *pv_rooted_entry_t;
333
334
335typedef struct pv_hashed_entry {
336 queue_head_t qlink;
337 vm_map_offset_t va;
338 pmap_t pmap;
339 ppnum_t ppn;
340 struct pv_hashed_entry *nexth;
341} *pv_hashed_entry_t;
342
343The main flow difference is that the code is now aware of the rooted entry and the hashed
344entries. Code that runs the pv list still starts with the rooted entry and then continues
345down the qlink onto the hashed entries. Code that is looking up a specific pv entry first
346checks the rooted entry and then hashes and runs the hash list for the match. The hash list
347lengths are much smaller than the original pv lists that contained all aliases for the specific ppn.
1c79356b 348
2d21ac55
A
349*/
350
351typedef struct pv_rooted_entry { /* first three entries must match pv_hashed_entry_t */
352 queue_head_t qlink;
353 vm_map_offset_t va; /* virtual address for mapping */
354 pmap_t pmap; /* pmap where mapping lies */
355} *pv_rooted_entry_t;
356
357#define PV_ROOTED_ENTRY_NULL ((pv_rooted_entry_t) 0)
358
359pv_rooted_entry_t pv_head_table; /* array of entries, one per page */
360
361typedef struct pv_hashed_entry { /* first three entries must match pv_rooted_entry_t */
362 queue_head_t qlink;
363 vm_map_offset_t va;
364 pmap_t pmap;
365 ppnum_t ppn;
366 struct pv_hashed_entry *nexth;
367} *pv_hashed_entry_t;
368
369#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
370
371#define NPVHASH 4095 /* MUST BE 2^N - 1 */
372pv_hashed_entry_t *pv_hash_table; /* hash lists */
373
374uint32_t npvhash = 0;
375
376/* #define PV_DEBUG 1 uncomment to enable some PV debugging code */
377#ifdef PV_DEBUG
378#define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized");
379#else
380#define CHK_NPVHASH()
381#endif
1c79356b
A
382
383/*
384 * pv_list entries are kept on a list that can only be accessed
385 * with the pmap system locked (at SPLVM, not in the cpus_active set).
2d21ac55 386 * The list is refilled from the pv_hashed_list_zone if it becomes empty.
1c79356b 387 */
2d21ac55
A
388pv_rooted_entry_t pv_free_list = PV_ROOTED_ENTRY_NULL; /* free list at SPLVM */
389pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL;
390pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL;
391decl_simple_lock_data(,pv_hashed_free_list_lock)
392decl_simple_lock_data(,pv_hashed_kern_free_list_lock)
393decl_simple_lock_data(,pv_hash_table_lock)
394
91447636 395int pv_free_count = 0;
2d21ac55
A
396int pv_hashed_free_count = 0;
397int pv_kern_free_count = 0;
398int pv_hashed_kern_free_count = 0;
399#define PV_HASHED_LOW_WATER_MARK 5000
400#define PV_HASHED_KERN_LOW_WATER_MARK 100
401#define PV_HASHED_ALLOC_CHUNK 2000
402#define PV_HASHED_KERN_ALLOC_CHUNK 50
91447636
A
403thread_call_t mapping_adjust_call;
404static thread_call_data_t mapping_adjust_call_data;
2d21ac55
A
405uint32_t mappingrecurse = 0;
406
407#define PV_HASHED_ALLOC(pvh_e) { \
408 simple_lock(&pv_hashed_free_list_lock); \
409 if ((pvh_e = pv_hashed_free_list) != 0) { \
410 pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \
411 pv_hashed_free_count--; \
412 if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) \
413 if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
414 thread_call_enter(mapping_adjust_call); \
415 } \
416 simple_unlock(&pv_hashed_free_list_lock); \
417}
418
419#define PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) { \
420 simple_lock(&pv_hashed_free_list_lock); \
421 pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list; \
422 pv_hashed_free_list = pvh_eh; \
423 pv_hashed_free_count += pv_cnt; \
424 simple_unlock(&pv_hashed_free_list_lock); \
425}
426
427#define PV_HASHED_KERN_ALLOC(pvh_e) { \
428 simple_lock(&pv_hashed_kern_free_list_lock); \
429 if ((pvh_e = pv_hashed_kern_free_list) != 0) { \
430 pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \
431 pv_hashed_kern_free_count--; \
432 if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) \
0c530ab8 433 if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
91447636 434 thread_call_enter(mapping_adjust_call); \
1c79356b 435 } \
2d21ac55 436 simple_unlock(&pv_hashed_kern_free_list_lock); \
1c79356b
A
437}
438
2d21ac55
A
439#define PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) { \
440 simple_lock(&pv_hashed_kern_free_list_lock); \
441 pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list; \
442 pv_hashed_kern_free_list = pvh_eh; \
443 pv_hashed_kern_free_count += pv_cnt; \
444 simple_unlock(&pv_hashed_kern_free_list_lock); \
1c79356b
A
445}
446
2d21ac55 447zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */
1c79356b 448
91447636 449static zone_t pdpt_zone;
91447636 450
1c79356b
A
451/*
452 * Each entry in the pv_head_table is locked by a bit in the
453 * pv_lock_table. The lock bits are accessed by the physical
454 * address of the page they lock.
455 */
456
457char *pv_lock_table; /* pointer to array of bits */
458#define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
459
2d21ac55
A
460char *pv_hash_lock_table;
461#define pv_hash_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
462
1c79356b
A
463/*
464 * First and last physical addresses that we maintain any information
465 * for. Initialized to zero so that pmap operations done before
466 * pmap_init won't touch any non-existent structures.
467 */
1c79356b
A
468boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */
469
91447636
A
470static struct vm_object kptobj_object_store;
471static vm_object_t kptobj;
91447636 472
1c79356b 473/*
2d21ac55 474 * Index into pv_head table, its lock bits, and the modify/reference and managed bits
1c79356b
A
475 */
476
2d21ac55
A
477#define pa_index(pa) (i386_btop(pa))
478#define ppn_to_pai(ppn) ((int)ppn)
1c79356b
A
479
480#define pai_to_pvh(pai) (&pv_head_table[pai])
481#define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
482#define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
483
2d21ac55
A
484#define pvhashidx(pmap, va) (((uint32_t)pmap ^ ((uint32_t)((uint64_t)va >> PAGE_SHIFT) & 0xFFFFFFFF)) & npvhash)
485#define pvhash(idx) (&pv_hash_table[idx])
486
487#define lock_hash_hash(hash) bit_lock(hash, (void *)pv_hash_lock_table)
488#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
489
1c79356b
A
490/*
491 * Array of physical page attribites for managed pages.
492 * One byte per physical page.
493 */
494char *pmap_phys_attributes;
2d21ac55 495unsigned int last_managed_page = 0;
1c79356b
A
496
497/*
498 * Physical page attributes. Copy bits from PTE definition.
499 */
500#define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
501#define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
2d21ac55 502#define PHYS_MANAGED INTEL_PTE_VALID /* page is managed */
1c79356b
A
503
504/*
505 * Amount of virtual memory mapped by one
506 * page-directory entry.
507 */
508#define PDE_MAPPED_SIZE (pdetova(1))
0c530ab8 509uint64_t pde_mapped_size;
1c79356b 510
1c79356b
A
511/*
512 * Locking and TLB invalidation
513 */
514
515/*
2d21ac55 516 * Locking Protocols: (changed 2/2007 JK)
1c79356b
A
517 *
518 * There are two structures in the pmap module that need locking:
519 * the pmaps themselves, and the per-page pv_lists (which are locked
520 * by locking the pv_lock_table entry that corresponds to the pv_head
521 * for the list in question.) Most routines want to lock a pmap and
522 * then do operations in it that require pv_list locking -- however
523 * pmap_remove_all and pmap_copy_on_write operate on a physical page
524 * basis and want to do the locking in the reverse order, i.e. lock
525 * a pv_list and then go through all the pmaps referenced by that list.
1c79356b 526 *
2d21ac55
A
527 * The system wide pmap lock has been removed. Now, paths take a lock
528 * on the pmap before changing its 'shape' and the reverse order lockers
529 * (coming in by phys ppn) take a lock on the corresponding pv and then
530 * retest to be sure nothing changed during the window before they locked
531 * and can then run up/down the pv lists holding the list lock. This also
532 * lets the pmap layer run (nearly completely) interrupt enabled, unlike
533 * previously.
1c79356b 534 */
1c79356b 535
1c79356b 536/*
2d21ac55 537 * pmap locking
1c79356b 538 */
1c79356b 539
2d21ac55 540#define PMAP_LOCK(pmap) { \
1c79356b
A
541 simple_lock(&(pmap)->lock); \
542}
543
2d21ac55 544#define PMAP_UNLOCK(pmap) { \
1c79356b 545 simple_unlock(&(pmap)->lock); \
1c79356b
A
546}
547
2d21ac55
A
548/*
549 * PV locking
550 */
551
552#define LOCK_PVH(index) { \
553 mp_disable_preemption(); \
554 lock_pvh_pai(index); \
1c79356b
A
555}
556
2d21ac55
A
557#define UNLOCK_PVH(index) { \
558 unlock_pvh_pai(index); \
559 mp_enable_preemption(); \
1c79356b
A
560}
561
2d21ac55
A
562/*
563 * PV hash locking
564 */
1c79356b 565
2d21ac55
A
566#define LOCK_PV_HASH(hash) lock_hash_hash(hash)
567
568#define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash)
1c79356b 569
55e303ae
A
570#if USLOCK_DEBUG
571extern int max_lock_loops;
91447636
A
572#define LOOP_VAR \
573 unsigned int loop_count; \
2d21ac55 574 loop_count = disable_serial_output ? max_lock_loops \
91447636 575 : max_lock_loops*100
55e303ae 576#define LOOP_CHECK(msg, pmap) \
91447636 577 if (--loop_count == 0) { \
55e303ae 578 mp_disable_preemption(); \
0c530ab8
A
579 kprintf("%s: cpu %d pmap %x\n", \
580 msg, cpu_number(), pmap); \
55e303ae
A
581 Debugger("deadlock detection"); \
582 mp_enable_preemption(); \
91447636 583 loop_count = max_lock_loops; \
55e303ae
A
584 }
585#else /* USLOCK_DEBUG */
586#define LOOP_VAR
587#define LOOP_CHECK(msg, pmap)
588#endif /* USLOCK_DEBUG */
1c79356b 589
21362eb3 590
0c530ab8 591static void pmap_flush_tlbs(pmap_t pmap);
21362eb3 592
0c530ab8
A
593#define PMAP_UPDATE_TLBS(pmap, s, e) \
594 pmap_flush_tlbs(pmap)
6601e61a 595
6601e61a 596
0c530ab8 597#define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
21362eb3 598
1c79356b 599
91447636 600pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE];
1c79356b
A
601
602/*
603 * Other useful macros.
604 */
91447636 605#define current_pmap() (vm_map_pmap(current_thread()->map))
1c79356b
A
606
607struct pmap kernel_pmap_store;
608pmap_t kernel_pmap;
609
0c530ab8
A
610pd_entry_t high_shared_pde;
611pd_entry_t commpage64_pde;
91447636 612
1c79356b
A
613struct zone *pmap_zone; /* zone of pmap structures */
614
615int pmap_debug = 0; /* flag for debugging prints */
91447636 616
2d21ac55 617unsigned int inuse_ptepages_count = 0;
1c79356b 618
0c530ab8
A
619addr64_t kernel64_cr3;
620boolean_t no_shared_cr3 = FALSE; /* -no_shared_cr3 boot arg */
621
1c79356b
A
622/*
623 * Pmap cache. Cache is threaded through ref_count field of pmap.
624 * Max will eventually be constant -- variable for experimentation.
625 */
626int pmap_cache_max = 32;
627int pmap_alloc_chunk = 8;
628pmap_t pmap_cache_list;
629int pmap_cache_count;
630decl_simple_lock_data(,pmap_cache_lock)
631
1c79356b
A
632extern char end;
633
91447636 634static int nkpt;
0c530ab8 635extern uint32_t lowGlo;
91447636
A
636
637pt_entry_t *DMAP1, *DMAP2;
638caddr_t DADDR1;
639caddr_t DADDR2;
1c79356b 640
2d21ac55
A
641static inline
642void pmap_pvh_unlink(pv_hashed_entry_t pv);
643
644/*
645 * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
646 * properly deals with the anchor.
647 * must be called with the hash locked, does not unlock it
648 */
649
650static inline
651void pmap_pvh_unlink(pv_hashed_entry_t pvh)
652{
653 pv_hashed_entry_t curh;
654 pv_hashed_entry_t *pprevh;
655 int pvhash_idx;
656
657 CHK_NPVHASH();
658 pvhash_idx = pvhashidx(pvh->pmap, pvh->va);
1c79356b 659
2d21ac55
A
660 pprevh = pvhash(pvhash_idx);
661
662#if PV_DEBUG
663 if (NULL == *pprevh) panic("pvh_unlink null anchor"); /* JK DEBUG */
664#endif
665 curh = *pprevh;
666
667 while (PV_HASHED_ENTRY_NULL != curh) {
668 if (pvh == curh)
669 break;
670 pprevh = &curh->nexth;
671 curh = curh->nexth;
672 }
673 if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh");
674 *pprevh = pvh->nexth;
675 return;
676}
1c79356b 677
0c530ab8
A
678/*
679 * for legacy, returns the address of the pde entry.
680 * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
681 * then returns the mapped address of the pde entry in that page
682 */
683pd_entry_t *
684pmap_pde(pmap_t m, vm_map_offset_t v)
4452a7af 685{
0c530ab8
A
686 pd_entry_t *pde;
687 if (!cpu_64bit || (m == kernel_pmap)) {
688 pde = (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]));
689 } else {
690 assert(m);
691 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
692 pde = pmap64_pde(m, v);
693 }
694 return pde;
4452a7af
A
695}
696
89b3af67 697
4452a7af 698/*
0c530ab8
A
699 * the single pml4 page per pmap is allocated at pmap create time and exists
700 * for the duration of the pmap. we allocate this page in kernel vm (to save us one
701 * level of page table dynamic mapping.
702 * this returns the address of the requested pml4 entry in the top level page.
4452a7af 703 */
0c530ab8
A
704static inline
705pml4_entry_t *
706pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr)
707{
708 return ((pml4_entry_t *)pmap->pm_hold + ((vm_offset_t)((vaddr>>PML4SHIFT)&(NPML4PG-1))));
709}
710
711/*
712 * maps in the pml4 page, if any, containing the pdpt entry requested
713 * and returns the address of the pdpt entry in that mapped page
714 */
715pdpt_entry_t *
716pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
717{
718 pml4_entry_t newpf;
719 pml4_entry_t *pml4;
720 int i;
721
722 assert(pmap);
723 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
724 if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) {
725 return(0);
4452a7af 726 }
0c530ab8
A
727
728 pml4 = pmap64_pml4(pmap, vaddr);
729
730 if (pml4 && ((*pml4 & INTEL_PTE_VALID))) {
731
732 newpf = *pml4 & PG_FRAME;
733
734
735 for (i=PMAP_PDPT_FIRST_WINDOW; i < PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS; i++) {
736 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
737 return((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
738 ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1))));
739 }
740 }
741
742 current_cpu_datap()->cpu_pmap->pdpt_window_index++;
743 if (current_cpu_datap()->cpu_pmap->pdpt_window_index > (PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS-1))
744 current_cpu_datap()->cpu_pmap->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW;
745 pmap_store_pte(
746 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CMAP),
747 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
748 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR));
749 return ((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR) +
750 ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1))));
751 }
752
2d21ac55 753 return (NULL);
4452a7af
A
754}
755
0c530ab8
A
756/*
757 * maps in the pdpt page, if any, containing the pde entry requested
758 * and returns the address of the pde entry in that mapped page
759 */
760pd_entry_t *
761pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr)
4452a7af 762{
0c530ab8
A
763 pdpt_entry_t newpf;
764 pdpt_entry_t *pdpt;
765 int i;
4452a7af 766
0c530ab8
A
767 assert(pmap);
768 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
769 if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) {
770 return(0);
771 }
772
773 /* if (vaddr & (1ULL << 63)) panic("neg addr");*/
774 pdpt = pmap64_pdpt(pmap, vaddr);
775
776 if (pdpt && ((*pdpt & INTEL_PTE_VALID))) {
777
778 newpf = *pdpt & PG_FRAME;
779
780 for (i=PMAP_PDE_FIRST_WINDOW; i < PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS; i++) {
781 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
782 return((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
783 ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1))));
784 }
4452a7af 785 }
0c530ab8
A
786
787 current_cpu_datap()->cpu_pmap->pde_window_index++;
788 if (current_cpu_datap()->cpu_pmap->pde_window_index > (PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS-1))
789 current_cpu_datap()->cpu_pmap->pde_window_index = PMAP_PDE_FIRST_WINDOW;
790 pmap_store_pte(
791 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CMAP),
792 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
793 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR));
794 return ((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR) +
795 ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1))));
21362eb3 796 }
4452a7af 797
2d21ac55 798 return (NULL);
0c530ab8
A
799}
800
2d21ac55
A
801/*
802 * Because the page tables (top 3 levels) are mapped into per cpu windows,
803 * callers must either disable interrupts or disable preemption before calling
804 * one of the pte mapping routines (e.g. pmap_pte()) as the returned vaddr
805 * is in one of those mapped windows and that cannot be allowed to change until
806 * the caller is done using the returned pte pointer. When done, the caller
807 * restores interrupts or preemption to its previous state after which point the
808 * vaddr for the returned pte can no longer be used
809 */
0c530ab8
A
810
811
812/*
813 * return address of mapped pte for vaddr va in pmap pmap.
814 * must be called with pre-emption or interrupts disabled
815 * if targeted pmap is not the kernel pmap
816 * since we may be passing back a virtual address that is
817 * associated with this cpu... pre-emption or interrupts
818 * must remain disabled until the caller is done using
819 * the pointer that was passed back .
820 *
821 * maps the pde page, if any, containing the pte in and returns
822 * the address of the pte in that mapped page
823 */
824pt_entry_t *
825pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
826{
827 pd_entry_t *pde;
828 pd_entry_t newpf;
829 int i;
830
831 assert(pmap);
832 pde = pmap_pde(pmap,vaddr);
833
834 if (pde && ((*pde & INTEL_PTE_VALID))) {
2d21ac55
A
835 if (pmap == kernel_pmap)
836 return (vtopte(vaddr)); /* compat kernel still has pte's mapped */
837#if TESTING
838 if (ml_get_interrupts_enabled() && get_preemption_level() == 0)
839 panic("pmap_pte: unsafe call");
840#endif
0c530ab8
A
841 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
842
843 newpf = *pde & PG_FRAME;
844
845 for (i=PMAP_PTE_FIRST_WINDOW; i < PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS; i++) {
846 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
847 return((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
848 ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1)));
849 }
850 }
851
852 current_cpu_datap()->cpu_pmap->pte_window_index++;
853 if (current_cpu_datap()->cpu_pmap->pte_window_index > (PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS-1))
854 current_cpu_datap()->cpu_pmap->pte_window_index = PMAP_PTE_FIRST_WINDOW;
855 pmap_store_pte(
856 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CMAP),
857 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
858 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR));
859 return ((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR) +
860 ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1)));
6601e61a 861 }
0c530ab8 862
2d21ac55 863 return(NULL);
1c79356b 864}
2d21ac55 865
1c79356b
A
866
867/*
868 * Map memory at initialization. The physical addresses being
869 * mapped are not managed and are never unmapped.
870 *
871 * For now, VM is already on, we only need to map the
872 * specified memory.
873 */
874vm_offset_t
875pmap_map(
0c530ab8
A
876 vm_offset_t virt,
877 vm_map_offset_t start_addr,
878 vm_map_offset_t end_addr,
879 vm_prot_t prot,
880 unsigned int flags)
1c79356b 881{
0c530ab8 882 int ps;
1c79356b
A
883
884 ps = PAGE_SIZE;
91447636 885 while (start_addr < end_addr) {
0c530ab8
A
886 pmap_enter(kernel_pmap, (vm_map_offset_t)virt,
887 (ppnum_t) i386_btop(start_addr), prot, flags, FALSE);
1c79356b 888 virt += ps;
91447636 889 start_addr += ps;
1c79356b
A
890 }
891 return(virt);
892}
893
894/*
895 * Back-door routine for mapping kernel VM at initialization.
896 * Useful for mapping memory outside the range
897 * Sets no-cache, A, D.
1c79356b
A
898 * Otherwise like pmap_map.
899 */
900vm_offset_t
901pmap_map_bd(
0c530ab8
A
902 vm_offset_t virt,
903 vm_map_offset_t start_addr,
904 vm_map_offset_t end_addr,
905 vm_prot_t prot,
906 unsigned int flags)
1c79356b 907{
0c530ab8
A
908 pt_entry_t template;
909 pt_entry_t *pte;
2d21ac55 910 spl_t spl;
1c79356b 911
91447636 912 template = pa_to_pte(start_addr)
1c79356b
A
913 | INTEL_PTE_REF
914 | INTEL_PTE_MOD
915 | INTEL_PTE_WIRED
916 | INTEL_PTE_VALID;
0c530ab8
A
917
918 if(flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) {
919 template |= INTEL_PTE_NCACHE;
920 if(!(flags & (VM_MEM_GUARDED | VM_WIMG_USE_DEFAULT)))
921 template |= INTEL_PTE_PTA;
922 }
923
1c79356b
A
924 if (prot & VM_PROT_WRITE)
925 template |= INTEL_PTE_WRITE;
926
91447636 927 while (start_addr < end_addr) {
2d21ac55 928 spl = splhigh();
0c530ab8 929 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
91447636 930 if (pte == PT_ENTRY_NULL) {
1c79356b 931 panic("pmap_map_bd: Invalid kernel address\n");
91447636 932 }
0c530ab8 933 pmap_store_pte(pte, template);
2d21ac55 934 splx(spl);
1c79356b
A
935 pte_increment_pa(template);
936 virt += PAGE_SIZE;
91447636 937 start_addr += PAGE_SIZE;
1c79356b
A
938 }
939
55e303ae 940 flush_tlb();
1c79356b
A
941 return(virt);
942}
943
1c79356b
A
944extern char *first_avail;
945extern vm_offset_t virtual_avail, virtual_end;
91447636
A
946extern pmap_paddr_t avail_start, avail_end;
947extern vm_offset_t etext;
948extern void *sectHIBB;
949extern int sectSizeHIB;
1c79356b 950
2d21ac55
A
951void
952pmap_cpu_init(void)
953{
954 /*
955 * Here early in the life of a processor (from cpu_mode_init()).
956 * If we're not in 64-bit mode, enable the global TLB feature.
957 * Note: regardless of mode we continue to set the global attribute
958 * bit in ptes for all (32-bit) global pages such as the commpage.
959 */
960 if (!cpu_64bit) {
961 set_cr4(get_cr4() | CR4_PGE);
962 }
963
964 /*
965 * Initialize the per-cpu, TLB-related fields.
966 */
967 current_cpu_datap()->cpu_active_cr3 = kernel_pmap->pm_cr3;
968 current_cpu_datap()->cpu_tlb_invalid = FALSE;
969}
0c530ab8
A
970
971vm_offset_t
972pmap_high_shared_remap(enum high_fixed_addresses e, vm_offset_t va, int sz)
973{
974 vm_offset_t ve = pmap_index_to_virt(e);
975 pt_entry_t *ptep;
976 pmap_paddr_t pa;
977 int i;
2d21ac55 978 spl_t s;
0c530ab8
A
979
980 assert(0 == (va & PAGE_MASK)); /* expecting page aligned */
2d21ac55 981 s = splhigh();
0c530ab8
A
982 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ve);
983
984 for (i=0; i< sz; i++) {
985 pa = (pmap_paddr_t) kvtophys(va);
986 pmap_store_pte(ptep, (pa & PG_FRAME)
987 | INTEL_PTE_VALID
988 | INTEL_PTE_GLOBAL
989 | INTEL_PTE_RW
990 | INTEL_PTE_REF
991 | INTEL_PTE_MOD);
992 va+= PAGE_SIZE;
993 ptep++;
994 }
2d21ac55 995 splx(s);
0c530ab8
A
996 return ve;
997}
998
999vm_offset_t
1000pmap_cpu_high_shared_remap(int cpu, enum high_cpu_types e, vm_offset_t va, int sz)
1001{
1002 enum high_fixed_addresses a = e + HIGH_CPU_END * cpu;
1003 return pmap_high_shared_remap(HIGH_FIXED_CPUS_BEGIN + a, va, sz);
1004}
1005
1006void pmap_init_high_shared(void);
1007
1008extern vm_offset_t gdtptr, idtptr;
1009
1010extern uint32_t low_intstack;
1011
1012extern struct fake_descriptor ldt_desc_pattern;
1013extern struct fake_descriptor tss_desc_pattern;
1014
1015extern char hi_remap_text, hi_remap_etext;
1016extern char t_zero_div;
1017
1018pt_entry_t *pte_unique_base;
1019
1020void
1021pmap_init_high_shared(void)
1022{
1023
1024 vm_offset_t haddr;
1025 struct __gdt_desc_struct gdt_desc = {0,0,0};
1026 struct __idt_desc_struct idt_desc = {0,0,0};
2d21ac55 1027 spl_t s;
0c530ab8
A
1028#if MACH_KDB
1029 struct i386_tss *ttss;
1030#endif
1031
1032 kprintf("HIGH_MEM_BASE 0x%x fixed per-cpu begin 0x%x\n",
1033 HIGH_MEM_BASE,pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN));
2d21ac55 1034 s = splhigh();
0c530ab8 1035 pte_unique_base = pmap_pte(kernel_pmap, (vm_map_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN));
2d21ac55 1036 splx(s);
0c530ab8
A
1037
1038 if (i386_btop(&hi_remap_etext - &hi_remap_text + 1) >
1039 HIGH_FIXED_TRAMPS_END - HIGH_FIXED_TRAMPS + 1)
1040 panic("tramps too large");
1041 haddr = pmap_high_shared_remap(HIGH_FIXED_TRAMPS,
1042 (vm_offset_t) &hi_remap_text, 3);
1043 kprintf("tramp: 0x%x, ",haddr);
1044 printf("hi mem tramps at 0x%x\n",haddr);
1045 /* map gdt up high and update ptr for reload */
1046 haddr = pmap_high_shared_remap(HIGH_FIXED_GDT,
1047 (vm_offset_t) master_gdt, 1);
1048 __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory");
1049 gdt_desc.address = haddr;
1050 kprintf("GDT: 0x%x, ",haddr);
1051 /* map ldt up high */
1052 haddr = pmap_high_shared_remap(HIGH_FIXED_LDT_BEGIN,
1053 (vm_offset_t) master_ldt,
1054 HIGH_FIXED_LDT_END - HIGH_FIXED_LDT_BEGIN + 1);
1055 kprintf("LDT: 0x%x, ",haddr);
1056 /* put new ldt addr into gdt */
1057 master_gdt[sel_idx(KERNEL_LDT)] = ldt_desc_pattern;
1058 master_gdt[sel_idx(KERNEL_LDT)].offset = (vm_offset_t) haddr;
1059 fix_desc(&master_gdt[sel_idx(KERNEL_LDT)], 1);
1060 master_gdt[sel_idx(USER_LDT)] = ldt_desc_pattern;
1061 master_gdt[sel_idx(USER_LDT)].offset = (vm_offset_t) haddr;
1062 fix_desc(&master_gdt[sel_idx(USER_LDT)], 1);
1063
1064 /* map idt up high */
1065 haddr = pmap_high_shared_remap(HIGH_FIXED_IDT,
1066 (vm_offset_t) master_idt, 1);
1067 __asm__ __volatile__("sidt %0" : "=m" (idt_desc));
1068 idt_desc.address = haddr;
1069 kprintf("IDT: 0x%x, ", haddr);
1070 /* remap ktss up high and put new high addr into gdt */
1071 haddr = pmap_high_shared_remap(HIGH_FIXED_KTSS,
1072 (vm_offset_t) &master_ktss, 1);
1073 master_gdt[sel_idx(KERNEL_TSS)] = tss_desc_pattern;
1074 master_gdt[sel_idx(KERNEL_TSS)].offset = (vm_offset_t) haddr;
1075 fix_desc(&master_gdt[sel_idx(KERNEL_TSS)], 1);
1076 kprintf("KTSS: 0x%x, ",haddr);
1077#if MACH_KDB
1078 /* remap dbtss up high and put new high addr into gdt */
1079 haddr = pmap_high_shared_remap(HIGH_FIXED_DBTSS,
1080 (vm_offset_t) &master_dbtss, 1);
1081 master_gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern;
1082 master_gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) haddr;
1083 fix_desc(&master_gdt[sel_idx(DEBUG_TSS)], 1);
1084 ttss = (struct i386_tss *)haddr;
1085 kprintf("DBTSS: 0x%x, ",haddr);
1086#endif /* MACH_KDB */
1087
1088 /* remap dftss up high and put new high addr into gdt */
1089 haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS,
1090 (vm_offset_t) &master_dftss, 1);
1091 master_gdt[sel_idx(DF_TSS)] = tss_desc_pattern;
1092 master_gdt[sel_idx(DF_TSS)].offset = (vm_offset_t) haddr;
1093 fix_desc(&master_gdt[sel_idx(DF_TSS)], 1);
1094 kprintf("DFTSS: 0x%x\n",haddr);
1095
1096 /* remap mctss up high and put new high addr into gdt */
1097 haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS,
1098 (vm_offset_t) &master_mctss, 1);
1099 master_gdt[sel_idx(MC_TSS)] = tss_desc_pattern;
1100 master_gdt[sel_idx(MC_TSS)].offset = (vm_offset_t) haddr;
1101 fix_desc(&master_gdt[sel_idx(MC_TSS)], 1);
1102 kprintf("MCTSS: 0x%x\n",haddr);
1103
1104 __asm__ __volatile__("lgdt %0": "=m" (gdt_desc));
1105 __asm__ __volatile__("lidt %0": "=m" (idt_desc));
1106 kprintf("gdt/idt reloaded, ");
1107 set_tr(KERNEL_TSS);
1108 kprintf("tr reset to KERNEL_TSS\n");
1109}
1110
1111
1c79356b
A
1112/*
1113 * Bootstrap the system enough to run with virtual memory.
1114 * Map the kernel's code and data, and allocate the system page table.
1115 * Called with mapping OFF. Page_size must already be set.
1116 *
1117 * Parameters:
1118 * load_start: PA where kernel was loaded
1119 * avail_start PA of first available physical page -
1120 * after kernel page tables
1121 * avail_end PA of last available physical page
1122 * virtual_avail VA of first available page -
1123 * after kernel page tables
1124 * virtual_end VA of last available page -
1125 * end of kernel address space
1126 *
1127 * &start_text start of kernel text
1128 * &etext end of kernel text
1129 */
1130
1131void
1132pmap_bootstrap(
0c530ab8
A
1133 __unused vm_offset_t load_start,
1134 boolean_t IA32e)
1c79356b 1135{
91447636
A
1136 vm_offset_t va;
1137 pt_entry_t *pte;
1138 int i;
1139 int wpkernel, boot_arg;
0c530ab8 1140 pdpt_entry_t *pdpt;
2d21ac55 1141 spl_t s;
1c79356b 1142
91447636
A
1143 vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address
1144 * known to VM */
1c79356b
A
1145 /*
1146 * The kernel's pmap is statically allocated so we don't
1147 * have to use pmap_create, which is unlikely to work
1148 * correctly at this part of the boot sequence.
1149 */
1150
0c530ab8 1151
1c79356b 1152 kernel_pmap = &kernel_pmap_store;
91447636 1153 kernel_pmap->ref_count = 1;
0c530ab8 1154 kernel_pmap->nx_enabled = FALSE;
2d21ac55 1155 kernel_pmap->pm_task_map = TASK_MAP_32BIT;
91447636
A
1156 kernel_pmap->pm_obj = (vm_object_t) NULL;
1157 kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE);
0c530ab8
A
1158 kernel_pmap->pdirbase = (pmap_paddr_t)((int)IdlePTD);
1159 pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE );
1160 kernel_pmap->pm_pdpt = pdpt;
1161 kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePDPT);
1c79356b 1162
91447636
A
1163 va = (vm_offset_t)kernel_pmap->dirbase;
1164 /* setup self referential mapping(s) */
0c530ab8 1165 for (i = 0; i< NPGPTD; i++, pdpt++) {
91447636
A
1166 pmap_paddr_t pa;
1167 pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i));
0c530ab8
A
1168 pmap_store_pte(
1169 (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i),
91447636 1170 (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF |
0c530ab8
A
1171 INTEL_PTE_MOD | INTEL_PTE_WIRED) ;
1172 pmap_store_pte(pdpt, pa | INTEL_PTE_VALID);
91447636 1173 }
1c79356b 1174
0c530ab8
A
1175 cpu_64bit = IA32e;
1176
1177 lo_kernel_cr3 = kernel_pmap->pm_cr3;
1178 current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3;
1179
1180 /* save the value we stuff into created pmaps to share the gdts etc */
1181 high_shared_pde = *pmap_pde(kernel_pmap, HIGH_MEM_BASE);
1182 /* make sure G bit is on for high shared pde entry */
1183 high_shared_pde |= INTEL_PTE_GLOBAL;
2d21ac55 1184 s = splhigh();
0c530ab8 1185 pmap_store_pte(pmap_pde(kernel_pmap, HIGH_MEM_BASE), high_shared_pde);
2d21ac55 1186 splx(s);
0c530ab8 1187
91447636 1188 nkpt = NKPT;
0c530ab8 1189 inuse_ptepages_count += NKPT;
1c79356b 1190
91447636
A
1191 virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail;
1192 virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS);
1c79356b
A
1193
1194 /*
91447636
A
1195 * Reserve some special page table entries/VA space for temporary
1196 * mapping of pages.
1c79356b 1197 */
91447636 1198#define SYSMAP(c, p, v, n) \
0c530ab8 1199 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n)
91447636
A
1200
1201 va = virtual_avail;
0c530ab8 1202 pte = vtopte(va);
6601e61a 1203
0c530ab8
A
1204 for (i=0; i<PMAP_NWINDOWS; i++) {
1205 SYSMAP(caddr_t,
1206 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP),
1207 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR),
1208 1);
1209 *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0;
1210 }
1c79356b 1211
91447636
A
1212 /* DMAP user for debugger */
1213 SYSMAP(caddr_t, DMAP1, DADDR1, 1);
1214 SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */
1c79356b 1215
91447636 1216 virtual_avail = va;
1c79356b 1217
593a1d5f 1218 if (PE_parse_boot_argn("npvhash", &npvhash, sizeof (npvhash))) {
2d21ac55
A
1219 if (0 != ((npvhash+1) & npvhash)) {
1220 kprintf("invalid hash %d, must be ((2^N)-1), using default %d\n",npvhash,NPVHASH);
1221 npvhash = NPVHASH;
1222 }
1223 } else {
1224 npvhash = NPVHASH;
1225 }
1226 printf("npvhash=%d\n",npvhash);
1227
91447636 1228 wpkernel = 1;
593a1d5f 1229 if (PE_parse_boot_argn("wpkernel", &boot_arg, sizeof (boot_arg))) {
0c530ab8
A
1230 if (boot_arg == 0)
1231 wpkernel = 0;
1c79356b
A
1232 }
1233
2d21ac55
A
1234 s = splhigh();
1235
0c530ab8 1236 /* Remap kernel text readonly unless the "wpkernel" boot-arg is present
2d21ac55 1237 * and set to 0.
0c530ab8 1238 */
91447636
A
1239 if (wpkernel)
1240 {
1241 vm_offset_t myva;
1242 pt_entry_t *ptep;
1243
0c530ab8 1244 for (myva = i386_round_page(MP_BOOT + MP_BOOTSTACK); myva < etext; myva += PAGE_SIZE) {
91447636
A
1245 if (myva >= (vm_offset_t)sectHIBB && myva < ((vm_offset_t)sectHIBB + sectSizeHIB))
1246 continue;
0c530ab8 1247 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
91447636 1248 if (ptep)
0c530ab8 1249 pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW);
91447636 1250 }
91447636 1251 }
1c79356b 1252
0c530ab8
A
1253 /* no matter what, kernel page zero is not accessible */
1254 pte = pmap_pte(kernel_pmap, 0);
1255 pmap_store_pte(pte, INTEL_PTE_INVALID);
1256
1257 /* map lowmem global page into fixed addr 0x2000 */
1258 if (0 == (pte = pmap_pte(kernel_pmap,0x2000))) panic("lowmem pte");
2d21ac55 1259 assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK)); /* make sure it is defined on page boundary */
0c530ab8 1260 pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)|INTEL_PTE_VALID|INTEL_PTE_REF|INTEL_PTE_MOD|INTEL_PTE_WIRED|INTEL_PTE_RW);
2d21ac55 1261 splx(s);
0c530ab8
A
1262 flush_tlb();
1263
91447636 1264 simple_lock_init(&kernel_pmap->lock, 0);
2d21ac55
A
1265 simple_lock_init(&pv_hashed_free_list_lock, 0);
1266 simple_lock_init(&pv_hashed_kern_free_list_lock, 0);
1267 simple_lock_init(&pv_hash_table_lock,0);
1c79356b 1268
2d21ac55 1269 pmap_init_high_shared();
0c530ab8
A
1270
1271 pde_mapped_size = PDE_MAPPED_SIZE;
1272
1273 if (cpu_64bit) {
1274 pdpt_entry_t *ppdpt = (pdpt_entry_t *)IdlePDPT;
1275 pdpt_entry_t *ppdpt64 = (pdpt_entry_t *)IdlePDPT64;
1276 pdpt_entry_t *ppml4 = (pdpt_entry_t *)IdlePML4;
1277 int istate = ml_set_interrupts_enabled(FALSE);
1278
1279 /*
1280 * Clone a new 64-bit 3rd-level page table directory, IdlePML4,
1281 * with page bits set for the correct IA-32e operation and so that
1282 * the legacy-mode IdlePDPT is retained for slave processor start-up.
1283 * This is necessary due to the incompatible use of page bits between
1284 * 64-bit and legacy modes.
1285 */
1286 kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePML4); /* setup in start.s for us */
1287 kernel_pmap->pm_pml4 = IdlePML4;
1288 kernel_pmap->pm_pdpt = (pd_entry_t *)
1289 ((unsigned int)IdlePDPT64 | KERNBASE );
1290#define PAGE_BITS INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF
1291 pmap_store_pte(kernel_pmap->pm_pml4,
1292 (uint32_t)IdlePDPT64 | PAGE_BITS);
1293 pmap_store_pte((ppdpt64+0), *(ppdpt+0) | PAGE_BITS);
1294 pmap_store_pte((ppdpt64+1), *(ppdpt+1) | PAGE_BITS);
1295 pmap_store_pte((ppdpt64+2), *(ppdpt+2) | PAGE_BITS);
1296 pmap_store_pte((ppdpt64+3), *(ppdpt+3) | PAGE_BITS);
1297
1298 /*
1299 * The kernel is also mapped in the uber-sapce at the 4GB starting
1300 * 0xFFFFFF80:00000000. This is the highest entry in the 4th-level.
1301 */
1302 pmap_store_pte((ppml4+KERNEL_UBER_PML4_INDEX), *(ppml4+0));
1303
1304 kernel64_cr3 = (addr64_t) kernel_pmap->pm_cr3;
0c530ab8 1305
2d21ac55 1306 /* Re-initialize descriptors and prepare to switch modes */
0c530ab8 1307 cpu_desc_init64(&cpu_data_master, TRUE);
2d21ac55
A
1308 current_cpu_datap()->cpu_is64bit = TRUE;
1309 current_cpu_datap()->cpu_active_cr3 = kernel64_cr3;
0c530ab8
A
1310
1311 pde_mapped_size = 512*4096 ;
1312
1313 ml_set_interrupts_enabled(istate);
0c530ab8 1314 }
2d21ac55
A
1315
1316 /* Set 64-bit mode if required. */
1317 cpu_mode_init(&cpu_data_master);
1318
0c530ab8 1319 kernel_pmap->pm_hold = (vm_offset_t)kernel_pmap->pm_pml4;
1c79356b 1320
91447636
A
1321 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
1322 VADDR(KPTDI,0), virtual_end);
6601e61a 1323 printf("PAE enabled\n");
0c530ab8
A
1324 if (cpu_64bit){
1325 printf("64 bit mode enabled\n");kprintf("64 bit mode enabled\n"); }
1326
1327 kprintf("Available physical space from 0x%llx to 0x%llx\n",
6601e61a 1328 avail_start, avail_end);
0c530ab8
A
1329
1330 /*
1331 * By default for 64-bit users loaded at 4GB, share kernel mapping.
1332 * But this may be overridden by the -no_shared_cr3 boot-arg.
1333 */
593a1d5f 1334 if (PE_parse_boot_argn("-no_shared_cr3", &no_shared_cr3, sizeof (no_shared_cr3))) {
0c530ab8 1335 kprintf("Shared kernel address space disabled\n");
2d21ac55
A
1336 }
1337
1338#ifdef PMAP_TRACES
593a1d5f 1339 if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof (pmap_trace))) {
2d21ac55
A
1340 kprintf("Kernel traces for pmap operations enabled\n");
1341 }
1342#endif /* PMAP_TRACES */
1c79356b
A
1343}
1344
1345void
1346pmap_virtual_space(
1347 vm_offset_t *startp,
1348 vm_offset_t *endp)
1349{
1350 *startp = virtual_avail;
1351 *endp = virtual_end;
1352}
1353
1354/*
1355 * Initialize the pmap module.
1356 * Called by vm_init, to initialize any structures that the pmap
1357 * system needs to map virtual memory.
1358 */
1359void
1360pmap_init(void)
1361{
1362 register long npages;
1363 vm_offset_t addr;
1364 register vm_size_t s;
0c530ab8 1365 vm_map_offset_t vaddr;
2d21ac55 1366 ppnum_t ppn;
1c79356b
A
1367
1368 /*
1369 * Allocate memory for the pv_head_table and its lock bits,
1370 * the modify bit array, and the pte_page table.
1371 */
1372
2d21ac55
A
1373 /*
1374 * zero bias all these arrays now instead of off avail_start
1375 * so we cover all memory
1376 */
1377
91447636 1378 npages = i386_btop(avail_end);
2d21ac55
A
1379 s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages
1380 + (sizeof (struct pv_hashed_entry_t *) * (npvhash+1))
1381 + pv_lock_table_size(npages)
1382 + pv_hash_lock_table_size((npvhash+1))
1c79356b
A
1383 + npages);
1384
1385 s = round_page(s);
1386 if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS)
1387 panic("pmap_init");
1388
1389 memset((char *)addr, 0, s);
1390
2d21ac55
A
1391#if PV_DEBUG
1392 if (0 == npvhash) panic("npvhash not initialized");
1393#endif
1394
1c79356b
A
1395 /*
1396 * Allocate the structures first to preserve word-alignment.
1397 */
2d21ac55 1398 pv_head_table = (pv_rooted_entry_t) addr;
1c79356b
A
1399 addr = (vm_offset_t) (pv_head_table + npages);
1400
2d21ac55
A
1401 pv_hash_table = (pv_hashed_entry_t *)addr;
1402 addr = (vm_offset_t) (pv_hash_table + (npvhash + 1));
1403
1c79356b
A
1404 pv_lock_table = (char *) addr;
1405 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
1406
2d21ac55
A
1407 pv_hash_lock_table = (char *) addr;
1408 addr = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhash+1)));
1409
1c79356b 1410 pmap_phys_attributes = (char *) addr;
2d21ac55
A
1411 {
1412 unsigned int i;
1413 unsigned int pn;
1414 ppnum_t last_pn;
1415 pmap_memory_region_t *pmptr = pmap_memory_regions;
1416
1417 last_pn = i386_btop(avail_end);
1418
1419 for (i = 0; i < pmap_memory_region_count; i++, pmptr++) {
1420 if (pmptr->type == kEfiConventionalMemory) {
1421 for (pn = pmptr->base; pn <= pmptr->end; pn++) {
1422 if (pn < last_pn) {
1423 pmap_phys_attributes[pn] |= PHYS_MANAGED;
1424
1425 if (pn > last_managed_page)
1426 last_managed_page = pn;
1427 }
1428 }
1429 }
1430 }
1431 }
1c79356b
A
1432
1433 /*
1434 * Create the zone of physical maps,
1435 * and of the physical-to-virtual entries.
1436 */
1437 s = (vm_size_t) sizeof(struct pmap);
1438 pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
2d21ac55
A
1439 s = (vm_size_t) sizeof(struct pv_hashed_entry);
1440 pv_hashed_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
91447636
A
1441 s = 63;
1442 pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */
55e303ae 1443
91447636 1444 kptobj = &kptobj_object_store;
2d21ac55 1445 _vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG), kptobj);
91447636 1446 kernel_pmap->pm_obj = kptobj;
91447636
A
1447
1448 /* create pv entries for kernel pages mapped by low level
1449 startup code. these have to exist so we can pmap_remove()
1450 e.g. kext pages from the middle of our addr space */
1451
0c530ab8 1452 vaddr = (vm_map_offset_t)0;
91447636 1453 for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) {
2d21ac55 1454 pv_rooted_entry_t pv_e;
91447636
A
1455
1456 pv_e = pai_to_pvh(ppn);
1457 pv_e->va = vaddr;
1458 vaddr += PAGE_SIZE;
1459 pv_e->pmap = kernel_pmap;
2d21ac55 1460 queue_init(&pv_e->qlink);
91447636
A
1461 }
1462
1c79356b
A
1463 pmap_initialized = TRUE;
1464
1465 /*
2d21ac55 1466 * Initialize pmap cache.
1c79356b
A
1467 */
1468 pmap_cache_list = PMAP_NULL;
1469 pmap_cache_count = 0;
91447636 1470 simple_lock_init(&pmap_cache_lock, 0);
2d21ac55
A
1471
1472 max_preemption_latency_tsc = tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS, tscFCvtn2t);
1473
1c79356b
A
1474}
1475
91447636
A
1476void
1477x86_lowmem_free(void)
1478{
1479 /* free lowmem pages back to the vm system. we had to defer doing this
1480 until the vm system was fully up.
1481 the actual pages that are released are determined by which
1482 pages the memory sizing code puts into the region table */
1c79356b 1483
0c530ab8 1484 ml_static_mfree((vm_offset_t) i386_ptob(pmap_memory_regions[0].base),
91447636
A
1485 (vm_size_t) i386_ptob(pmap_memory_regions[0].end - pmap_memory_regions[0].base));
1486}
1c79356b
A
1487
1488
2d21ac55 1489#define managed_page(x) ( (unsigned int)x <= last_managed_page && (pmap_phys_attributes[x] & PHYS_MANAGED) )
1c79356b 1490
2d21ac55
A
1491/*
1492 * this function is only used for debugging fron the vm layer
1493 */
1c79356b
A
1494boolean_t
1495pmap_verify_free(
55e303ae 1496 ppnum_t pn)
1c79356b 1497{
2d21ac55 1498 pv_rooted_entry_t pv_h;
1c79356b 1499 int pai;
1c79356b
A
1500 boolean_t result;
1501
55e303ae 1502 assert(pn != vm_page_fictitious_addr);
2d21ac55 1503
1c79356b
A
1504 if (!pmap_initialized)
1505 return(TRUE);
1506
2d21ac55
A
1507 if (pn == vm_page_guard_addr)
1508 return TRUE;
1c79356b 1509
2d21ac55
A
1510 pai = ppn_to_pai(pn);
1511 if (!managed_page(pai))
1512 return(FALSE);
1513 pv_h = pai_to_pvh(pn);
1514 result = (pv_h->pmap == PMAP_NULL);
1515 return(result);
1516}
1c79356b 1517
2d21ac55
A
1518boolean_t
1519pmap_is_empty(
1520 pmap_t pmap,
1521 vm_map_offset_t vstart,
1522 vm_map_offset_t vend)
1523{
1524 vm_map_offset_t offset;
1525 ppnum_t phys_page;
1c79356b 1526
2d21ac55
A
1527 if (pmap == PMAP_NULL) {
1528 return TRUE;
1529 }
1530 for (offset = vstart;
1531 offset < vend;
1532 offset += PAGE_SIZE_64) {
1533 phys_page = pmap_find_phys(pmap, offset);
1534 if (phys_page) {
1535 if (pmap != kernel_pmap &&
1536 pmap->pm_task_map == TASK_MAP_32BIT &&
1537 offset >= HIGH_MEM_BASE) {
1538 /*
1539 * The "high_shared_pde" is used to share
1540 * the entire top-most 2MB of address space
1541 * between the kernel and all 32-bit tasks.
1542 * So none of this can be removed from 32-bit
1543 * tasks.
1544 * Let's pretend there's nothing up
1545 * there...
1546 */
1547 return TRUE;
1548 }
1549 kprintf("pmap_is_empty(%p,0x%llx,0x%llx): "
1550 "page %d at 0x%llx\n",
1551 pmap, vstart, vend, phys_page, offset);
1552 return FALSE;
1553 }
1554 }
1c79356b 1555
2d21ac55 1556 return TRUE;
1c79356b
A
1557}
1558
2d21ac55 1559
1c79356b
A
1560/*
1561 * Create and return a physical map.
1562 *
1563 * If the size specified for the map
1564 * is zero, the map is an actual physical
1565 * map, and may be referenced by the
1566 * hardware.
1567 *
1568 * If the size specified is non-zero,
1569 * the map will be used in software only, and
1570 * is bounded by that size.
1571 */
1572pmap_t
1573pmap_create(
0c530ab8 1574 vm_map_size_t sz,
2d21ac55 1575 boolean_t is_64bit)
1c79356b 1576{
2d21ac55 1577 pmap_t p;
0c530ab8
A
1578 int i;
1579 vm_offset_t va;
1580 vm_size_t size;
1581 pdpt_entry_t *pdpt;
1582 pml4_entry_t *pml4p;
0c530ab8 1583 pd_entry_t *pdp;
2d21ac55 1584 int template;
0c530ab8
A
1585 spl_t s;
1586
2d21ac55
A
1587 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START,
1588 (int) (sz>>32), (int) sz, (int) is_64bit, 0, 0);
1589
0c530ab8 1590 size = (vm_size_t) sz;
1c79356b
A
1591
1592 /*
1593 * A software use-only map doesn't even need a map.
1594 */
1595
1596 if (size != 0) {
1597 return(PMAP_NULL);
1598 }
1599
91447636
A
1600 p = (pmap_t) zalloc(pmap_zone);
1601 if (PMAP_NULL == p)
2d21ac55 1602 panic("pmap_create zalloc");
6601e61a 1603
0c530ab8
A
1604 /* init counts now since we'll be bumping some */
1605 simple_lock_init(&p->lock, 0);
1c79356b 1606 p->stats.resident_count = 0;
2d21ac55 1607 p->stats.resident_max = 0;
1c79356b 1608 p->stats.wired_count = 0;
1c79356b 1609 p->ref_count = 1;
0c530ab8 1610 p->nx_enabled = 1;
0c530ab8
A
1611 p->pm_shared = FALSE;
1612
2d21ac55
A
1613 assert(!is_64bit || cpu_64bit);
1614 p->pm_task_map = is_64bit ? TASK_MAP_64BIT : TASK_MAP_32BIT;;
1615
0c530ab8 1616 if (!cpu_64bit) {
2d21ac55
A
1617 /* legacy 32 bit setup */
1618 /* in the legacy case the pdpt layer is hardwired to 4 entries and each
1619 * entry covers 1GB of addr space */
1620 if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD))
1621 panic("pmap_create kmem_alloc_wired");
1622 p->pm_hold = (vm_offset_t)zalloc(pdpt_zone);
1623 if ((vm_offset_t)NULL == p->pm_hold) {
1624 panic("pdpt zalloc");
1625 }
1626 pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31);
1627 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)pdpt);
1628 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG))))
1629 panic("pmap_create vm_object_allocate");
0c530ab8 1630
2d21ac55 1631 memset((char *)p->dirbase, 0, NBPTD);
0c530ab8 1632
2d21ac55
A
1633 va = (vm_offset_t)p->dirbase;
1634 p->pdirbase = kvtophys(va);
0c530ab8 1635
2d21ac55
A
1636 template = cpu_64bit ? INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF : INTEL_PTE_VALID;
1637 for (i = 0; i< NPGPTD; i++, pdpt++ ) {
1638 pmap_paddr_t pa;
1639 pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i));
1640 pmap_store_pte(pdpt, pa | template);
1641 }
0c530ab8 1642
2d21ac55
A
1643 /* map the high shared pde */
1644 s = splhigh();
1645 pmap_store_pte(pmap_pde(p, HIGH_MEM_BASE), high_shared_pde);
1646 splx(s);
4452a7af 1647
0c530ab8 1648 } else {
2d21ac55 1649 /* 64 bit setup */
4452a7af 1650
2d21ac55
A
1651 /* alloc the pml4 page in kernel vm */
1652 if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->pm_hold), PAGE_SIZE))
1653 panic("pmap_create kmem_alloc_wired pml4");
4452a7af 1654
2d21ac55
A
1655 memset((char *)p->pm_hold, 0, PAGE_SIZE);
1656 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_hold);
0c530ab8 1657
2d21ac55
A
1658 vm_page_lock_queues();
1659 inuse_ptepages_count++;
1660 vm_page_unlock_queues();
0c530ab8 1661
2d21ac55 1662 /* allocate the vm_objs to hold the pdpt, pde and pte pages */
0c530ab8 1663
2d21ac55
A
1664 if (NULL == (p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS))))
1665 panic("pmap_create pdpt obj");
0c530ab8 1666
2d21ac55
A
1667 if (NULL == (p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS))))
1668 panic("pmap_create pdpt obj");
0c530ab8 1669
2d21ac55
A
1670 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS))))
1671 panic("pmap_create pte obj");
0c530ab8 1672
2d21ac55
A
1673 /* uber space points to uber mapped kernel */
1674 s = splhigh();
1675 pml4p = pmap64_pml4(p, 0ULL);
1676 pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX),*kernel_pmap->pm_pml4);
0c530ab8 1677
0c530ab8 1678
2d21ac55
A
1679 if (!is_64bit) {
1680 while ((pdp = pmap64_pde(p, (uint64_t)HIGH_MEM_BASE)) == PD_ENTRY_NULL) {
1681 splx(s);
1682 pmap_expand_pdpt(p, (uint64_t)HIGH_MEM_BASE); /* need room for another pde entry */
1683 s = splhigh();
1684 }
1685 pmap_store_pte(pdp, high_shared_pde);
1686 }
1687 splx(s);
0c530ab8 1688 }
1c79356b 1689
2d21ac55
A
1690 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START,
1691 (int) p, is_64bit, 0, 0, 0);
1692
1c79356b
A
1693 return(p);
1694}
1695
2d21ac55
A
1696/*
1697 * The following routines implement the shared address optmization for 64-bit
1698 * users with a 4GB page zero.
1699 *
1700 * pmap_set_4GB_pagezero()
1701 * is called in the exec and fork paths to mirror the kernel's
1702 * mapping in the bottom 4G of the user's pmap. The task mapping changes
1703 * from TASK_MAP_64BIT to TASK_MAP_64BIT_SHARED. This routine returns
1704 * without doing anything if the -no_shared_cr3 boot-arg is set.
1705 *
1706 * pmap_clear_4GB_pagezero()
1707 * is called in the exec/exit paths to undo this mirror. The task mapping
1708 * reverts to TASK_MAP_64BIT. In addition, we switch to the kernel's
1709 * CR3 by calling pmap_load_kernel_cr3().
1710 *
1711 * pmap_load_kernel_cr3()
1712 * loads cr3 with the kernel's page table. In addition to being called
1713 * by pmap_clear_4GB_pagezero(), it is used both prior to teardown and
1714 * when we go idle in the context of a shared map.
1715 *
1716 * Further notes on per-cpu data used:
1717 *
1718 * cpu_kernel_cr3 is the cr3 for the kernel's pmap.
1719 * This is loaded in a trampoline on entering the kernel
1720 * from a 32-bit user (or non-shared-cr3 64-bit user).
1721 * cpu_task_cr3 is the cr3 for the current thread.
1722 * This is loaded in a trampoline as we exit the kernel.
1723 * cpu_active_cr3 reflects the cr3 currently loaded.
1724 * However, the low order bit is set when the
1725 * processor is idle or interrupts are disabled
1726 * while the system pmap lock is held. It is used by
1727 * tlb shoot-down.
1728 * cpu_task_map indicates whether the task cr3 belongs to
1729 * a 32-bit, a 64-bit or a 64-bit shared map.
1730 * The latter allows the avoidance of the cr3 load
1731 * on kernel entry and exit.
1732 * cpu_tlb_invalid set TRUE when a tlb flush is requested.
1733 * If the cr3 is "inactive" (the cpu is idle or the
1734 * system-wide pmap lock is held) this not serviced by
1735 * an IPI but at time when the cr3 becomes "active".
1736 */
1737
0c530ab8
A
1738void
1739pmap_set_4GB_pagezero(pmap_t p)
1740{
0c530ab8
A
1741 pdpt_entry_t *user_pdptp;
1742 pdpt_entry_t *kern_pdptp;
1743
2d21ac55 1744 assert(p->pm_task_map != TASK_MAP_32BIT);
0c530ab8
A
1745
1746 /* Kernel-shared cr3 may be disabled by boot arg. */
1747 if (no_shared_cr3)
1748 return;
1749
1750 /*
1751 * Set the bottom 4 3rd-level pte's to be the kernel's.
1752 */
2d21ac55 1753 PMAP_LOCK(p);
0c530ab8 1754 while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) {
2d21ac55 1755 PMAP_UNLOCK(p);
0c530ab8 1756 pmap_expand_pml4(p, 0x0);
2d21ac55 1757 PMAP_LOCK(p);
0c530ab8
A
1758 }
1759 kern_pdptp = kernel_pmap->pm_pdpt;
1760 pmap_store_pte(user_pdptp+0, *(kern_pdptp+0));
1761 pmap_store_pte(user_pdptp+1, *(kern_pdptp+1));
1762 pmap_store_pte(user_pdptp+2, *(kern_pdptp+2));
1763 pmap_store_pte(user_pdptp+3, *(kern_pdptp+3));
2d21ac55
A
1764 p->pm_task_map = TASK_MAP_64BIT_SHARED;
1765 PMAP_UNLOCK(p);
0c530ab8
A
1766}
1767
1768void
1769pmap_clear_4GB_pagezero(pmap_t p)
1770{
0c530ab8
A
1771 pdpt_entry_t *user_pdptp;
1772
2d21ac55 1773 if (p->pm_task_map != TASK_MAP_64BIT_SHARED)
0c530ab8
A
1774 return;
1775
2d21ac55
A
1776 PMAP_LOCK(p);
1777
1778 p->pm_task_map = TASK_MAP_64BIT;
1779
1780 pmap_load_kernel_cr3();
1781
0c530ab8
A
1782 user_pdptp = pmap64_pdpt(p, 0x0);
1783 pmap_store_pte(user_pdptp+0, 0);
1784 pmap_store_pte(user_pdptp+1, 0);
1785 pmap_store_pte(user_pdptp+2, 0);
1786 pmap_store_pte(user_pdptp+3, 0);
1787
2d21ac55
A
1788 PMAP_UNLOCK(p);
1789}
0c530ab8 1790
2d21ac55
A
1791void
1792pmap_load_kernel_cr3(void)
1793{
1794 uint64_t kernel_cr3;
0c530ab8 1795
2d21ac55
A
1796 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
1797
1798 /*
1799 * Reload cr3 with the true kernel cr3.
1800 */
1801 kernel_cr3 = current_cpu_datap()->cpu_kernel_cr3;
1802 set64_cr3(kernel_cr3);
1803 current_cpu_datap()->cpu_active_cr3 = kernel_cr3;
1804 current_cpu_datap()->cpu_tlb_invalid = FALSE;
1805 __asm__ volatile("mfence");
0c530ab8
A
1806}
1807
1c79356b
A
1808/*
1809 * Retire the given physical map from service.
1810 * Should only be called if the map contains
1811 * no valid mappings.
1812 */
1813
1814void
1815pmap_destroy(
1816 register pmap_t p)
1817{
1c79356b 1818 register int c;
1c79356b
A
1819
1820 if (p == PMAP_NULL)
1821 return;
2d21ac55
A
1822
1823 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START,
1824 (int) p, 0, 0, 0, 0);
1825
1826 PMAP_LOCK(p);
1827
1c79356b 1828 c = --p->ref_count;
2d21ac55 1829
1c79356b 1830 if (c == 0) {
1c79356b
A
1831 /*
1832 * If some cpu is not using the physical pmap pointer that it
1833 * is supposed to be (see set_dirbase), we might be using the
1834 * pmap that is being destroyed! Make sure we are
1835 * physically on the right pmap:
1836 */
55e303ae 1837 PMAP_UPDATE_TLBS(p,
2d21ac55
A
1838 0x0ULL,
1839 0xFFFFFFFFFFFFF000ULL);
1c79356b 1840 }
2d21ac55
A
1841
1842 PMAP_UNLOCK(p);
1c79356b
A
1843
1844 if (c != 0) {
2d21ac55
A
1845 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END,
1846 (int) p, 1, 0, 0, 0);
1847 return; /* still in use */
1c79356b
A
1848 }
1849
1850 /*
1851 * Free the memory maps, then the
1852 * pmap structure.
1853 */
0c530ab8 1854 if (!cpu_64bit) {
2d21ac55
A
1855 vm_page_lock_queues();
1856 inuse_ptepages_count -= p->pm_obj->resident_page_count;
1857 vm_page_unlock_queues();
91447636 1858
2d21ac55
A
1859 kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD);
1860 zfree(pdpt_zone, (void *)p->pm_hold);
0c530ab8 1861
2d21ac55
A
1862 vm_object_deallocate(p->pm_obj);
1863 } else {
1864 /* 64 bit */
1865 int inuse_ptepages = 0;
0c530ab8 1866
2d21ac55
A
1867 /* free 64 bit mode structs */
1868 inuse_ptepages++;
1869 kmem_free(kernel_map, (vm_offset_t)p->pm_hold, PAGE_SIZE);
1870
1871 inuse_ptepages += p->pm_obj_pml4->resident_page_count;
1872 vm_object_deallocate(p->pm_obj_pml4);
1873
1874 inuse_ptepages += p->pm_obj_pdpt->resident_page_count;
1875 vm_object_deallocate(p->pm_obj_pdpt);
0c530ab8 1876
2d21ac55
A
1877 inuse_ptepages += p->pm_obj->resident_page_count;
1878 vm_object_deallocate(p->pm_obj);
1879
1880 vm_page_lock_queues();
1881 inuse_ptepages_count -= inuse_ptepages;
1c79356b 1882 vm_page_unlock_queues();
2d21ac55
A
1883 }
1884 zfree(pmap_zone, p);
1c79356b 1885
2d21ac55
A
1886 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END,
1887 0, 0, 0, 0, 0);
0c530ab8 1888
1c79356b
A
1889}
1890
1891/*
1892 * Add a reference to the specified pmap.
1893 */
1894
1895void
1896pmap_reference(
1897 register pmap_t p)
1898{
1c79356b
A
1899
1900 if (p != PMAP_NULL) {
2d21ac55 1901 PMAP_LOCK(p);
1c79356b 1902 p->ref_count++;
2d21ac55 1903 PMAP_UNLOCK(p);;
1c79356b
A
1904 }
1905}
1906
1907/*
1908 * Remove a range of hardware page-table entries.
1909 * The entries given are the first (inclusive)
1910 * and last (exclusive) entries for the VM pages.
1911 * The virtual address is the va for the first pte.
1912 *
1913 * The pmap must be locked.
1914 * If the pmap is not the kernel pmap, the range must lie
1915 * entirely within one pte-page. This is NOT checked.
1916 * Assumes that the pte-page exists.
1917 */
1918
2d21ac55 1919void
1c79356b
A
1920pmap_remove_range(
1921 pmap_t pmap,
0c530ab8 1922 vm_map_offset_t start_vaddr,
1c79356b
A
1923 pt_entry_t *spte,
1924 pt_entry_t *epte)
1925{
1926 register pt_entry_t *cpte;
2d21ac55
A
1927 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
1928 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
1929 pv_hashed_entry_t pvh_e;
1930 int pvh_cnt = 0;
0c530ab8 1931 int num_removed, num_unwired, num_found;
1c79356b 1932 int pai;
91447636 1933 pmap_paddr_t pa;
2d21ac55
A
1934 vm_map_offset_t vaddr;
1935 int pvhash_idx;
1936 uint32_t pv_cnt;
1c79356b 1937
1c79356b
A
1938 num_removed = 0;
1939 num_unwired = 0;
2d21ac55
A
1940 num_found = 0;
1941
1942 if (pmap != kernel_pmap &&
1943 pmap->pm_task_map == TASK_MAP_32BIT &&
1944 start_vaddr >= HIGH_MEM_BASE) {
1945 /*
1946 * The range is in the "high_shared_pde" which is shared
1947 * between the kernel and all 32-bit tasks. It holds
1948 * the 32-bit commpage but also the trampolines, GDT, etc...
1949 * so we can't let user tasks remove anything from it.
1950 */
1951 return;
1952 }
1c79356b 1953
0c530ab8
A
1954 /* invalidate the PTEs first to "freeze" them */
1955 for (cpte = spte, vaddr = start_vaddr;
1956 cpte < epte;
1957 cpte++, vaddr += PAGE_SIZE_64) {
1c79356b
A
1958
1959 pa = pte_to_pa(*cpte);
1960 if (pa == 0)
1961 continue;
0c530ab8 1962 num_found++;
1c79356b 1963
1c79356b
A
1964 if (iswired(*cpte))
1965 num_unwired++;
1966
2d21ac55 1967 pai = pa_index(pa);
1c79356b 1968
2d21ac55 1969 if (!managed_page(pai)) {
1c79356b
A
1970 /*
1971 * Outside range of managed physical memory.
1972 * Just remove the mappings.
1973 */
0c530ab8 1974 pmap_store_pte(cpte, 0);
1c79356b
A
1975 continue;
1976 }
1977
2d21ac55 1978 /* invalidate the PTE */
0c530ab8
A
1979 pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID));
1980 }
1c79356b 1981
2d21ac55
A
1982 if (num_found == 0) {
1983 /* nothing was changed: we're done */
1984 goto update_counts;
0c530ab8 1985 }
1c79356b 1986
0c530ab8 1987 /* propagate the invalidates to other CPUs */
91447636 1988
0c530ab8
A
1989 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
1990
1991 for (cpte = spte, vaddr = start_vaddr;
1992 cpte < epte;
1993 cpte++, vaddr += PAGE_SIZE_64) {
1994
1995 pa = pte_to_pa(*cpte);
1996 if (pa == 0)
1997 continue;
1998
1999 pai = pa_index(pa);
2000
2001 LOCK_PVH(pai);
2002
2d21ac55
A
2003 pa = pte_to_pa(*cpte);
2004 if (pa == 0) {
2005 UNLOCK_PVH(pai);
2006 continue;
2007 }
2008
0c530ab8
A
2009 num_removed++;
2010
2011 /*
2012 * Get the modify and reference bits, then
2013 * nuke the entry in the page table
2014 */
2015 /* remember reference and change */
2016 pmap_phys_attributes[pai] |=
2d21ac55 2017 (char)(*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
0c530ab8
A
2018 /* completely invalidate the PTE */
2019 pmap_store_pte(cpte, 0);
1c79356b
A
2020
2021 /*
2022 * Remove the mapping from the pvlist for
2023 * this physical page.
2024 */
2025 {
2d21ac55
A
2026 pv_rooted_entry_t pv_h;
2027 pv_hashed_entry_t *pprevh;
2028 ppnum_t ppn = (ppnum_t)pai;
1c79356b
A
2029
2030 pv_h = pai_to_pvh(pai);
2d21ac55
A
2031 pvh_e = PV_HASHED_ENTRY_NULL;
2032 if (pv_h->pmap == PMAP_NULL)
2033 panic("pmap_remove_range: null pv_list!");
2034
2035 if (pv_h->va == vaddr && pv_h->pmap == pmap) { /* rooted or not */
1c79356b 2036 /*
2d21ac55
A
2037 * Header is the pv_rooted_entry. We can't free that. If there is a queued
2038 * entry after this one we remove that
2039 * from the ppn queue, we remove it from the hash chain
2040 * and copy it to the rooted entry. Then free it instead.
1c79356b 2041 */
2d21ac55
A
2042
2043 pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink);
2044 if (pv_h != (pv_rooted_entry_t)pvh_e) { /* any queued after rooted? */
2045 CHK_NPVHASH();
2046 pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
2047 LOCK_PV_HASH(pvhash_idx);
2048 remque(&pvh_e->qlink);
2049 {
2050 pprevh = pvhash(pvhash_idx);
2051 if (PV_HASHED_ENTRY_NULL == *pprevh) {
2052 panic("pmap_remove_range empty hash removing rooted pv");
2053 }
1c79356b 2054 }
2d21ac55
A
2055 pmap_pvh_unlink(pvh_e);
2056 UNLOCK_PV_HASH(pvhash_idx);
2057 pv_h->pmap = pvh_e->pmap;
2058 pv_h->va = pvh_e->va; /* dispose of pvh_e */
2059 } else { /* none queued after rooted */
2060 pv_h->pmap = PMAP_NULL;
2061 pvh_e = PV_HASHED_ENTRY_NULL;
2062 } /* any queued after rooted */
2063
2064 } else { /* rooted or not */
2065 /* not removing rooted pv. find it on hash chain, remove from ppn queue and
2066 * hash chain and free it */
2067 CHK_NPVHASH();
2068 pvhash_idx = pvhashidx(pmap,vaddr);
2069 LOCK_PV_HASH(pvhash_idx);
2070 pprevh = pvhash(pvhash_idx);
2071 if (PV_HASHED_ENTRY_NULL == *pprevh) {
2072 panic("pmap_remove_range empty hash removing hashed pv");
1c79356b 2073 }
2d21ac55
A
2074 pvh_e = *pprevh;
2075 pmap_pv_hashlist_walks++;
2076 pv_cnt = 0;
2077 while (PV_HASHED_ENTRY_NULL != pvh_e) {
2078 pv_cnt++;
2079 if (pvh_e->pmap == pmap && pvh_e->va == vaddr && pvh_e->ppn == ppn) break;
2080 pprevh = &pvh_e->nexth;
2081 pvh_e = pvh_e->nexth;
2082 }
2083 pmap_pv_hashlist_cnts += pv_cnt;
2084 if (pmap_pv_hashlist_max < pv_cnt) pmap_pv_hashlist_max = pv_cnt;
2085 if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pmap_remove_range pv not on hash");
2086 *pprevh = pvh_e->nexth;
2087 remque(&pvh_e->qlink);
2088 UNLOCK_PV_HASH(pvhash_idx);
2089
2090 } /* rooted or not */
2091
1c79356b 2092 UNLOCK_PVH(pai);
2d21ac55
A
2093
2094 if (pvh_e != PV_HASHED_ENTRY_NULL) {
2095 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
2096 pvh_eh = pvh_e;
2097
2098 if (pvh_et == PV_HASHED_ENTRY_NULL) {
2099 pvh_et = pvh_e;
2100 }
2101
2102 pvh_cnt++;
2103 }
2104
2105 } /* removing mappings for this phy page */
2106 } /* for loop */
2107
2108 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
2109 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1c79356b
A
2110 }
2111
2d21ac55 2112update_counts:
1c79356b
A
2113 /*
2114 * Update the counts
2115 */
2d21ac55
A
2116#if TESTING
2117 if (pmap->stats.resident_count < num_removed)
2118 panic("pmap_remove_range: resident_count");
2119#endif
1c79356b 2120 assert(pmap->stats.resident_count >= num_removed);
2d21ac55
A
2121 OSAddAtomic(-num_removed, (SInt32 *) &pmap->stats.resident_count);
2122
2123#if TESTING
2124 if (pmap->stats.wired_count < num_unwired)
2125 panic("pmap_remove_range: wired_count");
2126#endif
1c79356b 2127 assert(pmap->stats.wired_count >= num_unwired);
2d21ac55
A
2128 OSAddAtomic(-num_unwired, (SInt32 *) &pmap->stats.wired_count);
2129
0c530ab8 2130 return;
1c79356b
A
2131}
2132
0b4e3aa0
A
2133/*
2134 * Remove phys addr if mapped in specified map
2135 *
2136 */
2137void
2138pmap_remove_some_phys(
91447636
A
2139 __unused pmap_t map,
2140 __unused ppnum_t pn)
0b4e3aa0
A
2141{
2142
2143/* Implement to support working set code */
2144
2145}
2146
1c79356b
A
2147/*
2148 * Remove the given range of addresses
2149 * from the specified map.
2150 *
2151 * It is assumed that the start and end are properly
2152 * rounded to the hardware page size.
2153 */
2154
55e303ae 2155
1c79356b
A
2156void
2157pmap_remove(
2158 pmap_t map,
55e303ae
A
2159 addr64_t s64,
2160 addr64_t e64)
1c79356b 2161{
2d21ac55
A
2162 pt_entry_t *pde;
2163 pt_entry_t *spte, *epte;
2164 addr64_t l64;
2165 addr64_t orig_s64;
2166 uint64_t deadline;
2167
2168 pmap_intr_assert();
1c79356b 2169
0c530ab8 2170 if (map == PMAP_NULL || s64 == e64)
1c79356b 2171 return;
2d21ac55
A
2172
2173 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
2174 (int) map,
2175 (int) (s64>>32), (int) s64,
2176 (int) (e64>>32), (int) e64);
1c79356b 2177
2d21ac55
A
2178 PMAP_LOCK(map);
2179
2180#if 0
2181 /*
2182 * Check that address range in the kernel does not overlap the stacks.
2183 * We initialize local static min/max variables once to avoid making
2184 * 2 function calls for every remove. Note also that these functions
2185 * both return 0 before kernel stacks have been initialized, and hence
2186 * the panic is not triggered in this case.
2187 */
2188 if (map == kernel_pmap) {
2189 static vm_offset_t kernel_stack_min = 0;
2190 static vm_offset_t kernel_stack_max = 0;
2191
2192 if (kernel_stack_min == 0) {
2193 kernel_stack_min = min_valid_stack_address();
2194 kernel_stack_max = max_valid_stack_address();
2195 }
2196 if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
2197 (kernel_stack_min < e64 && e64 <= kernel_stack_max))
2198 panic("pmap_remove() attempted in kernel stack");
2199 }
2200#else
2201
2202 /*
2203 * The values of kernel_stack_min and kernel_stack_max are no longer
2204 * relevant now that we allocate kernel stacks anywhere in the kernel map,
2205 * so the old code above no longer applies. If we wanted to check that
2206 * we weren't removing a mapping of a page in a kernel stack we'd have to
2207 * mark the PTE with an unused bit and check that here.
2208 */
2209
2210#endif
2211
2212 deadline = rdtsc64() + max_preemption_latency_tsc;
1c79356b 2213
0c530ab8
A
2214 orig_s64 = s64;
2215
2216 while (s64 < e64) {
2d21ac55 2217
0c530ab8
A
2218 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size-1);
2219 if (l64 > e64)
2220 l64 = e64;
2221 pde = pmap_pde(map, s64);
2d21ac55 2222
0c530ab8
A
2223 if (pde && (*pde & INTEL_PTE_VALID)) {
2224 spte = (pt_entry_t *)pmap_pte(map, (s64 & ~(pde_mapped_size-1)));
2225 spte = &spte[ptenum(s64)];
2226 epte = &spte[intel_btop(l64-s64)];
2d21ac55 2227
0c530ab8 2228 pmap_remove_range(map, s64, spte, epte);
1c79356b 2229 }
0c530ab8 2230 s64 = l64;
1c79356b 2231 pde++;
2d21ac55
A
2232
2233 if (s64 < e64 && rdtsc64() >= deadline) {
2234 PMAP_UNLOCK(map)
2235 PMAP_LOCK(map)
2236
2237 deadline = rdtsc64() + max_preemption_latency_tsc;
2238 }
2239
1c79356b 2240 }
91447636 2241
2d21ac55
A
2242 PMAP_UNLOCK(map);
2243
2244 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
2245 (int) map, 0, 0, 0, 0);
2246
1c79356b
A
2247}
2248
2249/*
2250 * Routine: pmap_page_protect
2251 *
2252 * Function:
2253 * Lower the permission for all mappings to a given
2254 * page.
2255 */
2256void
2257pmap_page_protect(
55e303ae 2258 ppnum_t pn,
1c79356b
A
2259 vm_prot_t prot)
2260{
2d21ac55
A
2261 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
2262 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
2263 pv_hashed_entry_t nexth;
2264 int pvh_cnt = 0;
2265 pv_rooted_entry_t pv_h;
2266 pv_rooted_entry_t pv_e;
2267 pv_hashed_entry_t pvh_e;
2268 pt_entry_t *pte;
1c79356b
A
2269 int pai;
2270 register pmap_t pmap;
1c79356b 2271 boolean_t remove;
2d21ac55 2272 int pvhash_idx;
1c79356b 2273
2d21ac55 2274 pmap_intr_assert();
55e303ae 2275 assert(pn != vm_page_fictitious_addr);
2d21ac55
A
2276 if (pn == vm_page_guard_addr)
2277 return;
2278
2279 pai = ppn_to_pai(pn);
0c530ab8 2280
2d21ac55 2281 if (!managed_page(pai)) {
1c79356b
A
2282 /*
2283 * Not a managed page.
2284 */
2285 return;
2286 }
2287
2d21ac55
A
2288 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
2289 (int) pn, (int) prot, 0, 0, 0);
2290
1c79356b
A
2291 /*
2292 * Determine the new protection.
2293 */
2294 switch (prot) {
2295 case VM_PROT_READ:
2296 case VM_PROT_READ|VM_PROT_EXECUTE:
2297 remove = FALSE;
2298 break;
2299 case VM_PROT_ALL:
2300 return; /* nothing to do */
2301 default:
2302 remove = TRUE;
2303 break;
2304 }
0c530ab8 2305
2d21ac55 2306 pv_h = pai_to_pvh(pai);
1c79356b 2307
2d21ac55 2308 LOCK_PVH(pai);
1c79356b 2309
1c79356b
A
2310 /*
2311 * Walk down PV list, changing or removing all mappings.
1c79356b
A
2312 */
2313 if (pv_h->pmap != PMAP_NULL) {
2314
2d21ac55
A
2315 pv_e = pv_h;
2316 pvh_e = (pv_hashed_entry_t)pv_e; /* cheat */
4452a7af 2317
2d21ac55
A
2318 do {
2319 register vm_map_offset_t vaddr;
2320 pmap = pv_e->pmap;
0c530ab8 2321
2d21ac55
A
2322 vaddr = pv_e->va;
2323 pte = pmap_pte(pmap, vaddr);
2324
2325 if (0 == pte) {
2326 kprintf("pmap_page_protect pmap %p pn 0x%x vaddr 0x%llx\n",pmap, pn, vaddr);
2327 panic("pmap_page_protect");
2328 }
0c530ab8 2329
2d21ac55 2330 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink); /* if there is one */
4452a7af 2331
2d21ac55
A
2332 /*
2333 * Remove the mapping if new protection is NONE
2334 * or if write-protecting a kernel mapping.
2335 */
2336 if (remove || pmap == kernel_pmap) {
2337 /*
2338 * Remove the mapping, collecting any modify bits.
2339 */
2340 pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
4452a7af 2341
2d21ac55 2342 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
4452a7af 2343
2d21ac55 2344 pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
4452a7af 2345
2d21ac55 2346 pmap_store_pte(pte, 0);
0c530ab8 2347
2d21ac55
A
2348#if TESTING
2349 if (pmap->stats.resident_count < 1)
2350 panic("pmap_page_protect: resident_count");
2351#endif
2352 assert(pmap->stats.resident_count >= 1);
2353 OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count);
0c530ab8 2354
2d21ac55
A
2355 /*
2356 * Deal with the pv_rooted_entry.
2357 */
0c530ab8 2358
2d21ac55
A
2359 if (pv_e == pv_h) {
2360 /*
2361 * Fix up head later.
2362 */
2363 pv_h->pmap = PMAP_NULL;
2364 }
2365 else {
2366 /*
2367 * Delete this entry.
2368 */
2369 CHK_NPVHASH();
2370 pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
2371 LOCK_PV_HASH(pvhash_idx);
2372 remque(&pvh_e->qlink);
2373 pmap_pvh_unlink(pvh_e);
2374 UNLOCK_PV_HASH(pvhash_idx);
2375
2376 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
2377 pvh_eh = pvh_e;
2378
2379 if (pvh_et == PV_HASHED_ENTRY_NULL)
2380 pvh_et = pvh_e;
2381 pvh_cnt++;
2382 }
2383 } else {
2384 /*
2385 * Write-protect.
2386 */
2387 pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WRITE));
2388 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
2389 }
0c530ab8 2390
2d21ac55
A
2391 pvh_e = nexth;
2392 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
0c530ab8 2393
2d21ac55
A
2394 /*
2395 * If pv_head mapping was removed, fix it up.
2396 */
0c530ab8 2397
2d21ac55
A
2398 if (pv_h->pmap == PMAP_NULL) {
2399 pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink);
2400
2401 if (pvh_e != (pv_hashed_entry_t)pv_h) {
2402 CHK_NPVHASH();
2403 pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
2404 LOCK_PV_HASH(pvhash_idx);
2405 remque(&pvh_e->qlink);
2406 pmap_pvh_unlink(pvh_e);
2407 UNLOCK_PV_HASH(pvhash_idx);
2408 pv_h->pmap = pvh_e->pmap;
2409 pv_h->va = pvh_e->va;
2410 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
2411 pvh_eh = pvh_e;
2412
2413 if (pvh_et == PV_HASHED_ENTRY_NULL)
2414 pvh_et = pvh_e;
2415 pvh_cnt++;
1c79356b 2416 }
2d21ac55
A
2417 }
2418 }
2419 if (pvh_eh != PV_HASHED_ENTRY_NULL) {
2420 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1c79356b 2421 }
2d21ac55
A
2422
2423 UNLOCK_PVH(pai);
2424
2425 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
2426 0, 0, 0, 0, 0);
2427
1c79356b
A
2428}
2429
2d21ac55 2430
91447636
A
2431/*
2432 * Routine:
2433 * pmap_disconnect
2434 *
2435 * Function:
2436 * Disconnect all mappings for this page and return reference and change status
2437 * in generic format.
2438 *
2439 */
2440unsigned int pmap_disconnect(
2441 ppnum_t pa)
2442{
2d21ac55 2443 pmap_page_protect(pa, 0); /* disconnect the page */
91447636
A
2444 return (pmap_get_refmod(pa)); /* return ref/chg status */
2445}
2446
1c79356b
A
2447/*
2448 * Set the physical protection on the
2449 * specified range of this map as requested.
2450 * Will not increase permissions.
2451 */
2452void
2453pmap_protect(
2454 pmap_t map,
0c530ab8
A
2455 vm_map_offset_t sva,
2456 vm_map_offset_t eva,
1c79356b
A
2457 vm_prot_t prot)
2458{
2459 register pt_entry_t *pde;
2460 register pt_entry_t *spte, *epte;
0c530ab8
A
2461 vm_map_offset_t lva;
2462 vm_map_offset_t orig_sva;
0c530ab8 2463 boolean_t set_NX;
2d21ac55
A
2464 int num_found = 0;
2465
2466 pmap_intr_assert();
1c79356b
A
2467
2468 if (map == PMAP_NULL)
2469 return;
2470
0c530ab8
A
2471 if (prot == VM_PROT_NONE) {
2472 pmap_remove(map, sva, eva);
1c79356b
A
2473 return;
2474 }
2475
2d21ac55
A
2476 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
2477 (int) map,
2478 (int) (sva>>32), (int) sva,
2479 (int) (eva>>32), (int) eva);
2480
0c530ab8
A
2481 if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !map->nx_enabled )
2482 set_NX = FALSE;
2483 else
2484 set_NX = TRUE;
2485
2d21ac55 2486 PMAP_LOCK(map);
1c79356b 2487
0c530ab8
A
2488 orig_sva = sva;
2489 while (sva < eva) {
2490 lva = (sva + pde_mapped_size) & ~(pde_mapped_size-1);
2491 if (lva > eva)
2492 lva = eva;
2493 pde = pmap_pde(map, sva);
2494 if (pde && (*pde & INTEL_PTE_VALID)) {
2495 spte = (pt_entry_t *)pmap_pte(map, (sva & ~(pde_mapped_size-1)));
2496 spte = &spte[ptenum(sva)];
2497 epte = &spte[intel_btop(lva-sva)];
1c79356b
A
2498
2499 while (spte < epte) {
2d21ac55 2500
0c530ab8
A
2501 if (*spte & INTEL_PTE_VALID) {
2502
2503 if (prot & VM_PROT_WRITE)
2d21ac55 2504 pmap_update_pte(spte, *spte, (*spte | INTEL_PTE_WRITE));
0c530ab8 2505 else
2d21ac55 2506 pmap_update_pte(spte, *spte, (*spte & ~INTEL_PTE_WRITE));
0c530ab8
A
2507
2508 if (set_NX == TRUE)
2d21ac55 2509 pmap_update_pte(spte, *spte, (*spte | INTEL_PTE_NX));
0c530ab8 2510 else
2d21ac55 2511 pmap_update_pte(spte, *spte, (*spte & ~INTEL_PTE_NX));
0c530ab8
A
2512
2513 num_found++;
0c530ab8 2514 }
1c79356b
A
2515 spte++;
2516 }
2517 }
0c530ab8 2518 sva = lva;
1c79356b 2519 }
0c530ab8 2520 if (num_found)
2d21ac55
A
2521 PMAP_UPDATE_TLBS(map, orig_sva, eva);
2522
2523 PMAP_UNLOCK(map);
2524
2525 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END,
2526 0, 0, 0, 0, 0);
91447636 2527
1c79356b
A
2528}
2529
0c530ab8
A
2530/* Map a (possibly) autogenned block */
2531void
2532pmap_map_block(
2533 pmap_t pmap,
2534 addr64_t va,
2535 ppnum_t pa,
2536 uint32_t size,
2537 vm_prot_t prot,
2538 int attr,
2539 __unused unsigned int flags)
2540{
2d21ac55 2541 uint32_t page;
0c530ab8 2542
2d21ac55
A
2543 for (page = 0; page < size; page++) {
2544 pmap_enter(pmap, va, pa, prot, attr, TRUE);
2545 va += PAGE_SIZE;
2546 pa++;
2547 }
0c530ab8 2548}
1c79356b
A
2549
2550
2551/*
2552 * Insert the given physical page (p) at
2553 * the specified virtual address (v) in the
2554 * target physical map with the protection requested.
2555 *
2556 * If specified, the page will be wired down, meaning
2557 * that the related pte cannot be reclaimed.
2558 *
2559 * NB: This is the only routine which MAY NOT lazy-evaluate
2560 * or lose information. That is, this routine must actually
2561 * insert this page into the given map NOW.
2562 */
2563void
2564pmap_enter(
2565 register pmap_t pmap,
2d21ac55 2566 vm_map_offset_t vaddr,
55e303ae 2567 ppnum_t pn,
1c79356b 2568 vm_prot_t prot,
9bccf70c 2569 unsigned int flags,
1c79356b
A
2570 boolean_t wired)
2571{
2572 register pt_entry_t *pte;
2d21ac55 2573 register pv_rooted_entry_t pv_h;
91447636 2574 register int pai;
2d21ac55
A
2575 pv_hashed_entry_t pvh_e;
2576 pv_hashed_entry_t pvh_new;
2577 pv_hashed_entry_t *hashp;
1c79356b 2578 pt_entry_t template;
91447636 2579 pmap_paddr_t old_pa;
2d21ac55 2580 pmap_paddr_t pa = (pmap_paddr_t)i386_ptob(pn);
0c530ab8
A
2581 boolean_t need_tlbflush = FALSE;
2582 boolean_t set_NX;
2d21ac55
A
2583 char oattr;
2584 int pvhash_idx;
2585 uint32_t pv_cnt;
2586 boolean_t old_pa_locked;
1c79356b 2587
2d21ac55 2588 pmap_intr_assert();
55e303ae 2589 assert(pn != vm_page_fictitious_addr);
1c79356b 2590 if (pmap_debug)
0c530ab8 2591 printf("pmap(%qx, %x)\n", vaddr, pn);
1c79356b
A
2592 if (pmap == PMAP_NULL)
2593 return;
2d21ac55
A
2594 if (pn == vm_page_guard_addr)
2595 return;
2596
2597 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
2598 (int) pmap,
2599 (int) (vaddr>>32), (int) vaddr,
2600 (int) pn, prot);
1c79356b 2601
0c530ab8
A
2602 if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled )
2603 set_NX = FALSE;
2604 else
2605 set_NX = TRUE;
2606
1c79356b
A
2607 /*
2608 * Must allocate a new pvlist entry while we're unlocked;
2609 * zalloc may cause pageout (which will lock the pmap system).
2610 * If we determine we need a pvlist entry, we will unlock
2611 * and allocate one. Then we will retry, throughing away
2612 * the allocated entry later (if we no longer need it).
2613 */
91447636 2614
2d21ac55
A
2615 pvh_new = PV_HASHED_ENTRY_NULL;
2616Retry:
2617 pvh_e = PV_HASHED_ENTRY_NULL;
2618
2619 PMAP_LOCK(pmap);
1c79356b
A
2620
2621 /*
2622 * Expand pmap to include this pte. Assume that
2623 * pmap is always expanded to include enough hardware
2624 * pages to map one VM page.
2625 */
2626
0c530ab8 2627 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
1c79356b
A
2628 /*
2629 * Must unlock to expand the pmap.
2630 */
2d21ac55 2631 PMAP_UNLOCK(pmap);
0c530ab8 2632 pmap_expand(pmap, vaddr); /* going to grow pde level page(s) */
2d21ac55
A
2633 PMAP_LOCK(pmap);
2634 }
2635
2636 old_pa = pte_to_pa(*pte);
2637 pai = pa_index(old_pa);
2638 old_pa_locked = FALSE;
1c79356b 2639
2d21ac55
A
2640 /*
2641 * if we have a previous managed page, lock the pv entry now. after
2642 * we lock it, check to see if someone beat us to the lock and if so
2643 * drop the lock
2644 */
2645
2646 if ((0 != old_pa) && managed_page(pai)) {
2647 LOCK_PVH(pai);
2648 old_pa_locked = TRUE;
2649 old_pa = pte_to_pa(*pte);
2650 if (0 == old_pa) {
2651 UNLOCK_PVH(pai); /* some other path beat us to it */
2652 old_pa_locked = FALSE;
2653 }
1c79356b 2654 }
2d21ac55
A
2655
2656
1c79356b 2657 /*
2d21ac55 2658 * Special case if the incoming physical page is already mapped
1c79356b
A
2659 * at this address.
2660 */
1c79356b 2661 if (old_pa == pa) {
2d21ac55 2662
1c79356b
A
2663 /*
2664 * May be changing its wired attribute or protection
2665 */
2d21ac55 2666
1c79356b 2667 template = pa_to_pte(pa) | INTEL_PTE_VALID;
55e303ae 2668
0c530ab8 2669 if(VM_MEM_NOT_CACHEABLE == (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
55e303ae
A
2670 if(!(flags & VM_MEM_GUARDED))
2671 template |= INTEL_PTE_PTA;
2672 template |= INTEL_PTE_NCACHE;
2673 }
2674
1c79356b
A
2675 if (pmap != kernel_pmap)
2676 template |= INTEL_PTE_USER;
2677 if (prot & VM_PROT_WRITE)
2678 template |= INTEL_PTE_WRITE;
0c530ab8
A
2679
2680 if (set_NX == TRUE)
2681 template |= INTEL_PTE_NX;
2682
1c79356b
A
2683 if (wired) {
2684 template |= INTEL_PTE_WIRED;
2685 if (!iswired(*pte))
2d21ac55 2686 OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count);
1c79356b
A
2687 }
2688 else {
2689 if (iswired(*pte)) {
2690 assert(pmap->stats.wired_count >= 1);
2d21ac55 2691 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
1c79356b
A
2692 }
2693 }
2694
2d21ac55 2695 /* store modified PTE and preserve RC bits */
0c530ab8 2696 pmap_update_pte(pte, *pte, template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD)));
2d21ac55
A
2697 if (old_pa_locked) {
2698 UNLOCK_PVH(pai);
2699 old_pa_locked = FALSE;
2700 }
0c530ab8 2701 need_tlbflush = TRUE;
1c79356b
A
2702 goto Done;
2703 }
2704
2705 /*
2706 * Outline of code from here:
2707 * 1) If va was mapped, update TLBs, remove the mapping
2708 * and remove old pvlist entry.
2709 * 2) Add pvlist entry for new mapping
2710 * 3) Enter new mapping.
2711 *
1c79356b
A
2712 * If the old physical page is not managed step 1) is skipped
2713 * (except for updating the TLBs), and the mapping is
2714 * overwritten at step 3). If the new physical page is not
2715 * managed, step 2) is skipped.
2716 */
2717
91447636 2718 if (old_pa != (pmap_paddr_t) 0) {
1c79356b 2719
1c79356b
A
2720 /*
2721 * Don't do anything to pages outside valid memory here.
2722 * Instead convince the code that enters a new mapping
2723 * to overwrite the old one.
2724 */
2725
2d21ac55
A
2726 /* invalidate the PTE */
2727 pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
2728 /* propagate invalidate everywhere */
2729 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
2730 /* remember reference and change */
2731 oattr = (char)(*pte & (PHYS_MODIFIED | PHYS_REFERENCED));
2732 /* completely invalidate the PTE */
2733 pmap_store_pte(pte, 0);
1c79356b 2734
2d21ac55 2735 if (managed_page(pai)) {
1c79356b 2736
2d21ac55
A
2737#if TESTING
2738 if (pmap->stats.resident_count < 1)
2739 panic("pmap_enter: resident_count");
2740#endif
1c79356b 2741 assert(pmap->stats.resident_count >= 1);
2d21ac55
A
2742 OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count);
2743
1c79356b 2744 if (iswired(*pte)) {
2d21ac55
A
2745
2746#if TESTING
2747 if (pmap->stats.wired_count < 1)
2748 panic("pmap_enter: wired_count");
2749#endif
1c79356b 2750 assert(pmap->stats.wired_count >= 1);
2d21ac55 2751 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
1c79356b 2752 }
91447636 2753
0c530ab8 2754 pmap_phys_attributes[pai] |= oattr;
1c79356b
A
2755 /*
2756 * Remove the mapping from the pvlist for
2757 * this physical page.
2d21ac55
A
2758 * We'll end up with either a rooted pv or a
2759 * hashed pv
1c79356b
A
2760 */
2761 {
1c79356b
A
2762
2763 pv_h = pai_to_pvh(pai);
2d21ac55 2764
1c79356b
A
2765 if (pv_h->pmap == PMAP_NULL) {
2766 panic("pmap_enter: null pv_list!");
2767 }
0c530ab8
A
2768
2769 if (pv_h->va == vaddr && pv_h->pmap == pmap) {
1c79356b 2770 /*
2d21ac55
A
2771 * Header is the pv_rooted_entry.
2772 * If there is a next one, copy it to the
2773 * header and free the next one (we cannot
1c79356b
A
2774 * free the header)
2775 */
2d21ac55
A
2776 pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink);
2777 if (pvh_e != (pv_hashed_entry_t)pv_h) {
2778 pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
2779 LOCK_PV_HASH(pvhash_idx);
2780 remque(&pvh_e->qlink);
2781 pmap_pvh_unlink(pvh_e);
2782 UNLOCK_PV_HASH(pvhash_idx);
2783 pv_h->pmap = pvh_e->pmap;
2784 pv_h->va = pvh_e->va;
1c79356b 2785 }
2d21ac55
A
2786 else {
2787 pv_h->pmap = PMAP_NULL;
2788 pvh_e = PV_HASHED_ENTRY_NULL;
2789 }
1c79356b
A
2790 }
2791 else {
2d21ac55
A
2792 pv_hashed_entry_t *pprevh;
2793 ppnum_t old_ppn;
2794 /* wasn't the rooted pv - hash, find it, and unlink it */
2795 old_ppn = (ppnum_t)pa_index(old_pa);
2796 CHK_NPVHASH();
2797 pvhash_idx = pvhashidx(pmap,vaddr);
2798 LOCK_PV_HASH(pvhash_idx);
2799 pprevh = pvhash(pvhash_idx);
2800#if PV_DEBUG
2801 if (NULL==pprevh)panic("pmap enter 1");
2802#endif
2803 pvh_e = *pprevh;
2804 pmap_pv_hashlist_walks++;
2805 pv_cnt = 0;
2806 while (PV_HASHED_ENTRY_NULL != pvh_e) {
2807 pv_cnt++;
2808 if (pvh_e->pmap == pmap && pvh_e->va == vaddr && pvh_e->ppn == old_ppn) break;
2809 pprevh = &pvh_e->nexth;
2810 pvh_e = pvh_e->nexth;
2811 }
2812 pmap_pv_hashlist_cnts += pv_cnt;
2813 if (pmap_pv_hashlist_max < pv_cnt) pmap_pv_hashlist_max = pv_cnt;
2814 if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pmap_enter: pv not in hash list");
2815 if(NULL==pprevh)panic("pmap enter 2");
2816 *pprevh = pvh_e->nexth;
2817 remque(&pvh_e->qlink);
2818 UNLOCK_PV_HASH(pvhash_idx);
1c79356b
A
2819 }
2820 }
1c79356b
A
2821 }
2822 else {
2823
2824 /*
2d21ac55
A
2825 * old_pa is not managed.
2826 * Do removal part of accounting.
1c79356b 2827 */
0c530ab8 2828
1c79356b
A
2829 if (iswired(*pte)) {
2830 assert(pmap->stats.wired_count >= 1);
2d21ac55 2831 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
1c79356b
A
2832 }
2833 }
2834 }
2835
2d21ac55
A
2836 /*
2837 * if we had a previously managed paged locked, unlock it now
2838 */
2839
2840 if (old_pa_locked) {
2841 UNLOCK_PVH(pai);
2842 old_pa_locked = FALSE;
2843 }
2844
2845 pai = pa_index(pa); /* now working with new incoming phys page */
2846 if (managed_page(pai)) {
1c79356b
A
2847
2848 /*
2849 * Step 2) Enter the mapping in the PV list for this
2850 * physical page.
2851 */
2d21ac55 2852 pv_h = pai_to_pvh(pai);
1c79356b 2853
1c79356b 2854 LOCK_PVH(pai);
1c79356b
A
2855
2856 if (pv_h->pmap == PMAP_NULL) {
2857 /*
2d21ac55 2858 * No mappings yet, use rooted pv
1c79356b 2859 */
0c530ab8 2860 pv_h->va = vaddr;
1c79356b 2861 pv_h->pmap = pmap;
2d21ac55 2862 queue_init(&pv_h->qlink);
1c79356b
A
2863 }
2864 else {
1c79356b 2865 /*
2d21ac55 2866 * Add new pv_hashed_entry after header.
1c79356b 2867 */
2d21ac55
A
2868 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
2869 pvh_e = pvh_new;
2870 pvh_new = PV_HASHED_ENTRY_NULL; /* show we used it */
2871 } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
2872 PV_HASHED_ALLOC(pvh_e);
2873 if (PV_HASHED_ENTRY_NULL == pvh_e) {
2874 /* the pv list is empty.
2875 * if we are on the kernel pmap we'll use one of the special private
2876 * kernel pv_e's, else, we need to unlock everything, zalloc a pv_e,
2877 * and restart bringing in the pv_e with us.
2878 */
2879 if (kernel_pmap == pmap) {
2880 PV_HASHED_KERN_ALLOC(pvh_e);
2881 } else {
2882 UNLOCK_PVH(pai);
2883 PMAP_UNLOCK(pmap);
2884 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
2885 goto Retry;
1c79356b 2886 }
2d21ac55 2887 }
1c79356b 2888 }
2d21ac55
A
2889
2890 if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pvh_e exhaustion");
2891 pvh_e->va = vaddr;
2892 pvh_e->pmap = pmap;
2893 pvh_e->ppn = pn;
2894 CHK_NPVHASH();
2895 pvhash_idx = pvhashidx(pmap,vaddr);
2896 LOCK_PV_HASH(pvhash_idx);
2897 insque(&pvh_e->qlink, &pv_h->qlink);
2898 hashp = pvhash(pvhash_idx);
2899#if PV_DEBUG
2900 if(NULL==hashp)panic("pmap_enter 4");
2901#endif
2902 pvh_e->nexth = *hashp;
2903 *hashp = pvh_e;
2904 UNLOCK_PV_HASH(pvhash_idx);
2905
1c79356b
A
2906 /*
2907 * Remember that we used the pvlist entry.
2908 */
2d21ac55 2909 pvh_e = PV_HASHED_ENTRY_NULL;
1c79356b 2910 }
0c530ab8
A
2911
2912 /*
2913 * only count the mapping
2914 * for 'managed memory'
2915 */
2d21ac55
A
2916 OSAddAtomic(+1, (SInt32 *) &pmap->stats.resident_count);
2917 if (pmap->stats.resident_count > pmap->stats.resident_max) {
2918 pmap->stats.resident_max = pmap->stats.resident_count;
2919 }
1c79356b
A
2920 }
2921
2922 /*
0c530ab8 2923 * Step 3) Enter the mapping.
2d21ac55 2924 *
1c79356b
A
2925 * Build a template to speed up entering -
2926 * only the pfn changes.
2927 */
2928 template = pa_to_pte(pa) | INTEL_PTE_VALID;
55e303ae 2929
2d21ac55 2930 if (flags & VM_MEM_NOT_CACHEABLE) {
55e303ae
A
2931 if(!(flags & VM_MEM_GUARDED))
2932 template |= INTEL_PTE_PTA;
2933 template |= INTEL_PTE_NCACHE;
2934 }
2935
1c79356b
A
2936 if (pmap != kernel_pmap)
2937 template |= INTEL_PTE_USER;
2938 if (prot & VM_PROT_WRITE)
2939 template |= INTEL_PTE_WRITE;
0c530ab8
A
2940
2941 if (set_NX == TRUE)
2942 template |= INTEL_PTE_NX;
2943
1c79356b
A
2944 if (wired) {
2945 template |= INTEL_PTE_WIRED;
2d21ac55 2946 OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count);
1c79356b 2947 }
0c530ab8 2948 pmap_store_pte(pte, template);
91447636 2949
2d21ac55
A
2950 /* if this was a managed page we delayed unlocking the pv until here
2951 * to prevent pmap_page_protect et al from finding it until the pte
2952 * has been stored */
2953
2954 if (managed_page(pai)) {
2955 UNLOCK_PVH(pai);
2956 }
2957
1c79356b 2958Done:
0c530ab8
A
2959 if (need_tlbflush == TRUE)
2960 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
91447636 2961
2d21ac55
A
2962 if (pvh_e != PV_HASHED_ENTRY_NULL) {
2963 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
1c79356b
A
2964 }
2965
2d21ac55
A
2966 if (pvh_new != PV_HASHED_ENTRY_NULL) {
2967 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
2968 }
2969
2970 PMAP_UNLOCK(pmap);
2971 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1c79356b
A
2972}
2973
2974/*
2975 * Routine: pmap_change_wiring
2976 * Function: Change the wiring attribute for a map/virtual-address
2977 * pair.
2978 * In/out conditions:
2979 * The mapping must already exist in the pmap.
2980 */
2981void
2982pmap_change_wiring(
2983 register pmap_t map,
0c530ab8 2984 vm_map_offset_t vaddr,
1c79356b
A
2985 boolean_t wired)
2986{
2987 register pt_entry_t *pte;
1c79356b
A
2988
2989 /*
2990 * We must grab the pmap system lock because we may
2991 * change a pte_page queue.
2992 */
2d21ac55 2993 PMAP_LOCK(map);
1c79356b 2994
0c530ab8 2995 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
1c79356b
A
2996 panic("pmap_change_wiring: pte missing");
2997
2998 if (wired && !iswired(*pte)) {
2999 /*
3000 * wiring down mapping
3001 */
2d21ac55 3002 OSAddAtomic(+1, (SInt32 *) &map->stats.wired_count);
0c530ab8 3003 pmap_update_pte(pte, *pte, (*pte | INTEL_PTE_WIRED));
1c79356b
A
3004 }
3005 else if (!wired && iswired(*pte)) {
3006 /*
3007 * unwiring mapping
3008 */
3009 assert(map->stats.wired_count >= 1);
2d21ac55 3010 OSAddAtomic(-1, (SInt32 *) &map->stats.wired_count);
0c530ab8 3011 pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WIRED));
1c79356b
A
3012 }
3013
2d21ac55 3014 PMAP_UNLOCK(map);
1c79356b
A
3015}
3016
91447636 3017ppnum_t
55e303ae
A
3018pmap_find_phys(pmap_t pmap, addr64_t va)
3019{
91447636 3020 pt_entry_t *ptp;
91447636
A
3021 ppnum_t ppn;
3022
0c530ab8
A
3023 mp_disable_preemption();
3024
3025 ptp = pmap_pte(pmap, va);
91447636
A
3026 if (PT_ENTRY_NULL == ptp) {
3027 ppn = 0;
3028 } else {
3029 ppn = (ppnum_t) i386_btop(pte_to_pa(*ptp));
3030 }
0c530ab8
A
3031 mp_enable_preemption();
3032
91447636 3033 return ppn;
55e303ae
A
3034}
3035
1c79356b
A
3036/*
3037 * Routine: pmap_extract
3038 * Function:
3039 * Extract the physical page address associated
3040 * with the given map/virtual_address pair.
91447636
A
3041 * Change to shim for backwards compatibility but will not
3042 * work for 64 bit systems. Some old drivers that we cannot
3043 * change need this.
1c79356b
A
3044 */
3045
3046vm_offset_t
3047pmap_extract(
3048 register pmap_t pmap,
0c530ab8 3049 vm_map_offset_t vaddr)
1c79356b 3050{
0c530ab8
A
3051 ppnum_t ppn;
3052 vm_offset_t paddr;
91447636 3053
0c530ab8
A
3054 paddr = (vm_offset_t)0;
3055 ppn = pmap_find_phys(pmap, vaddr);
2d21ac55 3056
0c530ab8
A
3057 if (ppn) {
3058 paddr = ((vm_offset_t)i386_ptob(ppn)) | (vaddr & INTEL_OFFMASK);
3059 }
3060 return (paddr);
1c79356b
A
3061}
3062
1c79356b 3063void
0c530ab8
A
3064pmap_expand_pml4(
3065 pmap_t map,
3066 vm_map_offset_t vaddr)
1c79356b 3067{
1c79356b 3068 register vm_page_t m;
91447636 3069 register pmap_paddr_t pa;
0c530ab8 3070 uint64_t i;
1c79356b 3071 spl_t spl;
55e303ae 3072 ppnum_t pn;
0c530ab8 3073 pml4_entry_t *pml4p;
89b3af67 3074
0c530ab8
A
3075 if (kernel_pmap == map) panic("expand kernel pml4");
3076
3077 spl = splhigh();
2d21ac55
A
3078 pml4p = pmap64_pml4(map, vaddr);
3079 splx(spl);
3080 if (PML4_ENTRY_NULL == pml4p) panic("pmap_expand_pml4 no pml4p");
1c79356b
A
3081
3082 /*
0c530ab8 3083 * Allocate a VM page for the pml4 page
1c79356b
A
3084 */
3085 while ((m = vm_page_grab()) == VM_PAGE_NULL)
3086 VM_PAGE_WAIT();
3087
3088 /*
91447636 3089 * put the page into the pmap's obj list so it
1c79356b
A
3090 * can be found later.
3091 */
55e303ae
A
3092 pn = m->phys_page;
3093 pa = i386_ptob(pn);
0c530ab8
A
3094 i = pml4idx(map, vaddr);
3095
2d21ac55
A
3096 /*
3097 * Zero the page.
3098 */
3099 pmap_zero_page(pn);
0c530ab8 3100
1c79356b
A
3101 vm_page_lock_queues();
3102 vm_page_wire(m);
0c530ab8 3103 inuse_ptepages_count++;
2d21ac55 3104 vm_page_unlock_queues();
1c79356b 3105
2d21ac55
A
3106 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
3107 vm_object_lock(map->pm_obj_pml4);
1c79356b 3108
2d21ac55 3109 PMAP_LOCK(map);
1c79356b
A
3110 /*
3111 * See if someone else expanded us first
3112 */
0c530ab8 3113 if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) {
2d21ac55
A
3114 PMAP_UNLOCK(map);
3115 vm_object_unlock(map->pm_obj_pml4);
3116
1c79356b
A
3117 vm_page_lock_queues();
3118 vm_page_free(m);
3119 inuse_ptepages_count--;
3120 vm_page_unlock_queues();
2d21ac55 3121
1c79356b
A
3122 return;
3123 }
3124
2d21ac55
A
3125#if 0 /* DEBUG */
3126 if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i)) {
3127 panic("pmap_expand_pml4: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n",
3128 map, map->pm_obj_pml4, vaddr, i);
3129 }
3130#endif
3131 vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i);
3132 vm_object_unlock(map->pm_obj_pml4);
3133
1c79356b
A
3134 /*
3135 * Set the page directory entry for this page table.
1c79356b 3136 */
0c530ab8 3137 pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */
c0fea474 3138
0c530ab8
A
3139 pmap_store_pte(pml4p, pa_to_pte(pa)
3140 | INTEL_PTE_VALID
3141 | INTEL_PTE_USER
3142 | INTEL_PTE_WRITE);
5d5c5d0d 3143
2d21ac55 3144 PMAP_UNLOCK(map);
89b3af67 3145
6601e61a 3146 return;
0c530ab8 3147
6601e61a 3148}
89b3af67 3149
6601e61a 3150void
0c530ab8
A
3151pmap_expand_pdpt(
3152 pmap_t map,
3153 vm_map_offset_t vaddr)
6601e61a 3154{
0c530ab8
A
3155 register vm_page_t m;
3156 register pmap_paddr_t pa;
3157 uint64_t i;
3158 spl_t spl;
3159 ppnum_t pn;
3160 pdpt_entry_t *pdptp;
89b3af67 3161
0c530ab8 3162 if (kernel_pmap == map) panic("expand kernel pdpt");
89b3af67 3163
0c530ab8 3164 spl = splhigh();
2d21ac55
A
3165 while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) {
3166 splx(spl);
3167 pmap_expand_pml4(map, vaddr); /* need room for another pdpt entry */
3168 spl = splhigh();
3169 }
3170 splx(spl);
4452a7af 3171
0c530ab8
A
3172 /*
3173 * Allocate a VM page for the pdpt page
3174 */
3175 while ((m = vm_page_grab()) == VM_PAGE_NULL)
3176 VM_PAGE_WAIT();
4452a7af 3177
4452a7af 3178 /*
0c530ab8
A
3179 * put the page into the pmap's obj list so it
3180 * can be found later.
4452a7af 3181 */
0c530ab8
A
3182 pn = m->phys_page;
3183 pa = i386_ptob(pn);
3184 i = pdptidx(map, vaddr);
4452a7af 3185
2d21ac55
A
3186 /*
3187 * Zero the page.
3188 */
3189 pmap_zero_page(pn);
0c530ab8
A
3190
3191 vm_page_lock_queues();
3192 vm_page_wire(m);
0c530ab8 3193 inuse_ptepages_count++;
2d21ac55 3194 vm_page_unlock_queues();
0c530ab8 3195
2d21ac55
A
3196 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
3197 vm_object_lock(map->pm_obj_pdpt);
0c530ab8 3198
2d21ac55 3199 PMAP_LOCK(map);
0c530ab8
A
3200 /*
3201 * See if someone else expanded us first
3202 */
3203 if (pmap64_pde(map, vaddr) != PD_ENTRY_NULL) {
2d21ac55
A
3204 PMAP_UNLOCK(map);
3205 vm_object_unlock(map->pm_obj_pdpt);
3206
0c530ab8
A
3207 vm_page_lock_queues();
3208 vm_page_free(m);
3209 inuse_ptepages_count--;
0c530ab8 3210 vm_page_unlock_queues();
2d21ac55 3211
0c530ab8
A
3212 return;
3213 }
3214
2d21ac55
A
3215#if 0 /* DEBUG */
3216 if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i)) {
3217 panic("pmap_expand_pdpt: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n",
3218 map, map->pm_obj_pdpt, vaddr, i);
3219 }
3220#endif
3221 vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i);
3222 vm_object_unlock(map->pm_obj_pdpt);
3223
0c530ab8
A
3224 /*
3225 * Set the page directory entry for this page table.
0c530ab8 3226 */
0c530ab8
A
3227 pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */
3228
3229 pmap_store_pte(pdptp, pa_to_pte(pa)
3230 | INTEL_PTE_VALID
3231 | INTEL_PTE_USER
3232 | INTEL_PTE_WRITE);
3233
2d21ac55 3234 PMAP_UNLOCK(map);
0c530ab8
A
3235
3236 return;
3237
3238}
3239
3240
3241
3242/*
3243 * Routine: pmap_expand
3244 *
3245 * Expands a pmap to be able to map the specified virtual address.
3246 *
3247 * Allocates new virtual memory for the P0 or P1 portion of the
3248 * pmap, then re-maps the physical pages that were in the old
3249 * pmap to be in the new pmap.
3250 *
3251 * Must be called with the pmap system and the pmap unlocked,
3252 * since these must be unlocked to use vm_allocate or vm_deallocate.
3253 * Thus it must be called in a loop that checks whether the map
3254 * has been expanded enough.
3255 * (We won't loop forever, since page tables aren't shrunk.)
3256 */
3257void
3258pmap_expand(
3259 pmap_t map,
3260 vm_map_offset_t vaddr)
3261{
3262 pt_entry_t *pdp;
3263 register vm_page_t m;
3264 register pmap_paddr_t pa;
3265 uint64_t i;
3266 spl_t spl;
3267 ppnum_t pn;
3268
3269 /*
3270 * if not the kernel map (while we are still compat kernel mode)
3271 * and we are 64 bit, propagate expand upwards
3272 */
3273
3274 if (cpu_64bit && (map != kernel_pmap)) {
2d21ac55
A
3275 spl = splhigh();
3276 while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) {
3277 splx(spl);
3278 pmap_expand_pdpt(map, vaddr); /* need room for another pde entry */
3279 spl = splhigh();
3280 }
3281 splx(spl);
0c530ab8
A
3282 }
3283
0c530ab8
A
3284 /*
3285 * Allocate a VM page for the pde entries.
3286 */
3287 while ((m = vm_page_grab()) == VM_PAGE_NULL)
3288 VM_PAGE_WAIT();
3289
3290 /*
3291 * put the page into the pmap's obj list so it
3292 * can be found later.
3293 */
3294 pn = m->phys_page;
3295 pa = i386_ptob(pn);
3296 i = pdeidx(map, vaddr);
3297
2d21ac55
A
3298 /*
3299 * Zero the page.
3300 */
3301 pmap_zero_page(pn);
0c530ab8
A
3302
3303 vm_page_lock_queues();
3304 vm_page_wire(m);
3305 inuse_ptepages_count++;
0c530ab8 3306 vm_page_unlock_queues();
0c530ab8 3307
2d21ac55
A
3308 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
3309 vm_object_lock(map->pm_obj);
0c530ab8 3310
2d21ac55 3311 PMAP_LOCK(map);
0c530ab8
A
3312 /*
3313 * See if someone else expanded us first
3314 */
2d21ac55 3315
0c530ab8 3316 if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) {
2d21ac55
A
3317 PMAP_UNLOCK(map);
3318 vm_object_unlock(map->pm_obj);
0c530ab8
A
3319
3320 vm_page_lock_queues();
3321 vm_page_free(m);
3322 inuse_ptepages_count--;
0c530ab8 3323 vm_page_unlock_queues();
2d21ac55 3324
0c530ab8
A
3325 return;
3326 }
3327
2d21ac55
A
3328#if 0 /* DEBUG */
3329 if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i)) {
3330 panic("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n",
3331 map, map->pm_obj, vaddr, i);
3332 }
3333#endif
3334 vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i);
3335 vm_object_unlock(map->pm_obj);
0c530ab8
A
3336
3337 /*
2d21ac55 3338 * refetch while locked
0c530ab8
A
3339 */
3340
2d21ac55
A
3341 pdp = pmap_pde(map, vaddr);
3342
3343 /*
3344 * Set the page directory entry for this page table.
3345 */
0c530ab8
A
3346 pmap_store_pte(pdp, pa_to_pte(pa)
3347 | INTEL_PTE_VALID
3348 | INTEL_PTE_USER
3349 | INTEL_PTE_WRITE);
0c530ab8 3350
2d21ac55 3351 PMAP_UNLOCK(map);
0c530ab8
A
3352
3353 return;
3354}
3355
3356
3357/*
3358 * pmap_sync_page_data_phys(ppnum_t pa)
3359 *
3360 * Invalidates all of the instruction cache on a physical page and
3361 * pushes any dirty data from the data cache for the same physical page
3362 * Not required in i386.
3363 */
3364void
3365pmap_sync_page_data_phys(__unused ppnum_t pa)
3366{
3367 return;
3368}
3369
3370/*
3371 * pmap_sync_page_attributes_phys(ppnum_t pa)
3372 *
3373 * Write back and invalidate all cachelines on a physical page.
3374 */
3375void
3376pmap_sync_page_attributes_phys(ppnum_t pa)
3377{
3378 cache_flush_page_phys(pa);
3379}
3380
2d21ac55
A
3381
3382
3383#ifdef CURRENTLY_UNUSED_AND_UNTESTED
3384
0c530ab8
A
3385int collect_ref;
3386int collect_unref;
3387
3388/*
3389 * Routine: pmap_collect
3390 * Function:
3391 * Garbage collects the physical map system for
3392 * pages which are no longer used.
3393 * Success need not be guaranteed -- that is, there
3394 * may well be pages which are not referenced, but
3395 * others may be collected.
3396 * Usage:
3397 * Called by the pageout daemon when pages are scarce.
3398 */
3399void
3400pmap_collect(
3401 pmap_t p)
3402{
3403 register pt_entry_t *pdp, *ptp;
3404 pt_entry_t *eptp;
3405 int wired;
0c530ab8
A
3406
3407 if (p == PMAP_NULL)
3408 return;
3409
3410 if (p == kernel_pmap)
3411 return;
3412
3413 /*
3414 * Garbage collect map.
3415 */
2d21ac55 3416 PMAP_LOCK(p);
0c530ab8
A
3417
3418 for (pdp = (pt_entry_t *)p->dirbase;
4452a7af
A
3419 pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)];
3420 pdp++)
3421 {
3422 if (*pdp & INTEL_PTE_VALID) {
3423 if(*pdp & INTEL_PTE_REF) {
0c530ab8 3424 pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF);
4452a7af
A
3425 collect_ref++;
3426 } else {
3427 collect_unref++;
3428 ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase));
3429 eptp = ptp + NPTEPG;
3430
3431 /*
3432 * If the pte page has any wired mappings, we cannot
3433 * free it.
3434 */
3435 wired = 0;
3436 {
3437 register pt_entry_t *ptep;
3438 for (ptep = ptp; ptep < eptp; ptep++) {
3439 if (iswired(*ptep)) {
3440 wired = 1;
5d5c5d0d 3441 break;
1c79356b
A
3442 }
3443 }
3444 }
3445 if (!wired) {
3446 /*
3447 * Remove the virtual addresses mapped by this pte page.
3448 */
3449 pmap_remove_range(p,
91447636 3450 pdetova(pdp - (pt_entry_t *)p->dirbase),
1c79356b
A
3451 ptp,
3452 eptp);
3453
3454 /*
3455 * Invalidate the page directory pointer.
3456 */
0c530ab8 3457 pmap_store_pte(pdp, 0x0);
91447636 3458
2d21ac55 3459 PMAP_UNLOCK(p);
1c79356b
A
3460
3461 /*
3462 * And free the pte page itself.
3463 */
3464 {
3465 register vm_page_t m;
3466
91447636 3467 vm_object_lock(p->pm_obj);
2d21ac55 3468
91447636 3469 m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]));
1c79356b
A
3470 if (m == VM_PAGE_NULL)
3471 panic("pmap_collect: pte page not in object");
2d21ac55 3472
1c79356b
A
3473 vm_page_lock_queues();
3474 vm_page_free(m);
3475 inuse_ptepages_count--;
3476 vm_page_unlock_queues();
2d21ac55 3477
91447636 3478 vm_object_unlock(p->pm_obj);
1c79356b
A
3479 }
3480
2d21ac55 3481 PMAP_LOCK(p);
1c79356b 3482 }
91447636
A
3483 }
3484 }
1c79356b 3485 }
0c530ab8 3486
2d21ac55
A
3487 PMAP_UPDATE_TLBS(p, 0x0, 0xFFFFFFFFFFFFF000ULL);
3488 PMAP_UNLOCK(p);
1c79356b
A
3489 return;
3490
3491}
2d21ac55 3492#endif
1c79356b 3493
1c79356b 3494
1c79356b 3495void
2d21ac55 3496pmap_copy_page(ppnum_t src, ppnum_t dst)
1c79356b 3497{
2d21ac55
A
3498 bcopy_phys((addr64_t)i386_ptob(src),
3499 (addr64_t)i386_ptob(dst),
3500 PAGE_SIZE);
1c79356b 3501}
1c79356b 3502
1c79356b
A
3503
3504/*
3505 * Routine: pmap_pageable
3506 * Function:
3507 * Make the specified pages (by pmap, offset)
3508 * pageable (or not) as requested.
3509 *
3510 * A page which is not pageable may not take
3511 * a fault; therefore, its page table entry
3512 * must remain valid for the duration.
3513 *
3514 * This routine is merely advisory; pmap_enter
3515 * will specify that these pages are to be wired
3516 * down (or not) as appropriate.
3517 */
3518void
3519pmap_pageable(
91447636 3520 __unused pmap_t pmap,
0c530ab8
A
3521 __unused vm_map_offset_t start_addr,
3522 __unused vm_map_offset_t end_addr,
91447636 3523 __unused boolean_t pageable)
1c79356b
A
3524{
3525#ifdef lint
91447636 3526 pmap++; start_addr++; end_addr++; pageable++;
1c79356b
A
3527#endif /* lint */
3528}
3529
3530/*
3531 * Clear specified attribute bits.
3532 */
3533void
3534phys_attribute_clear(
2d21ac55 3535 ppnum_t pn,
1c79356b
A
3536 int bits)
3537{
2d21ac55
A
3538 pv_rooted_entry_t pv_h;
3539 register pv_hashed_entry_t pv_e;
1c79356b
A
3540 register pt_entry_t *pte;
3541 int pai;
3542 register pmap_t pmap;
1c79356b 3543
2d21ac55 3544 pmap_intr_assert();
91447636 3545 assert(pn != vm_page_fictitious_addr);
2d21ac55
A
3546 if (pn == vm_page_guard_addr)
3547 return;
3548
3549 pai = ppn_to_pai(pn);
3550
3551 if (!managed_page(pai)) {
1c79356b
A
3552 /*
3553 * Not a managed page.
3554 */
3555 return;
3556 }
3557
2d21ac55
A
3558 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
3559 (int) pn, bits, 0, 0, 0);
1c79356b 3560
1c79356b
A
3561 pv_h = pai_to_pvh(pai);
3562
2d21ac55
A
3563 LOCK_PVH(pai);
3564
1c79356b
A
3565 /*
3566 * Walk down PV list, clearing all modify or reference bits.
3567 * We do not have to lock the pv_list because we have
3568 * the entire pmap system locked.
3569 */
3570 if (pv_h->pmap != PMAP_NULL) {
3571 /*
3572 * There are some mappings.
3573 */
1c79356b 3574
2d21ac55
A
3575 pv_e = (pv_hashed_entry_t)pv_h;
3576
3577 do {
1c79356b 3578 pmap = pv_e->pmap;
1c79356b
A
3579
3580 {
2d21ac55 3581 vm_map_offset_t va;
1c79356b
A
3582
3583 va = pv_e->va;
1c79356b 3584
2d21ac55
A
3585 /*
3586 * Clear modify and/or reference bits.
3587 */
91447636 3588
0c530ab8
A
3589 pte = pmap_pte(pmap, va);
3590 pmap_update_pte(pte, *pte, (*pte & ~bits));
c910b4d9
A
3591 /* Ensure all processors using this translation
3592 * invalidate this TLB entry. The invalidation *must* follow
3593 * the PTE update, to ensure that the TLB shadow of the
3594 * 'D' bit (in particular) is synchronized with the
3595 * updated PTE.
3596 */
3597 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1c79356b 3598 }
91447636 3599
2d21ac55 3600 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1c79356b 3601
2d21ac55
A
3602 } while (pv_e != (pv_hashed_entry_t)pv_h);
3603 }
1c79356b
A
3604 pmap_phys_attributes[pai] &= ~bits;
3605
2d21ac55
A
3606 UNLOCK_PVH(pai);
3607
3608 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
3609 0, 0, 0, 0, 0);
3610
1c79356b
A
3611}
3612
3613/*
3614 * Check specified attribute bits.
3615 */
2d21ac55 3616int
1c79356b 3617phys_attribute_test(
2d21ac55 3618 ppnum_t pn,
1c79356b
A
3619 int bits)
3620{
2d21ac55
A
3621 pv_rooted_entry_t pv_h;
3622 register pv_hashed_entry_t pv_e;
1c79356b
A
3623 register pt_entry_t *pte;
3624 int pai;
3625 register pmap_t pmap;
2d21ac55 3626 int attributes = 0;
1c79356b 3627
2d21ac55 3628 pmap_intr_assert();
91447636 3629 assert(pn != vm_page_fictitious_addr);
2d21ac55
A
3630 if (pn == vm_page_guard_addr)
3631 return 0;
3632
3633 pai = ppn_to_pai(pn);
3634
3635 if (!managed_page(pai)) {
1c79356b
A
3636 /*
3637 * Not a managed page.
3638 */
2d21ac55 3639 return (0);
1c79356b
A
3640 }
3641
0c530ab8
A
3642 /*
3643 * super fast check... if bits already collected
3644 * no need to take any locks...
3645 * if not set, we need to recheck after taking
3646 * the lock in case they got pulled in while
3647 * we were waiting for the lock
3648 */
2d21ac55
A
3649 if ( (pmap_phys_attributes[pai] & bits) == bits)
3650 return (bits);
3651
0c530ab8
A
3652 pv_h = pai_to_pvh(pai);
3653
2d21ac55 3654 LOCK_PVH(pai);
1c79356b 3655
2d21ac55 3656 attributes = pmap_phys_attributes[pai] & bits;
1c79356b
A
3657
3658 /*
2d21ac55
A
3659 * Walk down PV list, checking the mappings until we
3660 * reach the end or we've found the attributes we've asked for
1c79356b
A
3661 * We do not have to lock the pv_list because we have
3662 * the entire pmap system locked.
3663 */
3664 if (pv_h->pmap != PMAP_NULL) {
3665 /*
3666 * There are some mappings.
3667 */
2d21ac55
A
3668 pv_e = (pv_hashed_entry_t)pv_h;
3669 if (attributes != bits) do {
1c79356b 3670
2d21ac55 3671 pmap = pv_e->pmap;
1c79356b
A
3672
3673 {
2d21ac55 3674 vm_map_offset_t va;
1c79356b
A
3675
3676 va = pv_e->va;
2d21ac55
A
3677 /*
3678 * first make sure any processor actively
3679 * using this pmap, flushes its TLB state
3680 */
3681 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1c79356b 3682
1c79356b 3683 /*
2d21ac55 3684 * pick up modify and/or reference bits from this mapping
1c79356b 3685 */
1c79356b 3686
2d21ac55
A
3687 pte = pmap_pte(pmap, va);
3688 attributes |= *pte & bits;
3689
1c79356b 3690 }
2d21ac55
A
3691
3692 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
3693
3694 } while ((attributes != bits) && (pv_e != (pv_hashed_entry_t)pv_h));
1c79356b 3695 }
2d21ac55
A
3696
3697 UNLOCK_PVH(pai);
3698 return (attributes);
1c79356b
A
3699}
3700
3701/*
3702 * Set specified attribute bits.
3703 */
3704void
3705phys_attribute_set(
2d21ac55 3706 ppnum_t pn,
1c79356b
A
3707 int bits)
3708{
2d21ac55 3709 int pai;
1c79356b 3710
2d21ac55 3711 pmap_intr_assert();
91447636 3712 assert(pn != vm_page_fictitious_addr);
2d21ac55
A
3713 if (pn == vm_page_guard_addr)
3714 return;
3715
3716 pai = ppn_to_pai(pn);
3717
3718 if (!managed_page(pai)) {
1c79356b
A
3719 /*
3720 * Not a managed page.
3721 */
3722 return;
3723 }
3724
2d21ac55
A
3725 LOCK_PVH(pai);
3726
3727 pmap_phys_attributes[pai] |= bits;
3728
3729 UNLOCK_PVH(pai);
1c79356b
A
3730}
3731
3732/*
3733 * Set the modify bit on the specified physical page.
3734 */
3735
3736void pmap_set_modify(
55e303ae 3737 ppnum_t pn)
1c79356b 3738{
91447636 3739 phys_attribute_set(pn, PHYS_MODIFIED);
1c79356b
A
3740}
3741
3742/*
3743 * Clear the modify bits on the specified physical page.
3744 */
3745
3746void
3747pmap_clear_modify(
55e303ae 3748 ppnum_t pn)
1c79356b 3749{
91447636 3750 phys_attribute_clear(pn, PHYS_MODIFIED);
1c79356b
A
3751}
3752
3753/*
3754 * pmap_is_modified:
3755 *
3756 * Return whether or not the specified physical page is modified
3757 * by any physical maps.
3758 */
3759
3760boolean_t
3761pmap_is_modified(
55e303ae 3762 ppnum_t pn)
1c79356b 3763{
2d21ac55
A
3764 if (phys_attribute_test(pn, PHYS_MODIFIED))
3765 return TRUE;
3766
3767 return FALSE;
1c79356b
A
3768}
3769
3770/*
3771 * pmap_clear_reference:
3772 *
3773 * Clear the reference bit on the specified physical page.
3774 */
3775
3776void
3777pmap_clear_reference(
55e303ae 3778 ppnum_t pn)
1c79356b 3779{
91447636
A
3780 phys_attribute_clear(pn, PHYS_REFERENCED);
3781}
3782
3783void
3784pmap_set_reference(ppnum_t pn)
3785{
3786 phys_attribute_set(pn, PHYS_REFERENCED);
1c79356b
A
3787}
3788
3789/*
3790 * pmap_is_referenced:
3791 *
3792 * Return whether or not the specified physical page is referenced
3793 * by any physical maps.
3794 */
3795
3796boolean_t
3797pmap_is_referenced(
55e303ae 3798 ppnum_t pn)
1c79356b 3799{
2d21ac55
A
3800 if (phys_attribute_test(pn, PHYS_REFERENCED))
3801 return TRUE;
3802
3803 return FALSE;
91447636
A
3804}
3805
3806/*
3807 * pmap_get_refmod(phys)
3808 * returns the referenced and modified bits of the specified
3809 * physical page.
3810 */
3811unsigned int
3812pmap_get_refmod(ppnum_t pa)
3813{
2d21ac55
A
3814 int refmod;
3815 unsigned int retval = 0;
3816
3817 refmod = phys_attribute_test(pa, PHYS_MODIFIED | PHYS_REFERENCED);
3818
3819 if (refmod & PHYS_MODIFIED)
3820 retval |= VM_MEM_MODIFIED;
3821 if (refmod & PHYS_REFERENCED)
3822 retval |= VM_MEM_REFERENCED;
3823
3824 return (retval);
91447636
A
3825}
3826
3827/*
3828 * pmap_clear_refmod(phys, mask)
3829 * clears the referenced and modified bits as specified by the mask
3830 * of the specified physical page.
3831 */
3832void
3833pmap_clear_refmod(ppnum_t pa, unsigned int mask)
3834{
3835 unsigned int x86Mask;
3836
3837 x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0)
3838 | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
3839 phys_attribute_clear(pa, x86Mask);
1c79356b
A
3840}
3841
1c79356b 3842void
91447636
A
3843invalidate_icache(__unused vm_offset_t addr,
3844 __unused unsigned cnt,
3845 __unused int phys)
1c79356b
A
3846{
3847 return;
3848}
3849void
91447636
A
3850flush_dcache(__unused vm_offset_t addr,
3851 __unused unsigned count,
3852 __unused int phys)
1c79356b
A
3853{
3854 return;
3855}
3856
2d21ac55
A
3857#if CONFIG_DTRACE
3858/*
3859 * Constrain DTrace copyin/copyout actions
3860 */
3861extern kern_return_t dtrace_copyio_preflight(addr64_t);
3862extern kern_return_t dtrace_copyio_postflight(addr64_t);
3863
3864kern_return_t dtrace_copyio_preflight(__unused addr64_t va)
3865{
3866 thread_t thread = current_thread();
3867
3868 if (current_map() == kernel_map)
3869 return KERN_FAILURE;
3870 else if (thread->machine.specFlags & CopyIOActive)
3871 return KERN_FAILURE;
3872 else
3873 return KERN_SUCCESS;
3874}
3875
3876kern_return_t dtrace_copyio_postflight(__unused addr64_t va)
3877{
3878 return KERN_SUCCESS;
3879}
3880#endif /* CONFIG_DTRACE */
3881
0c530ab8 3882#if MACH_KDB
6601e61a 3883
0c530ab8 3884/* show phys page mappings and attributes */
6601e61a 3885
0c530ab8 3886extern void db_show_page(pmap_paddr_t pa);
6601e61a 3887
2d21ac55 3888#if 0
6601e61a 3889void
0c530ab8 3890db_show_page(pmap_paddr_t pa)
6601e61a 3891{
0c530ab8
A
3892 pv_entry_t pv_h;
3893 int pai;
3894 char attr;
3895
3896 pai = pa_index(pa);
3897 pv_h = pai_to_pvh(pai);
1c79356b
A
3898
3899 attr = pmap_phys_attributes[pai];
2d21ac55 3900 printf("phys page %llx ", pa);
1c79356b
A
3901 if (attr & PHYS_MODIFIED)
3902 printf("modified, ");
3903 if (attr & PHYS_REFERENCED)
3904 printf("referenced, ");
3905 if (pv_h->pmap || pv_h->next)
3906 printf(" mapped at\n");
3907 else
3908 printf(" not mapped\n");
3909 for (; pv_h; pv_h = pv_h->next)
3910 if (pv_h->pmap)
2d21ac55 3911 printf("%llx in pmap %p\n", pv_h->va, pv_h->pmap);
1c79356b 3912}
2d21ac55 3913#endif
1c79356b
A
3914
3915#endif /* MACH_KDB */
3916
3917#if MACH_KDB
2d21ac55 3918#if 0
1c79356b
A
3919void db_kvtophys(vm_offset_t);
3920void db_show_vaddrs(pt_entry_t *);
3921
3922/*
3923 * print out the results of kvtophys(arg)
3924 */
3925void
3926db_kvtophys(
3927 vm_offset_t vaddr)
3928{
0c530ab8 3929 db_printf("0x%qx", kvtophys(vaddr));
1c79356b
A
3930}
3931
3932/*
3933 * Walk the pages tables.
3934 */
3935void
3936db_show_vaddrs(
3937 pt_entry_t *dirbase)
3938{
3939 pt_entry_t *ptep, *pdep, tmp;
0c530ab8 3940 unsigned int x, y, pdecnt, ptecnt;
1c79356b
A
3941
3942 if (dirbase == 0) {
3943 dirbase = kernel_pmap->dirbase;
3944 }
3945 if (dirbase == 0) {
3946 db_printf("need a dirbase...\n");
3947 return;
3948 }
0c530ab8 3949 dirbase = (pt_entry_t *) (int) ((unsigned long) dirbase & ~INTEL_OFFMASK);
1c79356b
A
3950
3951 db_printf("dirbase: 0x%x\n", dirbase);
3952
3953 pdecnt = ptecnt = 0;
3954 pdep = &dirbase[0];
91447636 3955 for (y = 0; y < NPDEPG; y++, pdep++) {
1c79356b
A
3956 if (((tmp = *pdep) & INTEL_PTE_VALID) == 0) {
3957 continue;
3958 }
3959 pdecnt++;
2d21ac55 3960 ptep = (pt_entry_t *) ((unsigned long)(*pdep) & ~INTEL_OFFMASK);
1c79356b 3961 db_printf("dir[%4d]: 0x%x\n", y, *pdep);
91447636 3962 for (x = 0; x < NPTEPG; x++, ptep++) {
1c79356b
A
3963 if (((tmp = *ptep) & INTEL_PTE_VALID) == 0) {
3964 continue;
3965 }
3966 ptecnt++;
3967 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
3968 x,
3969 *ptep,
3970 (y << 22) | (x << 12),
3971 *ptep & ~INTEL_OFFMASK);
3972 }
3973 }
3974
3975 db_printf("total: %d tables, %d page table entries.\n", pdecnt, ptecnt);
3976
3977}
2d21ac55 3978#endif
1c79356b
A
3979#endif /* MACH_KDB */
3980
3981#include <mach_vm_debug.h>
3982#if MACH_VM_DEBUG
3983#include <vm/vm_debug.h>
3984
3985int
3986pmap_list_resident_pages(
91447636
A
3987 __unused pmap_t pmap,
3988 __unused vm_offset_t *listp,
3989 __unused int space)
1c79356b
A
3990{
3991 return 0;
3992}
3993#endif /* MACH_VM_DEBUG */
3994
6601e61a 3995
1c79356b 3996
91447636
A
3997/* temporary workaround */
3998boolean_t
0c530ab8 3999coredumpok(__unused vm_map_t map, __unused vm_offset_t va)
91447636 4000{
0c530ab8 4001#if 0
91447636 4002 pt_entry_t *ptep;
1c79356b 4003
91447636
A
4004 ptep = pmap_pte(map->pmap, va);
4005 if (0 == ptep)
4006 return FALSE;
4007 return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED));
0c530ab8
A
4008#else
4009 return TRUE;
1c79356b 4010#endif
1c79356b
A
4011}
4012
1c79356b 4013
9bccf70c 4014boolean_t
91447636
A
4015phys_page_exists(
4016 ppnum_t pn)
9bccf70c 4017{
91447636
A
4018 assert(pn != vm_page_fictitious_addr);
4019
4020 if (!pmap_initialized)
4021 return (TRUE);
2d21ac55
A
4022
4023 if (pn == vm_page_guard_addr)
4024 return FALSE;
4025
4026 if (!managed_page(ppn_to_pai(pn)))
91447636
A
4027 return (FALSE);
4028
4029 return TRUE;
4030}
4031
4032void
2d21ac55 4033mapping_free_prime(void)
91447636
A
4034{
4035 int i;
2d21ac55
A
4036 pv_hashed_entry_t pvh_e;
4037 pv_hashed_entry_t pvh_eh;
4038 pv_hashed_entry_t pvh_et;
4039 int pv_cnt;
4040
4041 pv_cnt = 0;
4042 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
4043 for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) {
4044 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
4045
4046 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
4047 pvh_eh = pvh_e;
4048
4049 if (pvh_et == PV_HASHED_ENTRY_NULL)
4050 pvh_et = pvh_e;
4051 pv_cnt++;
4052 }
4053 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
91447636 4054
2d21ac55
A
4055 pv_cnt = 0;
4056 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
4057 for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
4058 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
4059
4060 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
4061 pvh_eh = pvh_e;
4062
4063 if (pvh_et == PV_HASHED_ENTRY_NULL)
4064 pvh_et = pvh_e;
4065 pv_cnt++;
91447636 4066 }
2d21ac55
A
4067 PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
4068
91447636
A
4069}
4070
4071void
2d21ac55 4072mapping_adjust(void)
91447636 4073{
2d21ac55
A
4074 pv_hashed_entry_t pvh_e;
4075 pv_hashed_entry_t pvh_eh;
4076 pv_hashed_entry_t pvh_et;
4077 int pv_cnt;
91447636 4078 int i;
91447636
A
4079
4080 if (mapping_adjust_call == NULL) {
4081 thread_call_setup(&mapping_adjust_call_data,
4082 (thread_call_func_t) mapping_adjust,
4083 (thread_call_param_t) NULL);
4084 mapping_adjust_call = &mapping_adjust_call_data;
4085 }
2d21ac55
A
4086
4087 pv_cnt = 0;
4088 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
4089 if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) {
4090 for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
4091 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
4092
4093 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
4094 pvh_eh = pvh_e;
4095
4096 if (pvh_et == PV_HASHED_ENTRY_NULL)
4097 pvh_et = pvh_e;
4098 pv_cnt++;
4099 }
4100 PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
4101 }
4102
4103 pv_cnt = 0;
4104 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
4105 if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) {
4106 for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) {
4107 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
4108
4109 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
4110 pvh_eh = pvh_e;
4111
4112 if (pvh_et == PV_HASHED_ENTRY_NULL)
4113 pvh_et = pvh_e;
4114 pv_cnt++;
91447636 4115 }
2d21ac55 4116 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
91447636
A
4117 }
4118 mappingrecurse = 0;
4119}
4120
4121void
0c530ab8 4122pmap_commpage32_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt)
91447636 4123{
2d21ac55
A
4124 int i;
4125 pt_entry_t *opte, *npte;
4126 pt_entry_t pte;
4127 spl_t s;
4128
4129 for (i = 0; i < cnt; i++) {
4130 s = splhigh();
4131 opte = pmap_pte(kernel_pmap, (vm_map_offset_t)kernel_commpage);
4132 if (0 == opte)
4133 panic("kernel_commpage");
4134 pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL;
4135 pte &= ~INTEL_PTE_WRITE; // ensure read only
4136 npte = pmap_pte(kernel_pmap, (vm_map_offset_t)user_commpage);
4137 if (0 == npte)
4138 panic("user_commpage");
4139 pmap_store_pte(npte, pte);
4140 splx(s);
4141 kernel_commpage += INTEL_PGBYTES;
4142 user_commpage += INTEL_PGBYTES;
4143 }
91447636
A
4144}
4145
2d21ac55 4146
0c530ab8
A
4147#define PMAP_COMMPAGE64_CNT (_COMM_PAGE64_AREA_USED/PAGE_SIZE)
4148pt_entry_t pmap_commpage64_ptes[PMAP_COMMPAGE64_CNT];
4149
4150void
4151pmap_commpage64_init(vm_offset_t kernel_commpage, __unused vm_map_offset_t user_commpage, int cnt)
4152{
2d21ac55
A
4153 int i;
4154 pt_entry_t *kptep;
0c530ab8 4155
2d21ac55 4156 PMAP_LOCK(kernel_pmap);
0c530ab8 4157
2d21ac55
A
4158 for (i = 0; i < cnt; i++) {
4159 kptep = pmap_pte(kernel_pmap, (uint64_t)kernel_commpage + (i*PAGE_SIZE));
4160 if ((0 == kptep) || (0 == (*kptep & INTEL_PTE_VALID)))
4161 panic("pmap_commpage64_init pte");
4162 pmap_commpage64_ptes[i] = ((*kptep & ~INTEL_PTE_WRITE) | INTEL_PTE_USER);
4163 }
4164 PMAP_UNLOCK(kernel_pmap);
0c530ab8
A
4165}
4166
0c530ab8 4167
91447636 4168static cpu_pmap_t cpu_pmap_master;
91447636
A
4169
4170struct cpu_pmap *
4171pmap_cpu_alloc(boolean_t is_boot_cpu)
4172{
4173 int ret;
4174 int i;
4175 cpu_pmap_t *cp;
91447636 4176 vm_offset_t address;
0c530ab8 4177 vm_map_address_t mapaddr;
91447636 4178 vm_map_entry_t entry;
0c530ab8 4179 pt_entry_t *pte;
91447636
A
4180
4181 if (is_boot_cpu) {
4182 cp = &cpu_pmap_master;
91447636
A
4183 } else {
4184 /*
4185 * The per-cpu pmap data structure itself.
4186 */
4187 ret = kmem_alloc(kernel_map,
4188 (vm_offset_t *) &cp, sizeof(cpu_pmap_t));
4189 if (ret != KERN_SUCCESS) {
4190 printf("pmap_cpu_alloc() failed ret=%d\n", ret);
4191 return NULL;
4192 }
4193 bzero((void *)cp, sizeof(cpu_pmap_t));
4194
4195 /*
0c530ab8 4196 * The temporary windows used for copy/zero - see loose_ends.c
91447636 4197 */
0c530ab8
A
4198 ret = vm_map_find_space(kernel_map,
4199 &mapaddr, PMAP_NWINDOWS*PAGE_SIZE, (vm_map_offset_t)0, 0, &entry);
91447636 4200 if (ret != KERN_SUCCESS) {
0c530ab8
A
4201 printf("pmap_cpu_alloc() "
4202 "vm_map_find_space ret=%d\n", ret);
91447636
A
4203 pmap_cpu_free(cp);
4204 return NULL;
4205 }
0c530ab8 4206 address = (vm_offset_t)mapaddr;
4452a7af 4207
0c530ab8 4208 for (i = 0; i < PMAP_NWINDOWS; i++, address += PAGE_SIZE) {
2d21ac55
A
4209 spl_t s;
4210 s = splhigh();
0c530ab8
A
4211 while ((pte = pmap_pte(kernel_pmap, (vm_map_offset_t)address)) == 0)
4212 pmap_expand(kernel_pmap, (vm_map_offset_t)address);
4213 * (int *) pte = 0;
6601e61a 4214 cp->mapwindow[i].prv_CADDR = (caddr_t) address;
0c530ab8 4215 cp->mapwindow[i].prv_CMAP = pte;
2d21ac55 4216 splx(s);
4452a7af 4217 }
0c530ab8 4218 vm_map_unlock(kernel_map);
4452a7af
A
4219 }
4220
0c530ab8
A
4221 cp->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW;
4222 cp->pde_window_index = PMAP_PDE_FIRST_WINDOW;
4223 cp->pte_window_index = PMAP_PTE_FIRST_WINDOW;
4452a7af 4224
6601e61a 4225 return cp;
4452a7af
A
4226}
4227
4228void
6601e61a 4229pmap_cpu_free(struct cpu_pmap *cp)
4452a7af 4230{
6601e61a 4231 if (cp != NULL && cp != &cpu_pmap_master) {
6601e61a 4232 kfree((void *) cp, sizeof(cpu_pmap_t));
4452a7af 4233 }
4452a7af 4234}
0c530ab8
A
4235
4236
4237mapwindow_t *
4238pmap_get_mapwindow(pt_entry_t pentry)
4239{
4240 mapwindow_t *mp;
4241 int i;
0c530ab8 4242
2d21ac55 4243 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
0c530ab8
A
4244
4245 /*
4246 * Note: 0th map reserved for pmap_pte()
4247 */
4248 for (i = PMAP_NWINDOWS_FIRSTFREE; i < PMAP_NWINDOWS; i++) {
4249 mp = &current_cpu_datap()->cpu_pmap->mapwindow[i];
4250
4251 if (*mp->prv_CMAP == 0) {
2d21ac55
A
4252 pmap_store_pte(mp->prv_CMAP, pentry);
4253
4254 invlpg((uintptr_t)mp->prv_CADDR);
4255
4256 return (mp);
0c530ab8
A
4257 }
4258 }
2d21ac55
A
4259 panic("pmap_get_mapwindow: no windows available");
4260
4261 return NULL;
4262}
4263
4264
4265void
4266pmap_put_mapwindow(mapwindow_t *mp)
4267{
4268 pmap_store_pte(mp->prv_CMAP, 0);
0c530ab8
A
4269}
4270
4271
2d21ac55
A
4272/*
4273 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
4274 * on a NBPDE boundary.
4275 */
4276uint64_t pmap_nesting_size_min = NBPDE;
4277uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE; /* no limit, really... */
4278
0c530ab8
A
4279/*
4280 * kern_return_t pmap_nest(grand, subord, vstart, size)
4281 *
4282 * grand = the pmap that we will nest subord into
4283 * subord = the pmap that goes into the grand
4284 * vstart = start of range in pmap to be inserted
4285 * nstart = start of range in pmap nested pmap
4286 * size = Size of nest area (up to 16TB)
4287 *
4288 * Inserts a pmap into another. This is used to implement shared segments.
4289 *
4290 * on x86 this is very limited right now. must be exactly 1 segment.
4291 *
4292 * Note that we depend upon higher level VM locks to insure that things don't change while
4293 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
4294 * or do 2 nests at once.
4295 */
4296
4297
4298kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t nstart, uint64_t size) {
4299
4300 vm_map_offset_t vaddr, nvaddr;
4301 pd_entry_t *pde,*npde;
2d21ac55
A
4302 unsigned int i;
4303 uint64_t num_pde;
0c530ab8
A
4304
4305 // do validity tests
2d21ac55
A
4306 if (size & (pmap_nesting_size_min-1)) return KERN_INVALID_VALUE;
4307 if(vstart & (pmap_nesting_size_min-1)) return KERN_INVALID_VALUE;
4308 if(nstart & (pmap_nesting_size_min-1)) return KERN_INVALID_VALUE;
0c530ab8 4309 if((size >> 28) > 65536) return KERN_INVALID_VALUE; /* Max size we can nest is 16TB */
2d21ac55 4310 if(size == 0) {
0c530ab8
A
4311 panic("pmap_nest: size is invalid - %016llX\n", size);
4312 }
0c530ab8 4313
2d21ac55
A
4314 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
4315 (int) grand, (int) subord,
4316 (int) (vstart>>32), (int) vstart, 0);
0c530ab8 4317
2d21ac55 4318 subord->pm_shared = TRUE;
0c530ab8 4319 nvaddr = (vm_map_offset_t)nstart;
0c530ab8
A
4320 num_pde = size >> PDESHIFT;
4321
2d21ac55
A
4322 PMAP_LOCK(subord);
4323 for (i = 0; i < num_pde; i++) {
0c530ab8 4324 npde = pmap_pde(subord, nvaddr);
2d21ac55
A
4325 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
4326 PMAP_UNLOCK(subord);
0c530ab8 4327 pmap_expand(subord, nvaddr); // pmap_expand handles races
2d21ac55
A
4328 PMAP_LOCK(subord);
4329 npde = pmap_pde(subord, nvaddr);
0c530ab8
A
4330 }
4331 nvaddr += NBPDE;
4332 }
4333
2d21ac55 4334 PMAP_UNLOCK(subord);
0c530ab8 4335
2d21ac55 4336 vaddr = (vm_map_offset_t)vstart;
0c530ab8 4337
2d21ac55 4338 PMAP_LOCK(grand);
0c530ab8 4339
2d21ac55 4340 for (i = 0;i < num_pde; i++) {
0c530ab8 4341 pd_entry_t tpde;
2d21ac55 4342
0c530ab8 4343 npde = pmap_pde(subord, nstart);
2d21ac55
A
4344 if (npde == 0)
4345 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart);
0c530ab8
A
4346 tpde = *npde;
4347 nstart += NBPDE;
4348 pde = pmap_pde(grand, vaddr);
2d21ac55
A
4349/* Legacy mode does not require expansion.
4350 * DRK: consider a debug mode test to verify that no PTEs are extant within
4351 * this range.
4352 */
4353 if ((0 == pde) && cpu_64bit) {
4354 PMAP_UNLOCK(grand);
4355 pmap_expand_pdpt(grand, vaddr);
4356 PMAP_LOCK(grand);
4357 pde = pmap_pde(grand, vaddr);
4358 }
4359
4360 if (pde == 0)
4361 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
0c530ab8
A
4362 vaddr += NBPDE;
4363 pmap_store_pte(pde, tpde);
4364 }
0c530ab8 4365
2d21ac55
A
4366 /* XXX FBDP: why do we need to flush here ? */
4367 PMAP_UPDATE_TLBS(grand, vstart, vstart + size - 1);
4368
4369 PMAP_UNLOCK(grand);
4370
4371 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
0c530ab8
A
4372
4373 return KERN_SUCCESS;
4374}
4375
4376/*
4377 * kern_return_t pmap_unnest(grand, vaddr)
4378 *
4379 * grand = the pmap that we will nest subord into
4380 * vaddr = start of range in pmap to be unnested
4381 *
4382 * Removes a pmap from another. This is used to implement shared segments.
4383 * On the current PPC processors, this is limited to segment (256MB) aligned
4384 * segment sized ranges.
4385 */
4386
2d21ac55 4387kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
0c530ab8 4388
0c530ab8
A
4389 pd_entry_t *pde;
4390 unsigned int i;
4391 unsigned int num_pde;
2d21ac55
A
4392 addr64_t vstart, vend;
4393
4394 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
4395 (int) grand,
4396 (int) (vaddr>>32), (int) vaddr, 0, 0);
0c530ab8 4397
2d21ac55
A
4398 if ((size & (pmap_nesting_size_min-1)) ||
4399 (vaddr & (pmap_nesting_size_min-1))) {
4400 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
4401 grand, vaddr, size);
4402 }
4403
4404 /* align everything to PDE boundaries */
4405 vstart = vaddr & ~(NBPDE-1);
4406 vend = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
4407 size = vend - vstart;
4408
4409 PMAP_LOCK(grand);
0c530ab8
A
4410
4411 // invalidate all pdes for segment at vaddr in pmap grand
4412
2d21ac55 4413 num_pde = size >> PDESHIFT;
0c530ab8 4414
2d21ac55 4415 vaddr = vstart;
0c530ab8
A
4416 for (i=0;i<num_pde;i++,pde++) {
4417 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
2d21ac55 4418 if (pde == 0) panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr);
0c530ab8
A
4419 pmap_store_pte(pde, (pd_entry_t)0);
4420 vaddr += NBPDE;
4421 }
2d21ac55 4422 PMAP_UPDATE_TLBS(grand, vstart, vend);
0c530ab8 4423
2d21ac55 4424 PMAP_UNLOCK(grand);
0c530ab8 4425
2d21ac55
A
4426 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
4427
4428 return KERN_SUCCESS;
0c530ab8
A
4429}
4430
4431void
4432pmap_switch(pmap_t tpmap)
4433{
4434 spl_t s;
4435 int my_cpu;
4436
4437 s = splhigh(); /* Make sure interruptions are disabled */
4438 my_cpu = cpu_number();
4439
4440 set_dirbase(tpmap, my_cpu);
4441
4442 splx(s);
4443}
4444
4445
4446/*
4447 * disable no-execute capability on
4448 * the specified pmap
4449 */
4450void pmap_disable_NX(pmap_t pmap) {
4451
4452 pmap->nx_enabled = 0;
4453}
4454
4455void
4456pt_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size,
4457 vm_size_t *alloc_size, int *collectable, int *exhaustable)
4458{
4459 *count = inuse_ptepages_count;
4460 *cur_size = PAGE_SIZE * inuse_ptepages_count;
4461 *max_size = PAGE_SIZE * (inuse_ptepages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
4462 *elem_size = PAGE_SIZE;
4463 *alloc_size = PAGE_SIZE;
4464
4465 *collectable = 1;
4466 *exhaustable = 0;
4467}
4468
4469vm_offset_t pmap_cpu_high_map_vaddr(int cpu, enum high_cpu_types e)
4470{
4471 enum high_fixed_addresses a;
4472 a = e + HIGH_CPU_END * cpu;
4473 return pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a);
4474}
4475
4476vm_offset_t pmap_high_map_vaddr(enum high_cpu_types e)
4477{
4478 return pmap_cpu_high_map_vaddr(cpu_number(), e);
4479}
4480
4481vm_offset_t pmap_high_map(pt_entry_t pte, enum high_cpu_types e)
4482{
4483 enum high_fixed_addresses a;
4484 vm_offset_t vaddr;
4485
4486 a = e + HIGH_CPU_END * cpu_number();
4487 vaddr = (vm_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a);
2d21ac55 4488 pmap_store_pte(pte_unique_base + a, pte);
0c530ab8
A
4489
4490 /* TLB flush for this page for this cpu */
4491 invlpg((uintptr_t)vaddr);
4492
4493 return vaddr;
4494}
4495
935ed37a
A
4496static inline void
4497pmap_cpuset_NMIPI(cpu_set cpu_mask) {
4498 unsigned int cpu, cpu_bit;
4499 uint64_t deadline;
4500
4501 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
4502 if (cpu_mask & cpu_bit)
4503 cpu_NMI_interrupt(cpu);
4504 }
4505 deadline = mach_absolute_time() + (LockTimeOut >> 2);
4506 while (mach_absolute_time() < deadline)
4507 cpu_pause();
4508}
4509
0c530ab8
A
4510
4511/*
4512 * Called with pmap locked, we:
4513 * - scan through per-cpu data to see which other cpus need to flush
4514 * - send an IPI to each non-idle cpu to be flushed
4515 * - wait for all to signal back that they are inactive or we see that
4516 * they are in an interrupt handler or at a safe point
4517 * - flush the local tlb is active for this pmap
4518 * - return ... the caller will unlock the pmap
4519 */
4520void
4521pmap_flush_tlbs(pmap_t pmap)
4522{
4523 unsigned int cpu;
4524 unsigned int cpu_bit;
4525 cpu_set cpus_to_signal;
4526 unsigned int my_cpu = cpu_number();
4527 pmap_paddr_t pmap_cr3 = pmap->pm_cr3;
4528 boolean_t flush_self = FALSE;
4529 uint64_t deadline;
4530
2d21ac55
A
4531 assert((processor_avail_count < 2) ||
4532 (ml_get_interrupts_enabled() && get_preemption_level() != 0));
0c530ab8
A
4533
4534 /*
4535 * Scan other cpus for matching active or task CR3.
4536 * For idle cpus (with no active map) we mark them invalid but
4537 * don't signal -- they'll check as they go busy.
4538 * Note: for the kernel pmap we look for 64-bit shared address maps.
4539 */
4540 cpus_to_signal = 0;
4541 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
4542 if (!cpu_datap(cpu)->cpu_running)
4543 continue;
2d21ac55
A
4544 if ((cpu_datap(cpu)->cpu_task_cr3 == pmap_cr3) ||
4545 (CPU_GET_ACTIVE_CR3(cpu) == pmap_cr3) ||
0c530ab8
A
4546 (pmap->pm_shared) ||
4547 ((pmap == kernel_pmap) &&
4548 (!CPU_CR3_IS_ACTIVE(cpu) ||
4549 cpu_datap(cpu)->cpu_task_map == TASK_MAP_64BIT_SHARED))) {
4550 if (cpu == my_cpu) {
4551 flush_self = TRUE;
4552 continue;
4553 }
4554 cpu_datap(cpu)->cpu_tlb_invalid = TRUE;
4555 __asm__ volatile("mfence");
4556
4557 if (CPU_CR3_IS_ACTIVE(cpu)) {
4558 cpus_to_signal |= cpu_bit;
4559 i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC);
4560 }
4561 }
4562 }
4563
2d21ac55
A
4564 PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START,
4565 (int) pmap, cpus_to_signal, flush_self, 0, 0);
0c530ab8 4566
2d21ac55 4567 if (cpus_to_signal) {
935ed37a
A
4568 cpu_set cpus_to_respond = cpus_to_signal;
4569
0c530ab8
A
4570 deadline = mach_absolute_time() + LockTimeOut;
4571 /*
4572 * Wait for those other cpus to acknowledge
4573 */
935ed37a
A
4574 while (cpus_to_respond != 0) {
4575 if (mach_absolute_time() > deadline) {
593a1d5f
A
4576 if (!panic_active()) {
4577 pmap_tlb_flush_timeout = TRUE;
4578 pmap_cpuset_NMIPI(cpus_to_respond);
4579 }
935ed37a
A
4580 panic("pmap_flush_tlbs() timeout: "
4581 "cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx",
4582 pmap, cpus_to_respond);
4583 }
4584
4585 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
4586 if ((cpus_to_respond & cpu_bit) != 0) {
4587 if (!cpu_datap(cpu)->cpu_running ||
4588 cpu_datap(cpu)->cpu_tlb_invalid == FALSE ||
4589 !CPU_CR3_IS_ACTIVE(cpu)) {
4590 cpus_to_respond &= ~cpu_bit;
4591 }
4592 cpu_pause();
2d21ac55 4593 }
935ed37a
A
4594 if (cpus_to_respond == 0)
4595 break;
0c530ab8 4596 }
0c530ab8 4597 }
0c530ab8
A
4598 }
4599
4600 /*
4601 * Flush local tlb if required.
4602 * We need this flush even if the pmap being changed
4603 * is the user map... in case we do a copyin/out
4604 * before returning to user mode.
4605 */
4606 if (flush_self)
4607 flush_tlb();
4608
2d21ac55
A
4609 PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END,
4610 (int) pmap, cpus_to_signal, flush_self, 0, 0);
0c530ab8
A
4611}
4612
4613void
4614process_pmap_updates(void)
4615{
2d21ac55
A
4616 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
4617
0c530ab8
A
4618 flush_tlb();
4619
4620 current_cpu_datap()->cpu_tlb_invalid = FALSE;
4621 __asm__ volatile("mfence");
4622}
4623
4624void
4625pmap_update_interrupt(void)
4626{
2d21ac55
A
4627 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_START,
4628 0, 0, 0, 0, 0);
0c530ab8
A
4629
4630 process_pmap_updates();
4631
2d21ac55
A
4632 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END,
4633 0, 0, 0, 0, 0);
0c530ab8
A
4634}
4635
4636
4637unsigned int pmap_cache_attributes(ppnum_t pn) {
4638
2d21ac55 4639 if (!managed_page(ppn_to_pai(pn)))
0c530ab8
A
4640 return (VM_WIMG_IO);
4641
4642 return (VM_WIMG_COPYBACK);
4643}
4644
4645#ifdef PMAP_DEBUG
4646void
4647pmap_dump(pmap_t p)
4648{
4649 int i;
4650
4651 kprintf("pmap 0x%x\n",p);
4652
4653 kprintf(" pm_cr3 0x%llx\n",p->pm_cr3);
4654 kprintf(" pm_pml4 0x%x\n",p->pm_pml4);
4655 kprintf(" pm_pdpt 0x%x\n",p->pm_pdpt);
4656
4657 kprintf(" pml4[0] 0x%llx\n",*p->pm_pml4);
4658 for (i=0;i<8;i++)
4659 kprintf(" pdpt[%d] 0x%llx\n",i, p->pm_pdpt[i]);
4660}
4661
4662void pmap_dump_wrap(void)
4663{
4664 pmap_dump(current_cpu_datap()->cpu_active_thread->task->map->pmap);
4665}
4666
4667void
4668dump_4GB_pdpt(pmap_t p)
4669{
4670 int spl;
4671 pdpt_entry_t *user_pdptp;
4672 pdpt_entry_t *kern_pdptp;
4673 pdpt_entry_t *pml4p;
4674
4675 spl = splhigh();
4676 while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) {
4677 splx(spl);
4678 pmap_expand_pml4(p, 0x0);
4679 spl = splhigh();
4680 }
4681 kern_pdptp = kernel_pmap->pm_pdpt;
4682 if (kern_pdptp == NULL)
4683 panic("kern_pdptp == NULL");
4684 kprintf("dump_4GB_pdpt(%p)\n"
4685 "kern_pdptp=%p (phys=0x%016llx)\n"
4686 "\t 0x%08x: 0x%016llx\n"
4687 "\t 0x%08x: 0x%016llx\n"
4688 "\t 0x%08x: 0x%016llx\n"
4689 "\t 0x%08x: 0x%016llx\n"
4690 "\t 0x%08x: 0x%016llx\n"
4691 "user_pdptp=%p (phys=0x%016llx)\n"
4692 "\t 0x%08x: 0x%016llx\n"
4693 "\t 0x%08x: 0x%016llx\n"
4694 "\t 0x%08x: 0x%016llx\n"
4695 "\t 0x%08x: 0x%016llx\n"
4696 "\t 0x%08x: 0x%016llx\n",
4697 p, kern_pdptp, kvtophys(kern_pdptp),
4698 kern_pdptp+0, *(kern_pdptp+0),
4699 kern_pdptp+1, *(kern_pdptp+1),
4700 kern_pdptp+2, *(kern_pdptp+2),
4701 kern_pdptp+3, *(kern_pdptp+3),
4702 kern_pdptp+4, *(kern_pdptp+4),
4703 user_pdptp, kvtophys(user_pdptp),
4704 user_pdptp+0, *(user_pdptp+0),
4705 user_pdptp+1, *(user_pdptp+1),
4706 user_pdptp+2, *(user_pdptp+2),
4707 user_pdptp+3, *(user_pdptp+3),
4708 user_pdptp+4, *(user_pdptp+4));
4709 kprintf("user pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
4710 p->pm_cr3, p->pm_hold, p->pm_pml4);
4711 pml4p = (pdpt_entry_t *)p->pm_hold;
4712 if (pml4p == NULL)
4713 panic("user pml4p == NULL");
4714 kprintf("\t 0x%08x: 0x%016llx\n"
4715 "\t 0x%08x: 0x%016llx\n",
4716 pml4p+0, *(pml4p),
4717 pml4p+KERNEL_UBER_PML4_INDEX, *(pml4p+KERNEL_UBER_PML4_INDEX));
4718 kprintf("kern pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
4719 kernel_pmap->pm_cr3, kernel_pmap->pm_hold, kernel_pmap->pm_pml4);
4720 pml4p = (pdpt_entry_t *)kernel_pmap->pm_hold;
4721 if (pml4p == NULL)
4722 panic("kern pml4p == NULL");
4723 kprintf("\t 0x%08x: 0x%016llx\n"
4724 "\t 0x%08x: 0x%016llx\n",
4725 pml4p+0, *(pml4p),
4726 pml4p+511, *(pml4p+511));
4727 splx(spl);
4728}
4729
4730void dump_4GB_pdpt_thread(thread_t tp)
4731{
4732 dump_4GB_pdpt(tp->map->pmap);
4733}
4734
4735
4736#endif