]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap.c
xnu-1504.7.4.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap.c
CommitLineData
1c79356b 1/*
c910b4d9 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
59/*
60 * File: pmap.c
61 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * (These guys wrote the Vax version)
63 *
64 * Physical Map management code for Intel i386, i486, and i860.
65 *
66 * Manages physical address maps.
67 *
68 * In addition to hardware address maps, this
69 * module is called upon to provide software-use-only
70 * maps which may or may not be stored in the same
71 * form as hardware maps. These pseudo-maps are
72 * used to store intermediate results from copy
73 * operations to and from address spaces.
74 *
75 * Since the information managed by this module is
76 * also stored by the logical address mapping module,
77 * this module may throw away valid virtual-to-physical
78 * mappings at almost any time. However, invalidations
79 * of virtual-to-physical mappings must be done as
80 * requested.
81 *
82 * In order to cope with hardware architectures which
83 * make virtual-to-physical map invalidates expensive,
84 * this module may delay invalidate or reduced protection
85 * operations until such time as they are actually
86 * necessary. This module is given full information as
87 * to which processors are currently using which maps,
88 * and to when physical maps must be made correct.
89 */
90
1c79356b 91#include <string.h>
1c79356b
A
92#include <mach_kdb.h>
93#include <mach_ldebug.h>
94
2d21ac55
A
95#include <libkern/OSAtomic.h>
96
1c79356b
A
97#include <mach/machine/vm_types.h>
98
99#include <mach/boolean.h>
100#include <kern/thread.h>
101#include <kern/zalloc.h>
2d21ac55 102#include <kern/queue.h>
1c79356b
A
103
104#include <kern/lock.h>
91447636 105#include <kern/kalloc.h>
1c79356b
A
106#include <kern/spl.h>
107
108#include <vm/pmap.h>
109#include <vm/vm_map.h>
110#include <vm/vm_kern.h>
111#include <mach/vm_param.h>
112#include <mach/vm_prot.h>
113#include <vm/vm_object.h>
114#include <vm/vm_page.h>
115
116#include <mach/machine/vm_param.h>
117#include <machine/thread.h>
118
119#include <kern/misc_protos.h> /* prototyping */
120#include <i386/misc_protos.h>
121
122#include <i386/cpuid.h>
91447636 123#include <i386/cpu_data.h>
55e303ae
A
124#include <i386/cpu_number.h>
125#include <i386/machine_cpu.h>
0c530ab8 126#include <i386/seg.h>
2d21ac55 127#include <i386/serial_io.h>
0c530ab8 128#include <i386/cpu_capabilities.h>
2d21ac55
A
129#include <i386/machine_routines.h>
130#include <i386/proc_reg.h>
131#include <i386/tsc.h>
b0d623f7
A
132#include <i386/acpi.h>
133#include <i386/pmap_internal.h>
1c79356b
A
134
135#if MACH_KDB
136#include <ddb/db_command.h>
137#include <ddb/db_output.h>
138#include <ddb/db_sym.h>
139#include <ddb/db_print.h>
140#endif /* MACH_KDB */
141
91447636
A
142#include <vm/vm_protos.h>
143
144#include <i386/mp.h>
0c530ab8 145#include <i386/mp_desc.h>
b0d623f7 146#include <i386/i386_lowmem.h>
0c530ab8 147
0c530ab8 148
2d21ac55
A
149/* #define DEBUGINTERRUPTS 1 uncomment to ensure pmap callers have interrupts enabled */
150#ifdef DEBUGINTERRUPTS
151#define pmap_intr_assert() {if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) panic("pmap interrupt assert %s, %d",__FILE__, __LINE__);}
152#else
153#define pmap_intr_assert()
154#endif
155
0c530ab8
A
156#ifdef IWANTTODEBUG
157#undef DEBUG
158#define DEBUG 1
159#define POSTCODE_DELAY 1
160#include <i386/postcode.h>
161#endif /* IWANTTODEBUG */
1c79356b
A
162
163/*
164 * Forward declarations for internal functions.
165 */
0c530ab8 166
b0d623f7 167void pmap_remove_range(
1c79356b 168 pmap_t pmap,
0c530ab8 169 vm_map_offset_t va,
1c79356b
A
170 pt_entry_t *spte,
171 pt_entry_t *epte);
172
91447636 173void phys_attribute_clear(
2d21ac55 174 ppnum_t phys,
1c79356b
A
175 int bits);
176
2d21ac55
A
177int phys_attribute_test(
178 ppnum_t phys,
1c79356b
A
179 int bits);
180
91447636 181void phys_attribute_set(
2d21ac55 182 ppnum_t phys,
1c79356b
A
183 int bits);
184
91447636
A
185void pmap_set_reference(
186 ppnum_t pn);
187
91447636
A
188boolean_t phys_page_exists(
189 ppnum_t pn);
1c79356b 190
2d21ac55 191
0c530ab8
A
192#ifdef PMAP_DEBUG
193void dump_pmap(pmap_t);
194void dump_4GB_pdpt(pmap_t p);
195void dump_4GB_pdpt_thread(thread_t tp);
196#endif
1c79356b 197
0c530ab8 198int nx_enabled = 1; /* enable no-execute protection */
4a3eedf9
A
199#ifdef CONFIG_EMBEDDED
200int allow_data_exec = 0; /* no exec from data, embedded is hardcore like that */
201#else
2d21ac55 202int allow_data_exec = VM_ABI_32; /* 32-bit apps may execute data by default, 64-bit apps may not */
4a3eedf9 203#endif
2d21ac55 204int allow_stack_exec = 0; /* No apps may execute from the stack by default */
0c530ab8 205
b0d623f7
A
206boolean_t cpu_64bit = FALSE;
207boolean_t pmap_trace = FALSE;
1c79356b 208
2d21ac55
A
209/*
210 * when spinning through pmap_remove
211 * ensure that we don't spend too much
212 * time with preemption disabled.
213 * I'm setting the current threshold
214 * to 20us
215 */
216#define MAX_PREEMPTION_LATENCY_NS 20000
217
218uint64_t max_preemption_latency_tsc = 0;
219
55e303ae 220
2d21ac55
A
221pv_hashed_entry_t *pv_hash_table; /* hash lists */
222
223uint32_t npvhash = 0;
224
1c79356b
A
225
226/*
227 * pv_list entries are kept on a list that can only be accessed
228 * with the pmap system locked (at SPLVM, not in the cpus_active set).
2d21ac55 229 * The list is refilled from the pv_hashed_list_zone if it becomes empty.
1c79356b 230 */
2d21ac55
A
231pv_rooted_entry_t pv_free_list = PV_ROOTED_ENTRY_NULL; /* free list at SPLVM */
232pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL;
233pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL;
234decl_simple_lock_data(,pv_hashed_free_list_lock)
235decl_simple_lock_data(,pv_hashed_kern_free_list_lock)
236decl_simple_lock_data(,pv_hash_table_lock)
237
91447636 238int pv_free_count = 0;
2d21ac55
A
239int pv_hashed_free_count = 0;
240int pv_kern_free_count = 0;
241int pv_hashed_kern_free_count = 0;
1c79356b 242
2d21ac55 243zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */
1c79356b 244
91447636 245static zone_t pdpt_zone;
91447636 246
1c79356b
A
247/*
248 * Each entry in the pv_head_table is locked by a bit in the
249 * pv_lock_table. The lock bits are accessed by the physical
250 * address of the page they lock.
251 */
252
253char *pv_lock_table; /* pointer to array of bits */
254#define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
255
2d21ac55
A
256char *pv_hash_lock_table;
257#define pv_hash_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
258
1c79356b
A
259/*
260 * First and last physical addresses that we maintain any information
261 * for. Initialized to zero so that pmap operations done before
262 * pmap_init won't touch any non-existent structures.
263 */
1c79356b
A
264boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */
265
91447636
A
266static struct vm_object kptobj_object_store;
267static vm_object_t kptobj;
91447636 268
1c79356b
A
269/*
270 * Array of physical page attribites for managed pages.
271 * One byte per physical page.
272 */
273char *pmap_phys_attributes;
2d21ac55 274unsigned int last_managed_page = 0;
1c79356b
A
275
276/*
277 * Physical page attributes. Copy bits from PTE definition.
278 */
279#define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
280#define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
2d21ac55 281#define PHYS_MANAGED INTEL_PTE_VALID /* page is managed */
1c79356b
A
282
283/*
284 * Amount of virtual memory mapped by one
285 * page-directory entry.
286 */
287#define PDE_MAPPED_SIZE (pdetova(1))
0c530ab8 288uint64_t pde_mapped_size;
1c79356b 289
1c79356b
A
290/*
291 * Locking and TLB invalidation
292 */
293
294/*
2d21ac55 295 * Locking Protocols: (changed 2/2007 JK)
1c79356b
A
296 *
297 * There are two structures in the pmap module that need locking:
298 * the pmaps themselves, and the per-page pv_lists (which are locked
299 * by locking the pv_lock_table entry that corresponds to the pv_head
300 * for the list in question.) Most routines want to lock a pmap and
301 * then do operations in it that require pv_list locking -- however
302 * pmap_remove_all and pmap_copy_on_write operate on a physical page
303 * basis and want to do the locking in the reverse order, i.e. lock
304 * a pv_list and then go through all the pmaps referenced by that list.
1c79356b 305 *
2d21ac55
A
306 * The system wide pmap lock has been removed. Now, paths take a lock
307 * on the pmap before changing its 'shape' and the reverse order lockers
308 * (coming in by phys ppn) take a lock on the corresponding pv and then
309 * retest to be sure nothing changed during the window before they locked
310 * and can then run up/down the pv lists holding the list lock. This also
311 * lets the pmap layer run (nearly completely) interrupt enabled, unlike
312 * previously.
1c79356b 313 */
1c79356b 314
1c79356b 315
2d21ac55
A
316/*
317 * PV locking
318 */
319
320#define LOCK_PVH(index) { \
321 mp_disable_preemption(); \
322 lock_pvh_pai(index); \
1c79356b
A
323}
324
2d21ac55
A
325#define UNLOCK_PVH(index) { \
326 unlock_pvh_pai(index); \
327 mp_enable_preemption(); \
1c79356b
A
328}
329
2d21ac55
A
330/*
331 * PV hash locking
332 */
1c79356b 333
2d21ac55
A
334#define LOCK_PV_HASH(hash) lock_hash_hash(hash)
335
336#define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash)
1c79356b 337
55e303ae
A
338#if USLOCK_DEBUG
339extern int max_lock_loops;
91447636
A
340#define LOOP_VAR \
341 unsigned int loop_count; \
2d21ac55 342 loop_count = disable_serial_output ? max_lock_loops \
91447636 343 : max_lock_loops*100
55e303ae 344#define LOOP_CHECK(msg, pmap) \
91447636 345 if (--loop_count == 0) { \
55e303ae 346 mp_disable_preemption(); \
0c530ab8
A
347 kprintf("%s: cpu %d pmap %x\n", \
348 msg, cpu_number(), pmap); \
55e303ae
A
349 Debugger("deadlock detection"); \
350 mp_enable_preemption(); \
91447636 351 loop_count = max_lock_loops; \
55e303ae
A
352 }
353#else /* USLOCK_DEBUG */
354#define LOOP_VAR
355#define LOOP_CHECK(msg, pmap)
356#endif /* USLOCK_DEBUG */
1c79356b 357
b0d623f7
A
358unsigned pmap_memory_region_count;
359unsigned pmap_memory_region_current;
1c79356b 360
91447636 361pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE];
1c79356b
A
362
363/*
364 * Other useful macros.
365 */
91447636 366#define current_pmap() (vm_map_pmap(current_thread()->map))
1c79356b
A
367
368struct pmap kernel_pmap_store;
369pmap_t kernel_pmap;
370
0c530ab8
A
371pd_entry_t high_shared_pde;
372pd_entry_t commpage64_pde;
91447636 373
1c79356b
A
374struct zone *pmap_zone; /* zone of pmap structures */
375
376int pmap_debug = 0; /* flag for debugging prints */
91447636 377
2d21ac55 378unsigned int inuse_ptepages_count = 0;
1c79356b 379
0c530ab8
A
380addr64_t kernel64_cr3;
381boolean_t no_shared_cr3 = FALSE; /* -no_shared_cr3 boot arg */
382
b0d623f7 383
1c79356b
A
384/*
385 * Pmap cache. Cache is threaded through ref_count field of pmap.
386 * Max will eventually be constant -- variable for experimentation.
387 */
388int pmap_cache_max = 32;
389int pmap_alloc_chunk = 8;
390pmap_t pmap_cache_list;
391int pmap_cache_count;
392decl_simple_lock_data(,pmap_cache_lock)
393
1c79356b
A
394extern char end;
395
91447636
A
396static int nkpt;
397
398pt_entry_t *DMAP1, *DMAP2;
399caddr_t DADDR1;
400caddr_t DADDR2;
0c530ab8
A
401/*
402 * for legacy, returns the address of the pde entry.
403 * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
404 * then returns the mapped address of the pde entry in that page
405 */
406pd_entry_t *
407pmap_pde(pmap_t m, vm_map_offset_t v)
4452a7af 408{
0c530ab8
A
409 pd_entry_t *pde;
410 if (!cpu_64bit || (m == kernel_pmap)) {
411 pde = (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]));
412 } else {
413 assert(m);
414 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
415 pde = pmap64_pde(m, v);
416 }
417 return pde;
4452a7af
A
418}
419
89b3af67 420
4452a7af 421/*
0c530ab8
A
422 * the single pml4 page per pmap is allocated at pmap create time and exists
423 * for the duration of the pmap. we allocate this page in kernel vm (to save us one
424 * level of page table dynamic mapping.
425 * this returns the address of the requested pml4 entry in the top level page.
4452a7af 426 */
0c530ab8
A
427static inline
428pml4_entry_t *
429pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr)
430{
431 return ((pml4_entry_t *)pmap->pm_hold + ((vm_offset_t)((vaddr>>PML4SHIFT)&(NPML4PG-1))));
432}
433
434/*
435 * maps in the pml4 page, if any, containing the pdpt entry requested
436 * and returns the address of the pdpt entry in that mapped page
437 */
438pdpt_entry_t *
439pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
440{
441 pml4_entry_t newpf;
442 pml4_entry_t *pml4;
443 int i;
444
445 assert(pmap);
446 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
447 if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) {
448 return(0);
4452a7af 449 }
0c530ab8
A
450
451 pml4 = pmap64_pml4(pmap, vaddr);
452
453 if (pml4 && ((*pml4 & INTEL_PTE_VALID))) {
454
455 newpf = *pml4 & PG_FRAME;
456
457
458 for (i=PMAP_PDPT_FIRST_WINDOW; i < PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS; i++) {
459 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
460 return((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
461 ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1))));
462 }
463 }
464
465 current_cpu_datap()->cpu_pmap->pdpt_window_index++;
466 if (current_cpu_datap()->cpu_pmap->pdpt_window_index > (PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS-1))
467 current_cpu_datap()->cpu_pmap->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW;
468 pmap_store_pte(
469 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CMAP),
470 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
471 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR));
472 return ((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR) +
473 ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1))));
474 }
475
2d21ac55 476 return (NULL);
4452a7af
A
477}
478
0c530ab8
A
479/*
480 * maps in the pdpt page, if any, containing the pde entry requested
481 * and returns the address of the pde entry in that mapped page
482 */
483pd_entry_t *
484pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr)
4452a7af 485{
0c530ab8
A
486 pdpt_entry_t newpf;
487 pdpt_entry_t *pdpt;
488 int i;
4452a7af 489
0c530ab8
A
490 assert(pmap);
491 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
492 if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) {
493 return(0);
494 }
495
496 /* if (vaddr & (1ULL << 63)) panic("neg addr");*/
497 pdpt = pmap64_pdpt(pmap, vaddr);
498
499 if (pdpt && ((*pdpt & INTEL_PTE_VALID))) {
500
501 newpf = *pdpt & PG_FRAME;
502
503 for (i=PMAP_PDE_FIRST_WINDOW; i < PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS; i++) {
504 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
505 return((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
506 ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1))));
507 }
4452a7af 508 }
0c530ab8
A
509
510 current_cpu_datap()->cpu_pmap->pde_window_index++;
511 if (current_cpu_datap()->cpu_pmap->pde_window_index > (PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS-1))
512 current_cpu_datap()->cpu_pmap->pde_window_index = PMAP_PDE_FIRST_WINDOW;
513 pmap_store_pte(
514 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CMAP),
515 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
516 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR));
517 return ((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR) +
518 ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1))));
21362eb3 519 }
4452a7af 520
2d21ac55 521 return (NULL);
0c530ab8
A
522}
523
2d21ac55
A
524/*
525 * Because the page tables (top 3 levels) are mapped into per cpu windows,
526 * callers must either disable interrupts or disable preemption before calling
527 * one of the pte mapping routines (e.g. pmap_pte()) as the returned vaddr
528 * is in one of those mapped windows and that cannot be allowed to change until
529 * the caller is done using the returned pte pointer. When done, the caller
530 * restores interrupts or preemption to its previous state after which point the
531 * vaddr for the returned pte can no longer be used
532 */
0c530ab8
A
533
534
535/*
536 * return address of mapped pte for vaddr va in pmap pmap.
537 * must be called with pre-emption or interrupts disabled
538 * if targeted pmap is not the kernel pmap
539 * since we may be passing back a virtual address that is
540 * associated with this cpu... pre-emption or interrupts
541 * must remain disabled until the caller is done using
542 * the pointer that was passed back .
543 *
544 * maps the pde page, if any, containing the pte in and returns
545 * the address of the pte in that mapped page
546 */
547pt_entry_t *
548pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
549{
550 pd_entry_t *pde;
551 pd_entry_t newpf;
552 int i;
553
554 assert(pmap);
555 pde = pmap_pde(pmap,vaddr);
556
557 if (pde && ((*pde & INTEL_PTE_VALID))) {
b0d623f7
A
558 if (*pde & INTEL_PTE_PS)
559 return pde;
2d21ac55
A
560 if (pmap == kernel_pmap)
561 return (vtopte(vaddr)); /* compat kernel still has pte's mapped */
562#if TESTING
563 if (ml_get_interrupts_enabled() && get_preemption_level() == 0)
564 panic("pmap_pte: unsafe call");
565#endif
0c530ab8
A
566 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
567
568 newpf = *pde & PG_FRAME;
569
570 for (i=PMAP_PTE_FIRST_WINDOW; i < PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS; i++) {
571 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
572 return((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
573 ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1)));
574 }
575 }
576
577 current_cpu_datap()->cpu_pmap->pte_window_index++;
578 if (current_cpu_datap()->cpu_pmap->pte_window_index > (PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS-1))
579 current_cpu_datap()->cpu_pmap->pte_window_index = PMAP_PTE_FIRST_WINDOW;
580 pmap_store_pte(
581 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CMAP),
582 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
583 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR));
584 return ((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR) +
585 ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1)));
6601e61a 586 }
0c530ab8 587
2d21ac55 588 return(NULL);
1c79356b 589}
2d21ac55 590
1c79356b
A
591
592/*
593 * Map memory at initialization. The physical addresses being
594 * mapped are not managed and are never unmapped.
595 *
596 * For now, VM is already on, we only need to map the
597 * specified memory.
598 */
599vm_offset_t
600pmap_map(
0c530ab8
A
601 vm_offset_t virt,
602 vm_map_offset_t start_addr,
603 vm_map_offset_t end_addr,
604 vm_prot_t prot,
605 unsigned int flags)
1c79356b 606{
0c530ab8 607 int ps;
1c79356b
A
608
609 ps = PAGE_SIZE;
91447636 610 while (start_addr < end_addr) {
0c530ab8
A
611 pmap_enter(kernel_pmap, (vm_map_offset_t)virt,
612 (ppnum_t) i386_btop(start_addr), prot, flags, FALSE);
1c79356b 613 virt += ps;
91447636 614 start_addr += ps;
1c79356b
A
615 }
616 return(virt);
617}
618
619/*
620 * Back-door routine for mapping kernel VM at initialization.
621 * Useful for mapping memory outside the range
622 * Sets no-cache, A, D.
1c79356b
A
623 * Otherwise like pmap_map.
624 */
625vm_offset_t
626pmap_map_bd(
0c530ab8
A
627 vm_offset_t virt,
628 vm_map_offset_t start_addr,
629 vm_map_offset_t end_addr,
630 vm_prot_t prot,
631 unsigned int flags)
1c79356b 632{
0c530ab8 633 pt_entry_t template;
b0d623f7 634 pt_entry_t *pte;
2d21ac55 635 spl_t spl;
1c79356b 636
91447636 637 template = pa_to_pte(start_addr)
1c79356b
A
638 | INTEL_PTE_REF
639 | INTEL_PTE_MOD
640 | INTEL_PTE_WIRED
641 | INTEL_PTE_VALID;
0c530ab8
A
642
643 if(flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) {
644 template |= INTEL_PTE_NCACHE;
645 if(!(flags & (VM_MEM_GUARDED | VM_WIMG_USE_DEFAULT)))
646 template |= INTEL_PTE_PTA;
647 }
648
1c79356b
A
649 if (prot & VM_PROT_WRITE)
650 template |= INTEL_PTE_WRITE;
651
b0d623f7 652
91447636 653 while (start_addr < end_addr) {
2d21ac55 654 spl = splhigh();
0c530ab8 655 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
91447636 656 if (pte == PT_ENTRY_NULL) {
1c79356b 657 panic("pmap_map_bd: Invalid kernel address\n");
91447636 658 }
0c530ab8 659 pmap_store_pte(pte, template);
2d21ac55 660 splx(spl);
1c79356b
A
661 pte_increment_pa(template);
662 virt += PAGE_SIZE;
91447636 663 start_addr += PAGE_SIZE;
b0d623f7
A
664 }
665
1c79356b 666
55e303ae 667 flush_tlb();
1c79356b
A
668 return(virt);
669}
670
b0d623f7
A
671extern char *first_avail;
672extern vm_offset_t virtual_avail, virtual_end;
673extern pmap_paddr_t avail_start, avail_end;
1c79356b 674
2d21ac55
A
675void
676pmap_cpu_init(void)
677{
678 /*
679 * Here early in the life of a processor (from cpu_mode_init()).
680 * If we're not in 64-bit mode, enable the global TLB feature.
681 * Note: regardless of mode we continue to set the global attribute
682 * bit in ptes for all (32-bit) global pages such as the commpage.
683 */
684 if (!cpu_64bit) {
685 set_cr4(get_cr4() | CR4_PGE);
686 }
687
688 /*
689 * Initialize the per-cpu, TLB-related fields.
690 */
691 current_cpu_datap()->cpu_active_cr3 = kernel_pmap->pm_cr3;
692 current_cpu_datap()->cpu_tlb_invalid = FALSE;
693}
0c530ab8
A
694
695vm_offset_t
696pmap_high_shared_remap(enum high_fixed_addresses e, vm_offset_t va, int sz)
697{
698 vm_offset_t ve = pmap_index_to_virt(e);
699 pt_entry_t *ptep;
700 pmap_paddr_t pa;
701 int i;
2d21ac55 702 spl_t s;
0c530ab8
A
703
704 assert(0 == (va & PAGE_MASK)); /* expecting page aligned */
2d21ac55 705 s = splhigh();
0c530ab8
A
706 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ve);
707
708 for (i=0; i< sz; i++) {
709 pa = (pmap_paddr_t) kvtophys(va);
710 pmap_store_pte(ptep, (pa & PG_FRAME)
711 | INTEL_PTE_VALID
712 | INTEL_PTE_GLOBAL
713 | INTEL_PTE_RW
714 | INTEL_PTE_REF
715 | INTEL_PTE_MOD);
716 va+= PAGE_SIZE;
717 ptep++;
718 }
2d21ac55 719 splx(s);
0c530ab8
A
720 return ve;
721}
722
723vm_offset_t
724pmap_cpu_high_shared_remap(int cpu, enum high_cpu_types e, vm_offset_t va, int sz)
725{
726 enum high_fixed_addresses a = e + HIGH_CPU_END * cpu;
727 return pmap_high_shared_remap(HIGH_FIXED_CPUS_BEGIN + a, va, sz);
728}
729
730void pmap_init_high_shared(void);
731
732extern vm_offset_t gdtptr, idtptr;
733
734extern uint32_t low_intstack;
735
736extern struct fake_descriptor ldt_desc_pattern;
737extern struct fake_descriptor tss_desc_pattern;
738
739extern char hi_remap_text, hi_remap_etext;
740extern char t_zero_div;
741
742pt_entry_t *pte_unique_base;
743
744void
745pmap_init_high_shared(void)
746{
747
748 vm_offset_t haddr;
2d21ac55 749 spl_t s;
0c530ab8
A
750#if MACH_KDB
751 struct i386_tss *ttss;
752#endif
753
b0d623f7
A
754 cpu_desc_index_t * cdi = &cpu_data_master.cpu_desc_index;
755
0c530ab8
A
756 kprintf("HIGH_MEM_BASE 0x%x fixed per-cpu begin 0x%x\n",
757 HIGH_MEM_BASE,pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN));
2d21ac55 758 s = splhigh();
0c530ab8 759 pte_unique_base = pmap_pte(kernel_pmap, (vm_map_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN));
2d21ac55 760 splx(s);
0c530ab8
A
761
762 if (i386_btop(&hi_remap_etext - &hi_remap_text + 1) >
763 HIGH_FIXED_TRAMPS_END - HIGH_FIXED_TRAMPS + 1)
764 panic("tramps too large");
765 haddr = pmap_high_shared_remap(HIGH_FIXED_TRAMPS,
766 (vm_offset_t) &hi_remap_text, 3);
767 kprintf("tramp: 0x%x, ",haddr);
0c530ab8
A
768 /* map gdt up high and update ptr for reload */
769 haddr = pmap_high_shared_remap(HIGH_FIXED_GDT,
770 (vm_offset_t) master_gdt, 1);
b0d623f7 771 cdi->cdi_gdt.ptr = (void *)haddr;
0c530ab8
A
772 kprintf("GDT: 0x%x, ",haddr);
773 /* map ldt up high */
774 haddr = pmap_high_shared_remap(HIGH_FIXED_LDT_BEGIN,
775 (vm_offset_t) master_ldt,
776 HIGH_FIXED_LDT_END - HIGH_FIXED_LDT_BEGIN + 1);
b0d623f7 777 cdi->cdi_ldt = (struct fake_descriptor *)haddr;
0c530ab8
A
778 kprintf("LDT: 0x%x, ",haddr);
779 /* put new ldt addr into gdt */
b0d623f7
A
780 struct fake_descriptor temp_fake_desc;
781 temp_fake_desc = ldt_desc_pattern;
782 temp_fake_desc.offset = (vm_offset_t) haddr;
783 fix_desc(&temp_fake_desc, 1);
784
785 *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_LDT)] = temp_fake_desc;
786 *(struct fake_descriptor *) &master_gdt[sel_idx(USER_LDT)] = temp_fake_desc;
0c530ab8
A
787
788 /* map idt up high */
789 haddr = pmap_high_shared_remap(HIGH_FIXED_IDT,
790 (vm_offset_t) master_idt, 1);
b0d623f7 791 cdi->cdi_idt.ptr = (void *)haddr;
0c530ab8
A
792 kprintf("IDT: 0x%x, ", haddr);
793 /* remap ktss up high and put new high addr into gdt */
794 haddr = pmap_high_shared_remap(HIGH_FIXED_KTSS,
795 (vm_offset_t) &master_ktss, 1);
b0d623f7
A
796
797 temp_fake_desc = tss_desc_pattern;
798 temp_fake_desc.offset = (vm_offset_t) haddr;
799 fix_desc(&temp_fake_desc, 1);
800 *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_TSS)] = temp_fake_desc;
0c530ab8
A
801 kprintf("KTSS: 0x%x, ",haddr);
802#if MACH_KDB
803 /* remap dbtss up high and put new high addr into gdt */
804 haddr = pmap_high_shared_remap(HIGH_FIXED_DBTSS,
805 (vm_offset_t) &master_dbtss, 1);
b0d623f7
A
806 temp_fake_desc = tss_desc_pattern;
807 temp_fake_desc.offset = (vm_offset_t) haddr;
808 fix_desc(&temp_fake_desc, 1);
809 *(struct fake_descriptor *)&master_gdt[sel_idx(DEBUG_TSS)] = temp_fake_desc;
0c530ab8
A
810 ttss = (struct i386_tss *)haddr;
811 kprintf("DBTSS: 0x%x, ",haddr);
812#endif /* MACH_KDB */
813
814 /* remap dftss up high and put new high addr into gdt */
815 haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS,
816 (vm_offset_t) &master_dftss, 1);
b0d623f7
A
817 temp_fake_desc = tss_desc_pattern;
818 temp_fake_desc.offset = (vm_offset_t) haddr;
819 fix_desc(&temp_fake_desc, 1);
820 *(struct fake_descriptor *) &master_gdt[sel_idx(DF_TSS)] = temp_fake_desc;
0c530ab8
A
821 kprintf("DFTSS: 0x%x\n",haddr);
822
823 /* remap mctss up high and put new high addr into gdt */
824 haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS,
825 (vm_offset_t) &master_mctss, 1);
b0d623f7
A
826 temp_fake_desc = tss_desc_pattern;
827 temp_fake_desc.offset = (vm_offset_t) haddr;
828 fix_desc(&temp_fake_desc, 1);
829 *(struct fake_descriptor *) &master_gdt[sel_idx(MC_TSS)] = temp_fake_desc;
0c530ab8
A
830 kprintf("MCTSS: 0x%x\n",haddr);
831
b0d623f7 832 cpu_desc_load(&cpu_data_master);
0c530ab8
A
833}
834
835
1c79356b
A
836/*
837 * Bootstrap the system enough to run with virtual memory.
838 * Map the kernel's code and data, and allocate the system page table.
839 * Called with mapping OFF. Page_size must already be set.
1c79356b
A
840 */
841
842void
843pmap_bootstrap(
0c530ab8
A
844 __unused vm_offset_t load_start,
845 boolean_t IA32e)
1c79356b 846{
91447636
A
847 vm_offset_t va;
848 pt_entry_t *pte;
849 int i;
0c530ab8 850 pdpt_entry_t *pdpt;
2d21ac55 851 spl_t s;
1c79356b 852
91447636
A
853 vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address
854 * known to VM */
1c79356b
A
855 /*
856 * The kernel's pmap is statically allocated so we don't
857 * have to use pmap_create, which is unlikely to work
858 * correctly at this part of the boot sequence.
859 */
860
0c530ab8 861
1c79356b 862 kernel_pmap = &kernel_pmap_store;
91447636 863 kernel_pmap->ref_count = 1;
0c530ab8 864 kernel_pmap->nx_enabled = FALSE;
2d21ac55 865 kernel_pmap->pm_task_map = TASK_MAP_32BIT;
91447636
A
866 kernel_pmap->pm_obj = (vm_object_t) NULL;
867 kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE);
0c530ab8
A
868 kernel_pmap->pdirbase = (pmap_paddr_t)((int)IdlePTD);
869 pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE );
870 kernel_pmap->pm_pdpt = pdpt;
871 kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePDPT);
1c79356b 872
b0d623f7 873
91447636
A
874 va = (vm_offset_t)kernel_pmap->dirbase;
875 /* setup self referential mapping(s) */
0c530ab8 876 for (i = 0; i< NPGPTD; i++, pdpt++) {
91447636 877 pmap_paddr_t pa;
b0d623f7 878 pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i)));
0c530ab8
A
879 pmap_store_pte(
880 (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i),
91447636 881 (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF |
0c530ab8
A
882 INTEL_PTE_MOD | INTEL_PTE_WIRED) ;
883 pmap_store_pte(pdpt, pa | INTEL_PTE_VALID);
91447636 884 }
1c79356b 885
0c530ab8
A
886 cpu_64bit = IA32e;
887
888 lo_kernel_cr3 = kernel_pmap->pm_cr3;
889 current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3;
890
891 /* save the value we stuff into created pmaps to share the gdts etc */
892 high_shared_pde = *pmap_pde(kernel_pmap, HIGH_MEM_BASE);
893 /* make sure G bit is on for high shared pde entry */
894 high_shared_pde |= INTEL_PTE_GLOBAL;
2d21ac55 895 s = splhigh();
0c530ab8 896 pmap_store_pte(pmap_pde(kernel_pmap, HIGH_MEM_BASE), high_shared_pde);
2d21ac55 897 splx(s);
0c530ab8 898
91447636 899 nkpt = NKPT;
b0d623f7 900 OSAddAtomic(NKPT, &inuse_ptepages_count);
1c79356b 901
91447636
A
902 virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail;
903 virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS);
1c79356b
A
904
905 /*
91447636
A
906 * Reserve some special page table entries/VA space for temporary
907 * mapping of pages.
1c79356b 908 */
91447636 909#define SYSMAP(c, p, v, n) \
0c530ab8 910 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n)
91447636
A
911
912 va = virtual_avail;
0c530ab8 913 pte = vtopte(va);
6601e61a 914
0c530ab8
A
915 for (i=0; i<PMAP_NWINDOWS; i++) {
916 SYSMAP(caddr_t,
917 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP),
918 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR),
919 1);
920 *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0;
921 }
1c79356b 922
91447636
A
923 /* DMAP user for debugger */
924 SYSMAP(caddr_t, DMAP1, DADDR1, 1);
925 SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */
1c79356b 926
91447636 927 virtual_avail = va;
1c79356b 928
593a1d5f 929 if (PE_parse_boot_argn("npvhash", &npvhash, sizeof (npvhash))) {
2d21ac55
A
930 if (0 != ((npvhash+1) & npvhash)) {
931 kprintf("invalid hash %d, must be ((2^N)-1), using default %d\n",npvhash,NPVHASH);
932 npvhash = NPVHASH;
933 }
934 } else {
935 npvhash = NPVHASH;
936 }
937 printf("npvhash=%d\n",npvhash);
938
91447636 939 simple_lock_init(&kernel_pmap->lock, 0);
2d21ac55
A
940 simple_lock_init(&pv_hashed_free_list_lock, 0);
941 simple_lock_init(&pv_hashed_kern_free_list_lock, 0);
942 simple_lock_init(&pv_hash_table_lock,0);
1c79356b 943
2d21ac55 944 pmap_init_high_shared();
0c530ab8
A
945
946 pde_mapped_size = PDE_MAPPED_SIZE;
947
948 if (cpu_64bit) {
b0d623f7 949 pdpt_entry_t *ppdpt = IdlePDPT;
0c530ab8
A
950 pdpt_entry_t *ppdpt64 = (pdpt_entry_t *)IdlePDPT64;
951 pdpt_entry_t *ppml4 = (pdpt_entry_t *)IdlePML4;
952 int istate = ml_set_interrupts_enabled(FALSE);
953
954 /*
955 * Clone a new 64-bit 3rd-level page table directory, IdlePML4,
956 * with page bits set for the correct IA-32e operation and so that
957 * the legacy-mode IdlePDPT is retained for slave processor start-up.
958 * This is necessary due to the incompatible use of page bits between
959 * 64-bit and legacy modes.
960 */
961 kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePML4); /* setup in start.s for us */
962 kernel_pmap->pm_pml4 = IdlePML4;
963 kernel_pmap->pm_pdpt = (pd_entry_t *)
964 ((unsigned int)IdlePDPT64 | KERNBASE );
965#define PAGE_BITS INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF
966 pmap_store_pte(kernel_pmap->pm_pml4,
967 (uint32_t)IdlePDPT64 | PAGE_BITS);
968 pmap_store_pte((ppdpt64+0), *(ppdpt+0) | PAGE_BITS);
969 pmap_store_pte((ppdpt64+1), *(ppdpt+1) | PAGE_BITS);
970 pmap_store_pte((ppdpt64+2), *(ppdpt+2) | PAGE_BITS);
971 pmap_store_pte((ppdpt64+3), *(ppdpt+3) | PAGE_BITS);
972
973 /*
974 * The kernel is also mapped in the uber-sapce at the 4GB starting
975 * 0xFFFFFF80:00000000. This is the highest entry in the 4th-level.
976 */
977 pmap_store_pte((ppml4+KERNEL_UBER_PML4_INDEX), *(ppml4+0));
978
979 kernel64_cr3 = (addr64_t) kernel_pmap->pm_cr3;
0c530ab8 980
2d21ac55 981 /* Re-initialize descriptors and prepare to switch modes */
b0d623f7 982 cpu_desc_init64(&cpu_data_master);
2d21ac55
A
983 current_cpu_datap()->cpu_is64bit = TRUE;
984 current_cpu_datap()->cpu_active_cr3 = kernel64_cr3;
0c530ab8
A
985
986 pde_mapped_size = 512*4096 ;
987
988 ml_set_interrupts_enabled(istate);
0c530ab8 989 }
2d21ac55 990
b0d623f7 991 /* Sets 64-bit mode if required. */
2d21ac55 992 cpu_mode_init(&cpu_data_master);
b0d623f7
A
993 /* Update in-kernel CPUID information if we're now in 64-bit mode */
994 if (IA32e)
995 cpuid_set_info();
2d21ac55 996
0c530ab8 997 kernel_pmap->pm_hold = (vm_offset_t)kernel_pmap->pm_pml4;
1c79356b 998
91447636
A
999 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
1000 VADDR(KPTDI,0), virtual_end);
6601e61a 1001 printf("PAE enabled\n");
0c530ab8
A
1002 if (cpu_64bit){
1003 printf("64 bit mode enabled\n");kprintf("64 bit mode enabled\n"); }
1004
1005 kprintf("Available physical space from 0x%llx to 0x%llx\n",
6601e61a 1006 avail_start, avail_end);
0c530ab8
A
1007
1008 /*
1009 * By default for 64-bit users loaded at 4GB, share kernel mapping.
1010 * But this may be overridden by the -no_shared_cr3 boot-arg.
1011 */
593a1d5f 1012 if (PE_parse_boot_argn("-no_shared_cr3", &no_shared_cr3, sizeof (no_shared_cr3))) {
0c530ab8 1013 kprintf("Shared kernel address space disabled\n");
2d21ac55
A
1014 }
1015
1016#ifdef PMAP_TRACES
593a1d5f 1017 if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof (pmap_trace))) {
2d21ac55
A
1018 kprintf("Kernel traces for pmap operations enabled\n");
1019 }
1020#endif /* PMAP_TRACES */
1c79356b
A
1021}
1022
1023void
1024pmap_virtual_space(
1025 vm_offset_t *startp,
1026 vm_offset_t *endp)
1027{
1028 *startp = virtual_avail;
1029 *endp = virtual_end;
1030}
1031
1032/*
1033 * Initialize the pmap module.
1034 * Called by vm_init, to initialize any structures that the pmap
1035 * system needs to map virtual memory.
1036 */
1037void
1038pmap_init(void)
1039{
1040 register long npages;
1041 vm_offset_t addr;
1042 register vm_size_t s;
0c530ab8 1043 vm_map_offset_t vaddr;
2d21ac55 1044 ppnum_t ppn;
1c79356b
A
1045
1046 /*
1047 * Allocate memory for the pv_head_table and its lock bits,
1048 * the modify bit array, and the pte_page table.
1049 */
1050
2d21ac55
A
1051 /*
1052 * zero bias all these arrays now instead of off avail_start
1053 * so we cover all memory
1054 */
1055
b0d623f7 1056 npages = (long)i386_btop(avail_end);
2d21ac55
A
1057 s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages
1058 + (sizeof (struct pv_hashed_entry_t *) * (npvhash+1))
1059 + pv_lock_table_size(npages)
1060 + pv_hash_lock_table_size((npvhash+1))
1c79356b
A
1061 + npages);
1062
1063 s = round_page(s);
b0d623f7
A
1064 if (kernel_memory_allocate(kernel_map, &addr, s, 0,
1065 KMA_KOBJECT | KMA_PERMANENT)
1066 != KERN_SUCCESS)
1c79356b
A
1067 panic("pmap_init");
1068
1069 memset((char *)addr, 0, s);
1070
2d21ac55
A
1071#if PV_DEBUG
1072 if (0 == npvhash) panic("npvhash not initialized");
1073#endif
1074
1c79356b
A
1075 /*
1076 * Allocate the structures first to preserve word-alignment.
1077 */
2d21ac55 1078 pv_head_table = (pv_rooted_entry_t) addr;
1c79356b
A
1079 addr = (vm_offset_t) (pv_head_table + npages);
1080
2d21ac55
A
1081 pv_hash_table = (pv_hashed_entry_t *)addr;
1082 addr = (vm_offset_t) (pv_hash_table + (npvhash + 1));
1083
1c79356b
A
1084 pv_lock_table = (char *) addr;
1085 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
1086
2d21ac55
A
1087 pv_hash_lock_table = (char *) addr;
1088 addr = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhash+1)));
1089
1c79356b 1090 pmap_phys_attributes = (char *) addr;
2d21ac55
A
1091 {
1092 unsigned int i;
1093 unsigned int pn;
1094 ppnum_t last_pn;
1095 pmap_memory_region_t *pmptr = pmap_memory_regions;
1096
b0d623f7 1097 last_pn = (ppnum_t)i386_btop(avail_end);
2d21ac55
A
1098
1099 for (i = 0; i < pmap_memory_region_count; i++, pmptr++) {
1100 if (pmptr->type == kEfiConventionalMemory) {
b0d623f7 1101
2d21ac55
A
1102 for (pn = pmptr->base; pn <= pmptr->end; pn++) {
1103 if (pn < last_pn) {
1104 pmap_phys_attributes[pn] |= PHYS_MANAGED;
1105
1106 if (pn > last_managed_page)
1107 last_managed_page = pn;
1108 }
1109 }
1110 }
1111 }
1112 }
1c79356b
A
1113
1114 /*
1115 * Create the zone of physical maps,
1116 * and of the physical-to-virtual entries.
1117 */
1118 s = (vm_size_t) sizeof(struct pmap);
1119 pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
2d21ac55
A
1120 s = (vm_size_t) sizeof(struct pv_hashed_entry);
1121 pv_hashed_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
91447636
A
1122 s = 63;
1123 pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */
55e303ae 1124
91447636 1125 kptobj = &kptobj_object_store;
2d21ac55 1126 _vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG), kptobj);
91447636 1127 kernel_pmap->pm_obj = kptobj;
91447636
A
1128
1129 /* create pv entries for kernel pages mapped by low level
1130 startup code. these have to exist so we can pmap_remove()
1131 e.g. kext pages from the middle of our addr space */
1132
0c530ab8 1133 vaddr = (vm_map_offset_t)0;
91447636 1134 for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) {
2d21ac55 1135 pv_rooted_entry_t pv_e;
91447636
A
1136
1137 pv_e = pai_to_pvh(ppn);
1138 pv_e->va = vaddr;
1139 vaddr += PAGE_SIZE;
1140 pv_e->pmap = kernel_pmap;
2d21ac55 1141 queue_init(&pv_e->qlink);
91447636
A
1142 }
1143
1c79356b
A
1144 pmap_initialized = TRUE;
1145
1146 /*
2d21ac55 1147 * Initialize pmap cache.
1c79356b
A
1148 */
1149 pmap_cache_list = PMAP_NULL;
1150 pmap_cache_count = 0;
91447636 1151 simple_lock_init(&pmap_cache_lock, 0);
2d21ac55
A
1152
1153 max_preemption_latency_tsc = tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS, tscFCvtn2t);
1154
1c79356b
A
1155}
1156
1c79356b 1157
2d21ac55 1158#define managed_page(x) ( (unsigned int)x <= last_managed_page && (pmap_phys_attributes[x] & PHYS_MANAGED) )
1c79356b 1159
2d21ac55
A
1160/*
1161 * this function is only used for debugging fron the vm layer
1162 */
1c79356b
A
1163boolean_t
1164pmap_verify_free(
55e303ae 1165 ppnum_t pn)
1c79356b 1166{
2d21ac55 1167 pv_rooted_entry_t pv_h;
1c79356b 1168 int pai;
1c79356b
A
1169 boolean_t result;
1170
55e303ae 1171 assert(pn != vm_page_fictitious_addr);
2d21ac55 1172
1c79356b
A
1173 if (!pmap_initialized)
1174 return(TRUE);
1175
2d21ac55
A
1176 if (pn == vm_page_guard_addr)
1177 return TRUE;
1c79356b 1178
2d21ac55
A
1179 pai = ppn_to_pai(pn);
1180 if (!managed_page(pai))
1181 return(FALSE);
1182 pv_h = pai_to_pvh(pn);
1183 result = (pv_h->pmap == PMAP_NULL);
1184 return(result);
1185}
1c79356b 1186
2d21ac55
A
1187boolean_t
1188pmap_is_empty(
1189 pmap_t pmap,
b0d623f7
A
1190 vm_map_offset_t va_start,
1191 vm_map_offset_t va_end)
2d21ac55
A
1192{
1193 vm_map_offset_t offset;
1194 ppnum_t phys_page;
1c79356b 1195
2d21ac55
A
1196 if (pmap == PMAP_NULL) {
1197 return TRUE;
1198 }
b0d623f7
A
1199
1200 /*
1201 * Check the resident page count
1202 * - if it's zero, the pmap is completely empty.
1203 * This short-circuit test prevents a virtual address scan which is
1204 * painfully slow for 64-bit spaces.
1205 * This assumes the count is correct
1206 * .. the debug kernel ought to be checking perhaps by page table walk.
1207 */
1208 if (pmap->stats.resident_count == 0)
1209 return TRUE;
1210
1211 for (offset = va_start;
1212 offset < va_end;
2d21ac55
A
1213 offset += PAGE_SIZE_64) {
1214 phys_page = pmap_find_phys(pmap, offset);
1215 if (phys_page) {
1216 if (pmap != kernel_pmap &&
1217 pmap->pm_task_map == TASK_MAP_32BIT &&
1218 offset >= HIGH_MEM_BASE) {
1219 /*
1220 * The "high_shared_pde" is used to share
1221 * the entire top-most 2MB of address space
1222 * between the kernel and all 32-bit tasks.
1223 * So none of this can be removed from 32-bit
1224 * tasks.
1225 * Let's pretend there's nothing up
1226 * there...
1227 */
1228 return TRUE;
1229 }
1230 kprintf("pmap_is_empty(%p,0x%llx,0x%llx): "
1231 "page %d at 0x%llx\n",
b0d623f7 1232 pmap, va_start, va_end, phys_page, offset);
2d21ac55
A
1233 return FALSE;
1234 }
1235 }
1c79356b 1236
2d21ac55 1237 return TRUE;
1c79356b
A
1238}
1239
2d21ac55 1240
1c79356b
A
1241/*
1242 * Create and return a physical map.
1243 *
1244 * If the size specified for the map
1245 * is zero, the map is an actual physical
1246 * map, and may be referenced by the
1247 * hardware.
1248 *
1249 * If the size specified is non-zero,
1250 * the map will be used in software only, and
1251 * is bounded by that size.
1252 */
1253pmap_t
1254pmap_create(
0c530ab8 1255 vm_map_size_t sz,
2d21ac55 1256 boolean_t is_64bit)
1c79356b 1257{
2d21ac55 1258 pmap_t p;
0c530ab8
A
1259 int i;
1260 vm_offset_t va;
1261 vm_size_t size;
1262 pdpt_entry_t *pdpt;
1263 pml4_entry_t *pml4p;
0c530ab8 1264 pd_entry_t *pdp;
2d21ac55 1265 int template;
0c530ab8
A
1266 spl_t s;
1267
2d21ac55
A
1268 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START,
1269 (int) (sz>>32), (int) sz, (int) is_64bit, 0, 0);
1270
0c530ab8 1271 size = (vm_size_t) sz;
1c79356b
A
1272
1273 /*
1274 * A software use-only map doesn't even need a map.
1275 */
1276
1277 if (size != 0) {
1278 return(PMAP_NULL);
1279 }
1280
91447636
A
1281 p = (pmap_t) zalloc(pmap_zone);
1282 if (PMAP_NULL == p)
2d21ac55 1283 panic("pmap_create zalloc");
6601e61a 1284
0c530ab8
A
1285 /* init counts now since we'll be bumping some */
1286 simple_lock_init(&p->lock, 0);
1c79356b 1287 p->stats.resident_count = 0;
2d21ac55 1288 p->stats.resident_max = 0;
1c79356b 1289 p->stats.wired_count = 0;
1c79356b 1290 p->ref_count = 1;
0c530ab8 1291 p->nx_enabled = 1;
0c530ab8
A
1292 p->pm_shared = FALSE;
1293
2d21ac55
A
1294 assert(!is_64bit || cpu_64bit);
1295 p->pm_task_map = is_64bit ? TASK_MAP_64BIT : TASK_MAP_32BIT;;
1296
0c530ab8 1297 if (!cpu_64bit) {
2d21ac55
A
1298 /* legacy 32 bit setup */
1299 /* in the legacy case the pdpt layer is hardwired to 4 entries and each
1300 * entry covers 1GB of addr space */
b0d623f7
A
1301 if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD))
1302 panic("pmap_create kmem_alloc_kobject");
2d21ac55
A
1303 p->pm_hold = (vm_offset_t)zalloc(pdpt_zone);
1304 if ((vm_offset_t)NULL == p->pm_hold) {
1305 panic("pdpt zalloc");
1306 }
1307 pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31);
1308 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)pdpt);
1309 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG))))
1310 panic("pmap_create vm_object_allocate");
0c530ab8 1311
2d21ac55 1312 memset((char *)p->dirbase, 0, NBPTD);
0c530ab8 1313
2d21ac55
A
1314 va = (vm_offset_t)p->dirbase;
1315 p->pdirbase = kvtophys(va);
0c530ab8 1316
b7266188 1317 template = INTEL_PTE_VALID;
2d21ac55
A
1318 for (i = 0; i< NPGPTD; i++, pdpt++ ) {
1319 pmap_paddr_t pa;
b0d623f7 1320 pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i)));
2d21ac55
A
1321 pmap_store_pte(pdpt, pa | template);
1322 }
0c530ab8 1323
2d21ac55
A
1324 /* map the high shared pde */
1325 s = splhigh();
1326 pmap_store_pte(pmap_pde(p, HIGH_MEM_BASE), high_shared_pde);
1327 splx(s);
4452a7af 1328
0c530ab8 1329 } else {
2d21ac55 1330 /* 64 bit setup */
4452a7af 1331
2d21ac55 1332 /* alloc the pml4 page in kernel vm */
b0d623f7
A
1333 if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->pm_hold), PAGE_SIZE))
1334 panic("pmap_create kmem_alloc_kobject pml4");
4452a7af 1335
2d21ac55
A
1336 memset((char *)p->pm_hold, 0, PAGE_SIZE);
1337 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_hold);
0c530ab8 1338
b0d623f7 1339 OSAddAtomic(1, &inuse_ptepages_count);
0c530ab8 1340
2d21ac55 1341 /* allocate the vm_objs to hold the pdpt, pde and pte pages */
0c530ab8 1342
2d21ac55
A
1343 if (NULL == (p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS))))
1344 panic("pmap_create pdpt obj");
0c530ab8 1345
2d21ac55
A
1346 if (NULL == (p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS))))
1347 panic("pmap_create pdpt obj");
0c530ab8 1348
2d21ac55
A
1349 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS))))
1350 panic("pmap_create pte obj");
0c530ab8 1351
2d21ac55
A
1352 /* uber space points to uber mapped kernel */
1353 s = splhigh();
1354 pml4p = pmap64_pml4(p, 0ULL);
b7266188 1355 pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX), *kernel_pmap->pm_pml4);
0c530ab8 1356
0c530ab8 1357
2d21ac55
A
1358 if (!is_64bit) {
1359 while ((pdp = pmap64_pde(p, (uint64_t)HIGH_MEM_BASE)) == PD_ENTRY_NULL) {
1360 splx(s);
1361 pmap_expand_pdpt(p, (uint64_t)HIGH_MEM_BASE); /* need room for another pde entry */
1362 s = splhigh();
1363 }
1364 pmap_store_pte(pdp, high_shared_pde);
1365 }
1366 splx(s);
0c530ab8 1367 }
1c79356b 1368
2d21ac55
A
1369 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START,
1370 (int) p, is_64bit, 0, 0, 0);
1371
1c79356b
A
1372 return(p);
1373}
1374
2d21ac55
A
1375/*
1376 * The following routines implement the shared address optmization for 64-bit
1377 * users with a 4GB page zero.
1378 *
1379 * pmap_set_4GB_pagezero()
1380 * is called in the exec and fork paths to mirror the kernel's
1381 * mapping in the bottom 4G of the user's pmap. The task mapping changes
1382 * from TASK_MAP_64BIT to TASK_MAP_64BIT_SHARED. This routine returns
1383 * without doing anything if the -no_shared_cr3 boot-arg is set.
1384 *
1385 * pmap_clear_4GB_pagezero()
1386 * is called in the exec/exit paths to undo this mirror. The task mapping
1387 * reverts to TASK_MAP_64BIT. In addition, we switch to the kernel's
1388 * CR3 by calling pmap_load_kernel_cr3().
1389 *
1390 * pmap_load_kernel_cr3()
1391 * loads cr3 with the kernel's page table. In addition to being called
1392 * by pmap_clear_4GB_pagezero(), it is used both prior to teardown and
1393 * when we go idle in the context of a shared map.
1394 *
1395 * Further notes on per-cpu data used:
1396 *
1397 * cpu_kernel_cr3 is the cr3 for the kernel's pmap.
1398 * This is loaded in a trampoline on entering the kernel
1399 * from a 32-bit user (or non-shared-cr3 64-bit user).
1400 * cpu_task_cr3 is the cr3 for the current thread.
1401 * This is loaded in a trampoline as we exit the kernel.
1402 * cpu_active_cr3 reflects the cr3 currently loaded.
1403 * However, the low order bit is set when the
1404 * processor is idle or interrupts are disabled
1405 * while the system pmap lock is held. It is used by
1406 * tlb shoot-down.
1407 * cpu_task_map indicates whether the task cr3 belongs to
1408 * a 32-bit, a 64-bit or a 64-bit shared map.
1409 * The latter allows the avoidance of the cr3 load
1410 * on kernel entry and exit.
1411 * cpu_tlb_invalid set TRUE when a tlb flush is requested.
1412 * If the cr3 is "inactive" (the cpu is idle or the
1413 * system-wide pmap lock is held) this not serviced by
1414 * an IPI but at time when the cr3 becomes "active".
1415 */
1416
0c530ab8
A
1417void
1418pmap_set_4GB_pagezero(pmap_t p)
1419{
0c530ab8
A
1420 pdpt_entry_t *user_pdptp;
1421 pdpt_entry_t *kern_pdptp;
1422
2d21ac55 1423 assert(p->pm_task_map != TASK_MAP_32BIT);
0c530ab8
A
1424
1425 /* Kernel-shared cr3 may be disabled by boot arg. */
1426 if (no_shared_cr3)
1427 return;
1428
1429 /*
1430 * Set the bottom 4 3rd-level pte's to be the kernel's.
1431 */
2d21ac55 1432 PMAP_LOCK(p);
0c530ab8 1433 while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) {
2d21ac55 1434 PMAP_UNLOCK(p);
0c530ab8 1435 pmap_expand_pml4(p, 0x0);
2d21ac55 1436 PMAP_LOCK(p);
0c530ab8
A
1437 }
1438 kern_pdptp = kernel_pmap->pm_pdpt;
1439 pmap_store_pte(user_pdptp+0, *(kern_pdptp+0));
1440 pmap_store_pte(user_pdptp+1, *(kern_pdptp+1));
1441 pmap_store_pte(user_pdptp+2, *(kern_pdptp+2));
1442 pmap_store_pte(user_pdptp+3, *(kern_pdptp+3));
2d21ac55
A
1443 p->pm_task_map = TASK_MAP_64BIT_SHARED;
1444 PMAP_UNLOCK(p);
0c530ab8
A
1445}
1446
1447void
1448pmap_clear_4GB_pagezero(pmap_t p)
1449{
0c530ab8
A
1450 pdpt_entry_t *user_pdptp;
1451
2d21ac55 1452 if (p->pm_task_map != TASK_MAP_64BIT_SHARED)
0c530ab8
A
1453 return;
1454
2d21ac55
A
1455 PMAP_LOCK(p);
1456
1457 p->pm_task_map = TASK_MAP_64BIT;
1458
1459 pmap_load_kernel_cr3();
1460
0c530ab8
A
1461 user_pdptp = pmap64_pdpt(p, 0x0);
1462 pmap_store_pte(user_pdptp+0, 0);
1463 pmap_store_pte(user_pdptp+1, 0);
1464 pmap_store_pte(user_pdptp+2, 0);
1465 pmap_store_pte(user_pdptp+3, 0);
1466
2d21ac55
A
1467 PMAP_UNLOCK(p);
1468}
0c530ab8 1469
2d21ac55
A
1470void
1471pmap_load_kernel_cr3(void)
1472{
1473 uint64_t kernel_cr3;
0c530ab8 1474
2d21ac55
A
1475 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
1476
1477 /*
1478 * Reload cr3 with the true kernel cr3.
1479 */
1480 kernel_cr3 = current_cpu_datap()->cpu_kernel_cr3;
1481 set64_cr3(kernel_cr3);
1482 current_cpu_datap()->cpu_active_cr3 = kernel_cr3;
1483 current_cpu_datap()->cpu_tlb_invalid = FALSE;
1484 __asm__ volatile("mfence");
0c530ab8
A
1485}
1486
1c79356b
A
1487/*
1488 * Retire the given physical map from service.
1489 * Should only be called if the map contains
1490 * no valid mappings.
1491 */
1492
1493void
1494pmap_destroy(
1495 register pmap_t p)
1496{
1c79356b 1497 register int c;
1c79356b
A
1498
1499 if (p == PMAP_NULL)
1500 return;
2d21ac55
A
1501
1502 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START,
1503 (int) p, 0, 0, 0, 0);
1504
1505 PMAP_LOCK(p);
1506
1c79356b 1507 c = --p->ref_count;
2d21ac55 1508
1c79356b 1509 if (c == 0) {
1c79356b
A
1510 /*
1511 * If some cpu is not using the physical pmap pointer that it
1512 * is supposed to be (see set_dirbase), we might be using the
1513 * pmap that is being destroyed! Make sure we are
1514 * physically on the right pmap:
1515 */
55e303ae 1516 PMAP_UPDATE_TLBS(p,
2d21ac55
A
1517 0x0ULL,
1518 0xFFFFFFFFFFFFF000ULL);
1c79356b 1519 }
2d21ac55
A
1520
1521 PMAP_UNLOCK(p);
1c79356b
A
1522
1523 if (c != 0) {
2d21ac55
A
1524 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END,
1525 (int) p, 1, 0, 0, 0);
1526 return; /* still in use */
1c79356b
A
1527 }
1528
1529 /*
1530 * Free the memory maps, then the
1531 * pmap structure.
1532 */
0c530ab8 1533 if (!cpu_64bit) {
b0d623f7 1534 OSAddAtomic(-p->pm_obj->resident_page_count, &inuse_ptepages_count);
91447636 1535
2d21ac55
A
1536 kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD);
1537 zfree(pdpt_zone, (void *)p->pm_hold);
0c530ab8 1538
2d21ac55
A
1539 vm_object_deallocate(p->pm_obj);
1540 } else {
1541 /* 64 bit */
1542 int inuse_ptepages = 0;
0c530ab8 1543
2d21ac55
A
1544 /* free 64 bit mode structs */
1545 inuse_ptepages++;
1546 kmem_free(kernel_map, (vm_offset_t)p->pm_hold, PAGE_SIZE);
1547
1548 inuse_ptepages += p->pm_obj_pml4->resident_page_count;
1549 vm_object_deallocate(p->pm_obj_pml4);
1550
1551 inuse_ptepages += p->pm_obj_pdpt->resident_page_count;
1552 vm_object_deallocate(p->pm_obj_pdpt);
0c530ab8 1553
2d21ac55
A
1554 inuse_ptepages += p->pm_obj->resident_page_count;
1555 vm_object_deallocate(p->pm_obj);
1556
b0d623f7 1557 OSAddAtomic(-inuse_ptepages, &inuse_ptepages_count);
2d21ac55
A
1558 }
1559 zfree(pmap_zone, p);
1c79356b 1560
2d21ac55
A
1561 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END,
1562 0, 0, 0, 0, 0);
0c530ab8 1563
1c79356b
A
1564}
1565
1566/*
1567 * Add a reference to the specified pmap.
1568 */
1569
1570void
1571pmap_reference(
1572 register pmap_t p)
1573{
1c79356b
A
1574
1575 if (p != PMAP_NULL) {
2d21ac55 1576 PMAP_LOCK(p);
1c79356b 1577 p->ref_count++;
2d21ac55 1578 PMAP_UNLOCK(p);;
1c79356b
A
1579 }
1580}
1581
1c79356b 1582
0b4e3aa0
A
1583/*
1584 * Remove phys addr if mapped in specified map
1585 *
1586 */
1587void
1588pmap_remove_some_phys(
91447636
A
1589 __unused pmap_t map,
1590 __unused ppnum_t pn)
0b4e3aa0
A
1591{
1592
1593/* Implement to support working set code */
1594
1595}
1596
91447636
A
1597/*
1598 * Routine:
1599 * pmap_disconnect
1600 *
1601 * Function:
1602 * Disconnect all mappings for this page and return reference and change status
1603 * in generic format.
1604 *
1605 */
1606unsigned int pmap_disconnect(
1607 ppnum_t pa)
1608{
2d21ac55 1609 pmap_page_protect(pa, 0); /* disconnect the page */
91447636
A
1610 return (pmap_get_refmod(pa)); /* return ref/chg status */
1611}
1612
1c79356b
A
1613/*
1614 * Set the physical protection on the
1615 * specified range of this map as requested.
1616 * Will not increase permissions.
1617 */
1618void
1619pmap_protect(
1620 pmap_t map,
0c530ab8
A
1621 vm_map_offset_t sva,
1622 vm_map_offset_t eva,
1c79356b
A
1623 vm_prot_t prot)
1624{
1625 register pt_entry_t *pde;
1626 register pt_entry_t *spte, *epte;
0c530ab8
A
1627 vm_map_offset_t lva;
1628 vm_map_offset_t orig_sva;
0c530ab8 1629 boolean_t set_NX;
2d21ac55
A
1630 int num_found = 0;
1631
1632 pmap_intr_assert();
1c79356b
A
1633
1634 if (map == PMAP_NULL)
1635 return;
1636
0c530ab8
A
1637 if (prot == VM_PROT_NONE) {
1638 pmap_remove(map, sva, eva);
1c79356b
A
1639 return;
1640 }
1641
2d21ac55
A
1642 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
1643 (int) map,
1644 (int) (sva>>32), (int) sva,
1645 (int) (eva>>32), (int) eva);
1646
0c530ab8
A
1647 if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !map->nx_enabled )
1648 set_NX = FALSE;
1649 else
1650 set_NX = TRUE;
1651
2d21ac55 1652 PMAP_LOCK(map);
1c79356b 1653
0c530ab8
A
1654 orig_sva = sva;
1655 while (sva < eva) {
1656 lva = (sva + pde_mapped_size) & ~(pde_mapped_size-1);
1657 if (lva > eva)
1658 lva = eva;
1659 pde = pmap_pde(map, sva);
1660 if (pde && (*pde & INTEL_PTE_VALID)) {
1661 spte = (pt_entry_t *)pmap_pte(map, (sva & ~(pde_mapped_size-1)));
1662 spte = &spte[ptenum(sva)];
1663 epte = &spte[intel_btop(lva-sva)];
1c79356b
A
1664
1665 while (spte < epte) {
2d21ac55 1666
0c530ab8
A
1667 if (*spte & INTEL_PTE_VALID) {
1668
1669 if (prot & VM_PROT_WRITE)
2d21ac55 1670 pmap_update_pte(spte, *spte, (*spte | INTEL_PTE_WRITE));
0c530ab8 1671 else
2d21ac55 1672 pmap_update_pte(spte, *spte, (*spte & ~INTEL_PTE_WRITE));
0c530ab8
A
1673
1674 if (set_NX == TRUE)
2d21ac55 1675 pmap_update_pte(spte, *spte, (*spte | INTEL_PTE_NX));
0c530ab8 1676 else
2d21ac55 1677 pmap_update_pte(spte, *spte, (*spte & ~INTEL_PTE_NX));
0c530ab8
A
1678
1679 num_found++;
0c530ab8 1680 }
1c79356b
A
1681 spte++;
1682 }
1683 }
0c530ab8 1684 sva = lva;
1c79356b 1685 }
0c530ab8 1686 if (num_found)
2d21ac55
A
1687 PMAP_UPDATE_TLBS(map, orig_sva, eva);
1688
1689 PMAP_UNLOCK(map);
1690
1691 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END,
1692 0, 0, 0, 0, 0);
91447636 1693
1c79356b
A
1694}
1695
0c530ab8
A
1696/* Map a (possibly) autogenned block */
1697void
1698pmap_map_block(
1699 pmap_t pmap,
1700 addr64_t va,
1701 ppnum_t pa,
1702 uint32_t size,
1703 vm_prot_t prot,
1704 int attr,
1705 __unused unsigned int flags)
1706{
2d21ac55 1707 uint32_t page;
0c530ab8 1708
2d21ac55
A
1709 for (page = 0; page < size; page++) {
1710 pmap_enter(pmap, va, pa, prot, attr, TRUE);
1711 va += PAGE_SIZE;
1712 pa++;
1713 }
0c530ab8 1714}
1c79356b
A
1715
1716
1c79356b
A
1717/*
1718 * Routine: pmap_change_wiring
1719 * Function: Change the wiring attribute for a map/virtual-address
1720 * pair.
1721 * In/out conditions:
1722 * The mapping must already exist in the pmap.
1723 */
1724void
1725pmap_change_wiring(
1726 register pmap_t map,
0c530ab8 1727 vm_map_offset_t vaddr,
1c79356b
A
1728 boolean_t wired)
1729{
1730 register pt_entry_t *pte;
1c79356b
A
1731
1732 /*
1733 * We must grab the pmap system lock because we may
1734 * change a pte_page queue.
1735 */
2d21ac55 1736 PMAP_LOCK(map);
1c79356b 1737
0c530ab8 1738 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
1c79356b
A
1739 panic("pmap_change_wiring: pte missing");
1740
1741 if (wired && !iswired(*pte)) {
1742 /*
1743 * wiring down mapping
1744 */
b0d623f7 1745 OSAddAtomic(+1, &map->stats.wired_count);
0c530ab8 1746 pmap_update_pte(pte, *pte, (*pte | INTEL_PTE_WIRED));
1c79356b
A
1747 }
1748 else if (!wired && iswired(*pte)) {
1749 /*
1750 * unwiring mapping
1751 */
1752 assert(map->stats.wired_count >= 1);
b0d623f7 1753 OSAddAtomic(-1, &map->stats.wired_count);
0c530ab8 1754 pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WIRED));
1c79356b
A
1755 }
1756
2d21ac55 1757 PMAP_UNLOCK(map);
1c79356b
A
1758}
1759
55e303ae 1760
1c79356b
A
1761/*
1762 * Routine: pmap_extract
1763 * Function:
1764 * Extract the physical page address associated
1765 * with the given map/virtual_address pair.
91447636
A
1766 * Change to shim for backwards compatibility but will not
1767 * work for 64 bit systems. Some old drivers that we cannot
1768 * change need this.
1c79356b
A
1769 */
1770
1771vm_offset_t
1772pmap_extract(
1773 register pmap_t pmap,
0c530ab8 1774 vm_map_offset_t vaddr)
1c79356b 1775{
0c530ab8
A
1776 ppnum_t ppn;
1777 vm_offset_t paddr;
91447636 1778
0c530ab8
A
1779 paddr = (vm_offset_t)0;
1780 ppn = pmap_find_phys(pmap, vaddr);
2d21ac55 1781
0c530ab8 1782 if (ppn) {
b0d623f7 1783 paddr = ((vm_offset_t)i386_ptob(ppn)) | ((vm_offset_t)vaddr & INTEL_OFFMASK);
0c530ab8
A
1784 }
1785 return (paddr);
1c79356b
A
1786}
1787
1c79356b 1788void
0c530ab8
A
1789pmap_expand_pml4(
1790 pmap_t map,
1791 vm_map_offset_t vaddr)
1c79356b 1792{
1c79356b 1793 register vm_page_t m;
91447636 1794 register pmap_paddr_t pa;
0c530ab8 1795 uint64_t i;
1c79356b 1796 spl_t spl;
55e303ae 1797 ppnum_t pn;
0c530ab8 1798 pml4_entry_t *pml4p;
89b3af67 1799
0c530ab8
A
1800 if (kernel_pmap == map) panic("expand kernel pml4");
1801
1802 spl = splhigh();
2d21ac55
A
1803 pml4p = pmap64_pml4(map, vaddr);
1804 splx(spl);
1805 if (PML4_ENTRY_NULL == pml4p) panic("pmap_expand_pml4 no pml4p");
1c79356b
A
1806
1807 /*
0c530ab8 1808 * Allocate a VM page for the pml4 page
1c79356b
A
1809 */
1810 while ((m = vm_page_grab()) == VM_PAGE_NULL)
1811 VM_PAGE_WAIT();
1812
1813 /*
91447636 1814 * put the page into the pmap's obj list so it
1c79356b
A
1815 * can be found later.
1816 */
55e303ae
A
1817 pn = m->phys_page;
1818 pa = i386_ptob(pn);
0c530ab8
A
1819 i = pml4idx(map, vaddr);
1820
2d21ac55
A
1821 /*
1822 * Zero the page.
1823 */
1824 pmap_zero_page(pn);
0c530ab8 1825
b0d623f7 1826 vm_page_lockspin_queues();
1c79356b 1827 vm_page_wire(m);
2d21ac55 1828 vm_page_unlock_queues();
1c79356b 1829
b0d623f7
A
1830 OSAddAtomic(1, &inuse_ptepages_count);
1831
2d21ac55
A
1832 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
1833 vm_object_lock(map->pm_obj_pml4);
1c79356b 1834
2d21ac55 1835 PMAP_LOCK(map);
1c79356b
A
1836 /*
1837 * See if someone else expanded us first
1838 */
0c530ab8 1839 if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) {
2d21ac55
A
1840 PMAP_UNLOCK(map);
1841 vm_object_unlock(map->pm_obj_pml4);
1842
b0d623f7 1843 VM_PAGE_FREE(m);
2d21ac55 1844
b0d623f7 1845 OSAddAtomic(-1, &inuse_ptepages_count);
1c79356b
A
1846 return;
1847 }
1848
2d21ac55
A
1849#if 0 /* DEBUG */
1850 if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i)) {
1851 panic("pmap_expand_pml4: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n",
1852 map, map->pm_obj_pml4, vaddr, i);
1853 }
1854#endif
1855 vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i);
1856 vm_object_unlock(map->pm_obj_pml4);
1857
1c79356b
A
1858 /*
1859 * Set the page directory entry for this page table.
1c79356b 1860 */
0c530ab8 1861 pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */
c0fea474 1862
0c530ab8
A
1863 pmap_store_pte(pml4p, pa_to_pte(pa)
1864 | INTEL_PTE_VALID
1865 | INTEL_PTE_USER
1866 | INTEL_PTE_WRITE);
5d5c5d0d 1867
2d21ac55 1868 PMAP_UNLOCK(map);
89b3af67 1869
6601e61a 1870 return;
0c530ab8 1871
6601e61a 1872}
89b3af67 1873
6601e61a 1874void
0c530ab8
A
1875pmap_expand_pdpt(
1876 pmap_t map,
1877 vm_map_offset_t vaddr)
6601e61a 1878{
0c530ab8
A
1879 register vm_page_t m;
1880 register pmap_paddr_t pa;
1881 uint64_t i;
1882 spl_t spl;
1883 ppnum_t pn;
1884 pdpt_entry_t *pdptp;
89b3af67 1885
0c530ab8 1886 if (kernel_pmap == map) panic("expand kernel pdpt");
89b3af67 1887
0c530ab8 1888 spl = splhigh();
2d21ac55
A
1889 while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) {
1890 splx(spl);
1891 pmap_expand_pml4(map, vaddr); /* need room for another pdpt entry */
1892 spl = splhigh();
1893 }
1894 splx(spl);
4452a7af 1895
0c530ab8
A
1896 /*
1897 * Allocate a VM page for the pdpt page
1898 */
1899 while ((m = vm_page_grab()) == VM_PAGE_NULL)
1900 VM_PAGE_WAIT();
4452a7af 1901
4452a7af 1902 /*
0c530ab8
A
1903 * put the page into the pmap's obj list so it
1904 * can be found later.
4452a7af 1905 */
0c530ab8
A
1906 pn = m->phys_page;
1907 pa = i386_ptob(pn);
1908 i = pdptidx(map, vaddr);
4452a7af 1909
2d21ac55
A
1910 /*
1911 * Zero the page.
1912 */
1913 pmap_zero_page(pn);
0c530ab8 1914
b0d623f7 1915 vm_page_lockspin_queues();
0c530ab8 1916 vm_page_wire(m);
2d21ac55 1917 vm_page_unlock_queues();
0c530ab8 1918
b0d623f7
A
1919 OSAddAtomic(1, &inuse_ptepages_count);
1920
2d21ac55
A
1921 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
1922 vm_object_lock(map->pm_obj_pdpt);
0c530ab8 1923
2d21ac55 1924 PMAP_LOCK(map);
0c530ab8
A
1925 /*
1926 * See if someone else expanded us first
1927 */
1928 if (pmap64_pde(map, vaddr) != PD_ENTRY_NULL) {
2d21ac55
A
1929 PMAP_UNLOCK(map);
1930 vm_object_unlock(map->pm_obj_pdpt);
1931
b0d623f7 1932 VM_PAGE_FREE(m);
2d21ac55 1933
b0d623f7 1934 OSAddAtomic(-1, &inuse_ptepages_count);
0c530ab8
A
1935 return;
1936 }
1937
2d21ac55
A
1938#if 0 /* DEBUG */
1939 if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i)) {
1940 panic("pmap_expand_pdpt: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n",
1941 map, map->pm_obj_pdpt, vaddr, i);
1942 }
1943#endif
1944 vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i);
1945 vm_object_unlock(map->pm_obj_pdpt);
1946
0c530ab8
A
1947 /*
1948 * Set the page directory entry for this page table.
0c530ab8 1949 */
0c530ab8
A
1950 pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */
1951
1952 pmap_store_pte(pdptp, pa_to_pte(pa)
1953 | INTEL_PTE_VALID
1954 | INTEL_PTE_USER
1955 | INTEL_PTE_WRITE);
1956
2d21ac55 1957 PMAP_UNLOCK(map);
0c530ab8
A
1958
1959 return;
1960
1961}
1962
1963
1964
1965/*
1966 * Routine: pmap_expand
1967 *
1968 * Expands a pmap to be able to map the specified virtual address.
1969 *
1970 * Allocates new virtual memory for the P0 or P1 portion of the
1971 * pmap, then re-maps the physical pages that were in the old
1972 * pmap to be in the new pmap.
1973 *
1974 * Must be called with the pmap system and the pmap unlocked,
1975 * since these must be unlocked to use vm_allocate or vm_deallocate.
1976 * Thus it must be called in a loop that checks whether the map
1977 * has been expanded enough.
1978 * (We won't loop forever, since page tables aren't shrunk.)
1979 */
1980void
1981pmap_expand(
1982 pmap_t map,
1983 vm_map_offset_t vaddr)
1984{
1985 pt_entry_t *pdp;
1986 register vm_page_t m;
1987 register pmap_paddr_t pa;
1988 uint64_t i;
1989 spl_t spl;
1990 ppnum_t pn;
1991
1992 /*
1993 * if not the kernel map (while we are still compat kernel mode)
1994 * and we are 64 bit, propagate expand upwards
1995 */
1996
1997 if (cpu_64bit && (map != kernel_pmap)) {
2d21ac55
A
1998 spl = splhigh();
1999 while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) {
2000 splx(spl);
2001 pmap_expand_pdpt(map, vaddr); /* need room for another pde entry */
2002 spl = splhigh();
2003 }
2004 splx(spl);
0c530ab8
A
2005 }
2006
0c530ab8
A
2007 /*
2008 * Allocate a VM page for the pde entries.
2009 */
2010 while ((m = vm_page_grab()) == VM_PAGE_NULL)
2011 VM_PAGE_WAIT();
2012
2013 /*
2014 * put the page into the pmap's obj list so it
2015 * can be found later.
2016 */
2017 pn = m->phys_page;
2018 pa = i386_ptob(pn);
2019 i = pdeidx(map, vaddr);
2020
2d21ac55
A
2021 /*
2022 * Zero the page.
2023 */
2024 pmap_zero_page(pn);
0c530ab8 2025
b0d623f7 2026 vm_page_lockspin_queues();
0c530ab8 2027 vm_page_wire(m);
0c530ab8 2028 vm_page_unlock_queues();
0c530ab8 2029
b0d623f7
A
2030 OSAddAtomic(1, &inuse_ptepages_count);
2031
2d21ac55
A
2032 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
2033 vm_object_lock(map->pm_obj);
0c530ab8 2034
2d21ac55 2035 PMAP_LOCK(map);
0c530ab8
A
2036 /*
2037 * See if someone else expanded us first
2038 */
2d21ac55 2039
0c530ab8 2040 if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) {
2d21ac55
A
2041 PMAP_UNLOCK(map);
2042 vm_object_unlock(map->pm_obj);
0c530ab8 2043
b0d623f7 2044 VM_PAGE_FREE(m);
2d21ac55 2045
b0d623f7 2046 OSAddAtomic(-1, &inuse_ptepages_count);
0c530ab8
A
2047 return;
2048 }
2049
2d21ac55
A
2050#if 0 /* DEBUG */
2051 if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i)) {
2052 panic("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n",
2053 map, map->pm_obj, vaddr, i);
2054 }
2055#endif
2056 vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i);
2057 vm_object_unlock(map->pm_obj);
0c530ab8
A
2058
2059 /*
2d21ac55 2060 * refetch while locked
0c530ab8
A
2061 */
2062
2d21ac55
A
2063 pdp = pmap_pde(map, vaddr);
2064
2065 /*
2066 * Set the page directory entry for this page table.
2067 */
0c530ab8
A
2068 pmap_store_pte(pdp, pa_to_pte(pa)
2069 | INTEL_PTE_VALID
2070 | INTEL_PTE_USER
2071 | INTEL_PTE_WRITE);
0c530ab8 2072
2d21ac55 2073 PMAP_UNLOCK(map);
0c530ab8
A
2074
2075 return;
2076}
2077
2078
2079/*
2080 * pmap_sync_page_data_phys(ppnum_t pa)
2081 *
2082 * Invalidates all of the instruction cache on a physical page and
2083 * pushes any dirty data from the data cache for the same physical page
2084 * Not required in i386.
2085 */
2086void
2087pmap_sync_page_data_phys(__unused ppnum_t pa)
2088{
2089 return;
2090}
2091
2092/*
2093 * pmap_sync_page_attributes_phys(ppnum_t pa)
2094 *
2095 * Write back and invalidate all cachelines on a physical page.
2096 */
2097void
2098pmap_sync_page_attributes_phys(ppnum_t pa)
2099{
2100 cache_flush_page_phys(pa);
2101}
2102
2d21ac55
A
2103
2104
2105#ifdef CURRENTLY_UNUSED_AND_UNTESTED
2106
0c530ab8
A
2107int collect_ref;
2108int collect_unref;
2109
2110/*
2111 * Routine: pmap_collect
2112 * Function:
2113 * Garbage collects the physical map system for
2114 * pages which are no longer used.
2115 * Success need not be guaranteed -- that is, there
2116 * may well be pages which are not referenced, but
2117 * others may be collected.
2118 * Usage:
2119 * Called by the pageout daemon when pages are scarce.
2120 */
2121void
2122pmap_collect(
2123 pmap_t p)
2124{
2125 register pt_entry_t *pdp, *ptp;
2126 pt_entry_t *eptp;
2127 int wired;
0c530ab8
A
2128
2129 if (p == PMAP_NULL)
2130 return;
2131
2132 if (p == kernel_pmap)
2133 return;
2134
2135 /*
2136 * Garbage collect map.
2137 */
2d21ac55 2138 PMAP_LOCK(p);
0c530ab8
A
2139
2140 for (pdp = (pt_entry_t *)p->dirbase;
4452a7af
A
2141 pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)];
2142 pdp++)
2143 {
2144 if (*pdp & INTEL_PTE_VALID) {
2145 if(*pdp & INTEL_PTE_REF) {
0c530ab8 2146 pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF);
4452a7af
A
2147 collect_ref++;
2148 } else {
2149 collect_unref++;
2150 ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase));
2151 eptp = ptp + NPTEPG;
2152
2153 /*
2154 * If the pte page has any wired mappings, we cannot
2155 * free it.
2156 */
2157 wired = 0;
2158 {
2159 register pt_entry_t *ptep;
2160 for (ptep = ptp; ptep < eptp; ptep++) {
2161 if (iswired(*ptep)) {
2162 wired = 1;
5d5c5d0d 2163 break;
1c79356b
A
2164 }
2165 }
2166 }
2167 if (!wired) {
2168 /*
2169 * Remove the virtual addresses mapped by this pte page.
2170 */
2171 pmap_remove_range(p,
91447636 2172 pdetova(pdp - (pt_entry_t *)p->dirbase),
1c79356b
A
2173 ptp,
2174 eptp);
2175
2176 /*
2177 * Invalidate the page directory pointer.
2178 */
0c530ab8 2179 pmap_store_pte(pdp, 0x0);
91447636 2180
2d21ac55 2181 PMAP_UNLOCK(p);
1c79356b
A
2182
2183 /*
2184 * And free the pte page itself.
2185 */
2186 {
2187 register vm_page_t m;
2188
91447636 2189 vm_object_lock(p->pm_obj);
2d21ac55 2190
91447636 2191 m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]));
1c79356b
A
2192 if (m == VM_PAGE_NULL)
2193 panic("pmap_collect: pte page not in object");
2d21ac55 2194
b0d623f7
A
2195 VM_PAGE_FREE(m);
2196
2197 OSAddAtomic(-1, &inuse_ptepages_count);
2d21ac55 2198
91447636 2199 vm_object_unlock(p->pm_obj);
1c79356b
A
2200 }
2201
2d21ac55 2202 PMAP_LOCK(p);
1c79356b 2203 }
91447636
A
2204 }
2205 }
1c79356b 2206 }
0c530ab8 2207
2d21ac55
A
2208 PMAP_UPDATE_TLBS(p, 0x0, 0xFFFFFFFFFFFFF000ULL);
2209 PMAP_UNLOCK(p);
1c79356b
A
2210 return;
2211
2212}
2d21ac55 2213#endif
1c79356b 2214
1c79356b 2215
1c79356b 2216void
2d21ac55 2217pmap_copy_page(ppnum_t src, ppnum_t dst)
1c79356b 2218{
2d21ac55
A
2219 bcopy_phys((addr64_t)i386_ptob(src),
2220 (addr64_t)i386_ptob(dst),
2221 PAGE_SIZE);
1c79356b 2222}
1c79356b 2223
1c79356b
A
2224
2225/*
2226 * Routine: pmap_pageable
2227 * Function:
2228 * Make the specified pages (by pmap, offset)
2229 * pageable (or not) as requested.
2230 *
2231 * A page which is not pageable may not take
2232 * a fault; therefore, its page table entry
2233 * must remain valid for the duration.
2234 *
2235 * This routine is merely advisory; pmap_enter
2236 * will specify that these pages are to be wired
2237 * down (or not) as appropriate.
2238 */
2239void
2240pmap_pageable(
91447636 2241 __unused pmap_t pmap,
0c530ab8
A
2242 __unused vm_map_offset_t start_addr,
2243 __unused vm_map_offset_t end_addr,
91447636 2244 __unused boolean_t pageable)
1c79356b
A
2245{
2246#ifdef lint
91447636 2247 pmap++; start_addr++; end_addr++; pageable++;
1c79356b
A
2248#endif /* lint */
2249}
2250
2251/*
2252 * Clear specified attribute bits.
2253 */
2254void
2255phys_attribute_clear(
2d21ac55 2256 ppnum_t pn,
1c79356b
A
2257 int bits)
2258{
2d21ac55
A
2259 pv_rooted_entry_t pv_h;
2260 register pv_hashed_entry_t pv_e;
1c79356b
A
2261 register pt_entry_t *pte;
2262 int pai;
2263 register pmap_t pmap;
1c79356b 2264
2d21ac55 2265 pmap_intr_assert();
91447636 2266 assert(pn != vm_page_fictitious_addr);
2d21ac55
A
2267 if (pn == vm_page_guard_addr)
2268 return;
2269
2270 pai = ppn_to_pai(pn);
2271
2272 if (!managed_page(pai)) {
1c79356b
A
2273 /*
2274 * Not a managed page.
2275 */
2276 return;
2277 }
2278
b0d623f7 2279
2d21ac55
A
2280 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
2281 (int) pn, bits, 0, 0, 0);
1c79356b 2282
1c79356b
A
2283 pv_h = pai_to_pvh(pai);
2284
2d21ac55
A
2285 LOCK_PVH(pai);
2286
1c79356b
A
2287 /*
2288 * Walk down PV list, clearing all modify or reference bits.
2289 * We do not have to lock the pv_list because we have
2290 * the entire pmap system locked.
2291 */
2292 if (pv_h->pmap != PMAP_NULL) {
2293 /*
2294 * There are some mappings.
2295 */
1c79356b 2296
2d21ac55
A
2297 pv_e = (pv_hashed_entry_t)pv_h;
2298
2299 do {
1c79356b 2300 pmap = pv_e->pmap;
1c79356b
A
2301
2302 {
2d21ac55 2303 vm_map_offset_t va;
1c79356b
A
2304
2305 va = pv_e->va;
1c79356b 2306
2d21ac55
A
2307 /*
2308 * Clear modify and/or reference bits.
2309 */
91447636 2310
0c530ab8
A
2311 pte = pmap_pte(pmap, va);
2312 pmap_update_pte(pte, *pte, (*pte & ~bits));
c910b4d9
A
2313 /* Ensure all processors using this translation
2314 * invalidate this TLB entry. The invalidation *must* follow
2315 * the PTE update, to ensure that the TLB shadow of the
2316 * 'D' bit (in particular) is synchronized with the
2317 * updated PTE.
2318 */
2319 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1c79356b 2320 }
91447636 2321
2d21ac55 2322 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1c79356b 2323
2d21ac55
A
2324 } while (pv_e != (pv_hashed_entry_t)pv_h);
2325 }
1c79356b
A
2326 pmap_phys_attributes[pai] &= ~bits;
2327
2d21ac55
A
2328 UNLOCK_PVH(pai);
2329
2330 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
2331 0, 0, 0, 0, 0);
2332
1c79356b
A
2333}
2334
2335/*
2336 * Check specified attribute bits.
2337 */
2d21ac55 2338int
1c79356b 2339phys_attribute_test(
2d21ac55 2340 ppnum_t pn,
1c79356b
A
2341 int bits)
2342{
2d21ac55
A
2343 pv_rooted_entry_t pv_h;
2344 register pv_hashed_entry_t pv_e;
1c79356b
A
2345 register pt_entry_t *pte;
2346 int pai;
2347 register pmap_t pmap;
2d21ac55 2348 int attributes = 0;
1c79356b 2349
2d21ac55 2350 pmap_intr_assert();
91447636 2351 assert(pn != vm_page_fictitious_addr);
2d21ac55
A
2352 if (pn == vm_page_guard_addr)
2353 return 0;
2354
2355 pai = ppn_to_pai(pn);
2356
2357 if (!managed_page(pai)) {
1c79356b
A
2358 /*
2359 * Not a managed page.
2360 */
2d21ac55 2361 return (0);
1c79356b
A
2362 }
2363
0c530ab8
A
2364 /*
2365 * super fast check... if bits already collected
2366 * no need to take any locks...
2367 * if not set, we need to recheck after taking
2368 * the lock in case they got pulled in while
2369 * we were waiting for the lock
2370 */
2d21ac55
A
2371 if ( (pmap_phys_attributes[pai] & bits) == bits)
2372 return (bits);
2373
0c530ab8
A
2374 pv_h = pai_to_pvh(pai);
2375
2d21ac55 2376 LOCK_PVH(pai);
1c79356b 2377
2d21ac55 2378 attributes = pmap_phys_attributes[pai] & bits;
1c79356b 2379
b0d623f7 2380
1c79356b 2381 /*
2d21ac55
A
2382 * Walk down PV list, checking the mappings until we
2383 * reach the end or we've found the attributes we've asked for
1c79356b
A
2384 * We do not have to lock the pv_list because we have
2385 * the entire pmap system locked.
2386 */
2387 if (pv_h->pmap != PMAP_NULL) {
2388 /*
2389 * There are some mappings.
2390 */
2d21ac55
A
2391 pv_e = (pv_hashed_entry_t)pv_h;
2392 if (attributes != bits) do {
1c79356b 2393
2d21ac55 2394 pmap = pv_e->pmap;
1c79356b
A
2395
2396 {
2d21ac55 2397 vm_map_offset_t va;
1c79356b
A
2398
2399 va = pv_e->va;
2d21ac55
A
2400 /*
2401 * first make sure any processor actively
2402 * using this pmap, flushes its TLB state
2403 */
2404 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1c79356b 2405
1c79356b 2406 /*
2d21ac55 2407 * pick up modify and/or reference bits from this mapping
1c79356b 2408 */
2d21ac55 2409 pte = pmap_pte(pmap, va);
b0d623f7 2410 attributes |= (int)(*pte & bits);
2d21ac55 2411
1c79356b 2412 }
2d21ac55
A
2413
2414 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
2415
2416 } while ((attributes != bits) && (pv_e != (pv_hashed_entry_t)pv_h));
1c79356b 2417 }
2d21ac55
A
2418
2419 UNLOCK_PVH(pai);
2420 return (attributes);
1c79356b
A
2421}
2422
2423/*
2424 * Set specified attribute bits.
2425 */
2426void
2427phys_attribute_set(
2d21ac55 2428 ppnum_t pn,
1c79356b
A
2429 int bits)
2430{
2d21ac55 2431 int pai;
1c79356b 2432
2d21ac55 2433 pmap_intr_assert();
91447636 2434 assert(pn != vm_page_fictitious_addr);
2d21ac55
A
2435 if (pn == vm_page_guard_addr)
2436 return;
2437
2438 pai = ppn_to_pai(pn);
2439
2440 if (!managed_page(pai)) {
1c79356b
A
2441 /*
2442 * Not a managed page.
2443 */
2444 return;
2445 }
2446
2d21ac55
A
2447 LOCK_PVH(pai);
2448
2449 pmap_phys_attributes[pai] |= bits;
2450
2451 UNLOCK_PVH(pai);
1c79356b
A
2452}
2453
2454/*
2455 * Set the modify bit on the specified physical page.
2456 */
2457
2458void pmap_set_modify(
55e303ae 2459 ppnum_t pn)
1c79356b 2460{
91447636 2461 phys_attribute_set(pn, PHYS_MODIFIED);
1c79356b
A
2462}
2463
2464/*
2465 * Clear the modify bits on the specified physical page.
2466 */
2467
2468void
2469pmap_clear_modify(
55e303ae 2470 ppnum_t pn)
1c79356b 2471{
91447636 2472 phys_attribute_clear(pn, PHYS_MODIFIED);
1c79356b
A
2473}
2474
2475/*
2476 * pmap_is_modified:
2477 *
2478 * Return whether or not the specified physical page is modified
2479 * by any physical maps.
2480 */
2481
2482boolean_t
2483pmap_is_modified(
55e303ae 2484 ppnum_t pn)
1c79356b 2485{
2d21ac55
A
2486 if (phys_attribute_test(pn, PHYS_MODIFIED))
2487 return TRUE;
2488
2489 return FALSE;
1c79356b
A
2490}
2491
2492/*
2493 * pmap_clear_reference:
2494 *
2495 * Clear the reference bit on the specified physical page.
2496 */
2497
2498void
2499pmap_clear_reference(
55e303ae 2500 ppnum_t pn)
1c79356b 2501{
91447636
A
2502 phys_attribute_clear(pn, PHYS_REFERENCED);
2503}
2504
2505void
2506pmap_set_reference(ppnum_t pn)
2507{
2508 phys_attribute_set(pn, PHYS_REFERENCED);
1c79356b
A
2509}
2510
2511/*
2512 * pmap_is_referenced:
2513 *
2514 * Return whether or not the specified physical page is referenced
2515 * by any physical maps.
2516 */
2517
2518boolean_t
2519pmap_is_referenced(
55e303ae 2520 ppnum_t pn)
1c79356b 2521{
2d21ac55
A
2522 if (phys_attribute_test(pn, PHYS_REFERENCED))
2523 return TRUE;
2524
2525 return FALSE;
91447636
A
2526}
2527
2528/*
2529 * pmap_get_refmod(phys)
2530 * returns the referenced and modified bits of the specified
2531 * physical page.
2532 */
2533unsigned int
2534pmap_get_refmod(ppnum_t pa)
2535{
2d21ac55
A
2536 int refmod;
2537 unsigned int retval = 0;
2538
2539 refmod = phys_attribute_test(pa, PHYS_MODIFIED | PHYS_REFERENCED);
2540
2541 if (refmod & PHYS_MODIFIED)
2542 retval |= VM_MEM_MODIFIED;
2543 if (refmod & PHYS_REFERENCED)
2544 retval |= VM_MEM_REFERENCED;
2545
2546 return (retval);
91447636
A
2547}
2548
2549/*
2550 * pmap_clear_refmod(phys, mask)
2551 * clears the referenced and modified bits as specified by the mask
2552 * of the specified physical page.
2553 */
2554void
2555pmap_clear_refmod(ppnum_t pa, unsigned int mask)
2556{
2557 unsigned int x86Mask;
2558
2559 x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0)
2560 | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
2561 phys_attribute_clear(pa, x86Mask);
1c79356b
A
2562}
2563
1c79356b 2564void
91447636
A
2565invalidate_icache(__unused vm_offset_t addr,
2566 __unused unsigned cnt,
2567 __unused int phys)
1c79356b
A
2568{
2569 return;
2570}
2571void
91447636
A
2572flush_dcache(__unused vm_offset_t addr,
2573 __unused unsigned count,
2574 __unused int phys)
1c79356b
A
2575{
2576 return;
2577}
2578
2d21ac55
A
2579#if CONFIG_DTRACE
2580/*
2581 * Constrain DTrace copyin/copyout actions
2582 */
2583extern kern_return_t dtrace_copyio_preflight(addr64_t);
2584extern kern_return_t dtrace_copyio_postflight(addr64_t);
2585
2586kern_return_t dtrace_copyio_preflight(__unused addr64_t va)
2587{
2588 thread_t thread = current_thread();
2589
2590 if (current_map() == kernel_map)
2591 return KERN_FAILURE;
2592 else if (thread->machine.specFlags & CopyIOActive)
2593 return KERN_FAILURE;
2594 else
2595 return KERN_SUCCESS;
2596}
2597
2598kern_return_t dtrace_copyio_postflight(__unused addr64_t va)
2599{
2600 return KERN_SUCCESS;
2601}
2602#endif /* CONFIG_DTRACE */
2603
0c530ab8 2604#if MACH_KDB
6601e61a 2605
0c530ab8 2606/* show phys page mappings and attributes */
6601e61a 2607
0c530ab8 2608extern void db_show_page(pmap_paddr_t pa);
6601e61a 2609
2d21ac55 2610#if 0
6601e61a 2611void
0c530ab8 2612db_show_page(pmap_paddr_t pa)
6601e61a 2613{
0c530ab8
A
2614 pv_entry_t pv_h;
2615 int pai;
2616 char attr;
2617
2618 pai = pa_index(pa);
2619 pv_h = pai_to_pvh(pai);
1c79356b
A
2620
2621 attr = pmap_phys_attributes[pai];
2d21ac55 2622 printf("phys page %llx ", pa);
1c79356b
A
2623 if (attr & PHYS_MODIFIED)
2624 printf("modified, ");
2625 if (attr & PHYS_REFERENCED)
2626 printf("referenced, ");
2627 if (pv_h->pmap || pv_h->next)
2628 printf(" mapped at\n");
2629 else
2630 printf(" not mapped\n");
2631 for (; pv_h; pv_h = pv_h->next)
2632 if (pv_h->pmap)
2d21ac55 2633 printf("%llx in pmap %p\n", pv_h->va, pv_h->pmap);
1c79356b 2634}
2d21ac55 2635#endif
1c79356b
A
2636
2637#endif /* MACH_KDB */
2638
2639#if MACH_KDB
2d21ac55 2640#if 0
1c79356b
A
2641void db_kvtophys(vm_offset_t);
2642void db_show_vaddrs(pt_entry_t *);
2643
2644/*
2645 * print out the results of kvtophys(arg)
2646 */
2647void
2648db_kvtophys(
2649 vm_offset_t vaddr)
2650{
0c530ab8 2651 db_printf("0x%qx", kvtophys(vaddr));
1c79356b
A
2652}
2653
2654/*
2655 * Walk the pages tables.
2656 */
2657void
2658db_show_vaddrs(
2659 pt_entry_t *dirbase)
2660{
2661 pt_entry_t *ptep, *pdep, tmp;
0c530ab8 2662 unsigned int x, y, pdecnt, ptecnt;
1c79356b
A
2663
2664 if (dirbase == 0) {
2665 dirbase = kernel_pmap->dirbase;
2666 }
2667 if (dirbase == 0) {
2668 db_printf("need a dirbase...\n");
2669 return;
2670 }
0c530ab8 2671 dirbase = (pt_entry_t *) (int) ((unsigned long) dirbase & ~INTEL_OFFMASK);
1c79356b
A
2672
2673 db_printf("dirbase: 0x%x\n", dirbase);
2674
2675 pdecnt = ptecnt = 0;
2676 pdep = &dirbase[0];
91447636 2677 for (y = 0; y < NPDEPG; y++, pdep++) {
1c79356b
A
2678 if (((tmp = *pdep) & INTEL_PTE_VALID) == 0) {
2679 continue;
2680 }
2681 pdecnt++;
2d21ac55 2682 ptep = (pt_entry_t *) ((unsigned long)(*pdep) & ~INTEL_OFFMASK);
1c79356b 2683 db_printf("dir[%4d]: 0x%x\n", y, *pdep);
91447636 2684 for (x = 0; x < NPTEPG; x++, ptep++) {
1c79356b
A
2685 if (((tmp = *ptep) & INTEL_PTE_VALID) == 0) {
2686 continue;
2687 }
2688 ptecnt++;
2689 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
2690 x,
2691 *ptep,
2692 (y << 22) | (x << 12),
2693 *ptep & ~INTEL_OFFMASK);
2694 }
2695 }
2696
2697 db_printf("total: %d tables, %d page table entries.\n", pdecnt, ptecnt);
2698
2699}
2d21ac55 2700#endif
1c79356b
A
2701#endif /* MACH_KDB */
2702
2703#include <mach_vm_debug.h>
2704#if MACH_VM_DEBUG
2705#include <vm/vm_debug.h>
2706
2707int
2708pmap_list_resident_pages(
91447636
A
2709 __unused pmap_t pmap,
2710 __unused vm_offset_t *listp,
2711 __unused int space)
1c79356b
A
2712{
2713 return 0;
2714}
2715#endif /* MACH_VM_DEBUG */
2716
6601e61a 2717
1c79356b 2718
91447636
A
2719/* temporary workaround */
2720boolean_t
0c530ab8 2721coredumpok(__unused vm_map_t map, __unused vm_offset_t va)
91447636 2722{
0c530ab8 2723#if 0
91447636 2724 pt_entry_t *ptep;
1c79356b 2725
91447636
A
2726 ptep = pmap_pte(map->pmap, va);
2727 if (0 == ptep)
2728 return FALSE;
2729 return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED));
0c530ab8
A
2730#else
2731 return TRUE;
1c79356b 2732#endif
1c79356b
A
2733}
2734
1c79356b 2735
9bccf70c 2736boolean_t
91447636
A
2737phys_page_exists(
2738 ppnum_t pn)
9bccf70c 2739{
91447636
A
2740 assert(pn != vm_page_fictitious_addr);
2741
2742 if (!pmap_initialized)
2743 return (TRUE);
2d21ac55
A
2744
2745 if (pn == vm_page_guard_addr)
2746 return FALSE;
2747
2748 if (!managed_page(ppn_to_pai(pn)))
91447636
A
2749 return (FALSE);
2750
2751 return TRUE;
2752}
2753
91447636 2754void
0c530ab8 2755pmap_commpage32_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt)
91447636 2756{
2d21ac55
A
2757 int i;
2758 pt_entry_t *opte, *npte;
2759 pt_entry_t pte;
2760 spl_t s;
2761
2762 for (i = 0; i < cnt; i++) {
2763 s = splhigh();
2764 opte = pmap_pte(kernel_pmap, (vm_map_offset_t)kernel_commpage);
2765 if (0 == opte)
2766 panic("kernel_commpage");
2767 pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL;
2768 pte &= ~INTEL_PTE_WRITE; // ensure read only
2769 npte = pmap_pte(kernel_pmap, (vm_map_offset_t)user_commpage);
2770 if (0 == npte)
2771 panic("user_commpage");
2772 pmap_store_pte(npte, pte);
2773 splx(s);
2774 kernel_commpage += INTEL_PGBYTES;
2775 user_commpage += INTEL_PGBYTES;
2776 }
91447636
A
2777}
2778
2d21ac55 2779
0c530ab8
A
2780#define PMAP_COMMPAGE64_CNT (_COMM_PAGE64_AREA_USED/PAGE_SIZE)
2781pt_entry_t pmap_commpage64_ptes[PMAP_COMMPAGE64_CNT];
2782
2783void
2784pmap_commpage64_init(vm_offset_t kernel_commpage, __unused vm_map_offset_t user_commpage, int cnt)
2785{
2d21ac55
A
2786 int i;
2787 pt_entry_t *kptep;
0c530ab8 2788
2d21ac55 2789 PMAP_LOCK(kernel_pmap);
0c530ab8 2790
2d21ac55
A
2791 for (i = 0; i < cnt; i++) {
2792 kptep = pmap_pte(kernel_pmap, (uint64_t)kernel_commpage + (i*PAGE_SIZE));
2793 if ((0 == kptep) || (0 == (*kptep & INTEL_PTE_VALID)))
2794 panic("pmap_commpage64_init pte");
2795 pmap_commpage64_ptes[i] = ((*kptep & ~INTEL_PTE_WRITE) | INTEL_PTE_USER);
2796 }
2797 PMAP_UNLOCK(kernel_pmap);
0c530ab8
A
2798}
2799
0c530ab8 2800
91447636 2801static cpu_pmap_t cpu_pmap_master;
91447636
A
2802
2803struct cpu_pmap *
2804pmap_cpu_alloc(boolean_t is_boot_cpu)
2805{
2806 int ret;
2807 int i;
2808 cpu_pmap_t *cp;
91447636 2809 vm_offset_t address;
0c530ab8 2810 vm_map_address_t mapaddr;
91447636 2811 vm_map_entry_t entry;
0c530ab8 2812 pt_entry_t *pte;
91447636
A
2813
2814 if (is_boot_cpu) {
2815 cp = &cpu_pmap_master;
91447636
A
2816 } else {
2817 /*
2818 * The per-cpu pmap data structure itself.
2819 */
2820 ret = kmem_alloc(kernel_map,
2821 (vm_offset_t *) &cp, sizeof(cpu_pmap_t));
2822 if (ret != KERN_SUCCESS) {
2823 printf("pmap_cpu_alloc() failed ret=%d\n", ret);
2824 return NULL;
2825 }
2826 bzero((void *)cp, sizeof(cpu_pmap_t));
2827
2828 /*
0c530ab8 2829 * The temporary windows used for copy/zero - see loose_ends.c
91447636 2830 */
0c530ab8
A
2831 ret = vm_map_find_space(kernel_map,
2832 &mapaddr, PMAP_NWINDOWS*PAGE_SIZE, (vm_map_offset_t)0, 0, &entry);
91447636 2833 if (ret != KERN_SUCCESS) {
0c530ab8
A
2834 printf("pmap_cpu_alloc() "
2835 "vm_map_find_space ret=%d\n", ret);
91447636
A
2836 pmap_cpu_free(cp);
2837 return NULL;
2838 }
0c530ab8 2839 address = (vm_offset_t)mapaddr;
4452a7af 2840
0c530ab8 2841 for (i = 0; i < PMAP_NWINDOWS; i++, address += PAGE_SIZE) {
2d21ac55
A
2842 spl_t s;
2843 s = splhigh();
0c530ab8
A
2844 while ((pte = pmap_pte(kernel_pmap, (vm_map_offset_t)address)) == 0)
2845 pmap_expand(kernel_pmap, (vm_map_offset_t)address);
2846 * (int *) pte = 0;
6601e61a 2847 cp->mapwindow[i].prv_CADDR = (caddr_t) address;
0c530ab8 2848 cp->mapwindow[i].prv_CMAP = pte;
2d21ac55 2849 splx(s);
4452a7af 2850 }
0c530ab8 2851 vm_map_unlock(kernel_map);
4452a7af
A
2852 }
2853
0c530ab8
A
2854 cp->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW;
2855 cp->pde_window_index = PMAP_PDE_FIRST_WINDOW;
2856 cp->pte_window_index = PMAP_PTE_FIRST_WINDOW;
4452a7af 2857
6601e61a 2858 return cp;
4452a7af
A
2859}
2860
2861void
6601e61a 2862pmap_cpu_free(struct cpu_pmap *cp)
4452a7af 2863{
6601e61a 2864 if (cp != NULL && cp != &cpu_pmap_master) {
6601e61a 2865 kfree((void *) cp, sizeof(cpu_pmap_t));
4452a7af 2866 }
4452a7af 2867}
0c530ab8
A
2868
2869
2870mapwindow_t *
2871pmap_get_mapwindow(pt_entry_t pentry)
2872{
2873 mapwindow_t *mp;
2874 int i;
0c530ab8 2875
2d21ac55 2876 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
0c530ab8
A
2877
2878 /*
2879 * Note: 0th map reserved for pmap_pte()
2880 */
2881 for (i = PMAP_NWINDOWS_FIRSTFREE; i < PMAP_NWINDOWS; i++) {
2882 mp = &current_cpu_datap()->cpu_pmap->mapwindow[i];
2883
2884 if (*mp->prv_CMAP == 0) {
2d21ac55
A
2885 pmap_store_pte(mp->prv_CMAP, pentry);
2886
2887 invlpg((uintptr_t)mp->prv_CADDR);
2888
2889 return (mp);
0c530ab8
A
2890 }
2891 }
2d21ac55
A
2892 panic("pmap_get_mapwindow: no windows available");
2893
2894 return NULL;
2895}
2896
2897
2898void
2899pmap_put_mapwindow(mapwindow_t *mp)
2900{
2901 pmap_store_pte(mp->prv_CMAP, 0);
0c530ab8
A
2902}
2903
0c530ab8
A
2904void
2905pmap_switch(pmap_t tpmap)
2906{
2907 spl_t s;
0c530ab8
A
2908
2909 s = splhigh(); /* Make sure interruptions are disabled */
0c530ab8 2910
b0d623f7 2911 set_dirbase(tpmap, current_thread());
0c530ab8
A
2912
2913 splx(s);
2914}
2915
2916
2917/*
2918 * disable no-execute capability on
2919 * the specified pmap
2920 */
2921void pmap_disable_NX(pmap_t pmap) {
2922
2923 pmap->nx_enabled = 0;
2924}
2925
2926void
2927pt_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size,
2928 vm_size_t *alloc_size, int *collectable, int *exhaustable)
2929{
2930 *count = inuse_ptepages_count;
2931 *cur_size = PAGE_SIZE * inuse_ptepages_count;
2932 *max_size = PAGE_SIZE * (inuse_ptepages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
2933 *elem_size = PAGE_SIZE;
2934 *alloc_size = PAGE_SIZE;
2935
2936 *collectable = 1;
2937 *exhaustable = 0;
2938}
2939
2940vm_offset_t pmap_cpu_high_map_vaddr(int cpu, enum high_cpu_types e)
2941{
2942 enum high_fixed_addresses a;
2943 a = e + HIGH_CPU_END * cpu;
2944 return pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a);
2945}
2946
2947vm_offset_t pmap_high_map_vaddr(enum high_cpu_types e)
2948{
2949 return pmap_cpu_high_map_vaddr(cpu_number(), e);
2950}
2951
2952vm_offset_t pmap_high_map(pt_entry_t pte, enum high_cpu_types e)
2953{
2954 enum high_fixed_addresses a;
2955 vm_offset_t vaddr;
2956
2957 a = e + HIGH_CPU_END * cpu_number();
2958 vaddr = (vm_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a);
2d21ac55 2959 pmap_store_pte(pte_unique_base + a, pte);
0c530ab8
A
2960
2961 /* TLB flush for this page for this cpu */
2962 invlpg((uintptr_t)vaddr);
2963
2964 return vaddr;
2965}
2966
935ed37a
A
2967static inline void
2968pmap_cpuset_NMIPI(cpu_set cpu_mask) {
2969 unsigned int cpu, cpu_bit;
2970 uint64_t deadline;
2971
2972 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
2973 if (cpu_mask & cpu_bit)
2974 cpu_NMI_interrupt(cpu);
2975 }
b0d623f7 2976 deadline = mach_absolute_time() + (LockTimeOut);
935ed37a
A
2977 while (mach_absolute_time() < deadline)
2978 cpu_pause();
2979}
2980
0c530ab8
A
2981/*
2982 * Called with pmap locked, we:
2983 * - scan through per-cpu data to see which other cpus need to flush
2984 * - send an IPI to each non-idle cpu to be flushed
2985 * - wait for all to signal back that they are inactive or we see that
2986 * they are in an interrupt handler or at a safe point
2987 * - flush the local tlb is active for this pmap
2988 * - return ... the caller will unlock the pmap
2989 */
2990void
2991pmap_flush_tlbs(pmap_t pmap)
2992{
2993 unsigned int cpu;
2994 unsigned int cpu_bit;
2995 cpu_set cpus_to_signal;
2996 unsigned int my_cpu = cpu_number();
2997 pmap_paddr_t pmap_cr3 = pmap->pm_cr3;
2998 boolean_t flush_self = FALSE;
2999 uint64_t deadline;
3000
2d21ac55
A
3001 assert((processor_avail_count < 2) ||
3002 (ml_get_interrupts_enabled() && get_preemption_level() != 0));
0c530ab8
A
3003
3004 /*
3005 * Scan other cpus for matching active or task CR3.
3006 * For idle cpus (with no active map) we mark them invalid but
3007 * don't signal -- they'll check as they go busy.
3008 * Note: for the kernel pmap we look for 64-bit shared address maps.
3009 */
3010 cpus_to_signal = 0;
3011 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
3012 if (!cpu_datap(cpu)->cpu_running)
3013 continue;
2d21ac55
A
3014 if ((cpu_datap(cpu)->cpu_task_cr3 == pmap_cr3) ||
3015 (CPU_GET_ACTIVE_CR3(cpu) == pmap_cr3) ||
0c530ab8
A
3016 (pmap->pm_shared) ||
3017 ((pmap == kernel_pmap) &&
3018 (!CPU_CR3_IS_ACTIVE(cpu) ||
3019 cpu_datap(cpu)->cpu_task_map == TASK_MAP_64BIT_SHARED))) {
3020 if (cpu == my_cpu) {
3021 flush_self = TRUE;
3022 continue;
3023 }
3024 cpu_datap(cpu)->cpu_tlb_invalid = TRUE;
3025 __asm__ volatile("mfence");
3026
3027 if (CPU_CR3_IS_ACTIVE(cpu)) {
3028 cpus_to_signal |= cpu_bit;
3029 i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC);
3030 }
3031 }
3032 }
3033
2d21ac55
A
3034 PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START,
3035 (int) pmap, cpus_to_signal, flush_self, 0, 0);
0c530ab8 3036
2d21ac55 3037 if (cpus_to_signal) {
935ed37a
A
3038 cpu_set cpus_to_respond = cpus_to_signal;
3039
0c530ab8
A
3040 deadline = mach_absolute_time() + LockTimeOut;
3041 /*
3042 * Wait for those other cpus to acknowledge
3043 */
935ed37a
A
3044 while (cpus_to_respond != 0) {
3045 if (mach_absolute_time() > deadline) {
b0d623f7
A
3046 if (mp_recent_debugger_activity())
3047 continue;
593a1d5f
A
3048 if (!panic_active()) {
3049 pmap_tlb_flush_timeout = TRUE;
3050 pmap_cpuset_NMIPI(cpus_to_respond);
3051 }
935ed37a
A
3052 panic("pmap_flush_tlbs() timeout: "
3053 "cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx",
3054 pmap, cpus_to_respond);
3055 }
3056
3057 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
3058 if ((cpus_to_respond & cpu_bit) != 0) {
3059 if (!cpu_datap(cpu)->cpu_running ||
3060 cpu_datap(cpu)->cpu_tlb_invalid == FALSE ||
3061 !CPU_CR3_IS_ACTIVE(cpu)) {
3062 cpus_to_respond &= ~cpu_bit;
3063 }
3064 cpu_pause();
2d21ac55 3065 }
935ed37a
A
3066 if (cpus_to_respond == 0)
3067 break;
0c530ab8 3068 }
0c530ab8 3069 }
0c530ab8 3070 }
0c530ab8
A
3071 /*
3072 * Flush local tlb if required.
3073 * We need this flush even if the pmap being changed
3074 * is the user map... in case we do a copyin/out
3075 * before returning to user mode.
3076 */
3077 if (flush_self)
3078 flush_tlb();
3079
b0d623f7
A
3080 if ((pmap == kernel_pmap) && (flush_self != TRUE)) {
3081 panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map);
3082 }
3083
2d21ac55
A
3084 PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END,
3085 (int) pmap, cpus_to_signal, flush_self, 0, 0);
0c530ab8
A
3086}
3087
3088void
3089process_pmap_updates(void)
3090{
2d21ac55
A
3091 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
3092
0c530ab8
A
3093 flush_tlb();
3094
3095 current_cpu_datap()->cpu_tlb_invalid = FALSE;
3096 __asm__ volatile("mfence");
3097}
3098
3099void
3100pmap_update_interrupt(void)
3101{
2d21ac55
A
3102 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_START,
3103 0, 0, 0, 0, 0);
0c530ab8
A
3104
3105 process_pmap_updates();
3106
2d21ac55
A
3107 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END,
3108 0, 0, 0, 0, 0);
0c530ab8
A
3109}
3110
3111
3112unsigned int pmap_cache_attributes(ppnum_t pn) {
3113
2d21ac55 3114 if (!managed_page(ppn_to_pai(pn)))
0c530ab8
A
3115 return (VM_WIMG_IO);
3116
3117 return (VM_WIMG_COPYBACK);
3118}
3119
3120#ifdef PMAP_DEBUG
3121void
3122pmap_dump(pmap_t p)
3123{
3124 int i;
3125
3126 kprintf("pmap 0x%x\n",p);
3127
3128 kprintf(" pm_cr3 0x%llx\n",p->pm_cr3);
3129 kprintf(" pm_pml4 0x%x\n",p->pm_pml4);
3130 kprintf(" pm_pdpt 0x%x\n",p->pm_pdpt);
3131
3132 kprintf(" pml4[0] 0x%llx\n",*p->pm_pml4);
3133 for (i=0;i<8;i++)
3134 kprintf(" pdpt[%d] 0x%llx\n",i, p->pm_pdpt[i]);
3135}
3136
3137void pmap_dump_wrap(void)
3138{
3139 pmap_dump(current_cpu_datap()->cpu_active_thread->task->map->pmap);
3140}
3141
3142void
3143dump_4GB_pdpt(pmap_t p)
3144{
3145 int spl;
3146 pdpt_entry_t *user_pdptp;
3147 pdpt_entry_t *kern_pdptp;
3148 pdpt_entry_t *pml4p;
3149
3150 spl = splhigh();
3151 while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) {
3152 splx(spl);
3153 pmap_expand_pml4(p, 0x0);
3154 spl = splhigh();
3155 }
3156 kern_pdptp = kernel_pmap->pm_pdpt;
3157 if (kern_pdptp == NULL)
3158 panic("kern_pdptp == NULL");
3159 kprintf("dump_4GB_pdpt(%p)\n"
3160 "kern_pdptp=%p (phys=0x%016llx)\n"
3161 "\t 0x%08x: 0x%016llx\n"
3162 "\t 0x%08x: 0x%016llx\n"
3163 "\t 0x%08x: 0x%016llx\n"
3164 "\t 0x%08x: 0x%016llx\n"
3165 "\t 0x%08x: 0x%016llx\n"
3166 "user_pdptp=%p (phys=0x%016llx)\n"
3167 "\t 0x%08x: 0x%016llx\n"
3168 "\t 0x%08x: 0x%016llx\n"
3169 "\t 0x%08x: 0x%016llx\n"
3170 "\t 0x%08x: 0x%016llx\n"
3171 "\t 0x%08x: 0x%016llx\n",
3172 p, kern_pdptp, kvtophys(kern_pdptp),
3173 kern_pdptp+0, *(kern_pdptp+0),
3174 kern_pdptp+1, *(kern_pdptp+1),
3175 kern_pdptp+2, *(kern_pdptp+2),
3176 kern_pdptp+3, *(kern_pdptp+3),
3177 kern_pdptp+4, *(kern_pdptp+4),
3178 user_pdptp, kvtophys(user_pdptp),
3179 user_pdptp+0, *(user_pdptp+0),
3180 user_pdptp+1, *(user_pdptp+1),
3181 user_pdptp+2, *(user_pdptp+2),
3182 user_pdptp+3, *(user_pdptp+3),
3183 user_pdptp+4, *(user_pdptp+4));
3184 kprintf("user pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
3185 p->pm_cr3, p->pm_hold, p->pm_pml4);
3186 pml4p = (pdpt_entry_t *)p->pm_hold;
3187 if (pml4p == NULL)
3188 panic("user pml4p == NULL");
3189 kprintf("\t 0x%08x: 0x%016llx\n"
3190 "\t 0x%08x: 0x%016llx\n",
3191 pml4p+0, *(pml4p),
3192 pml4p+KERNEL_UBER_PML4_INDEX, *(pml4p+KERNEL_UBER_PML4_INDEX));
3193 kprintf("kern pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
3194 kernel_pmap->pm_cr3, kernel_pmap->pm_hold, kernel_pmap->pm_pml4);
3195 pml4p = (pdpt_entry_t *)kernel_pmap->pm_hold;
3196 if (pml4p == NULL)
3197 panic("kern pml4p == NULL");
3198 kprintf("\t 0x%08x: 0x%016llx\n"
3199 "\t 0x%08x: 0x%016llx\n",
3200 pml4p+0, *(pml4p),
3201 pml4p+511, *(pml4p+511));
3202 splx(spl);
3203}
3204
3205void dump_4GB_pdpt_thread(thread_t tp)
3206{
3207 dump_4GB_pdpt(tp->map->pmap);
3208}
3209
3210
3211#endif
b0d623f7 3212