]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap.c
xnu-1504.9.37.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap.c
CommitLineData
1c79356b 1/*
c910b4d9 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
59/*
60 * File: pmap.c
61 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * (These guys wrote the Vax version)
63 *
64 * Physical Map management code for Intel i386, i486, and i860.
65 *
66 * Manages physical address maps.
67 *
68 * In addition to hardware address maps, this
69 * module is called upon to provide software-use-only
70 * maps which may or may not be stored in the same
71 * form as hardware maps. These pseudo-maps are
72 * used to store intermediate results from copy
73 * operations to and from address spaces.
74 *
75 * Since the information managed by this module is
76 * also stored by the logical address mapping module,
77 * this module may throw away valid virtual-to-physical
78 * mappings at almost any time. However, invalidations
79 * of virtual-to-physical mappings must be done as
80 * requested.
81 *
82 * In order to cope with hardware architectures which
83 * make virtual-to-physical map invalidates expensive,
84 * this module may delay invalidate or reduced protection
85 * operations until such time as they are actually
86 * necessary. This module is given full information as
87 * to which processors are currently using which maps,
88 * and to when physical maps must be made correct.
89 */
90
1c79356b 91#include <string.h>
1c79356b
A
92#include <mach_kdb.h>
93#include <mach_ldebug.h>
94
2d21ac55
A
95#include <libkern/OSAtomic.h>
96
1c79356b
A
97#include <mach/machine/vm_types.h>
98
99#include <mach/boolean.h>
100#include <kern/thread.h>
101#include <kern/zalloc.h>
2d21ac55 102#include <kern/queue.h>
1c79356b
A
103
104#include <kern/lock.h>
91447636 105#include <kern/kalloc.h>
1c79356b
A
106#include <kern/spl.h>
107
108#include <vm/pmap.h>
109#include <vm/vm_map.h>
110#include <vm/vm_kern.h>
111#include <mach/vm_param.h>
112#include <mach/vm_prot.h>
113#include <vm/vm_object.h>
114#include <vm/vm_page.h>
115
116#include <mach/machine/vm_param.h>
117#include <machine/thread.h>
118
119#include <kern/misc_protos.h> /* prototyping */
120#include <i386/misc_protos.h>
121
122#include <i386/cpuid.h>
91447636 123#include <i386/cpu_data.h>
55e303ae
A
124#include <i386/cpu_number.h>
125#include <i386/machine_cpu.h>
0c530ab8 126#include <i386/seg.h>
2d21ac55 127#include <i386/serial_io.h>
0c530ab8 128#include <i386/cpu_capabilities.h>
2d21ac55
A
129#include <i386/machine_routines.h>
130#include <i386/proc_reg.h>
131#include <i386/tsc.h>
b0d623f7
A
132#include <i386/acpi.h>
133#include <i386/pmap_internal.h>
1c79356b
A
134
135#if MACH_KDB
136#include <ddb/db_command.h>
137#include <ddb/db_output.h>
138#include <ddb/db_sym.h>
139#include <ddb/db_print.h>
140#endif /* MACH_KDB */
141
91447636
A
142#include <vm/vm_protos.h>
143
144#include <i386/mp.h>
0c530ab8 145#include <i386/mp_desc.h>
b0d623f7 146#include <i386/i386_lowmem.h>
0c530ab8 147
0c530ab8 148
2d21ac55
A
149/* #define DEBUGINTERRUPTS 1 uncomment to ensure pmap callers have interrupts enabled */
150#ifdef DEBUGINTERRUPTS
151#define pmap_intr_assert() {if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) panic("pmap interrupt assert %s, %d",__FILE__, __LINE__);}
152#else
153#define pmap_intr_assert()
154#endif
155
0c530ab8
A
156#ifdef IWANTTODEBUG
157#undef DEBUG
158#define DEBUG 1
159#define POSTCODE_DELAY 1
160#include <i386/postcode.h>
161#endif /* IWANTTODEBUG */
1c79356b
A
162
163/*
164 * Forward declarations for internal functions.
165 */
0c530ab8 166
b0d623f7 167void pmap_remove_range(
1c79356b 168 pmap_t pmap,
0c530ab8 169 vm_map_offset_t va,
1c79356b
A
170 pt_entry_t *spte,
171 pt_entry_t *epte);
172
91447636 173void phys_attribute_clear(
2d21ac55 174 ppnum_t phys,
1c79356b
A
175 int bits);
176
2d21ac55
A
177int phys_attribute_test(
178 ppnum_t phys,
1c79356b
A
179 int bits);
180
91447636 181void phys_attribute_set(
2d21ac55 182 ppnum_t phys,
1c79356b
A
183 int bits);
184
91447636
A
185void pmap_set_reference(
186 ppnum_t pn);
187
91447636
A
188boolean_t phys_page_exists(
189 ppnum_t pn);
1c79356b 190
2d21ac55 191
0c530ab8
A
192#ifdef PMAP_DEBUG
193void dump_pmap(pmap_t);
194void dump_4GB_pdpt(pmap_t p);
195void dump_4GB_pdpt_thread(thread_t tp);
196#endif
1c79356b 197
0c530ab8 198int nx_enabled = 1; /* enable no-execute protection */
4a3eedf9
A
199#ifdef CONFIG_EMBEDDED
200int allow_data_exec = 0; /* no exec from data, embedded is hardcore like that */
201#else
2d21ac55 202int allow_data_exec = VM_ABI_32; /* 32-bit apps may execute data by default, 64-bit apps may not */
4a3eedf9 203#endif
2d21ac55 204int allow_stack_exec = 0; /* No apps may execute from the stack by default */
0c530ab8 205
b0d623f7
A
206boolean_t cpu_64bit = FALSE;
207boolean_t pmap_trace = FALSE;
1c79356b 208
2d21ac55
A
209/*
210 * when spinning through pmap_remove
211 * ensure that we don't spend too much
212 * time with preemption disabled.
213 * I'm setting the current threshold
214 * to 20us
215 */
216#define MAX_PREEMPTION_LATENCY_NS 20000
217
218uint64_t max_preemption_latency_tsc = 0;
219
55e303ae 220
2d21ac55
A
221pv_hashed_entry_t *pv_hash_table; /* hash lists */
222
223uint32_t npvhash = 0;
224
1c79356b
A
225
226/*
227 * pv_list entries are kept on a list that can only be accessed
228 * with the pmap system locked (at SPLVM, not in the cpus_active set).
2d21ac55 229 * The list is refilled from the pv_hashed_list_zone if it becomes empty.
1c79356b 230 */
2d21ac55
A
231pv_rooted_entry_t pv_free_list = PV_ROOTED_ENTRY_NULL; /* free list at SPLVM */
232pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL;
233pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL;
234decl_simple_lock_data(,pv_hashed_free_list_lock)
235decl_simple_lock_data(,pv_hashed_kern_free_list_lock)
236decl_simple_lock_data(,pv_hash_table_lock)
237
91447636 238int pv_free_count = 0;
2d21ac55
A
239int pv_hashed_free_count = 0;
240int pv_kern_free_count = 0;
241int pv_hashed_kern_free_count = 0;
1c79356b 242
2d21ac55 243zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */
1c79356b 244
91447636 245static zone_t pdpt_zone;
91447636 246
1c79356b
A
247/*
248 * Each entry in the pv_head_table is locked by a bit in the
249 * pv_lock_table. The lock bits are accessed by the physical
250 * address of the page they lock.
251 */
252
253char *pv_lock_table; /* pointer to array of bits */
254#define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
255
2d21ac55
A
256char *pv_hash_lock_table;
257#define pv_hash_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
258
1c79356b
A
259/*
260 * First and last physical addresses that we maintain any information
261 * for. Initialized to zero so that pmap operations done before
262 * pmap_init won't touch any non-existent structures.
263 */
1c79356b
A
264boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */
265
91447636
A
266static struct vm_object kptobj_object_store;
267static vm_object_t kptobj;
91447636 268
1c79356b
A
269/*
270 * Array of physical page attribites for managed pages.
271 * One byte per physical page.
272 */
273char *pmap_phys_attributes;
2d21ac55 274unsigned int last_managed_page = 0;
1c79356b 275
0b4c1975
A
276extern ppnum_t lowest_lo;
277extern ppnum_t lowest_hi;
278extern ppnum_t highest_hi;
1c79356b
A
279
280/*
281 * Amount of virtual memory mapped by one
282 * page-directory entry.
283 */
284#define PDE_MAPPED_SIZE (pdetova(1))
0c530ab8 285uint64_t pde_mapped_size;
1c79356b 286
1c79356b
A
287/*
288 * Locking and TLB invalidation
289 */
290
291/*
2d21ac55 292 * Locking Protocols: (changed 2/2007 JK)
1c79356b
A
293 *
294 * There are two structures in the pmap module that need locking:
295 * the pmaps themselves, and the per-page pv_lists (which are locked
296 * by locking the pv_lock_table entry that corresponds to the pv_head
297 * for the list in question.) Most routines want to lock a pmap and
298 * then do operations in it that require pv_list locking -- however
299 * pmap_remove_all and pmap_copy_on_write operate on a physical page
300 * basis and want to do the locking in the reverse order, i.e. lock
301 * a pv_list and then go through all the pmaps referenced by that list.
1c79356b 302 *
2d21ac55
A
303 * The system wide pmap lock has been removed. Now, paths take a lock
304 * on the pmap before changing its 'shape' and the reverse order lockers
305 * (coming in by phys ppn) take a lock on the corresponding pv and then
306 * retest to be sure nothing changed during the window before they locked
307 * and can then run up/down the pv lists holding the list lock. This also
308 * lets the pmap layer run (nearly completely) interrupt enabled, unlike
309 * previously.
1c79356b 310 */
1c79356b 311
1c79356b 312
2d21ac55
A
313/*
314 * PV locking
315 */
316
317#define LOCK_PVH(index) { \
318 mp_disable_preemption(); \
319 lock_pvh_pai(index); \
1c79356b
A
320}
321
2d21ac55
A
322#define UNLOCK_PVH(index) { \
323 unlock_pvh_pai(index); \
324 mp_enable_preemption(); \
1c79356b
A
325}
326
2d21ac55
A
327/*
328 * PV hash locking
329 */
1c79356b 330
2d21ac55
A
331#define LOCK_PV_HASH(hash) lock_hash_hash(hash)
332
333#define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash)
1c79356b 334
55e303ae
A
335#if USLOCK_DEBUG
336extern int max_lock_loops;
91447636
A
337#define LOOP_VAR \
338 unsigned int loop_count; \
2d21ac55 339 loop_count = disable_serial_output ? max_lock_loops \
91447636 340 : max_lock_loops*100
55e303ae 341#define LOOP_CHECK(msg, pmap) \
91447636 342 if (--loop_count == 0) { \
55e303ae 343 mp_disable_preemption(); \
0c530ab8
A
344 kprintf("%s: cpu %d pmap %x\n", \
345 msg, cpu_number(), pmap); \
55e303ae
A
346 Debugger("deadlock detection"); \
347 mp_enable_preemption(); \
91447636 348 loop_count = max_lock_loops; \
55e303ae
A
349 }
350#else /* USLOCK_DEBUG */
351#define LOOP_VAR
352#define LOOP_CHECK(msg, pmap)
353#endif /* USLOCK_DEBUG */
1c79356b 354
b0d623f7
A
355unsigned pmap_memory_region_count;
356unsigned pmap_memory_region_current;
1c79356b 357
91447636 358pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE];
1c79356b
A
359
360/*
361 * Other useful macros.
362 */
91447636 363#define current_pmap() (vm_map_pmap(current_thread()->map))
1c79356b
A
364
365struct pmap kernel_pmap_store;
366pmap_t kernel_pmap;
367
0c530ab8
A
368pd_entry_t high_shared_pde;
369pd_entry_t commpage64_pde;
91447636 370
1c79356b
A
371struct zone *pmap_zone; /* zone of pmap structures */
372
373int pmap_debug = 0; /* flag for debugging prints */
91447636 374
2d21ac55 375unsigned int inuse_ptepages_count = 0;
1c79356b 376
0c530ab8
A
377addr64_t kernel64_cr3;
378boolean_t no_shared_cr3 = FALSE; /* -no_shared_cr3 boot arg */
379
b0d623f7 380
1c79356b
A
381/*
382 * Pmap cache. Cache is threaded through ref_count field of pmap.
383 * Max will eventually be constant -- variable for experimentation.
384 */
385int pmap_cache_max = 32;
386int pmap_alloc_chunk = 8;
387pmap_t pmap_cache_list;
388int pmap_cache_count;
389decl_simple_lock_data(,pmap_cache_lock)
390
1c79356b
A
391extern char end;
392
91447636
A
393static int nkpt;
394
395pt_entry_t *DMAP1, *DMAP2;
396caddr_t DADDR1;
397caddr_t DADDR2;
0c530ab8
A
398/*
399 * for legacy, returns the address of the pde entry.
400 * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
401 * then returns the mapped address of the pde entry in that page
402 */
403pd_entry_t *
404pmap_pde(pmap_t m, vm_map_offset_t v)
4452a7af 405{
0c530ab8
A
406 pd_entry_t *pde;
407 if (!cpu_64bit || (m == kernel_pmap)) {
408 pde = (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]));
409 } else {
410 assert(m);
411 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
412 pde = pmap64_pde(m, v);
413 }
414 return pde;
4452a7af
A
415}
416
89b3af67 417
4452a7af 418/*
0c530ab8
A
419 * the single pml4 page per pmap is allocated at pmap create time and exists
420 * for the duration of the pmap. we allocate this page in kernel vm (to save us one
421 * level of page table dynamic mapping.
422 * this returns the address of the requested pml4 entry in the top level page.
4452a7af 423 */
0c530ab8
A
424static inline
425pml4_entry_t *
426pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr)
427{
428 return ((pml4_entry_t *)pmap->pm_hold + ((vm_offset_t)((vaddr>>PML4SHIFT)&(NPML4PG-1))));
429}
430
431/*
432 * maps in the pml4 page, if any, containing the pdpt entry requested
433 * and returns the address of the pdpt entry in that mapped page
434 */
435pdpt_entry_t *
436pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
437{
438 pml4_entry_t newpf;
439 pml4_entry_t *pml4;
440 int i;
441
442 assert(pmap);
443 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
444 if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) {
445 return(0);
4452a7af 446 }
0c530ab8
A
447
448 pml4 = pmap64_pml4(pmap, vaddr);
449
450 if (pml4 && ((*pml4 & INTEL_PTE_VALID))) {
451
452 newpf = *pml4 & PG_FRAME;
453
454
455 for (i=PMAP_PDPT_FIRST_WINDOW; i < PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS; i++) {
456 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
457 return((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
458 ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1))));
459 }
460 }
461
462 current_cpu_datap()->cpu_pmap->pdpt_window_index++;
463 if (current_cpu_datap()->cpu_pmap->pdpt_window_index > (PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS-1))
464 current_cpu_datap()->cpu_pmap->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW;
465 pmap_store_pte(
466 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CMAP),
467 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
468 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR));
469 return ((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR) +
470 ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1))));
471 }
472
2d21ac55 473 return (NULL);
4452a7af
A
474}
475
0c530ab8
A
476/*
477 * maps in the pdpt page, if any, containing the pde entry requested
478 * and returns the address of the pde entry in that mapped page
479 */
480pd_entry_t *
481pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr)
4452a7af 482{
0c530ab8
A
483 pdpt_entry_t newpf;
484 pdpt_entry_t *pdpt;
485 int i;
4452a7af 486
0c530ab8
A
487 assert(pmap);
488 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
489 if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) {
490 return(0);
491 }
492
493 /* if (vaddr & (1ULL << 63)) panic("neg addr");*/
494 pdpt = pmap64_pdpt(pmap, vaddr);
495
496 if (pdpt && ((*pdpt & INTEL_PTE_VALID))) {
497
498 newpf = *pdpt & PG_FRAME;
499
500 for (i=PMAP_PDE_FIRST_WINDOW; i < PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS; i++) {
501 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
502 return((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
503 ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1))));
504 }
4452a7af 505 }
0c530ab8
A
506
507 current_cpu_datap()->cpu_pmap->pde_window_index++;
508 if (current_cpu_datap()->cpu_pmap->pde_window_index > (PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS-1))
509 current_cpu_datap()->cpu_pmap->pde_window_index = PMAP_PDE_FIRST_WINDOW;
510 pmap_store_pte(
511 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CMAP),
512 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
513 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR));
514 return ((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR) +
515 ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1))));
21362eb3 516 }
4452a7af 517
2d21ac55 518 return (NULL);
0c530ab8
A
519}
520
2d21ac55
A
521/*
522 * Because the page tables (top 3 levels) are mapped into per cpu windows,
523 * callers must either disable interrupts or disable preemption before calling
524 * one of the pte mapping routines (e.g. pmap_pte()) as the returned vaddr
525 * is in one of those mapped windows and that cannot be allowed to change until
526 * the caller is done using the returned pte pointer. When done, the caller
527 * restores interrupts or preemption to its previous state after which point the
528 * vaddr for the returned pte can no longer be used
529 */
0c530ab8
A
530
531
532/*
533 * return address of mapped pte for vaddr va in pmap pmap.
534 * must be called with pre-emption or interrupts disabled
535 * if targeted pmap is not the kernel pmap
536 * since we may be passing back a virtual address that is
537 * associated with this cpu... pre-emption or interrupts
538 * must remain disabled until the caller is done using
539 * the pointer that was passed back .
540 *
541 * maps the pde page, if any, containing the pte in and returns
542 * the address of the pte in that mapped page
543 */
544pt_entry_t *
545pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
546{
547 pd_entry_t *pde;
548 pd_entry_t newpf;
549 int i;
550
551 assert(pmap);
552 pde = pmap_pde(pmap,vaddr);
553
554 if (pde && ((*pde & INTEL_PTE_VALID))) {
b0d623f7
A
555 if (*pde & INTEL_PTE_PS)
556 return pde;
2d21ac55
A
557 if (pmap == kernel_pmap)
558 return (vtopte(vaddr)); /* compat kernel still has pte's mapped */
559#if TESTING
560 if (ml_get_interrupts_enabled() && get_preemption_level() == 0)
561 panic("pmap_pte: unsafe call");
562#endif
0c530ab8
A
563 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
564
565 newpf = *pde & PG_FRAME;
566
567 for (i=PMAP_PTE_FIRST_WINDOW; i < PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS; i++) {
568 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
569 return((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
570 ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1)));
571 }
572 }
573
574 current_cpu_datap()->cpu_pmap->pte_window_index++;
575 if (current_cpu_datap()->cpu_pmap->pte_window_index > (PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS-1))
576 current_cpu_datap()->cpu_pmap->pte_window_index = PMAP_PTE_FIRST_WINDOW;
577 pmap_store_pte(
578 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CMAP),
579 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
580 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR));
581 return ((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR) +
582 ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1)));
6601e61a 583 }
0c530ab8 584
2d21ac55 585 return(NULL);
1c79356b 586}
2d21ac55 587
1c79356b
A
588
589/*
590 * Map memory at initialization. The physical addresses being
591 * mapped are not managed and are never unmapped.
592 *
593 * For now, VM is already on, we only need to map the
594 * specified memory.
595 */
596vm_offset_t
597pmap_map(
0c530ab8
A
598 vm_offset_t virt,
599 vm_map_offset_t start_addr,
600 vm_map_offset_t end_addr,
601 vm_prot_t prot,
602 unsigned int flags)
1c79356b 603{
0c530ab8 604 int ps;
1c79356b
A
605
606 ps = PAGE_SIZE;
91447636 607 while (start_addr < end_addr) {
0c530ab8
A
608 pmap_enter(kernel_pmap, (vm_map_offset_t)virt,
609 (ppnum_t) i386_btop(start_addr), prot, flags, FALSE);
1c79356b 610 virt += ps;
91447636 611 start_addr += ps;
1c79356b
A
612 }
613 return(virt);
614}
615
616/*
617 * Back-door routine for mapping kernel VM at initialization.
618 * Useful for mapping memory outside the range
619 * Sets no-cache, A, D.
1c79356b
A
620 * Otherwise like pmap_map.
621 */
622vm_offset_t
623pmap_map_bd(
0c530ab8
A
624 vm_offset_t virt,
625 vm_map_offset_t start_addr,
626 vm_map_offset_t end_addr,
627 vm_prot_t prot,
628 unsigned int flags)
1c79356b 629{
0c530ab8 630 pt_entry_t template;
b0d623f7 631 pt_entry_t *pte;
2d21ac55 632 spl_t spl;
1c79356b 633
91447636 634 template = pa_to_pte(start_addr)
1c79356b
A
635 | INTEL_PTE_REF
636 | INTEL_PTE_MOD
637 | INTEL_PTE_WIRED
638 | INTEL_PTE_VALID;
0c530ab8
A
639
640 if(flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) {
641 template |= INTEL_PTE_NCACHE;
642 if(!(flags & (VM_MEM_GUARDED | VM_WIMG_USE_DEFAULT)))
643 template |= INTEL_PTE_PTA;
644 }
645
1c79356b
A
646 if (prot & VM_PROT_WRITE)
647 template |= INTEL_PTE_WRITE;
648
b0d623f7 649
91447636 650 while (start_addr < end_addr) {
2d21ac55 651 spl = splhigh();
0c530ab8 652 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
91447636 653 if (pte == PT_ENTRY_NULL) {
1c79356b 654 panic("pmap_map_bd: Invalid kernel address\n");
91447636 655 }
0c530ab8 656 pmap_store_pte(pte, template);
2d21ac55 657 splx(spl);
1c79356b
A
658 pte_increment_pa(template);
659 virt += PAGE_SIZE;
91447636 660 start_addr += PAGE_SIZE;
b0d623f7
A
661 }
662
1c79356b 663
55e303ae 664 flush_tlb();
1c79356b
A
665 return(virt);
666}
667
b0d623f7
A
668extern char *first_avail;
669extern vm_offset_t virtual_avail, virtual_end;
670extern pmap_paddr_t avail_start, avail_end;
1c79356b 671
2d21ac55
A
672void
673pmap_cpu_init(void)
674{
675 /*
676 * Here early in the life of a processor (from cpu_mode_init()).
2d21ac55 677 */
2d21ac55
A
678
679 /*
680 * Initialize the per-cpu, TLB-related fields.
681 */
682 current_cpu_datap()->cpu_active_cr3 = kernel_pmap->pm_cr3;
683 current_cpu_datap()->cpu_tlb_invalid = FALSE;
684}
0c530ab8
A
685
686vm_offset_t
687pmap_high_shared_remap(enum high_fixed_addresses e, vm_offset_t va, int sz)
688{
689 vm_offset_t ve = pmap_index_to_virt(e);
690 pt_entry_t *ptep;
691 pmap_paddr_t pa;
692 int i;
2d21ac55 693 spl_t s;
0c530ab8
A
694
695 assert(0 == (va & PAGE_MASK)); /* expecting page aligned */
2d21ac55 696 s = splhigh();
0c530ab8
A
697 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ve);
698
699 for (i=0; i< sz; i++) {
700 pa = (pmap_paddr_t) kvtophys(va);
701 pmap_store_pte(ptep, (pa & PG_FRAME)
702 | INTEL_PTE_VALID
703 | INTEL_PTE_GLOBAL
704 | INTEL_PTE_RW
705 | INTEL_PTE_REF
706 | INTEL_PTE_MOD);
707 va+= PAGE_SIZE;
708 ptep++;
709 }
2d21ac55 710 splx(s);
0c530ab8
A
711 return ve;
712}
713
714vm_offset_t
715pmap_cpu_high_shared_remap(int cpu, enum high_cpu_types e, vm_offset_t va, int sz)
716{
717 enum high_fixed_addresses a = e + HIGH_CPU_END * cpu;
718 return pmap_high_shared_remap(HIGH_FIXED_CPUS_BEGIN + a, va, sz);
719}
720
721void pmap_init_high_shared(void);
722
723extern vm_offset_t gdtptr, idtptr;
724
725extern uint32_t low_intstack;
726
727extern struct fake_descriptor ldt_desc_pattern;
728extern struct fake_descriptor tss_desc_pattern;
729
730extern char hi_remap_text, hi_remap_etext;
731extern char t_zero_div;
732
733pt_entry_t *pte_unique_base;
734
735void
736pmap_init_high_shared(void)
737{
738
739 vm_offset_t haddr;
2d21ac55 740 spl_t s;
0c530ab8
A
741#if MACH_KDB
742 struct i386_tss *ttss;
743#endif
744
b0d623f7
A
745 cpu_desc_index_t * cdi = &cpu_data_master.cpu_desc_index;
746
0c530ab8
A
747 kprintf("HIGH_MEM_BASE 0x%x fixed per-cpu begin 0x%x\n",
748 HIGH_MEM_BASE,pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN));
2d21ac55 749 s = splhigh();
0c530ab8 750 pte_unique_base = pmap_pte(kernel_pmap, (vm_map_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN));
2d21ac55 751 splx(s);
0c530ab8
A
752
753 if (i386_btop(&hi_remap_etext - &hi_remap_text + 1) >
754 HIGH_FIXED_TRAMPS_END - HIGH_FIXED_TRAMPS + 1)
755 panic("tramps too large");
756 haddr = pmap_high_shared_remap(HIGH_FIXED_TRAMPS,
757 (vm_offset_t) &hi_remap_text, 3);
758 kprintf("tramp: 0x%x, ",haddr);
0c530ab8
A
759 /* map gdt up high and update ptr for reload */
760 haddr = pmap_high_shared_remap(HIGH_FIXED_GDT,
761 (vm_offset_t) master_gdt, 1);
b0d623f7 762 cdi->cdi_gdt.ptr = (void *)haddr;
0c530ab8
A
763 kprintf("GDT: 0x%x, ",haddr);
764 /* map ldt up high */
765 haddr = pmap_high_shared_remap(HIGH_FIXED_LDT_BEGIN,
766 (vm_offset_t) master_ldt,
767 HIGH_FIXED_LDT_END - HIGH_FIXED_LDT_BEGIN + 1);
b0d623f7 768 cdi->cdi_ldt = (struct fake_descriptor *)haddr;
0c530ab8
A
769 kprintf("LDT: 0x%x, ",haddr);
770 /* put new ldt addr into gdt */
b0d623f7
A
771 struct fake_descriptor temp_fake_desc;
772 temp_fake_desc = ldt_desc_pattern;
773 temp_fake_desc.offset = (vm_offset_t) haddr;
774 fix_desc(&temp_fake_desc, 1);
775
776 *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_LDT)] = temp_fake_desc;
777 *(struct fake_descriptor *) &master_gdt[sel_idx(USER_LDT)] = temp_fake_desc;
0c530ab8
A
778
779 /* map idt up high */
780 haddr = pmap_high_shared_remap(HIGH_FIXED_IDT,
781 (vm_offset_t) master_idt, 1);
b0d623f7 782 cdi->cdi_idt.ptr = (void *)haddr;
0c530ab8
A
783 kprintf("IDT: 0x%x, ", haddr);
784 /* remap ktss up high and put new high addr into gdt */
785 haddr = pmap_high_shared_remap(HIGH_FIXED_KTSS,
786 (vm_offset_t) &master_ktss, 1);
b0d623f7
A
787
788 temp_fake_desc = tss_desc_pattern;
789 temp_fake_desc.offset = (vm_offset_t) haddr;
790 fix_desc(&temp_fake_desc, 1);
791 *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_TSS)] = temp_fake_desc;
0c530ab8
A
792 kprintf("KTSS: 0x%x, ",haddr);
793#if MACH_KDB
794 /* remap dbtss up high and put new high addr into gdt */
795 haddr = pmap_high_shared_remap(HIGH_FIXED_DBTSS,
796 (vm_offset_t) &master_dbtss, 1);
b0d623f7
A
797 temp_fake_desc = tss_desc_pattern;
798 temp_fake_desc.offset = (vm_offset_t) haddr;
799 fix_desc(&temp_fake_desc, 1);
800 *(struct fake_descriptor *)&master_gdt[sel_idx(DEBUG_TSS)] = temp_fake_desc;
0c530ab8
A
801 ttss = (struct i386_tss *)haddr;
802 kprintf("DBTSS: 0x%x, ",haddr);
803#endif /* MACH_KDB */
804
805 /* remap dftss up high and put new high addr into gdt */
806 haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS,
807 (vm_offset_t) &master_dftss, 1);
b0d623f7
A
808 temp_fake_desc = tss_desc_pattern;
809 temp_fake_desc.offset = (vm_offset_t) haddr;
810 fix_desc(&temp_fake_desc, 1);
811 *(struct fake_descriptor *) &master_gdt[sel_idx(DF_TSS)] = temp_fake_desc;
0c530ab8
A
812 kprintf("DFTSS: 0x%x\n",haddr);
813
814 /* remap mctss up high and put new high addr into gdt */
815 haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS,
816 (vm_offset_t) &master_mctss, 1);
b0d623f7
A
817 temp_fake_desc = tss_desc_pattern;
818 temp_fake_desc.offset = (vm_offset_t) haddr;
819 fix_desc(&temp_fake_desc, 1);
820 *(struct fake_descriptor *) &master_gdt[sel_idx(MC_TSS)] = temp_fake_desc;
0c530ab8
A
821 kprintf("MCTSS: 0x%x\n",haddr);
822
b0d623f7 823 cpu_desc_load(&cpu_data_master);
0c530ab8
A
824}
825
826
1c79356b
A
827/*
828 * Bootstrap the system enough to run with virtual memory.
829 * Map the kernel's code and data, and allocate the system page table.
830 * Called with mapping OFF. Page_size must already be set.
1c79356b
A
831 */
832
833void
834pmap_bootstrap(
0c530ab8
A
835 __unused vm_offset_t load_start,
836 boolean_t IA32e)
1c79356b 837{
91447636
A
838 vm_offset_t va;
839 pt_entry_t *pte;
840 int i;
0c530ab8 841 pdpt_entry_t *pdpt;
2d21ac55 842 spl_t s;
1c79356b 843
91447636
A
844 vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address
845 * known to VM */
1c79356b
A
846 /*
847 * The kernel's pmap is statically allocated so we don't
848 * have to use pmap_create, which is unlikely to work
849 * correctly at this part of the boot sequence.
850 */
851
0c530ab8 852
1c79356b 853 kernel_pmap = &kernel_pmap_store;
91447636 854 kernel_pmap->ref_count = 1;
0c530ab8 855 kernel_pmap->nx_enabled = FALSE;
2d21ac55 856 kernel_pmap->pm_task_map = TASK_MAP_32BIT;
91447636
A
857 kernel_pmap->pm_obj = (vm_object_t) NULL;
858 kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE);
0c530ab8
A
859 kernel_pmap->pdirbase = (pmap_paddr_t)((int)IdlePTD);
860 pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE );
861 kernel_pmap->pm_pdpt = pdpt;
862 kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePDPT);
1c79356b 863
b0d623f7 864
91447636
A
865 va = (vm_offset_t)kernel_pmap->dirbase;
866 /* setup self referential mapping(s) */
0c530ab8 867 for (i = 0; i< NPGPTD; i++, pdpt++) {
91447636 868 pmap_paddr_t pa;
b0d623f7 869 pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i)));
0c530ab8
A
870 pmap_store_pte(
871 (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i),
91447636 872 (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF |
0c530ab8
A
873 INTEL_PTE_MOD | INTEL_PTE_WIRED) ;
874 pmap_store_pte(pdpt, pa | INTEL_PTE_VALID);
91447636 875 }
1c79356b 876
0c530ab8
A
877 cpu_64bit = IA32e;
878
879 lo_kernel_cr3 = kernel_pmap->pm_cr3;
880 current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3;
881
882 /* save the value we stuff into created pmaps to share the gdts etc */
883 high_shared_pde = *pmap_pde(kernel_pmap, HIGH_MEM_BASE);
884 /* make sure G bit is on for high shared pde entry */
885 high_shared_pde |= INTEL_PTE_GLOBAL;
2d21ac55 886 s = splhigh();
0c530ab8 887 pmap_store_pte(pmap_pde(kernel_pmap, HIGH_MEM_BASE), high_shared_pde);
2d21ac55 888 splx(s);
0c530ab8 889
91447636 890 nkpt = NKPT;
b0d623f7 891 OSAddAtomic(NKPT, &inuse_ptepages_count);
1c79356b 892
91447636
A
893 virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail;
894 virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS);
1c79356b
A
895
896 /*
91447636
A
897 * Reserve some special page table entries/VA space for temporary
898 * mapping of pages.
1c79356b 899 */
91447636 900#define SYSMAP(c, p, v, n) \
0c530ab8 901 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n)
91447636
A
902
903 va = virtual_avail;
0c530ab8 904 pte = vtopte(va);
6601e61a 905
0c530ab8
A
906 for (i=0; i<PMAP_NWINDOWS; i++) {
907 SYSMAP(caddr_t,
908 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP),
909 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR),
910 1);
911 *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0;
912 }
1c79356b 913
91447636
A
914 /* DMAP user for debugger */
915 SYSMAP(caddr_t, DMAP1, DADDR1, 1);
916 SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */
1c79356b 917
91447636 918 virtual_avail = va;
1c79356b 919
593a1d5f 920 if (PE_parse_boot_argn("npvhash", &npvhash, sizeof (npvhash))) {
2d21ac55
A
921 if (0 != ((npvhash+1) & npvhash)) {
922 kprintf("invalid hash %d, must be ((2^N)-1), using default %d\n",npvhash,NPVHASH);
923 npvhash = NPVHASH;
924 }
925 } else {
926 npvhash = NPVHASH;
927 }
928 printf("npvhash=%d\n",npvhash);
929
91447636 930 simple_lock_init(&kernel_pmap->lock, 0);
2d21ac55
A
931 simple_lock_init(&pv_hashed_free_list_lock, 0);
932 simple_lock_init(&pv_hashed_kern_free_list_lock, 0);
933 simple_lock_init(&pv_hash_table_lock,0);
1c79356b 934
2d21ac55 935 pmap_init_high_shared();
0c530ab8
A
936
937 pde_mapped_size = PDE_MAPPED_SIZE;
938
939 if (cpu_64bit) {
b0d623f7 940 pdpt_entry_t *ppdpt = IdlePDPT;
0c530ab8
A
941 pdpt_entry_t *ppdpt64 = (pdpt_entry_t *)IdlePDPT64;
942 pdpt_entry_t *ppml4 = (pdpt_entry_t *)IdlePML4;
943 int istate = ml_set_interrupts_enabled(FALSE);
944
945 /*
946 * Clone a new 64-bit 3rd-level page table directory, IdlePML4,
947 * with page bits set for the correct IA-32e operation and so that
948 * the legacy-mode IdlePDPT is retained for slave processor start-up.
949 * This is necessary due to the incompatible use of page bits between
950 * 64-bit and legacy modes.
951 */
952 kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePML4); /* setup in start.s for us */
953 kernel_pmap->pm_pml4 = IdlePML4;
954 kernel_pmap->pm_pdpt = (pd_entry_t *)
955 ((unsigned int)IdlePDPT64 | KERNBASE );
956#define PAGE_BITS INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF
957 pmap_store_pte(kernel_pmap->pm_pml4,
958 (uint32_t)IdlePDPT64 | PAGE_BITS);
959 pmap_store_pte((ppdpt64+0), *(ppdpt+0) | PAGE_BITS);
960 pmap_store_pte((ppdpt64+1), *(ppdpt+1) | PAGE_BITS);
961 pmap_store_pte((ppdpt64+2), *(ppdpt+2) | PAGE_BITS);
962 pmap_store_pte((ppdpt64+3), *(ppdpt+3) | PAGE_BITS);
963
964 /*
965 * The kernel is also mapped in the uber-sapce at the 4GB starting
966 * 0xFFFFFF80:00000000. This is the highest entry in the 4th-level.
967 */
968 pmap_store_pte((ppml4+KERNEL_UBER_PML4_INDEX), *(ppml4+0));
969
970 kernel64_cr3 = (addr64_t) kernel_pmap->pm_cr3;
0c530ab8 971
2d21ac55 972 /* Re-initialize descriptors and prepare to switch modes */
b0d623f7 973 cpu_desc_init64(&cpu_data_master);
2d21ac55
A
974 current_cpu_datap()->cpu_is64bit = TRUE;
975 current_cpu_datap()->cpu_active_cr3 = kernel64_cr3;
0c530ab8
A
976
977 pde_mapped_size = 512*4096 ;
978
979 ml_set_interrupts_enabled(istate);
0c530ab8 980 }
2d21ac55 981
b0d623f7 982 /* Sets 64-bit mode if required. */
2d21ac55 983 cpu_mode_init(&cpu_data_master);
b0d623f7
A
984 /* Update in-kernel CPUID information if we're now in 64-bit mode */
985 if (IA32e)
986 cpuid_set_info();
2d21ac55 987
0c530ab8 988 kernel_pmap->pm_hold = (vm_offset_t)kernel_pmap->pm_pml4;
1c79356b 989
91447636
A
990 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
991 VADDR(KPTDI,0), virtual_end);
6601e61a 992 printf("PAE enabled\n");
0c530ab8
A
993 if (cpu_64bit){
994 printf("64 bit mode enabled\n");kprintf("64 bit mode enabled\n"); }
995
996 kprintf("Available physical space from 0x%llx to 0x%llx\n",
6601e61a 997 avail_start, avail_end);
0c530ab8
A
998
999 /*
1000 * By default for 64-bit users loaded at 4GB, share kernel mapping.
1001 * But this may be overridden by the -no_shared_cr3 boot-arg.
1002 */
593a1d5f 1003 if (PE_parse_boot_argn("-no_shared_cr3", &no_shared_cr3, sizeof (no_shared_cr3))) {
0c530ab8 1004 kprintf("Shared kernel address space disabled\n");
2d21ac55
A
1005 }
1006
1007#ifdef PMAP_TRACES
593a1d5f 1008 if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof (pmap_trace))) {
2d21ac55
A
1009 kprintf("Kernel traces for pmap operations enabled\n");
1010 }
1011#endif /* PMAP_TRACES */
1c79356b
A
1012}
1013
1014void
1015pmap_virtual_space(
1016 vm_offset_t *startp,
1017 vm_offset_t *endp)
1018{
1019 *startp = virtual_avail;
1020 *endp = virtual_end;
1021}
1022
1023/*
1024 * Initialize the pmap module.
1025 * Called by vm_init, to initialize any structures that the pmap
1026 * system needs to map virtual memory.
1027 */
1028void
1029pmap_init(void)
1030{
0b4c1975
A
1031 long npages;
1032 vm_map_offset_t vaddr;
1033 vm_offset_t addr;
1034 vm_size_t s, vsize;
1035 ppnum_t ppn;
1c79356b
A
1036
1037 /*
1038 * Allocate memory for the pv_head_table and its lock bits,
1039 * the modify bit array, and the pte_page table.
1040 */
1041
2d21ac55
A
1042 /*
1043 * zero bias all these arrays now instead of off avail_start
1044 * so we cover all memory
1045 */
1046
b0d623f7 1047 npages = (long)i386_btop(avail_end);
2d21ac55
A
1048 s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages
1049 + (sizeof (struct pv_hashed_entry_t *) * (npvhash+1))
1050 + pv_lock_table_size(npages)
1051 + pv_hash_lock_table_size((npvhash+1))
1c79356b
A
1052 + npages);
1053
1054 s = round_page(s);
b0d623f7
A
1055 if (kernel_memory_allocate(kernel_map, &addr, s, 0,
1056 KMA_KOBJECT | KMA_PERMANENT)
1057 != KERN_SUCCESS)
1c79356b
A
1058 panic("pmap_init");
1059
1060 memset((char *)addr, 0, s);
1061
0b4c1975
A
1062 vaddr = addr;
1063 vsize = s;
1064
2d21ac55
A
1065#if PV_DEBUG
1066 if (0 == npvhash) panic("npvhash not initialized");
1067#endif
1068
1c79356b
A
1069 /*
1070 * Allocate the structures first to preserve word-alignment.
1071 */
2d21ac55 1072 pv_head_table = (pv_rooted_entry_t) addr;
1c79356b
A
1073 addr = (vm_offset_t) (pv_head_table + npages);
1074
2d21ac55
A
1075 pv_hash_table = (pv_hashed_entry_t *)addr;
1076 addr = (vm_offset_t) (pv_hash_table + (npvhash + 1));
1077
1c79356b
A
1078 pv_lock_table = (char *) addr;
1079 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
1080
2d21ac55
A
1081 pv_hash_lock_table = (char *) addr;
1082 addr = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhash+1)));
1083
1c79356b 1084 pmap_phys_attributes = (char *) addr;
2d21ac55
A
1085 {
1086 unsigned int i;
1087 unsigned int pn;
1088 ppnum_t last_pn;
1089 pmap_memory_region_t *pmptr = pmap_memory_regions;
1090
b0d623f7 1091 last_pn = (ppnum_t)i386_btop(avail_end);
2d21ac55
A
1092
1093 for (i = 0; i < pmap_memory_region_count; i++, pmptr++) {
1094 if (pmptr->type == kEfiConventionalMemory) {
b0d623f7 1095
2d21ac55
A
1096 for (pn = pmptr->base; pn <= pmptr->end; pn++) {
1097 if (pn < last_pn) {
1098 pmap_phys_attributes[pn] |= PHYS_MANAGED;
1099
1100 if (pn > last_managed_page)
1101 last_managed_page = pn;
0b4c1975
A
1102
1103 if (pn < lowest_lo)
1104 pmap_phys_attributes[pn] |= PHYS_NOENCRYPT;
1105 else if (pn >= lowest_hi && pn <= highest_hi)
1106 pmap_phys_attributes[pn] |= PHYS_NOENCRYPT;
2d21ac55
A
1107 }
1108 }
1109 }
1110 }
1111 }
0b4c1975
A
1112 while (vsize) {
1113 ppn = pmap_find_phys(kernel_pmap, vaddr);
1114
1115 pmap_phys_attributes[ppn] |= PHYS_NOENCRYPT;
1116
1117 vaddr += PAGE_SIZE;
1118 vsize -= PAGE_SIZE;
1119 }
1c79356b
A
1120
1121 /*
1122 * Create the zone of physical maps,
1123 * and of the physical-to-virtual entries.
1124 */
1125 s = (vm_size_t) sizeof(struct pmap);
1126 pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
0b4c1975
A
1127 zone_change(pmap_zone, Z_NOENCRYPT, TRUE);
1128
2d21ac55
A
1129 s = (vm_size_t) sizeof(struct pv_hashed_entry);
1130 pv_hashed_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
0b4c1975
A
1131 zone_change(pv_hashed_list_zone, Z_NOENCRYPT, TRUE);
1132
91447636
A
1133 s = 63;
1134 pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */
0b4c1975 1135 zone_change(pdpt_zone, Z_NOENCRYPT, TRUE);
55e303ae 1136
91447636 1137 kptobj = &kptobj_object_store;
2d21ac55 1138 _vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG), kptobj);
91447636 1139 kernel_pmap->pm_obj = kptobj;
91447636
A
1140
1141 /* create pv entries for kernel pages mapped by low level
1142 startup code. these have to exist so we can pmap_remove()
1143 e.g. kext pages from the middle of our addr space */
1144
0c530ab8 1145 vaddr = (vm_map_offset_t)0;
91447636 1146 for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) {
2d21ac55 1147 pv_rooted_entry_t pv_e;
91447636
A
1148
1149 pv_e = pai_to_pvh(ppn);
1150 pv_e->va = vaddr;
1151 vaddr += PAGE_SIZE;
1152 pv_e->pmap = kernel_pmap;
2d21ac55 1153 queue_init(&pv_e->qlink);
91447636
A
1154 }
1155
1c79356b
A
1156 pmap_initialized = TRUE;
1157
1158 /*
2d21ac55 1159 * Initialize pmap cache.
1c79356b
A
1160 */
1161 pmap_cache_list = PMAP_NULL;
1162 pmap_cache_count = 0;
91447636 1163 simple_lock_init(&pmap_cache_lock, 0);
2d21ac55
A
1164
1165 max_preemption_latency_tsc = tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS, tscFCvtn2t);
1166
1c79356b
A
1167}
1168
1c79356b 1169
2d21ac55 1170#define managed_page(x) ( (unsigned int)x <= last_managed_page && (pmap_phys_attributes[x] & PHYS_MANAGED) )
1c79356b 1171
2d21ac55
A
1172/*
1173 * this function is only used for debugging fron the vm layer
1174 */
1c79356b
A
1175boolean_t
1176pmap_verify_free(
55e303ae 1177 ppnum_t pn)
1c79356b 1178{
2d21ac55 1179 pv_rooted_entry_t pv_h;
1c79356b 1180 int pai;
1c79356b
A
1181 boolean_t result;
1182
55e303ae 1183 assert(pn != vm_page_fictitious_addr);
2d21ac55 1184
1c79356b
A
1185 if (!pmap_initialized)
1186 return(TRUE);
1187
2d21ac55
A
1188 if (pn == vm_page_guard_addr)
1189 return TRUE;
1c79356b 1190
2d21ac55
A
1191 pai = ppn_to_pai(pn);
1192 if (!managed_page(pai))
1193 return(FALSE);
1194 pv_h = pai_to_pvh(pn);
1195 result = (pv_h->pmap == PMAP_NULL);
1196 return(result);
1197}
1c79356b 1198
2d21ac55
A
1199boolean_t
1200pmap_is_empty(
1201 pmap_t pmap,
b0d623f7
A
1202 vm_map_offset_t va_start,
1203 vm_map_offset_t va_end)
2d21ac55
A
1204{
1205 vm_map_offset_t offset;
1206 ppnum_t phys_page;
1c79356b 1207
2d21ac55
A
1208 if (pmap == PMAP_NULL) {
1209 return TRUE;
1210 }
b0d623f7
A
1211
1212 /*
1213 * Check the resident page count
1214 * - if it's zero, the pmap is completely empty.
1215 * This short-circuit test prevents a virtual address scan which is
1216 * painfully slow for 64-bit spaces.
1217 * This assumes the count is correct
1218 * .. the debug kernel ought to be checking perhaps by page table walk.
1219 */
1220 if (pmap->stats.resident_count == 0)
1221 return TRUE;
1222
1223 for (offset = va_start;
1224 offset < va_end;
2d21ac55
A
1225 offset += PAGE_SIZE_64) {
1226 phys_page = pmap_find_phys(pmap, offset);
1227 if (phys_page) {
1228 if (pmap != kernel_pmap &&
1229 pmap->pm_task_map == TASK_MAP_32BIT &&
1230 offset >= HIGH_MEM_BASE) {
1231 /*
1232 * The "high_shared_pde" is used to share
1233 * the entire top-most 2MB of address space
1234 * between the kernel and all 32-bit tasks.
1235 * So none of this can be removed from 32-bit
1236 * tasks.
1237 * Let's pretend there's nothing up
1238 * there...
1239 */
1240 return TRUE;
1241 }
1242 kprintf("pmap_is_empty(%p,0x%llx,0x%llx): "
1243 "page %d at 0x%llx\n",
b0d623f7 1244 pmap, va_start, va_end, phys_page, offset);
2d21ac55
A
1245 return FALSE;
1246 }
1247 }
1c79356b 1248
2d21ac55 1249 return TRUE;
1c79356b
A
1250}
1251
2d21ac55 1252
1c79356b
A
1253/*
1254 * Create and return a physical map.
1255 *
1256 * If the size specified for the map
1257 * is zero, the map is an actual physical
1258 * map, and may be referenced by the
1259 * hardware.
1260 *
1261 * If the size specified is non-zero,
1262 * the map will be used in software only, and
1263 * is bounded by that size.
1264 */
1265pmap_t
1266pmap_create(
0c530ab8 1267 vm_map_size_t sz,
2d21ac55 1268 boolean_t is_64bit)
1c79356b 1269{
2d21ac55 1270 pmap_t p;
0c530ab8
A
1271 int i;
1272 vm_offset_t va;
1273 vm_size_t size;
1274 pdpt_entry_t *pdpt;
1275 pml4_entry_t *pml4p;
0c530ab8 1276 pd_entry_t *pdp;
2d21ac55 1277 int template;
0c530ab8
A
1278 spl_t s;
1279
2d21ac55
A
1280 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START,
1281 (int) (sz>>32), (int) sz, (int) is_64bit, 0, 0);
1282
0c530ab8 1283 size = (vm_size_t) sz;
1c79356b
A
1284
1285 /*
1286 * A software use-only map doesn't even need a map.
1287 */
1288
1289 if (size != 0) {
1290 return(PMAP_NULL);
1291 }
1292
91447636
A
1293 p = (pmap_t) zalloc(pmap_zone);
1294 if (PMAP_NULL == p)
2d21ac55 1295 panic("pmap_create zalloc");
6601e61a 1296
0c530ab8
A
1297 /* init counts now since we'll be bumping some */
1298 simple_lock_init(&p->lock, 0);
1c79356b 1299 p->stats.resident_count = 0;
2d21ac55 1300 p->stats.resident_max = 0;
1c79356b 1301 p->stats.wired_count = 0;
1c79356b 1302 p->ref_count = 1;
0c530ab8 1303 p->nx_enabled = 1;
0c530ab8
A
1304 p->pm_shared = FALSE;
1305
2d21ac55
A
1306 assert(!is_64bit || cpu_64bit);
1307 p->pm_task_map = is_64bit ? TASK_MAP_64BIT : TASK_MAP_32BIT;;
1308
0c530ab8 1309 if (!cpu_64bit) {
2d21ac55
A
1310 /* legacy 32 bit setup */
1311 /* in the legacy case the pdpt layer is hardwired to 4 entries and each
1312 * entry covers 1GB of addr space */
b0d623f7
A
1313 if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD))
1314 panic("pmap_create kmem_alloc_kobject");
2d21ac55
A
1315 p->pm_hold = (vm_offset_t)zalloc(pdpt_zone);
1316 if ((vm_offset_t)NULL == p->pm_hold) {
1317 panic("pdpt zalloc");
1318 }
1319 pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31);
1320 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)pdpt);
1321 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG))))
1322 panic("pmap_create vm_object_allocate");
0c530ab8 1323
2d21ac55 1324 memset((char *)p->dirbase, 0, NBPTD);
0c530ab8 1325
2d21ac55
A
1326 va = (vm_offset_t)p->dirbase;
1327 p->pdirbase = kvtophys(va);
0c530ab8 1328
b7266188 1329 template = INTEL_PTE_VALID;
2d21ac55
A
1330 for (i = 0; i< NPGPTD; i++, pdpt++ ) {
1331 pmap_paddr_t pa;
b0d623f7 1332 pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i)));
2d21ac55
A
1333 pmap_store_pte(pdpt, pa | template);
1334 }
0c530ab8 1335
2d21ac55
A
1336 /* map the high shared pde */
1337 s = splhigh();
1338 pmap_store_pte(pmap_pde(p, HIGH_MEM_BASE), high_shared_pde);
1339 splx(s);
4452a7af 1340
0c530ab8 1341 } else {
2d21ac55 1342 /* 64 bit setup */
4452a7af 1343
2d21ac55 1344 /* alloc the pml4 page in kernel vm */
b0d623f7
A
1345 if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->pm_hold), PAGE_SIZE))
1346 panic("pmap_create kmem_alloc_kobject pml4");
4452a7af 1347
2d21ac55
A
1348 memset((char *)p->pm_hold, 0, PAGE_SIZE);
1349 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_hold);
0c530ab8 1350
b0d623f7 1351 OSAddAtomic(1, &inuse_ptepages_count);
0c530ab8 1352
2d21ac55 1353 /* allocate the vm_objs to hold the pdpt, pde and pte pages */
0c530ab8 1354
2d21ac55
A
1355 if (NULL == (p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS))))
1356 panic("pmap_create pdpt obj");
0c530ab8 1357
2d21ac55
A
1358 if (NULL == (p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS))))
1359 panic("pmap_create pdpt obj");
0c530ab8 1360
2d21ac55
A
1361 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS))))
1362 panic("pmap_create pte obj");
0c530ab8 1363
2d21ac55
A
1364 /* uber space points to uber mapped kernel */
1365 s = splhigh();
1366 pml4p = pmap64_pml4(p, 0ULL);
b7266188 1367 pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX), *kernel_pmap->pm_pml4);
0c530ab8 1368
0c530ab8 1369
2d21ac55
A
1370 if (!is_64bit) {
1371 while ((pdp = pmap64_pde(p, (uint64_t)HIGH_MEM_BASE)) == PD_ENTRY_NULL) {
1372 splx(s);
1373 pmap_expand_pdpt(p, (uint64_t)HIGH_MEM_BASE); /* need room for another pde entry */
1374 s = splhigh();
1375 }
1376 pmap_store_pte(pdp, high_shared_pde);
1377 }
1378 splx(s);
0c530ab8 1379 }
1c79356b 1380
2d21ac55
A
1381 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START,
1382 (int) p, is_64bit, 0, 0, 0);
1383
1c79356b
A
1384 return(p);
1385}
1386
2d21ac55
A
1387/*
1388 * The following routines implement the shared address optmization for 64-bit
1389 * users with a 4GB page zero.
1390 *
1391 * pmap_set_4GB_pagezero()
1392 * is called in the exec and fork paths to mirror the kernel's
1393 * mapping in the bottom 4G of the user's pmap. The task mapping changes
1394 * from TASK_MAP_64BIT to TASK_MAP_64BIT_SHARED. This routine returns
1395 * without doing anything if the -no_shared_cr3 boot-arg is set.
1396 *
1397 * pmap_clear_4GB_pagezero()
1398 * is called in the exec/exit paths to undo this mirror. The task mapping
1399 * reverts to TASK_MAP_64BIT. In addition, we switch to the kernel's
1400 * CR3 by calling pmap_load_kernel_cr3().
1401 *
1402 * pmap_load_kernel_cr3()
1403 * loads cr3 with the kernel's page table. In addition to being called
1404 * by pmap_clear_4GB_pagezero(), it is used both prior to teardown and
1405 * when we go idle in the context of a shared map.
1406 *
1407 * Further notes on per-cpu data used:
1408 *
1409 * cpu_kernel_cr3 is the cr3 for the kernel's pmap.
1410 * This is loaded in a trampoline on entering the kernel
1411 * from a 32-bit user (or non-shared-cr3 64-bit user).
1412 * cpu_task_cr3 is the cr3 for the current thread.
1413 * This is loaded in a trampoline as we exit the kernel.
1414 * cpu_active_cr3 reflects the cr3 currently loaded.
1415 * However, the low order bit is set when the
1416 * processor is idle or interrupts are disabled
1417 * while the system pmap lock is held. It is used by
1418 * tlb shoot-down.
1419 * cpu_task_map indicates whether the task cr3 belongs to
1420 * a 32-bit, a 64-bit or a 64-bit shared map.
1421 * The latter allows the avoidance of the cr3 load
1422 * on kernel entry and exit.
1423 * cpu_tlb_invalid set TRUE when a tlb flush is requested.
1424 * If the cr3 is "inactive" (the cpu is idle or the
1425 * system-wide pmap lock is held) this not serviced by
1426 * an IPI but at time when the cr3 becomes "active".
1427 */
1428
0c530ab8
A
1429void
1430pmap_set_4GB_pagezero(pmap_t p)
1431{
0c530ab8
A
1432 pdpt_entry_t *user_pdptp;
1433 pdpt_entry_t *kern_pdptp;
1434
2d21ac55 1435 assert(p->pm_task_map != TASK_MAP_32BIT);
0c530ab8
A
1436
1437 /* Kernel-shared cr3 may be disabled by boot arg. */
1438 if (no_shared_cr3)
1439 return;
1440
1441 /*
1442 * Set the bottom 4 3rd-level pte's to be the kernel's.
1443 */
2d21ac55 1444 PMAP_LOCK(p);
0c530ab8 1445 while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) {
2d21ac55 1446 PMAP_UNLOCK(p);
0c530ab8 1447 pmap_expand_pml4(p, 0x0);
2d21ac55 1448 PMAP_LOCK(p);
0c530ab8
A
1449 }
1450 kern_pdptp = kernel_pmap->pm_pdpt;
1451 pmap_store_pte(user_pdptp+0, *(kern_pdptp+0));
1452 pmap_store_pte(user_pdptp+1, *(kern_pdptp+1));
1453 pmap_store_pte(user_pdptp+2, *(kern_pdptp+2));
1454 pmap_store_pte(user_pdptp+3, *(kern_pdptp+3));
2d21ac55
A
1455 p->pm_task_map = TASK_MAP_64BIT_SHARED;
1456 PMAP_UNLOCK(p);
0c530ab8
A
1457}
1458
1459void
1460pmap_clear_4GB_pagezero(pmap_t p)
1461{
0c530ab8
A
1462 pdpt_entry_t *user_pdptp;
1463
2d21ac55 1464 if (p->pm_task_map != TASK_MAP_64BIT_SHARED)
0c530ab8
A
1465 return;
1466
2d21ac55
A
1467 PMAP_LOCK(p);
1468
1469 p->pm_task_map = TASK_MAP_64BIT;
1470
1471 pmap_load_kernel_cr3();
1472
0c530ab8
A
1473 user_pdptp = pmap64_pdpt(p, 0x0);
1474 pmap_store_pte(user_pdptp+0, 0);
1475 pmap_store_pte(user_pdptp+1, 0);
1476 pmap_store_pte(user_pdptp+2, 0);
1477 pmap_store_pte(user_pdptp+3, 0);
1478
2d21ac55
A
1479 PMAP_UNLOCK(p);
1480}
0c530ab8 1481
2d21ac55
A
1482void
1483pmap_load_kernel_cr3(void)
1484{
1485 uint64_t kernel_cr3;
0c530ab8 1486
2d21ac55
A
1487 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
1488
1489 /*
1490 * Reload cr3 with the true kernel cr3.
1491 */
1492 kernel_cr3 = current_cpu_datap()->cpu_kernel_cr3;
1493 set64_cr3(kernel_cr3);
1494 current_cpu_datap()->cpu_active_cr3 = kernel_cr3;
1495 current_cpu_datap()->cpu_tlb_invalid = FALSE;
1496 __asm__ volatile("mfence");
0c530ab8
A
1497}
1498
1c79356b
A
1499/*
1500 * Retire the given physical map from service.
1501 * Should only be called if the map contains
1502 * no valid mappings.
1503 */
1504
1505void
1506pmap_destroy(
1507 register pmap_t p)
1508{
1c79356b 1509 register int c;
1c79356b
A
1510
1511 if (p == PMAP_NULL)
1512 return;
2d21ac55
A
1513
1514 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START,
1515 (int) p, 0, 0, 0, 0);
1516
1517 PMAP_LOCK(p);
1518
1c79356b 1519 c = --p->ref_count;
2d21ac55 1520
1c79356b 1521 if (c == 0) {
1c79356b
A
1522 /*
1523 * If some cpu is not using the physical pmap pointer that it
1524 * is supposed to be (see set_dirbase), we might be using the
1525 * pmap that is being destroyed! Make sure we are
1526 * physically on the right pmap:
1527 */
55e303ae 1528 PMAP_UPDATE_TLBS(p,
2d21ac55
A
1529 0x0ULL,
1530 0xFFFFFFFFFFFFF000ULL);
1c79356b 1531 }
2d21ac55
A
1532
1533 PMAP_UNLOCK(p);
1c79356b
A
1534
1535 if (c != 0) {
2d21ac55
A
1536 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END,
1537 (int) p, 1, 0, 0, 0);
1538 return; /* still in use */
1c79356b
A
1539 }
1540
1541 /*
1542 * Free the memory maps, then the
1543 * pmap structure.
1544 */
0c530ab8 1545 if (!cpu_64bit) {
b0d623f7 1546 OSAddAtomic(-p->pm_obj->resident_page_count, &inuse_ptepages_count);
91447636 1547
2d21ac55
A
1548 kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD);
1549 zfree(pdpt_zone, (void *)p->pm_hold);
0c530ab8 1550
2d21ac55
A
1551 vm_object_deallocate(p->pm_obj);
1552 } else {
1553 /* 64 bit */
1554 int inuse_ptepages = 0;
0c530ab8 1555
2d21ac55
A
1556 /* free 64 bit mode structs */
1557 inuse_ptepages++;
1558 kmem_free(kernel_map, (vm_offset_t)p->pm_hold, PAGE_SIZE);
1559
1560 inuse_ptepages += p->pm_obj_pml4->resident_page_count;
1561 vm_object_deallocate(p->pm_obj_pml4);
1562
1563 inuse_ptepages += p->pm_obj_pdpt->resident_page_count;
1564 vm_object_deallocate(p->pm_obj_pdpt);
0c530ab8 1565
2d21ac55
A
1566 inuse_ptepages += p->pm_obj->resident_page_count;
1567 vm_object_deallocate(p->pm_obj);
1568
b0d623f7 1569 OSAddAtomic(-inuse_ptepages, &inuse_ptepages_count);
2d21ac55
A
1570 }
1571 zfree(pmap_zone, p);
1c79356b 1572
2d21ac55
A
1573 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END,
1574 0, 0, 0, 0, 0);
0c530ab8 1575
1c79356b
A
1576}
1577
1578/*
1579 * Add a reference to the specified pmap.
1580 */
1581
1582void
1583pmap_reference(
1584 register pmap_t p)
1585{
1c79356b
A
1586
1587 if (p != PMAP_NULL) {
2d21ac55 1588 PMAP_LOCK(p);
1c79356b 1589 p->ref_count++;
2d21ac55 1590 PMAP_UNLOCK(p);;
1c79356b
A
1591 }
1592}
1593
1c79356b 1594
0b4e3aa0
A
1595/*
1596 * Remove phys addr if mapped in specified map
1597 *
1598 */
1599void
1600pmap_remove_some_phys(
91447636
A
1601 __unused pmap_t map,
1602 __unused ppnum_t pn)
0b4e3aa0
A
1603{
1604
1605/* Implement to support working set code */
1606
1607}
1608
91447636
A
1609/*
1610 * Routine:
1611 * pmap_disconnect
1612 *
1613 * Function:
1614 * Disconnect all mappings for this page and return reference and change status
1615 * in generic format.
1616 *
1617 */
1618unsigned int pmap_disconnect(
1619 ppnum_t pa)
1620{
2d21ac55 1621 pmap_page_protect(pa, 0); /* disconnect the page */
91447636
A
1622 return (pmap_get_refmod(pa)); /* return ref/chg status */
1623}
1624
1c79356b
A
1625/*
1626 * Set the physical protection on the
1627 * specified range of this map as requested.
1628 * Will not increase permissions.
1629 */
1630void
1631pmap_protect(
1632 pmap_t map,
0c530ab8
A
1633 vm_map_offset_t sva,
1634 vm_map_offset_t eva,
1c79356b
A
1635 vm_prot_t prot)
1636{
1637 register pt_entry_t *pde;
1638 register pt_entry_t *spte, *epte;
0c530ab8
A
1639 vm_map_offset_t lva;
1640 vm_map_offset_t orig_sva;
0c530ab8 1641 boolean_t set_NX;
2d21ac55
A
1642 int num_found = 0;
1643
1644 pmap_intr_assert();
1c79356b
A
1645
1646 if (map == PMAP_NULL)
1647 return;
1648
0c530ab8
A
1649 if (prot == VM_PROT_NONE) {
1650 pmap_remove(map, sva, eva);
1c79356b
A
1651 return;
1652 }
1653
2d21ac55
A
1654 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
1655 (int) map,
1656 (int) (sva>>32), (int) sva,
1657 (int) (eva>>32), (int) eva);
1658
0c530ab8
A
1659 if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !map->nx_enabled )
1660 set_NX = FALSE;
1661 else
1662 set_NX = TRUE;
1663
2d21ac55 1664 PMAP_LOCK(map);
1c79356b 1665
0c530ab8
A
1666 orig_sva = sva;
1667 while (sva < eva) {
1668 lva = (sva + pde_mapped_size) & ~(pde_mapped_size-1);
1669 if (lva > eva)
1670 lva = eva;
1671 pde = pmap_pde(map, sva);
1672 if (pde && (*pde & INTEL_PTE_VALID)) {
1673 spte = (pt_entry_t *)pmap_pte(map, (sva & ~(pde_mapped_size-1)));
1674 spte = &spte[ptenum(sva)];
1675 epte = &spte[intel_btop(lva-sva)];
1c79356b
A
1676
1677 while (spte < epte) {
2d21ac55 1678
0c530ab8
A
1679 if (*spte & INTEL_PTE_VALID) {
1680
1681 if (prot & VM_PROT_WRITE)
2d21ac55 1682 pmap_update_pte(spte, *spte, (*spte | INTEL_PTE_WRITE));
0c530ab8 1683 else
2d21ac55 1684 pmap_update_pte(spte, *spte, (*spte & ~INTEL_PTE_WRITE));
0c530ab8
A
1685
1686 if (set_NX == TRUE)
2d21ac55 1687 pmap_update_pte(spte, *spte, (*spte | INTEL_PTE_NX));
0c530ab8 1688 else
2d21ac55 1689 pmap_update_pte(spte, *spte, (*spte & ~INTEL_PTE_NX));
0c530ab8
A
1690
1691 num_found++;
0c530ab8 1692 }
1c79356b
A
1693 spte++;
1694 }
1695 }
0c530ab8 1696 sva = lva;
1c79356b 1697 }
0c530ab8 1698 if (num_found)
2d21ac55
A
1699 PMAP_UPDATE_TLBS(map, orig_sva, eva);
1700
1701 PMAP_UNLOCK(map);
1702
1703 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END,
1704 0, 0, 0, 0, 0);
91447636 1705
1c79356b
A
1706}
1707
0c530ab8
A
1708/* Map a (possibly) autogenned block */
1709void
1710pmap_map_block(
1711 pmap_t pmap,
1712 addr64_t va,
1713 ppnum_t pa,
1714 uint32_t size,
1715 vm_prot_t prot,
1716 int attr,
1717 __unused unsigned int flags)
1718{
2d21ac55 1719 uint32_t page;
0c530ab8 1720
2d21ac55
A
1721 for (page = 0; page < size; page++) {
1722 pmap_enter(pmap, va, pa, prot, attr, TRUE);
1723 va += PAGE_SIZE;
1724 pa++;
1725 }
0c530ab8 1726}
1c79356b
A
1727
1728
1c79356b
A
1729/*
1730 * Routine: pmap_change_wiring
1731 * Function: Change the wiring attribute for a map/virtual-address
1732 * pair.
1733 * In/out conditions:
1734 * The mapping must already exist in the pmap.
1735 */
1736void
1737pmap_change_wiring(
1738 register pmap_t map,
0c530ab8 1739 vm_map_offset_t vaddr,
1c79356b
A
1740 boolean_t wired)
1741{
1742 register pt_entry_t *pte;
1c79356b
A
1743
1744 /*
1745 * We must grab the pmap system lock because we may
1746 * change a pte_page queue.
1747 */
2d21ac55 1748 PMAP_LOCK(map);
1c79356b 1749
0c530ab8 1750 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
1c79356b
A
1751 panic("pmap_change_wiring: pte missing");
1752
1753 if (wired && !iswired(*pte)) {
1754 /*
1755 * wiring down mapping
1756 */
b0d623f7 1757 OSAddAtomic(+1, &map->stats.wired_count);
0c530ab8 1758 pmap_update_pte(pte, *pte, (*pte | INTEL_PTE_WIRED));
1c79356b
A
1759 }
1760 else if (!wired && iswired(*pte)) {
1761 /*
1762 * unwiring mapping
1763 */
1764 assert(map->stats.wired_count >= 1);
b0d623f7 1765 OSAddAtomic(-1, &map->stats.wired_count);
0c530ab8 1766 pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WIRED));
1c79356b
A
1767 }
1768
2d21ac55 1769 PMAP_UNLOCK(map);
1c79356b
A
1770}
1771
55e303ae 1772
1c79356b
A
1773/*
1774 * Routine: pmap_extract
1775 * Function:
1776 * Extract the physical page address associated
1777 * with the given map/virtual_address pair.
91447636
A
1778 * Change to shim for backwards compatibility but will not
1779 * work for 64 bit systems. Some old drivers that we cannot
1780 * change need this.
1c79356b
A
1781 */
1782
1783vm_offset_t
1784pmap_extract(
1785 register pmap_t pmap,
0c530ab8 1786 vm_map_offset_t vaddr)
1c79356b 1787{
0c530ab8
A
1788 ppnum_t ppn;
1789 vm_offset_t paddr;
91447636 1790
0c530ab8
A
1791 paddr = (vm_offset_t)0;
1792 ppn = pmap_find_phys(pmap, vaddr);
2d21ac55 1793
0c530ab8 1794 if (ppn) {
b0d623f7 1795 paddr = ((vm_offset_t)i386_ptob(ppn)) | ((vm_offset_t)vaddr & INTEL_OFFMASK);
0c530ab8
A
1796 }
1797 return (paddr);
1c79356b
A
1798}
1799
1c79356b 1800void
0c530ab8
A
1801pmap_expand_pml4(
1802 pmap_t map,
1803 vm_map_offset_t vaddr)
1c79356b 1804{
1c79356b 1805 register vm_page_t m;
91447636 1806 register pmap_paddr_t pa;
0c530ab8 1807 uint64_t i;
1c79356b 1808 spl_t spl;
55e303ae 1809 ppnum_t pn;
0c530ab8 1810 pml4_entry_t *pml4p;
89b3af67 1811
0c530ab8
A
1812 if (kernel_pmap == map) panic("expand kernel pml4");
1813
1814 spl = splhigh();
2d21ac55
A
1815 pml4p = pmap64_pml4(map, vaddr);
1816 splx(spl);
1817 if (PML4_ENTRY_NULL == pml4p) panic("pmap_expand_pml4 no pml4p");
1c79356b
A
1818
1819 /*
0c530ab8 1820 * Allocate a VM page for the pml4 page
1c79356b
A
1821 */
1822 while ((m = vm_page_grab()) == VM_PAGE_NULL)
1823 VM_PAGE_WAIT();
1824
1825 /*
91447636 1826 * put the page into the pmap's obj list so it
1c79356b
A
1827 * can be found later.
1828 */
55e303ae
A
1829 pn = m->phys_page;
1830 pa = i386_ptob(pn);
0c530ab8
A
1831 i = pml4idx(map, vaddr);
1832
2d21ac55
A
1833 /*
1834 * Zero the page.
1835 */
1836 pmap_zero_page(pn);
0c530ab8 1837
b0d623f7 1838 vm_page_lockspin_queues();
1c79356b 1839 vm_page_wire(m);
2d21ac55 1840 vm_page_unlock_queues();
1c79356b 1841
b0d623f7
A
1842 OSAddAtomic(1, &inuse_ptepages_count);
1843
2d21ac55
A
1844 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
1845 vm_object_lock(map->pm_obj_pml4);
1c79356b 1846
2d21ac55 1847 PMAP_LOCK(map);
1c79356b
A
1848 /*
1849 * See if someone else expanded us first
1850 */
0c530ab8 1851 if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) {
2d21ac55
A
1852 PMAP_UNLOCK(map);
1853 vm_object_unlock(map->pm_obj_pml4);
1854
b0d623f7 1855 VM_PAGE_FREE(m);
2d21ac55 1856
b0d623f7 1857 OSAddAtomic(-1, &inuse_ptepages_count);
1c79356b
A
1858 return;
1859 }
0b4c1975 1860 pmap_set_noencrypt(pn);
1c79356b 1861
2d21ac55
A
1862#if 0 /* DEBUG */
1863 if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i)) {
1864 panic("pmap_expand_pml4: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n",
1865 map, map->pm_obj_pml4, vaddr, i);
1866 }
1867#endif
1868 vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i);
1869 vm_object_unlock(map->pm_obj_pml4);
1870
1c79356b
A
1871 /*
1872 * Set the page directory entry for this page table.
1c79356b 1873 */
0c530ab8 1874 pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */
c0fea474 1875
0c530ab8
A
1876 pmap_store_pte(pml4p, pa_to_pte(pa)
1877 | INTEL_PTE_VALID
1878 | INTEL_PTE_USER
1879 | INTEL_PTE_WRITE);
5d5c5d0d 1880
2d21ac55 1881 PMAP_UNLOCK(map);
89b3af67 1882
6601e61a 1883 return;
0c530ab8 1884
6601e61a 1885}
89b3af67 1886
6601e61a 1887void
0c530ab8
A
1888pmap_expand_pdpt(
1889 pmap_t map,
1890 vm_map_offset_t vaddr)
6601e61a 1891{
0c530ab8
A
1892 register vm_page_t m;
1893 register pmap_paddr_t pa;
1894 uint64_t i;
1895 spl_t spl;
1896 ppnum_t pn;
1897 pdpt_entry_t *pdptp;
89b3af67 1898
0c530ab8 1899 if (kernel_pmap == map) panic("expand kernel pdpt");
89b3af67 1900
0c530ab8 1901 spl = splhigh();
2d21ac55
A
1902 while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) {
1903 splx(spl);
1904 pmap_expand_pml4(map, vaddr); /* need room for another pdpt entry */
1905 spl = splhigh();
1906 }
1907 splx(spl);
4452a7af 1908
0c530ab8
A
1909 /*
1910 * Allocate a VM page for the pdpt page
1911 */
1912 while ((m = vm_page_grab()) == VM_PAGE_NULL)
1913 VM_PAGE_WAIT();
4452a7af 1914
4452a7af 1915 /*
0c530ab8
A
1916 * put the page into the pmap's obj list so it
1917 * can be found later.
4452a7af 1918 */
0c530ab8
A
1919 pn = m->phys_page;
1920 pa = i386_ptob(pn);
1921 i = pdptidx(map, vaddr);
4452a7af 1922
2d21ac55
A
1923 /*
1924 * Zero the page.
1925 */
1926 pmap_zero_page(pn);
0c530ab8 1927
b0d623f7 1928 vm_page_lockspin_queues();
0c530ab8 1929 vm_page_wire(m);
2d21ac55 1930 vm_page_unlock_queues();
0c530ab8 1931
b0d623f7
A
1932 OSAddAtomic(1, &inuse_ptepages_count);
1933
2d21ac55
A
1934 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
1935 vm_object_lock(map->pm_obj_pdpt);
0c530ab8 1936
2d21ac55 1937 PMAP_LOCK(map);
0c530ab8
A
1938 /*
1939 * See if someone else expanded us first
1940 */
1941 if (pmap64_pde(map, vaddr) != PD_ENTRY_NULL) {
2d21ac55
A
1942 PMAP_UNLOCK(map);
1943 vm_object_unlock(map->pm_obj_pdpt);
1944
b0d623f7 1945 VM_PAGE_FREE(m);
2d21ac55 1946
b0d623f7 1947 OSAddAtomic(-1, &inuse_ptepages_count);
0c530ab8
A
1948 return;
1949 }
0b4c1975 1950 pmap_set_noencrypt(pn);
0c530ab8 1951
2d21ac55
A
1952#if 0 /* DEBUG */
1953 if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i)) {
1954 panic("pmap_expand_pdpt: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n",
1955 map, map->pm_obj_pdpt, vaddr, i);
1956 }
1957#endif
1958 vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i);
1959 vm_object_unlock(map->pm_obj_pdpt);
1960
0c530ab8
A
1961 /*
1962 * Set the page directory entry for this page table.
0c530ab8 1963 */
0c530ab8
A
1964 pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */
1965
1966 pmap_store_pte(pdptp, pa_to_pte(pa)
1967 | INTEL_PTE_VALID
1968 | INTEL_PTE_USER
1969 | INTEL_PTE_WRITE);
1970
2d21ac55 1971 PMAP_UNLOCK(map);
0c530ab8
A
1972
1973 return;
1974
1975}
1976
1977
1978
1979/*
1980 * Routine: pmap_expand
1981 *
1982 * Expands a pmap to be able to map the specified virtual address.
1983 *
1984 * Allocates new virtual memory for the P0 or P1 portion of the
1985 * pmap, then re-maps the physical pages that were in the old
1986 * pmap to be in the new pmap.
1987 *
1988 * Must be called with the pmap system and the pmap unlocked,
1989 * since these must be unlocked to use vm_allocate or vm_deallocate.
1990 * Thus it must be called in a loop that checks whether the map
1991 * has been expanded enough.
1992 * (We won't loop forever, since page tables aren't shrunk.)
1993 */
1994void
1995pmap_expand(
1996 pmap_t map,
1997 vm_map_offset_t vaddr)
1998{
1999 pt_entry_t *pdp;
2000 register vm_page_t m;
2001 register pmap_paddr_t pa;
2002 uint64_t i;
2003 spl_t spl;
2004 ppnum_t pn;
2005
2006 /*
2007 * if not the kernel map (while we are still compat kernel mode)
2008 * and we are 64 bit, propagate expand upwards
2009 */
2010
2011 if (cpu_64bit && (map != kernel_pmap)) {
2d21ac55
A
2012 spl = splhigh();
2013 while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) {
2014 splx(spl);
2015 pmap_expand_pdpt(map, vaddr); /* need room for another pde entry */
2016 spl = splhigh();
2017 }
2018 splx(spl);
0c530ab8
A
2019 }
2020
0c530ab8
A
2021 /*
2022 * Allocate a VM page for the pde entries.
2023 */
2024 while ((m = vm_page_grab()) == VM_PAGE_NULL)
2025 VM_PAGE_WAIT();
2026
2027 /*
2028 * put the page into the pmap's obj list so it
2029 * can be found later.
2030 */
2031 pn = m->phys_page;
2032 pa = i386_ptob(pn);
2033 i = pdeidx(map, vaddr);
2034
2d21ac55
A
2035 /*
2036 * Zero the page.
2037 */
2038 pmap_zero_page(pn);
0c530ab8 2039
b0d623f7 2040 vm_page_lockspin_queues();
0c530ab8 2041 vm_page_wire(m);
0c530ab8 2042 vm_page_unlock_queues();
0c530ab8 2043
b0d623f7
A
2044 OSAddAtomic(1, &inuse_ptepages_count);
2045
2d21ac55
A
2046 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
2047 vm_object_lock(map->pm_obj);
0c530ab8 2048
2d21ac55 2049 PMAP_LOCK(map);
0c530ab8
A
2050 /*
2051 * See if someone else expanded us first
2052 */
2d21ac55 2053
0c530ab8 2054 if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) {
2d21ac55
A
2055 PMAP_UNLOCK(map);
2056 vm_object_unlock(map->pm_obj);
0c530ab8 2057
b0d623f7 2058 VM_PAGE_FREE(m);
2d21ac55 2059
b0d623f7 2060 OSAddAtomic(-1, &inuse_ptepages_count);
0c530ab8
A
2061 return;
2062 }
0b4c1975 2063 pmap_set_noencrypt(pn);
0c530ab8 2064
2d21ac55
A
2065#if 0 /* DEBUG */
2066 if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i)) {
2067 panic("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n",
2068 map, map->pm_obj, vaddr, i);
2069 }
2070#endif
2071 vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i);
2072 vm_object_unlock(map->pm_obj);
0c530ab8
A
2073
2074 /*
2d21ac55 2075 * refetch while locked
0c530ab8
A
2076 */
2077
2d21ac55
A
2078 pdp = pmap_pde(map, vaddr);
2079
2080 /*
2081 * Set the page directory entry for this page table.
2082 */
0c530ab8
A
2083 pmap_store_pte(pdp, pa_to_pte(pa)
2084 | INTEL_PTE_VALID
2085 | INTEL_PTE_USER
2086 | INTEL_PTE_WRITE);
0c530ab8 2087
2d21ac55 2088 PMAP_UNLOCK(map);
0c530ab8
A
2089
2090 return;
2091}
2092
2093
2094/*
2095 * pmap_sync_page_data_phys(ppnum_t pa)
2096 *
2097 * Invalidates all of the instruction cache on a physical page and
2098 * pushes any dirty data from the data cache for the same physical page
2099 * Not required in i386.
2100 */
2101void
2102pmap_sync_page_data_phys(__unused ppnum_t pa)
2103{
2104 return;
2105}
2106
2107/*
2108 * pmap_sync_page_attributes_phys(ppnum_t pa)
2109 *
2110 * Write back and invalidate all cachelines on a physical page.
2111 */
2112void
2113pmap_sync_page_attributes_phys(ppnum_t pa)
2114{
2115 cache_flush_page_phys(pa);
2116}
2117
2d21ac55
A
2118
2119
2120#ifdef CURRENTLY_UNUSED_AND_UNTESTED
2121
0c530ab8
A
2122int collect_ref;
2123int collect_unref;
2124
2125/*
2126 * Routine: pmap_collect
2127 * Function:
2128 * Garbage collects the physical map system for
2129 * pages which are no longer used.
2130 * Success need not be guaranteed -- that is, there
2131 * may well be pages which are not referenced, but
2132 * others may be collected.
2133 * Usage:
2134 * Called by the pageout daemon when pages are scarce.
2135 */
2136void
2137pmap_collect(
2138 pmap_t p)
2139{
2140 register pt_entry_t *pdp, *ptp;
2141 pt_entry_t *eptp;
2142 int wired;
0c530ab8
A
2143
2144 if (p == PMAP_NULL)
2145 return;
2146
2147 if (p == kernel_pmap)
2148 return;
2149
2150 /*
2151 * Garbage collect map.
2152 */
2d21ac55 2153 PMAP_LOCK(p);
0c530ab8
A
2154
2155 for (pdp = (pt_entry_t *)p->dirbase;
4452a7af
A
2156 pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)];
2157 pdp++)
2158 {
2159 if (*pdp & INTEL_PTE_VALID) {
2160 if(*pdp & INTEL_PTE_REF) {
0c530ab8 2161 pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF);
4452a7af
A
2162 collect_ref++;
2163 } else {
2164 collect_unref++;
2165 ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase));
2166 eptp = ptp + NPTEPG;
2167
2168 /*
2169 * If the pte page has any wired mappings, we cannot
2170 * free it.
2171 */
2172 wired = 0;
2173 {
2174 register pt_entry_t *ptep;
2175 for (ptep = ptp; ptep < eptp; ptep++) {
2176 if (iswired(*ptep)) {
2177 wired = 1;
5d5c5d0d 2178 break;
1c79356b
A
2179 }
2180 }
2181 }
2182 if (!wired) {
2183 /*
2184 * Remove the virtual addresses mapped by this pte page.
2185 */
2186 pmap_remove_range(p,
91447636 2187 pdetova(pdp - (pt_entry_t *)p->dirbase),
1c79356b
A
2188 ptp,
2189 eptp);
2190
2191 /*
2192 * Invalidate the page directory pointer.
2193 */
0c530ab8 2194 pmap_store_pte(pdp, 0x0);
91447636 2195
2d21ac55 2196 PMAP_UNLOCK(p);
1c79356b
A
2197
2198 /*
2199 * And free the pte page itself.
2200 */
2201 {
2202 register vm_page_t m;
2203
91447636 2204 vm_object_lock(p->pm_obj);
2d21ac55 2205
91447636 2206 m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]));
1c79356b
A
2207 if (m == VM_PAGE_NULL)
2208 panic("pmap_collect: pte page not in object");
2d21ac55 2209
b0d623f7
A
2210 VM_PAGE_FREE(m);
2211
2212 OSAddAtomic(-1, &inuse_ptepages_count);
2d21ac55 2213
91447636 2214 vm_object_unlock(p->pm_obj);
1c79356b
A
2215 }
2216
2d21ac55 2217 PMAP_LOCK(p);
1c79356b 2218 }
91447636
A
2219 }
2220 }
1c79356b 2221 }
0c530ab8 2222
2d21ac55
A
2223 PMAP_UPDATE_TLBS(p, 0x0, 0xFFFFFFFFFFFFF000ULL);
2224 PMAP_UNLOCK(p);
1c79356b
A
2225 return;
2226
2227}
2d21ac55 2228#endif
1c79356b 2229
1c79356b 2230
1c79356b 2231void
2d21ac55 2232pmap_copy_page(ppnum_t src, ppnum_t dst)
1c79356b 2233{
2d21ac55
A
2234 bcopy_phys((addr64_t)i386_ptob(src),
2235 (addr64_t)i386_ptob(dst),
2236 PAGE_SIZE);
1c79356b 2237}
1c79356b 2238
1c79356b
A
2239
2240/*
2241 * Routine: pmap_pageable
2242 * Function:
2243 * Make the specified pages (by pmap, offset)
2244 * pageable (or not) as requested.
2245 *
2246 * A page which is not pageable may not take
2247 * a fault; therefore, its page table entry
2248 * must remain valid for the duration.
2249 *
2250 * This routine is merely advisory; pmap_enter
2251 * will specify that these pages are to be wired
2252 * down (or not) as appropriate.
2253 */
2254void
2255pmap_pageable(
91447636 2256 __unused pmap_t pmap,
0c530ab8
A
2257 __unused vm_map_offset_t start_addr,
2258 __unused vm_map_offset_t end_addr,
91447636 2259 __unused boolean_t pageable)
1c79356b
A
2260{
2261#ifdef lint
91447636 2262 pmap++; start_addr++; end_addr++; pageable++;
1c79356b
A
2263#endif /* lint */
2264}
2265
2266/*
2267 * Clear specified attribute bits.
2268 */
2269void
2270phys_attribute_clear(
2d21ac55 2271 ppnum_t pn,
1c79356b
A
2272 int bits)
2273{
2d21ac55
A
2274 pv_rooted_entry_t pv_h;
2275 register pv_hashed_entry_t pv_e;
1c79356b
A
2276 register pt_entry_t *pte;
2277 int pai;
2278 register pmap_t pmap;
1c79356b 2279
2d21ac55 2280 pmap_intr_assert();
91447636 2281 assert(pn != vm_page_fictitious_addr);
2d21ac55
A
2282 if (pn == vm_page_guard_addr)
2283 return;
2284
2285 pai = ppn_to_pai(pn);
2286
2287 if (!managed_page(pai)) {
1c79356b
A
2288 /*
2289 * Not a managed page.
2290 */
2291 return;
2292 }
2293
b0d623f7 2294
2d21ac55
A
2295 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
2296 (int) pn, bits, 0, 0, 0);
1c79356b 2297
1c79356b
A
2298 pv_h = pai_to_pvh(pai);
2299
2d21ac55
A
2300 LOCK_PVH(pai);
2301
1c79356b
A
2302 /*
2303 * Walk down PV list, clearing all modify or reference bits.
2304 * We do not have to lock the pv_list because we have
2305 * the entire pmap system locked.
2306 */
2307 if (pv_h->pmap != PMAP_NULL) {
2308 /*
2309 * There are some mappings.
2310 */
1c79356b 2311
2d21ac55
A
2312 pv_e = (pv_hashed_entry_t)pv_h;
2313
2314 do {
1c79356b 2315 pmap = pv_e->pmap;
1c79356b
A
2316
2317 {
2d21ac55 2318 vm_map_offset_t va;
1c79356b
A
2319
2320 va = pv_e->va;
1c79356b 2321
2d21ac55
A
2322 /*
2323 * Clear modify and/or reference bits.
2324 */
91447636 2325
0c530ab8
A
2326 pte = pmap_pte(pmap, va);
2327 pmap_update_pte(pte, *pte, (*pte & ~bits));
c910b4d9
A
2328 /* Ensure all processors using this translation
2329 * invalidate this TLB entry. The invalidation *must* follow
2330 * the PTE update, to ensure that the TLB shadow of the
2331 * 'D' bit (in particular) is synchronized with the
2332 * updated PTE.
2333 */
2334 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1c79356b 2335 }
91447636 2336
2d21ac55 2337 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1c79356b 2338
2d21ac55
A
2339 } while (pv_e != (pv_hashed_entry_t)pv_h);
2340 }
1c79356b
A
2341 pmap_phys_attributes[pai] &= ~bits;
2342
2d21ac55
A
2343 UNLOCK_PVH(pai);
2344
2345 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
2346 0, 0, 0, 0, 0);
2347
1c79356b
A
2348}
2349
2350/*
2351 * Check specified attribute bits.
2352 */
2d21ac55 2353int
1c79356b 2354phys_attribute_test(
2d21ac55 2355 ppnum_t pn,
1c79356b
A
2356 int bits)
2357{
2d21ac55
A
2358 pv_rooted_entry_t pv_h;
2359 register pv_hashed_entry_t pv_e;
1c79356b
A
2360 register pt_entry_t *pte;
2361 int pai;
2362 register pmap_t pmap;
2d21ac55 2363 int attributes = 0;
1c79356b 2364
2d21ac55 2365 pmap_intr_assert();
91447636 2366 assert(pn != vm_page_fictitious_addr);
2d21ac55
A
2367 if (pn == vm_page_guard_addr)
2368 return 0;
2369
2370 pai = ppn_to_pai(pn);
2371
2372 if (!managed_page(pai)) {
1c79356b
A
2373 /*
2374 * Not a managed page.
2375 */
2d21ac55 2376 return (0);
1c79356b
A
2377 }
2378
0c530ab8
A
2379 /*
2380 * super fast check... if bits already collected
2381 * no need to take any locks...
2382 * if not set, we need to recheck after taking
2383 * the lock in case they got pulled in while
2384 * we were waiting for the lock
2385 */
2d21ac55
A
2386 if ( (pmap_phys_attributes[pai] & bits) == bits)
2387 return (bits);
2388
0c530ab8
A
2389 pv_h = pai_to_pvh(pai);
2390
2d21ac55 2391 LOCK_PVH(pai);
1c79356b 2392
2d21ac55 2393 attributes = pmap_phys_attributes[pai] & bits;
1c79356b 2394
b0d623f7 2395
1c79356b 2396 /*
2d21ac55
A
2397 * Walk down PV list, checking the mappings until we
2398 * reach the end or we've found the attributes we've asked for
1c79356b
A
2399 * We do not have to lock the pv_list because we have
2400 * the entire pmap system locked.
2401 */
2402 if (pv_h->pmap != PMAP_NULL) {
2403 /*
2404 * There are some mappings.
2405 */
2d21ac55
A
2406 pv_e = (pv_hashed_entry_t)pv_h;
2407 if (attributes != bits) do {
1c79356b 2408
2d21ac55 2409 pmap = pv_e->pmap;
1c79356b
A
2410
2411 {
2d21ac55 2412 vm_map_offset_t va;
1c79356b
A
2413
2414 va = pv_e->va;
2d21ac55
A
2415 /*
2416 * first make sure any processor actively
2417 * using this pmap, flushes its TLB state
2418 */
2419 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1c79356b 2420
1c79356b 2421 /*
2d21ac55 2422 * pick up modify and/or reference bits from this mapping
1c79356b 2423 */
2d21ac55 2424 pte = pmap_pte(pmap, va);
b0d623f7 2425 attributes |= (int)(*pte & bits);
2d21ac55 2426
1c79356b 2427 }
2d21ac55
A
2428
2429 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
2430
2431 } while ((attributes != bits) && (pv_e != (pv_hashed_entry_t)pv_h));
1c79356b 2432 }
2d21ac55
A
2433
2434 UNLOCK_PVH(pai);
2435 return (attributes);
1c79356b
A
2436}
2437
2438/*
2439 * Set specified attribute bits.
2440 */
2441void
2442phys_attribute_set(
2d21ac55 2443 ppnum_t pn,
1c79356b
A
2444 int bits)
2445{
2d21ac55 2446 int pai;
1c79356b 2447
2d21ac55 2448 pmap_intr_assert();
91447636 2449 assert(pn != vm_page_fictitious_addr);
2d21ac55
A
2450 if (pn == vm_page_guard_addr)
2451 return;
2452
2453 pai = ppn_to_pai(pn);
2454
2455 if (!managed_page(pai)) {
1c79356b
A
2456 /*
2457 * Not a managed page.
2458 */
2459 return;
2460 }
2461
2d21ac55
A
2462 LOCK_PVH(pai);
2463
2464 pmap_phys_attributes[pai] |= bits;
2465
2466 UNLOCK_PVH(pai);
1c79356b
A
2467}
2468
2469/*
2470 * Set the modify bit on the specified physical page.
2471 */
2472
2473void pmap_set_modify(
55e303ae 2474 ppnum_t pn)
1c79356b 2475{
91447636 2476 phys_attribute_set(pn, PHYS_MODIFIED);
1c79356b
A
2477}
2478
2479/*
2480 * Clear the modify bits on the specified physical page.
2481 */
2482
2483void
2484pmap_clear_modify(
55e303ae 2485 ppnum_t pn)
1c79356b 2486{
91447636 2487 phys_attribute_clear(pn, PHYS_MODIFIED);
1c79356b
A
2488}
2489
2490/*
2491 * pmap_is_modified:
2492 *
2493 * Return whether or not the specified physical page is modified
2494 * by any physical maps.
2495 */
2496
2497boolean_t
2498pmap_is_modified(
55e303ae 2499 ppnum_t pn)
1c79356b 2500{
2d21ac55
A
2501 if (phys_attribute_test(pn, PHYS_MODIFIED))
2502 return TRUE;
2503
2504 return FALSE;
1c79356b
A
2505}
2506
2507/*
2508 * pmap_clear_reference:
2509 *
2510 * Clear the reference bit on the specified physical page.
2511 */
2512
2513void
2514pmap_clear_reference(
55e303ae 2515 ppnum_t pn)
1c79356b 2516{
91447636
A
2517 phys_attribute_clear(pn, PHYS_REFERENCED);
2518}
2519
2520void
2521pmap_set_reference(ppnum_t pn)
2522{
2523 phys_attribute_set(pn, PHYS_REFERENCED);
1c79356b
A
2524}
2525
2526/*
2527 * pmap_is_referenced:
2528 *
2529 * Return whether or not the specified physical page is referenced
2530 * by any physical maps.
2531 */
2532
2533boolean_t
2534pmap_is_referenced(
55e303ae 2535 ppnum_t pn)
1c79356b 2536{
2d21ac55
A
2537 if (phys_attribute_test(pn, PHYS_REFERENCED))
2538 return TRUE;
2539
2540 return FALSE;
91447636
A
2541}
2542
2543/*
2544 * pmap_get_refmod(phys)
2545 * returns the referenced and modified bits of the specified
2546 * physical page.
2547 */
2548unsigned int
2549pmap_get_refmod(ppnum_t pa)
2550{
2d21ac55
A
2551 int refmod;
2552 unsigned int retval = 0;
2553
2554 refmod = phys_attribute_test(pa, PHYS_MODIFIED | PHYS_REFERENCED);
2555
2556 if (refmod & PHYS_MODIFIED)
2557 retval |= VM_MEM_MODIFIED;
2558 if (refmod & PHYS_REFERENCED)
2559 retval |= VM_MEM_REFERENCED;
2560
2561 return (retval);
91447636
A
2562}
2563
2564/*
2565 * pmap_clear_refmod(phys, mask)
2566 * clears the referenced and modified bits as specified by the mask
2567 * of the specified physical page.
2568 */
2569void
2570pmap_clear_refmod(ppnum_t pa, unsigned int mask)
2571{
2572 unsigned int x86Mask;
2573
2574 x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0)
2575 | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
2576 phys_attribute_clear(pa, x86Mask);
1c79356b
A
2577}
2578
1c79356b 2579void
91447636
A
2580invalidate_icache(__unused vm_offset_t addr,
2581 __unused unsigned cnt,
2582 __unused int phys)
1c79356b
A
2583{
2584 return;
2585}
2586void
91447636
A
2587flush_dcache(__unused vm_offset_t addr,
2588 __unused unsigned count,
2589 __unused int phys)
1c79356b
A
2590{
2591 return;
2592}
2593
2d21ac55
A
2594#if CONFIG_DTRACE
2595/*
2596 * Constrain DTrace copyin/copyout actions
2597 */
2598extern kern_return_t dtrace_copyio_preflight(addr64_t);
2599extern kern_return_t dtrace_copyio_postflight(addr64_t);
2600
2601kern_return_t dtrace_copyio_preflight(__unused addr64_t va)
2602{
2603 thread_t thread = current_thread();
2604
2605 if (current_map() == kernel_map)
2606 return KERN_FAILURE;
2607 else if (thread->machine.specFlags & CopyIOActive)
2608 return KERN_FAILURE;
2609 else
2610 return KERN_SUCCESS;
2611}
2612
2613kern_return_t dtrace_copyio_postflight(__unused addr64_t va)
2614{
2615 return KERN_SUCCESS;
2616}
2617#endif /* CONFIG_DTRACE */
2618
0c530ab8 2619#if MACH_KDB
6601e61a 2620
0c530ab8 2621/* show phys page mappings and attributes */
6601e61a 2622
0c530ab8 2623extern void db_show_page(pmap_paddr_t pa);
6601e61a 2624
2d21ac55 2625#if 0
6601e61a 2626void
0c530ab8 2627db_show_page(pmap_paddr_t pa)
6601e61a 2628{
0c530ab8
A
2629 pv_entry_t pv_h;
2630 int pai;
2631 char attr;
2632
2633 pai = pa_index(pa);
2634 pv_h = pai_to_pvh(pai);
1c79356b
A
2635
2636 attr = pmap_phys_attributes[pai];
2d21ac55 2637 printf("phys page %llx ", pa);
1c79356b
A
2638 if (attr & PHYS_MODIFIED)
2639 printf("modified, ");
2640 if (attr & PHYS_REFERENCED)
2641 printf("referenced, ");
2642 if (pv_h->pmap || pv_h->next)
2643 printf(" mapped at\n");
2644 else
2645 printf(" not mapped\n");
2646 for (; pv_h; pv_h = pv_h->next)
2647 if (pv_h->pmap)
2d21ac55 2648 printf("%llx in pmap %p\n", pv_h->va, pv_h->pmap);
1c79356b 2649}
2d21ac55 2650#endif
1c79356b
A
2651
2652#endif /* MACH_KDB */
2653
2654#if MACH_KDB
2d21ac55 2655#if 0
1c79356b
A
2656void db_kvtophys(vm_offset_t);
2657void db_show_vaddrs(pt_entry_t *);
2658
2659/*
2660 * print out the results of kvtophys(arg)
2661 */
2662void
2663db_kvtophys(
2664 vm_offset_t vaddr)
2665{
0c530ab8 2666 db_printf("0x%qx", kvtophys(vaddr));
1c79356b
A
2667}
2668
2669/*
2670 * Walk the pages tables.
2671 */
2672void
2673db_show_vaddrs(
2674 pt_entry_t *dirbase)
2675{
2676 pt_entry_t *ptep, *pdep, tmp;
0c530ab8 2677 unsigned int x, y, pdecnt, ptecnt;
1c79356b
A
2678
2679 if (dirbase == 0) {
2680 dirbase = kernel_pmap->dirbase;
2681 }
2682 if (dirbase == 0) {
2683 db_printf("need a dirbase...\n");
2684 return;
2685 }
0c530ab8 2686 dirbase = (pt_entry_t *) (int) ((unsigned long) dirbase & ~INTEL_OFFMASK);
1c79356b
A
2687
2688 db_printf("dirbase: 0x%x\n", dirbase);
2689
2690 pdecnt = ptecnt = 0;
2691 pdep = &dirbase[0];
91447636 2692 for (y = 0; y < NPDEPG; y++, pdep++) {
1c79356b
A
2693 if (((tmp = *pdep) & INTEL_PTE_VALID) == 0) {
2694 continue;
2695 }
2696 pdecnt++;
2d21ac55 2697 ptep = (pt_entry_t *) ((unsigned long)(*pdep) & ~INTEL_OFFMASK);
1c79356b 2698 db_printf("dir[%4d]: 0x%x\n", y, *pdep);
91447636 2699 for (x = 0; x < NPTEPG; x++, ptep++) {
1c79356b
A
2700 if (((tmp = *ptep) & INTEL_PTE_VALID) == 0) {
2701 continue;
2702 }
2703 ptecnt++;
2704 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
2705 x,
2706 *ptep,
2707 (y << 22) | (x << 12),
2708 *ptep & ~INTEL_OFFMASK);
2709 }
2710 }
2711
2712 db_printf("total: %d tables, %d page table entries.\n", pdecnt, ptecnt);
2713
2714}
2d21ac55 2715#endif
1c79356b
A
2716#endif /* MACH_KDB */
2717
2718#include <mach_vm_debug.h>
2719#if MACH_VM_DEBUG
2720#include <vm/vm_debug.h>
2721
2722int
2723pmap_list_resident_pages(
91447636
A
2724 __unused pmap_t pmap,
2725 __unused vm_offset_t *listp,
2726 __unused int space)
1c79356b
A
2727{
2728 return 0;
2729}
2730#endif /* MACH_VM_DEBUG */
2731
6601e61a 2732
1c79356b 2733
91447636
A
2734/* temporary workaround */
2735boolean_t
0c530ab8 2736coredumpok(__unused vm_map_t map, __unused vm_offset_t va)
91447636 2737{
0c530ab8 2738#if 0
91447636 2739 pt_entry_t *ptep;
1c79356b 2740
91447636
A
2741 ptep = pmap_pte(map->pmap, va);
2742 if (0 == ptep)
2743 return FALSE;
2744 return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED));
0c530ab8
A
2745#else
2746 return TRUE;
1c79356b 2747#endif
1c79356b
A
2748}
2749
1c79356b 2750
9bccf70c 2751boolean_t
91447636
A
2752phys_page_exists(
2753 ppnum_t pn)
9bccf70c 2754{
91447636
A
2755 assert(pn != vm_page_fictitious_addr);
2756
2757 if (!pmap_initialized)
2758 return (TRUE);
2d21ac55
A
2759
2760 if (pn == vm_page_guard_addr)
2761 return FALSE;
2762
2763 if (!managed_page(ppn_to_pai(pn)))
91447636
A
2764 return (FALSE);
2765
2766 return TRUE;
2767}
2768
91447636 2769void
0c530ab8 2770pmap_commpage32_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt)
91447636 2771{
2d21ac55
A
2772 int i;
2773 pt_entry_t *opte, *npte;
2774 pt_entry_t pte;
2775 spl_t s;
2776
2777 for (i = 0; i < cnt; i++) {
2778 s = splhigh();
2779 opte = pmap_pte(kernel_pmap, (vm_map_offset_t)kernel_commpage);
2780 if (0 == opte)
2781 panic("kernel_commpage");
2782 pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL;
2783 pte &= ~INTEL_PTE_WRITE; // ensure read only
2784 npte = pmap_pte(kernel_pmap, (vm_map_offset_t)user_commpage);
2785 if (0 == npte)
2786 panic("user_commpage");
2787 pmap_store_pte(npte, pte);
2788 splx(s);
2789 kernel_commpage += INTEL_PGBYTES;
2790 user_commpage += INTEL_PGBYTES;
2791 }
91447636
A
2792}
2793
2d21ac55 2794
0c530ab8
A
2795#define PMAP_COMMPAGE64_CNT (_COMM_PAGE64_AREA_USED/PAGE_SIZE)
2796pt_entry_t pmap_commpage64_ptes[PMAP_COMMPAGE64_CNT];
2797
2798void
2799pmap_commpage64_init(vm_offset_t kernel_commpage, __unused vm_map_offset_t user_commpage, int cnt)
2800{
2d21ac55
A
2801 int i;
2802 pt_entry_t *kptep;
0c530ab8 2803
2d21ac55 2804 PMAP_LOCK(kernel_pmap);
0c530ab8 2805
2d21ac55
A
2806 for (i = 0; i < cnt; i++) {
2807 kptep = pmap_pte(kernel_pmap, (uint64_t)kernel_commpage + (i*PAGE_SIZE));
2808 if ((0 == kptep) || (0 == (*kptep & INTEL_PTE_VALID)))
2809 panic("pmap_commpage64_init pte");
2810 pmap_commpage64_ptes[i] = ((*kptep & ~INTEL_PTE_WRITE) | INTEL_PTE_USER);
2811 }
2812 PMAP_UNLOCK(kernel_pmap);
0c530ab8
A
2813}
2814
0c530ab8 2815
91447636 2816static cpu_pmap_t cpu_pmap_master;
91447636
A
2817
2818struct cpu_pmap *
2819pmap_cpu_alloc(boolean_t is_boot_cpu)
2820{
2821 int ret;
2822 int i;
2823 cpu_pmap_t *cp;
91447636 2824 vm_offset_t address;
0c530ab8 2825 vm_map_address_t mapaddr;
91447636 2826 vm_map_entry_t entry;
0c530ab8 2827 pt_entry_t *pte;
91447636
A
2828
2829 if (is_boot_cpu) {
2830 cp = &cpu_pmap_master;
91447636
A
2831 } else {
2832 /*
2833 * The per-cpu pmap data structure itself.
2834 */
2835 ret = kmem_alloc(kernel_map,
2836 (vm_offset_t *) &cp, sizeof(cpu_pmap_t));
2837 if (ret != KERN_SUCCESS) {
2838 printf("pmap_cpu_alloc() failed ret=%d\n", ret);
2839 return NULL;
2840 }
2841 bzero((void *)cp, sizeof(cpu_pmap_t));
2842
2843 /*
0c530ab8 2844 * The temporary windows used for copy/zero - see loose_ends.c
91447636 2845 */
0c530ab8
A
2846 ret = vm_map_find_space(kernel_map,
2847 &mapaddr, PMAP_NWINDOWS*PAGE_SIZE, (vm_map_offset_t)0, 0, &entry);
91447636 2848 if (ret != KERN_SUCCESS) {
0c530ab8
A
2849 printf("pmap_cpu_alloc() "
2850 "vm_map_find_space ret=%d\n", ret);
91447636
A
2851 pmap_cpu_free(cp);
2852 return NULL;
2853 }
0c530ab8 2854 address = (vm_offset_t)mapaddr;
4452a7af 2855
0c530ab8 2856 for (i = 0; i < PMAP_NWINDOWS; i++, address += PAGE_SIZE) {
2d21ac55
A
2857 spl_t s;
2858 s = splhigh();
0c530ab8
A
2859 while ((pte = pmap_pte(kernel_pmap, (vm_map_offset_t)address)) == 0)
2860 pmap_expand(kernel_pmap, (vm_map_offset_t)address);
2861 * (int *) pte = 0;
6601e61a 2862 cp->mapwindow[i].prv_CADDR = (caddr_t) address;
0c530ab8 2863 cp->mapwindow[i].prv_CMAP = pte;
2d21ac55 2864 splx(s);
4452a7af 2865 }
0c530ab8 2866 vm_map_unlock(kernel_map);
4452a7af
A
2867 }
2868
0c530ab8
A
2869 cp->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW;
2870 cp->pde_window_index = PMAP_PDE_FIRST_WINDOW;
2871 cp->pte_window_index = PMAP_PTE_FIRST_WINDOW;
4452a7af 2872
6601e61a 2873 return cp;
4452a7af
A
2874}
2875
2876void
6601e61a 2877pmap_cpu_free(struct cpu_pmap *cp)
4452a7af 2878{
6601e61a 2879 if (cp != NULL && cp != &cpu_pmap_master) {
6601e61a 2880 kfree((void *) cp, sizeof(cpu_pmap_t));
4452a7af 2881 }
4452a7af 2882}
0c530ab8
A
2883
2884
2885mapwindow_t *
2886pmap_get_mapwindow(pt_entry_t pentry)
2887{
2888 mapwindow_t *mp;
2889 int i;
0c530ab8 2890
2d21ac55 2891 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
0c530ab8
A
2892
2893 /*
2894 * Note: 0th map reserved for pmap_pte()
2895 */
2896 for (i = PMAP_NWINDOWS_FIRSTFREE; i < PMAP_NWINDOWS; i++) {
2897 mp = &current_cpu_datap()->cpu_pmap->mapwindow[i];
2898
2899 if (*mp->prv_CMAP == 0) {
2d21ac55
A
2900 pmap_store_pte(mp->prv_CMAP, pentry);
2901
2902 invlpg((uintptr_t)mp->prv_CADDR);
2903
2904 return (mp);
0c530ab8
A
2905 }
2906 }
2d21ac55
A
2907 panic("pmap_get_mapwindow: no windows available");
2908
2909 return NULL;
2910}
2911
2912
2913void
2914pmap_put_mapwindow(mapwindow_t *mp)
2915{
2916 pmap_store_pte(mp->prv_CMAP, 0);
0c530ab8
A
2917}
2918
0c530ab8
A
2919void
2920pmap_switch(pmap_t tpmap)
2921{
2922 spl_t s;
0c530ab8
A
2923
2924 s = splhigh(); /* Make sure interruptions are disabled */
0c530ab8 2925
b0d623f7 2926 set_dirbase(tpmap, current_thread());
0c530ab8
A
2927
2928 splx(s);
2929}
2930
2931
2932/*
2933 * disable no-execute capability on
2934 * the specified pmap
2935 */
2936void pmap_disable_NX(pmap_t pmap) {
2937
2938 pmap->nx_enabled = 0;
2939}
2940
2941void
2942pt_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size,
2943 vm_size_t *alloc_size, int *collectable, int *exhaustable)
2944{
2945 *count = inuse_ptepages_count;
2946 *cur_size = PAGE_SIZE * inuse_ptepages_count;
2947 *max_size = PAGE_SIZE * (inuse_ptepages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
2948 *elem_size = PAGE_SIZE;
2949 *alloc_size = PAGE_SIZE;
2950
2951 *collectable = 1;
2952 *exhaustable = 0;
2953}
2954
2955vm_offset_t pmap_cpu_high_map_vaddr(int cpu, enum high_cpu_types e)
2956{
2957 enum high_fixed_addresses a;
2958 a = e + HIGH_CPU_END * cpu;
2959 return pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a);
2960}
2961
2962vm_offset_t pmap_high_map_vaddr(enum high_cpu_types e)
2963{
2964 return pmap_cpu_high_map_vaddr(cpu_number(), e);
2965}
2966
2967vm_offset_t pmap_high_map(pt_entry_t pte, enum high_cpu_types e)
2968{
2969 enum high_fixed_addresses a;
2970 vm_offset_t vaddr;
2971
2972 a = e + HIGH_CPU_END * cpu_number();
2973 vaddr = (vm_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a);
2d21ac55 2974 pmap_store_pte(pte_unique_base + a, pte);
0c530ab8
A
2975
2976 /* TLB flush for this page for this cpu */
2977 invlpg((uintptr_t)vaddr);
2978
2979 return vaddr;
2980}
2981
935ed37a
A
2982static inline void
2983pmap_cpuset_NMIPI(cpu_set cpu_mask) {
2984 unsigned int cpu, cpu_bit;
2985 uint64_t deadline;
2986
2987 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
2988 if (cpu_mask & cpu_bit)
2989 cpu_NMI_interrupt(cpu);
2990 }
b0d623f7 2991 deadline = mach_absolute_time() + (LockTimeOut);
935ed37a
A
2992 while (mach_absolute_time() < deadline)
2993 cpu_pause();
2994}
2995
0c530ab8
A
2996/*
2997 * Called with pmap locked, we:
2998 * - scan through per-cpu data to see which other cpus need to flush
2999 * - send an IPI to each non-idle cpu to be flushed
3000 * - wait for all to signal back that they are inactive or we see that
3001 * they are in an interrupt handler or at a safe point
3002 * - flush the local tlb is active for this pmap
3003 * - return ... the caller will unlock the pmap
3004 */
3005void
3006pmap_flush_tlbs(pmap_t pmap)
3007{
3008 unsigned int cpu;
3009 unsigned int cpu_bit;
3010 cpu_set cpus_to_signal;
3011 unsigned int my_cpu = cpu_number();
3012 pmap_paddr_t pmap_cr3 = pmap->pm_cr3;
3013 boolean_t flush_self = FALSE;
3014 uint64_t deadline;
3015
2d21ac55
A
3016 assert((processor_avail_count < 2) ||
3017 (ml_get_interrupts_enabled() && get_preemption_level() != 0));
0c530ab8
A
3018
3019 /*
3020 * Scan other cpus for matching active or task CR3.
3021 * For idle cpus (with no active map) we mark them invalid but
3022 * don't signal -- they'll check as they go busy.
3023 * Note: for the kernel pmap we look for 64-bit shared address maps.
3024 */
3025 cpus_to_signal = 0;
3026 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
3027 if (!cpu_datap(cpu)->cpu_running)
3028 continue;
2d21ac55
A
3029 if ((cpu_datap(cpu)->cpu_task_cr3 == pmap_cr3) ||
3030 (CPU_GET_ACTIVE_CR3(cpu) == pmap_cr3) ||
0c530ab8
A
3031 (pmap->pm_shared) ||
3032 ((pmap == kernel_pmap) &&
3033 (!CPU_CR3_IS_ACTIVE(cpu) ||
3034 cpu_datap(cpu)->cpu_task_map == TASK_MAP_64BIT_SHARED))) {
3035 if (cpu == my_cpu) {
3036 flush_self = TRUE;
3037 continue;
3038 }
3039 cpu_datap(cpu)->cpu_tlb_invalid = TRUE;
3040 __asm__ volatile("mfence");
3041
3042 if (CPU_CR3_IS_ACTIVE(cpu)) {
3043 cpus_to_signal |= cpu_bit;
3044 i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC);
3045 }
3046 }
3047 }
3048
2d21ac55
A
3049 PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START,
3050 (int) pmap, cpus_to_signal, flush_self, 0, 0);
0c530ab8 3051
2d21ac55 3052 if (cpus_to_signal) {
935ed37a
A
3053 cpu_set cpus_to_respond = cpus_to_signal;
3054
0c530ab8
A
3055 deadline = mach_absolute_time() + LockTimeOut;
3056 /*
3057 * Wait for those other cpus to acknowledge
3058 */
935ed37a
A
3059 while (cpus_to_respond != 0) {
3060 if (mach_absolute_time() > deadline) {
b0d623f7
A
3061 if (mp_recent_debugger_activity())
3062 continue;
593a1d5f
A
3063 if (!panic_active()) {
3064 pmap_tlb_flush_timeout = TRUE;
3065 pmap_cpuset_NMIPI(cpus_to_respond);
3066 }
935ed37a
A
3067 panic("pmap_flush_tlbs() timeout: "
3068 "cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx",
3069 pmap, cpus_to_respond);
3070 }
3071
3072 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
3073 if ((cpus_to_respond & cpu_bit) != 0) {
3074 if (!cpu_datap(cpu)->cpu_running ||
3075 cpu_datap(cpu)->cpu_tlb_invalid == FALSE ||
3076 !CPU_CR3_IS_ACTIVE(cpu)) {
3077 cpus_to_respond &= ~cpu_bit;
3078 }
3079 cpu_pause();
2d21ac55 3080 }
935ed37a
A
3081 if (cpus_to_respond == 0)
3082 break;
0c530ab8 3083 }
0c530ab8 3084 }
0c530ab8 3085 }
0c530ab8
A
3086 /*
3087 * Flush local tlb if required.
3088 * We need this flush even if the pmap being changed
3089 * is the user map... in case we do a copyin/out
3090 * before returning to user mode.
3091 */
3092 if (flush_self)
3093 flush_tlb();
3094
b0d623f7
A
3095 if ((pmap == kernel_pmap) && (flush_self != TRUE)) {
3096 panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map);
3097 }
3098
2d21ac55
A
3099 PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END,
3100 (int) pmap, cpus_to_signal, flush_self, 0, 0);
0c530ab8
A
3101}
3102
3103void
3104process_pmap_updates(void)
3105{
2d21ac55
A
3106 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
3107
0c530ab8
A
3108 flush_tlb();
3109
3110 current_cpu_datap()->cpu_tlb_invalid = FALSE;
3111 __asm__ volatile("mfence");
3112}
3113
3114void
3115pmap_update_interrupt(void)
3116{
2d21ac55
A
3117 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_START,
3118 0, 0, 0, 0, 0);
0c530ab8
A
3119
3120 process_pmap_updates();
3121
2d21ac55
A
3122 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END,
3123 0, 0, 0, 0, 0);
0c530ab8
A
3124}
3125
3126
3127unsigned int pmap_cache_attributes(ppnum_t pn) {
3128
2d21ac55 3129 if (!managed_page(ppn_to_pai(pn)))
0c530ab8
A
3130 return (VM_WIMG_IO);
3131
3132 return (VM_WIMG_COPYBACK);
3133}
3134
3135#ifdef PMAP_DEBUG
3136void
3137pmap_dump(pmap_t p)
3138{
3139 int i;
3140
3141 kprintf("pmap 0x%x\n",p);
3142
3143 kprintf(" pm_cr3 0x%llx\n",p->pm_cr3);
3144 kprintf(" pm_pml4 0x%x\n",p->pm_pml4);
3145 kprintf(" pm_pdpt 0x%x\n",p->pm_pdpt);
3146
3147 kprintf(" pml4[0] 0x%llx\n",*p->pm_pml4);
3148 for (i=0;i<8;i++)
3149 kprintf(" pdpt[%d] 0x%llx\n",i, p->pm_pdpt[i]);
3150}
3151
3152void pmap_dump_wrap(void)
3153{
3154 pmap_dump(current_cpu_datap()->cpu_active_thread->task->map->pmap);
3155}
3156
3157void
3158dump_4GB_pdpt(pmap_t p)
3159{
3160 int spl;
3161 pdpt_entry_t *user_pdptp;
3162 pdpt_entry_t *kern_pdptp;
3163 pdpt_entry_t *pml4p;
3164
3165 spl = splhigh();
3166 while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) {
3167 splx(spl);
3168 pmap_expand_pml4(p, 0x0);
3169 spl = splhigh();
3170 }
3171 kern_pdptp = kernel_pmap->pm_pdpt;
3172 if (kern_pdptp == NULL)
3173 panic("kern_pdptp == NULL");
3174 kprintf("dump_4GB_pdpt(%p)\n"
3175 "kern_pdptp=%p (phys=0x%016llx)\n"
3176 "\t 0x%08x: 0x%016llx\n"
3177 "\t 0x%08x: 0x%016llx\n"
3178 "\t 0x%08x: 0x%016llx\n"
3179 "\t 0x%08x: 0x%016llx\n"
3180 "\t 0x%08x: 0x%016llx\n"
3181 "user_pdptp=%p (phys=0x%016llx)\n"
3182 "\t 0x%08x: 0x%016llx\n"
3183 "\t 0x%08x: 0x%016llx\n"
3184 "\t 0x%08x: 0x%016llx\n"
3185 "\t 0x%08x: 0x%016llx\n"
3186 "\t 0x%08x: 0x%016llx\n",
3187 p, kern_pdptp, kvtophys(kern_pdptp),
3188 kern_pdptp+0, *(kern_pdptp+0),
3189 kern_pdptp+1, *(kern_pdptp+1),
3190 kern_pdptp+2, *(kern_pdptp+2),
3191 kern_pdptp+3, *(kern_pdptp+3),
3192 kern_pdptp+4, *(kern_pdptp+4),
3193 user_pdptp, kvtophys(user_pdptp),
3194 user_pdptp+0, *(user_pdptp+0),
3195 user_pdptp+1, *(user_pdptp+1),
3196 user_pdptp+2, *(user_pdptp+2),
3197 user_pdptp+3, *(user_pdptp+3),
3198 user_pdptp+4, *(user_pdptp+4));
3199 kprintf("user pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
3200 p->pm_cr3, p->pm_hold, p->pm_pml4);
3201 pml4p = (pdpt_entry_t *)p->pm_hold;
3202 if (pml4p == NULL)
3203 panic("user pml4p == NULL");
3204 kprintf("\t 0x%08x: 0x%016llx\n"
3205 "\t 0x%08x: 0x%016llx\n",
3206 pml4p+0, *(pml4p),
3207 pml4p+KERNEL_UBER_PML4_INDEX, *(pml4p+KERNEL_UBER_PML4_INDEX));
3208 kprintf("kern pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
3209 kernel_pmap->pm_cr3, kernel_pmap->pm_hold, kernel_pmap->pm_pml4);
3210 pml4p = (pdpt_entry_t *)kernel_pmap->pm_hold;
3211 if (pml4p == NULL)
3212 panic("kern pml4p == NULL");
3213 kprintf("\t 0x%08x: 0x%016llx\n"
3214 "\t 0x%08x: 0x%016llx\n",
3215 pml4p+0, *(pml4p),
3216 pml4p+511, *(pml4p+511));
3217 splx(spl);
3218}
3219
3220void dump_4GB_pdpt_thread(thread_t tp)
3221{
3222 dump_4GB_pdpt(tp->map->pmap);
3223}
3224
3225
3226#endif
b0d623f7 3227