]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/pmap.c
b6a6372e00124258b03f804f4b61ada5e493dcb9
[apple/xnu.git] / osfmk / i386 / pmap.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52
53 /*
54 * File: pmap.c
55 * Author: Avadis Tevanian, Jr., Michael Wayne Young
56 * (These guys wrote the Vax version)
57 *
58 * Physical Map management code for Intel i386, i486, and i860.
59 *
60 * Manages physical address maps.
61 *
62 * In addition to hardware address maps, this
63 * module is called upon to provide software-use-only
64 * maps which may or may not be stored in the same
65 * form as hardware maps. These pseudo-maps are
66 * used to store intermediate results from copy
67 * operations to and from address spaces.
68 *
69 * Since the information managed by this module is
70 * also stored by the logical address mapping module,
71 * this module may throw away valid virtual-to-physical
72 * mappings at almost any time. However, invalidations
73 * of virtual-to-physical mappings must be done as
74 * requested.
75 *
76 * In order to cope with hardware architectures which
77 * make virtual-to-physical map invalidates expensive,
78 * this module may delay invalidate or reduced protection
79 * operations until such time as they are actually
80 * necessary. This module is given full information as
81 * to which processors are currently using which maps,
82 * and to when physical maps must be made correct.
83 */
84
85 #include <string.h>
86 #include <norma_vm.h>
87 #include <mach_kdb.h>
88 #include <mach_ldebug.h>
89
90 #include <mach/machine/vm_types.h>
91
92 #include <mach/boolean.h>
93 #include <kern/thread.h>
94 #include <kern/zalloc.h>
95
96 #include <kern/lock.h>
97 #include <kern/kalloc.h>
98 #include <kern/spl.h>
99
100 #include <vm/pmap.h>
101 #include <vm/vm_map.h>
102 #include <vm/vm_kern.h>
103 #include <mach/vm_param.h>
104 #include <mach/vm_prot.h>
105 #include <vm/vm_object.h>
106 #include <vm/vm_page.h>
107
108 #include <mach/machine/vm_param.h>
109 #include <machine/thread.h>
110
111 #include <kern/misc_protos.h> /* prototyping */
112 #include <i386/misc_protos.h>
113
114 #include <i386/cpuid.h>
115 #include <i386/cpu_data.h>
116 #include <i386/cpu_number.h>
117 #include <i386/machine_cpu.h>
118 #include <i386/mp_slave_boot.h>
119 #include <i386/seg.h>
120 #include <i386/cpu_capabilities.h>
121
122 #if MACH_KDB
123 #include <ddb/db_command.h>
124 #include <ddb/db_output.h>
125 #include <ddb/db_sym.h>
126 #include <ddb/db_print.h>
127 #endif /* MACH_KDB */
128
129 #include <kern/xpr.h>
130
131 #include <vm/vm_protos.h>
132
133 #include <i386/mp.h>
134 #include <i386/mp_desc.h>
135
136 #include <sys/kdebug.h>
137
138 #ifdef IWANTTODEBUG
139 #undef DEBUG
140 #define DEBUG 1
141 #define POSTCODE_DELAY 1
142 #include <i386/postcode.h>
143 #endif /* IWANTTODEBUG */
144
145 /*
146 * Forward declarations for internal functions.
147 */
148 void pmap_expand_pml4(
149 pmap_t map,
150 vm_map_offset_t v);
151
152 void pmap_expand_pdpt(
153 pmap_t map,
154 vm_map_offset_t v);
155
156 void pmap_expand(
157 pmap_t map,
158 vm_map_offset_t v);
159
160 static void pmap_remove_range(
161 pmap_t pmap,
162 vm_map_offset_t va,
163 pt_entry_t *spte,
164 pt_entry_t *epte);
165
166 void phys_attribute_clear(
167 ppnum_t phys,
168 int bits);
169
170 boolean_t phys_attribute_test(
171 ppnum_t phys,
172 int bits);
173
174 void phys_attribute_set(
175 ppnum_t phys,
176 int bits);
177
178 void pmap_set_reference(
179 ppnum_t pn);
180
181 void pmap_movepage(
182 unsigned long from,
183 unsigned long to,
184 vm_size_t size);
185
186 boolean_t phys_page_exists(
187 ppnum_t pn);
188
189 #ifdef PMAP_DEBUG
190 void dump_pmap(pmap_t);
191 void dump_4GB_pdpt(pmap_t p);
192 void dump_4GB_pdpt_thread(thread_t tp);
193 #endif
194
195 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
196
197 int nx_enabled = 1; /* enable no-execute protection */
198
199 int cpu_64bit = 0;
200
201
202 /*
203 * Private data structures.
204 */
205
206 /*
207 * For each vm_page_t, there is a list of all currently
208 * valid virtual mappings of that page. An entry is
209 * a pv_entry_t; the list is the pv_table.
210 */
211
212 typedef struct pv_entry {
213 struct pv_entry *next; /* next pv_entry */
214 pmap_t pmap; /* pmap where mapping lies */
215 vm_map_offset_t va; /* virtual address for mapping */
216 } *pv_entry_t;
217
218 #define PV_ENTRY_NULL ((pv_entry_t) 0)
219
220 pv_entry_t pv_head_table; /* array of entries, one per page */
221
222 /*
223 * pv_list entries are kept on a list that can only be accessed
224 * with the pmap system locked (at SPLVM, not in the cpus_active set).
225 * The list is refilled from the pv_list_zone if it becomes empty.
226 */
227 pv_entry_t pv_free_list; /* free list at SPLVM */
228 decl_simple_lock_data(,pv_free_list_lock)
229 int pv_free_count = 0;
230 #define PV_LOW_WATER_MARK 5000
231 #define PV_ALLOC_CHUNK 2000
232 thread_call_t mapping_adjust_call;
233 static thread_call_data_t mapping_adjust_call_data;
234 int mappingrecurse = 0;
235
236 #define PV_ALLOC(pv_e) { \
237 simple_lock(&pv_free_list_lock); \
238 if ((pv_e = pv_free_list) != 0) { \
239 pv_free_list = pv_e->next; \
240 pv_free_count--; \
241 if (pv_free_count < PV_LOW_WATER_MARK) \
242 if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
243 thread_call_enter(mapping_adjust_call); \
244 } \
245 simple_unlock(&pv_free_list_lock); \
246 }
247
248 #define PV_FREE(pv_e) { \
249 simple_lock(&pv_free_list_lock); \
250 pv_e->next = pv_free_list; \
251 pv_free_list = pv_e; \
252 pv_free_count++; \
253 simple_unlock(&pv_free_list_lock); \
254 }
255
256 zone_t pv_list_zone; /* zone of pv_entry structures */
257
258 static zone_t pdpt_zone;
259
260 /*
261 * Each entry in the pv_head_table is locked by a bit in the
262 * pv_lock_table. The lock bits are accessed by the physical
263 * address of the page they lock.
264 */
265
266 char *pv_lock_table; /* pointer to array of bits */
267 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
268
269 /*
270 * First and last physical addresses that we maintain any information
271 * for. Initialized to zero so that pmap operations done before
272 * pmap_init won't touch any non-existent structures.
273 */
274 pmap_paddr_t vm_first_phys = (pmap_paddr_t) 0;
275 pmap_paddr_t vm_last_phys = (pmap_paddr_t) 0;
276 boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */
277
278 static struct vm_object kptobj_object_store;
279 static vm_object_t kptobj;
280
281 /*
282 * Index into pv_head table, its lock bits, and the modify/reference
283 * bits starting at vm_first_phys.
284 */
285
286 #define pa_index(pa) (i386_btop(pa - vm_first_phys))
287
288 #define pai_to_pvh(pai) (&pv_head_table[pai])
289 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
290 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
291
292 /*
293 * Array of physical page attribites for managed pages.
294 * One byte per physical page.
295 */
296 char *pmap_phys_attributes;
297
298 /*
299 * Physical page attributes. Copy bits from PTE definition.
300 */
301 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
302 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
303 #define PHYS_NCACHE INTEL_PTE_NCACHE
304
305 /*
306 * Amount of virtual memory mapped by one
307 * page-directory entry.
308 */
309 #define PDE_MAPPED_SIZE (pdetova(1))
310 uint64_t pde_mapped_size;
311
312 /*
313 * Locking and TLB invalidation
314 */
315
316 /*
317 * Locking Protocols:
318 *
319 * There are two structures in the pmap module that need locking:
320 * the pmaps themselves, and the per-page pv_lists (which are locked
321 * by locking the pv_lock_table entry that corresponds to the pv_head
322 * for the list in question.) Most routines want to lock a pmap and
323 * then do operations in it that require pv_list locking -- however
324 * pmap_remove_all and pmap_copy_on_write operate on a physical page
325 * basis and want to do the locking in the reverse order, i.e. lock
326 * a pv_list and then go through all the pmaps referenced by that list.
327 * To protect against deadlock between these two cases, the pmap_lock
328 * is used. There are three different locking protocols as a result:
329 *
330 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
331 * the pmap.
332 *
333 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
334 * lock on the pmap_lock (shared read), then lock the pmap
335 * and finally the pv_lists as needed [i.e. pmap lock before
336 * pv_list lock.]
337 *
338 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
339 * Get a write lock on the pmap_lock (exclusive write); this
340 * also guaranteees exclusive access to the pv_lists. Lock the
341 * pmaps as needed.
342 *
343 * At no time may any routine hold more than one pmap lock or more than
344 * one pv_list lock. Because interrupt level routines can allocate
345 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
346 * kernel_pmap can only be held at splhigh.
347 */
348
349 /*
350 * We raise the interrupt level to splvm, to block interprocessor
351 * interrupts during pmap operations. We mark the cpu's cr3 inactive
352 * while interrupts are blocked.
353 */
354 #define SPLVM(spl) { \
355 spl = splhigh(); \
356 CPU_CR3_MARK_INACTIVE(); \
357 }
358
359 #define SPLX(spl) { \
360 if (current_cpu_datap()->cpu_tlb_invalid) \
361 process_pmap_updates(); \
362 CPU_CR3_MARK_ACTIVE(); \
363 splx(spl); \
364 }
365
366 /*
367 * Lock on pmap system
368 */
369 lock_t pmap_system_lock;
370
371 #define PMAP_READ_LOCK(pmap, spl) { \
372 SPLVM(spl); \
373 lock_read(&pmap_system_lock); \
374 simple_lock(&(pmap)->lock); \
375 }
376
377 #define PMAP_WRITE_LOCK(spl) { \
378 SPLVM(spl); \
379 lock_write(&pmap_system_lock); \
380 }
381
382 #define PMAP_READ_UNLOCK(pmap, spl) { \
383 simple_unlock(&(pmap)->lock); \
384 lock_read_done(&pmap_system_lock); \
385 SPLX(spl); \
386 }
387
388 #define PMAP_WRITE_UNLOCK(spl) { \
389 lock_write_done(&pmap_system_lock); \
390 SPLX(spl); \
391 }
392
393 #define PMAP_WRITE_TO_READ_LOCK(pmap) { \
394 simple_lock(&(pmap)->lock); \
395 lock_write_to_read(&pmap_system_lock); \
396 }
397
398 #define LOCK_PVH(index) lock_pvh_pai(index)
399
400 #define UNLOCK_PVH(index) unlock_pvh_pai(index)
401
402 #if USLOCK_DEBUG
403 extern int max_lock_loops;
404 extern int disableSerialOuput;
405 #define LOOP_VAR \
406 unsigned int loop_count; \
407 loop_count = disableSerialOuput ? max_lock_loops \
408 : max_lock_loops*100
409 #define LOOP_CHECK(msg, pmap) \
410 if (--loop_count == 0) { \
411 mp_disable_preemption(); \
412 kprintf("%s: cpu %d pmap %x\n", \
413 msg, cpu_number(), pmap); \
414 Debugger("deadlock detection"); \
415 mp_enable_preemption(); \
416 loop_count = max_lock_loops; \
417 }
418 #else /* USLOCK_DEBUG */
419 #define LOOP_VAR
420 #define LOOP_CHECK(msg, pmap)
421 #endif /* USLOCK_DEBUG */
422
423
424 static void pmap_flush_tlbs(pmap_t pmap);
425
426 #define PMAP_UPDATE_TLBS(pmap, s, e) \
427 pmap_flush_tlbs(pmap)
428
429
430 #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
431
432
433 pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE];
434
435 /*
436 * Other useful macros.
437 */
438 #define current_pmap() (vm_map_pmap(current_thread()->map))
439
440 struct pmap kernel_pmap_store;
441 pmap_t kernel_pmap;
442
443 pd_entry_t high_shared_pde;
444 pd_entry_t commpage64_pde;
445
446 struct zone *pmap_zone; /* zone of pmap structures */
447
448 int pmap_debug = 0; /* flag for debugging prints */
449
450 unsigned int inuse_ptepages_count = 0; /* debugging */
451
452 addr64_t kernel64_cr3;
453 boolean_t no_shared_cr3 = FALSE; /* -no_shared_cr3 boot arg */
454
455 /*
456 * Pmap cache. Cache is threaded through ref_count field of pmap.
457 * Max will eventually be constant -- variable for experimentation.
458 */
459 int pmap_cache_max = 32;
460 int pmap_alloc_chunk = 8;
461 pmap_t pmap_cache_list;
462 int pmap_cache_count;
463 decl_simple_lock_data(,pmap_cache_lock)
464
465 extern char end;
466
467 static int nkpt;
468 extern uint32_t lowGlo;
469 extern void *version;
470
471 pt_entry_t *DMAP1, *DMAP2;
472 caddr_t DADDR1;
473 caddr_t DADDR2;
474
475 #if DEBUG_ALIAS
476 #define PMAP_ALIAS_MAX 32
477 struct pmap_alias {
478 vm_offset_t rpc;
479 pmap_t pmap;
480 vm_map_offset_t va;
481 int cookie;
482 #define PMAP_ALIAS_COOKIE 0xdeadbeef
483 } pmap_aliasbuf[PMAP_ALIAS_MAX];
484 int pmap_alias_index = 0;
485 extern vm_offset_t get_rpc();
486
487 #endif /* DEBUG_ALIAS */
488
489 /*
490 * for legacy, returns the address of the pde entry.
491 * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
492 * then returns the mapped address of the pde entry in that page
493 */
494 pd_entry_t *
495 pmap_pde(pmap_t m, vm_map_offset_t v)
496 {
497 pd_entry_t *pde;
498 if (!cpu_64bit || (m == kernel_pmap)) {
499 pde = (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]));
500 } else {
501 assert(m);
502 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
503 pde = pmap64_pde(m, v);
504 }
505 return pde;
506 }
507
508
509 /*
510 * the single pml4 page per pmap is allocated at pmap create time and exists
511 * for the duration of the pmap. we allocate this page in kernel vm (to save us one
512 * level of page table dynamic mapping.
513 * this returns the address of the requested pml4 entry in the top level page.
514 */
515 static inline
516 pml4_entry_t *
517 pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr)
518 {
519 return ((pml4_entry_t *)pmap->pm_hold + ((vm_offset_t)((vaddr>>PML4SHIFT)&(NPML4PG-1))));
520 }
521
522 /*
523 * maps in the pml4 page, if any, containing the pdpt entry requested
524 * and returns the address of the pdpt entry in that mapped page
525 */
526 pdpt_entry_t *
527 pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
528 {
529 pml4_entry_t newpf;
530 pml4_entry_t *pml4;
531 int i;
532
533 assert(pmap);
534 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
535 if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) {
536 return(0);
537 }
538
539 pml4 = pmap64_pml4(pmap, vaddr);
540
541 if (pml4 && ((*pml4 & INTEL_PTE_VALID))) {
542
543 newpf = *pml4 & PG_FRAME;
544
545
546 for (i=PMAP_PDPT_FIRST_WINDOW; i < PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS; i++) {
547 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
548 return((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
549 ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1))));
550 }
551 }
552
553 current_cpu_datap()->cpu_pmap->pdpt_window_index++;
554 if (current_cpu_datap()->cpu_pmap->pdpt_window_index > (PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS-1))
555 current_cpu_datap()->cpu_pmap->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW;
556 pmap_store_pte(
557 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CMAP),
558 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
559 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR));
560 return ((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR) +
561 ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1))));
562 }
563
564 return (0);
565 }
566
567 /*
568 * maps in the pdpt page, if any, containing the pde entry requested
569 * and returns the address of the pde entry in that mapped page
570 */
571 pd_entry_t *
572 pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr)
573 {
574 pdpt_entry_t newpf;
575 pdpt_entry_t *pdpt;
576 int i;
577
578 assert(pmap);
579 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
580 if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) {
581 return(0);
582 }
583
584 /* if (vaddr & (1ULL << 63)) panic("neg addr");*/
585 pdpt = pmap64_pdpt(pmap, vaddr);
586
587 if (pdpt && ((*pdpt & INTEL_PTE_VALID))) {
588
589 newpf = *pdpt & PG_FRAME;
590
591 for (i=PMAP_PDE_FIRST_WINDOW; i < PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS; i++) {
592 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
593 return((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
594 ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1))));
595 }
596 }
597
598 current_cpu_datap()->cpu_pmap->pde_window_index++;
599 if (current_cpu_datap()->cpu_pmap->pde_window_index > (PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS-1))
600 current_cpu_datap()->cpu_pmap->pde_window_index = PMAP_PDE_FIRST_WINDOW;
601 pmap_store_pte(
602 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CMAP),
603 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
604 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR));
605 return ((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR) +
606 ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1))));
607 }
608
609 return (0);
610 }
611
612
613
614 /*
615 * return address of mapped pte for vaddr va in pmap pmap.
616 * must be called with pre-emption or interrupts disabled
617 * if targeted pmap is not the kernel pmap
618 * since we may be passing back a virtual address that is
619 * associated with this cpu... pre-emption or interrupts
620 * must remain disabled until the caller is done using
621 * the pointer that was passed back .
622 *
623 * maps the pde page, if any, containing the pte in and returns
624 * the address of the pte in that mapped page
625 */
626 pt_entry_t *
627 pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
628 {
629 pd_entry_t *pde;
630 pd_entry_t newpf;
631 int i;
632
633 assert(pmap);
634 pde = pmap_pde(pmap,vaddr);
635
636 if (pde && ((*pde & INTEL_PTE_VALID))) {
637 if (pmap == kernel_pmap) {
638 return (vtopte(vaddr)); /* compat kernel still has pte's mapped */
639 }
640
641 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
642
643 newpf = *pde & PG_FRAME;
644
645 for (i=PMAP_PTE_FIRST_WINDOW; i < PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS; i++) {
646 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
647 return((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
648 ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1)));
649 }
650 }
651
652 current_cpu_datap()->cpu_pmap->pte_window_index++;
653 if (current_cpu_datap()->cpu_pmap->pte_window_index > (PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS-1))
654 current_cpu_datap()->cpu_pmap->pte_window_index = PMAP_PTE_FIRST_WINDOW;
655 pmap_store_pte(
656 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CMAP),
657 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
658 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR));
659 return ((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR) +
660 ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1)));
661 }
662
663 return(0);
664 }
665
666
667 /*
668 * Map memory at initialization. The physical addresses being
669 * mapped are not managed and are never unmapped.
670 *
671 * For now, VM is already on, we only need to map the
672 * specified memory.
673 */
674 vm_offset_t
675 pmap_map(
676 vm_offset_t virt,
677 vm_map_offset_t start_addr,
678 vm_map_offset_t end_addr,
679 vm_prot_t prot,
680 unsigned int flags)
681 {
682 int ps;
683
684 ps = PAGE_SIZE;
685 while (start_addr < end_addr) {
686 pmap_enter(kernel_pmap, (vm_map_offset_t)virt,
687 (ppnum_t) i386_btop(start_addr), prot, flags, FALSE);
688 virt += ps;
689 start_addr += ps;
690 }
691 return(virt);
692 }
693
694 /*
695 * Back-door routine for mapping kernel VM at initialization.
696 * Useful for mapping memory outside the range
697 * Sets no-cache, A, D.
698 * [vm_first_phys, vm_last_phys) (i.e., devices).
699 * Otherwise like pmap_map.
700 */
701 vm_offset_t
702 pmap_map_bd(
703 vm_offset_t virt,
704 vm_map_offset_t start_addr,
705 vm_map_offset_t end_addr,
706 vm_prot_t prot,
707 unsigned int flags)
708 {
709 pt_entry_t template;
710 pt_entry_t *pte;
711
712 template = pa_to_pte(start_addr)
713 | INTEL_PTE_REF
714 | INTEL_PTE_MOD
715 | INTEL_PTE_WIRED
716 | INTEL_PTE_VALID;
717
718 if(flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) {
719 template |= INTEL_PTE_NCACHE;
720 if(!(flags & (VM_MEM_GUARDED | VM_WIMG_USE_DEFAULT)))
721 template |= INTEL_PTE_PTA;
722 }
723
724 if (prot & VM_PROT_WRITE)
725 template |= INTEL_PTE_WRITE;
726
727 while (start_addr < end_addr) {
728 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
729 if (pte == PT_ENTRY_NULL) {
730 panic("pmap_map_bd: Invalid kernel address\n");
731 }
732 pmap_store_pte(pte, template);
733 pte_increment_pa(template);
734 virt += PAGE_SIZE;
735 start_addr += PAGE_SIZE;
736 }
737
738 flush_tlb();
739 return(virt);
740 }
741
742 extern char *first_avail;
743 extern vm_offset_t virtual_avail, virtual_end;
744 extern pmap_paddr_t avail_start, avail_end;
745 extern vm_offset_t etext;
746 extern void *sectHIBB;
747 extern int sectSizeHIB;
748
749
750 vm_offset_t
751 pmap_high_shared_remap(enum high_fixed_addresses e, vm_offset_t va, int sz)
752 {
753 vm_offset_t ve = pmap_index_to_virt(e);
754 pt_entry_t *ptep;
755 pmap_paddr_t pa;
756 int i;
757
758 assert(0 == (va & PAGE_MASK)); /* expecting page aligned */
759 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ve);
760
761 for (i=0; i< sz; i++) {
762 pa = (pmap_paddr_t) kvtophys(va);
763 pmap_store_pte(ptep, (pa & PG_FRAME)
764 | INTEL_PTE_VALID
765 | INTEL_PTE_GLOBAL
766 | INTEL_PTE_RW
767 | INTEL_PTE_REF
768 | INTEL_PTE_MOD);
769 va+= PAGE_SIZE;
770 ptep++;
771 }
772 return ve;
773 }
774
775 vm_offset_t
776 pmap_cpu_high_shared_remap(int cpu, enum high_cpu_types e, vm_offset_t va, int sz)
777 {
778 enum high_fixed_addresses a = e + HIGH_CPU_END * cpu;
779 return pmap_high_shared_remap(HIGH_FIXED_CPUS_BEGIN + a, va, sz);
780 }
781
782 void pmap_init_high_shared(void);
783
784 extern vm_offset_t gdtptr, idtptr;
785
786 extern uint32_t low_intstack;
787
788 extern struct fake_descriptor ldt_desc_pattern;
789 extern struct fake_descriptor tss_desc_pattern;
790
791 extern char hi_remap_text, hi_remap_etext;
792 extern char t_zero_div;
793
794 pt_entry_t *pte_unique_base;
795
796 void
797 pmap_init_high_shared(void)
798 {
799
800 vm_offset_t haddr;
801 struct __gdt_desc_struct gdt_desc = {0,0,0};
802 struct __idt_desc_struct idt_desc = {0,0,0};
803 #if MACH_KDB
804 struct i386_tss *ttss;
805 #endif
806
807 kprintf("HIGH_MEM_BASE 0x%x fixed per-cpu begin 0x%x\n",
808 HIGH_MEM_BASE,pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN));
809 pte_unique_base = pmap_pte(kernel_pmap, (vm_map_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN));
810
811 if (i386_btop(&hi_remap_etext - &hi_remap_text + 1) >
812 HIGH_FIXED_TRAMPS_END - HIGH_FIXED_TRAMPS + 1)
813 panic("tramps too large");
814 haddr = pmap_high_shared_remap(HIGH_FIXED_TRAMPS,
815 (vm_offset_t) &hi_remap_text, 3);
816 kprintf("tramp: 0x%x, ",haddr);
817 printf("hi mem tramps at 0x%x\n",haddr);
818 /* map gdt up high and update ptr for reload */
819 haddr = pmap_high_shared_remap(HIGH_FIXED_GDT,
820 (vm_offset_t) master_gdt, 1);
821 __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory");
822 gdt_desc.address = haddr;
823 kprintf("GDT: 0x%x, ",haddr);
824 /* map ldt up high */
825 haddr = pmap_high_shared_remap(HIGH_FIXED_LDT_BEGIN,
826 (vm_offset_t) master_ldt,
827 HIGH_FIXED_LDT_END - HIGH_FIXED_LDT_BEGIN + 1);
828 kprintf("LDT: 0x%x, ",haddr);
829 /* put new ldt addr into gdt */
830 master_gdt[sel_idx(KERNEL_LDT)] = ldt_desc_pattern;
831 master_gdt[sel_idx(KERNEL_LDT)].offset = (vm_offset_t) haddr;
832 fix_desc(&master_gdt[sel_idx(KERNEL_LDT)], 1);
833 master_gdt[sel_idx(USER_LDT)] = ldt_desc_pattern;
834 master_gdt[sel_idx(USER_LDT)].offset = (vm_offset_t) haddr;
835 fix_desc(&master_gdt[sel_idx(USER_LDT)], 1);
836
837 /* map idt up high */
838 haddr = pmap_high_shared_remap(HIGH_FIXED_IDT,
839 (vm_offset_t) master_idt, 1);
840 __asm__ __volatile__("sidt %0" : "=m" (idt_desc));
841 idt_desc.address = haddr;
842 kprintf("IDT: 0x%x, ", haddr);
843 /* remap ktss up high and put new high addr into gdt */
844 haddr = pmap_high_shared_remap(HIGH_FIXED_KTSS,
845 (vm_offset_t) &master_ktss, 1);
846 master_gdt[sel_idx(KERNEL_TSS)] = tss_desc_pattern;
847 master_gdt[sel_idx(KERNEL_TSS)].offset = (vm_offset_t) haddr;
848 fix_desc(&master_gdt[sel_idx(KERNEL_TSS)], 1);
849 kprintf("KTSS: 0x%x, ",haddr);
850 #if MACH_KDB
851 /* remap dbtss up high and put new high addr into gdt */
852 haddr = pmap_high_shared_remap(HIGH_FIXED_DBTSS,
853 (vm_offset_t) &master_dbtss, 1);
854 master_gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern;
855 master_gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) haddr;
856 fix_desc(&master_gdt[sel_idx(DEBUG_TSS)], 1);
857 ttss = (struct i386_tss *)haddr;
858 kprintf("DBTSS: 0x%x, ",haddr);
859 #endif /* MACH_KDB */
860
861 /* remap dftss up high and put new high addr into gdt */
862 haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS,
863 (vm_offset_t) &master_dftss, 1);
864 master_gdt[sel_idx(DF_TSS)] = tss_desc_pattern;
865 master_gdt[sel_idx(DF_TSS)].offset = (vm_offset_t) haddr;
866 fix_desc(&master_gdt[sel_idx(DF_TSS)], 1);
867 kprintf("DFTSS: 0x%x\n",haddr);
868
869 /* remap mctss up high and put new high addr into gdt */
870 haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS,
871 (vm_offset_t) &master_mctss, 1);
872 master_gdt[sel_idx(MC_TSS)] = tss_desc_pattern;
873 master_gdt[sel_idx(MC_TSS)].offset = (vm_offset_t) haddr;
874 fix_desc(&master_gdt[sel_idx(MC_TSS)], 1);
875 kprintf("MCTSS: 0x%x\n",haddr);
876
877 __asm__ __volatile__("lgdt %0": "=m" (gdt_desc));
878 __asm__ __volatile__("lidt %0": "=m" (idt_desc));
879 kprintf("gdt/idt reloaded, ");
880 set_tr(KERNEL_TSS);
881 kprintf("tr reset to KERNEL_TSS\n");
882 }
883
884
885 /*
886 * Bootstrap the system enough to run with virtual memory.
887 * Map the kernel's code and data, and allocate the system page table.
888 * Called with mapping OFF. Page_size must already be set.
889 *
890 * Parameters:
891 * load_start: PA where kernel was loaded
892 * avail_start PA of first available physical page -
893 * after kernel page tables
894 * avail_end PA of last available physical page
895 * virtual_avail VA of first available page -
896 * after kernel page tables
897 * virtual_end VA of last available page -
898 * end of kernel address space
899 *
900 * &start_text start of kernel text
901 * &etext end of kernel text
902 */
903
904 void
905 pmap_bootstrap(
906 __unused vm_offset_t load_start,
907 boolean_t IA32e)
908 {
909 vm_offset_t va;
910 pt_entry_t *pte;
911 int i;
912 int wpkernel, boot_arg;
913 pdpt_entry_t *pdpt;
914
915 vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address
916 * known to VM */
917 /*
918 * The kernel's pmap is statically allocated so we don't
919 * have to use pmap_create, which is unlikely to work
920 * correctly at this part of the boot sequence.
921 */
922
923
924 kernel_pmap = &kernel_pmap_store;
925 kernel_pmap->ref_count = 1;
926 kernel_pmap->nx_enabled = FALSE;
927 kernel_pmap->pm_64bit = 0;
928 kernel_pmap->pm_obj = (vm_object_t) NULL;
929 kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE);
930 kernel_pmap->pdirbase = (pmap_paddr_t)((int)IdlePTD);
931 pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE );
932 kernel_pmap->pm_pdpt = pdpt;
933 kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePDPT);
934
935 va = (vm_offset_t)kernel_pmap->dirbase;
936 /* setup self referential mapping(s) */
937 for (i = 0; i< NPGPTD; i++, pdpt++) {
938 pmap_paddr_t pa;
939 pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i));
940 pmap_store_pte(
941 (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i),
942 (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF |
943 INTEL_PTE_MOD | INTEL_PTE_WIRED) ;
944 pmap_store_pte(pdpt, pa | INTEL_PTE_VALID);
945 }
946
947 cpu_64bit = IA32e;
948
949 lo_kernel_cr3 = kernel_pmap->pm_cr3;
950 current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3;
951
952 /* save the value we stuff into created pmaps to share the gdts etc */
953 high_shared_pde = *pmap_pde(kernel_pmap, HIGH_MEM_BASE);
954 /* make sure G bit is on for high shared pde entry */
955 high_shared_pde |= INTEL_PTE_GLOBAL;
956 pmap_store_pte(pmap_pde(kernel_pmap, HIGH_MEM_BASE), high_shared_pde);
957
958 nkpt = NKPT;
959 inuse_ptepages_count += NKPT;
960
961 virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail;
962 virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS);
963
964 /*
965 * Reserve some special page table entries/VA space for temporary
966 * mapping of pages.
967 */
968 #define SYSMAP(c, p, v, n) \
969 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n)
970
971 va = virtual_avail;
972 pte = vtopte(va);
973
974 for (i=0; i<PMAP_NWINDOWS; i++) {
975 SYSMAP(caddr_t,
976 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP),
977 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR),
978 1);
979 *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0;
980 }
981
982 /* DMAP user for debugger */
983 SYSMAP(caddr_t, DMAP1, DADDR1, 1);
984 SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */
985
986
987 lock_init(&pmap_system_lock,
988 FALSE, /* NOT a sleep lock */
989 0, 0);
990
991 virtual_avail = va;
992
993 wpkernel = 1;
994 if (PE_parse_boot_arg("wpkernel", &boot_arg)) {
995 if (boot_arg == 0)
996 wpkernel = 0;
997 }
998
999 /* Remap kernel text readonly unless the "wpkernel" boot-arg is present
1000 * and set to 0.
1001 */
1002 if (wpkernel)
1003 {
1004 vm_offset_t myva;
1005 pt_entry_t *ptep;
1006
1007 for (myva = i386_round_page(MP_BOOT + MP_BOOTSTACK); myva < etext; myva += PAGE_SIZE) {
1008 if (myva >= (vm_offset_t)sectHIBB && myva < ((vm_offset_t)sectHIBB + sectSizeHIB))
1009 continue;
1010 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
1011 if (ptep)
1012 pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW);
1013 }
1014 }
1015
1016 /* no matter what, kernel page zero is not accessible */
1017 pte = pmap_pte(kernel_pmap, 0);
1018 pmap_store_pte(pte, INTEL_PTE_INVALID);
1019
1020 /* map lowmem global page into fixed addr 0x2000 */
1021 if (0 == (pte = pmap_pte(kernel_pmap,0x2000))) panic("lowmem pte");
1022
1023 pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)|INTEL_PTE_VALID|INTEL_PTE_REF|INTEL_PTE_MOD|INTEL_PTE_WIRED|INTEL_PTE_RW);
1024 flush_tlb();
1025
1026 simple_lock_init(&kernel_pmap->lock, 0);
1027 simple_lock_init(&pv_free_list_lock, 0);
1028
1029 pmap_init_high_shared();
1030
1031 pde_mapped_size = PDE_MAPPED_SIZE;
1032
1033 if (cpu_64bit) {
1034 pdpt_entry_t *ppdpt = (pdpt_entry_t *)IdlePDPT;
1035 pdpt_entry_t *ppdpt64 = (pdpt_entry_t *)IdlePDPT64;
1036 pdpt_entry_t *ppml4 = (pdpt_entry_t *)IdlePML4;
1037 int istate = ml_set_interrupts_enabled(FALSE);
1038
1039 /*
1040 * Clone a new 64-bit 3rd-level page table directory, IdlePML4,
1041 * with page bits set for the correct IA-32e operation and so that
1042 * the legacy-mode IdlePDPT is retained for slave processor start-up.
1043 * This is necessary due to the incompatible use of page bits between
1044 * 64-bit and legacy modes.
1045 */
1046 kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePML4); /* setup in start.s for us */
1047 kernel_pmap->pm_pml4 = IdlePML4;
1048 kernel_pmap->pm_pdpt = (pd_entry_t *)
1049 ((unsigned int)IdlePDPT64 | KERNBASE );
1050 #define PAGE_BITS INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF
1051 pmap_store_pte(kernel_pmap->pm_pml4,
1052 (uint32_t)IdlePDPT64 | PAGE_BITS);
1053 pmap_store_pte((ppdpt64+0), *(ppdpt+0) | PAGE_BITS);
1054 pmap_store_pte((ppdpt64+1), *(ppdpt+1) | PAGE_BITS);
1055 pmap_store_pte((ppdpt64+2), *(ppdpt+2) | PAGE_BITS);
1056 pmap_store_pte((ppdpt64+3), *(ppdpt+3) | PAGE_BITS);
1057
1058 /*
1059 * The kernel is also mapped in the uber-sapce at the 4GB starting
1060 * 0xFFFFFF80:00000000. This is the highest entry in the 4th-level.
1061 */
1062 pmap_store_pte((ppml4+KERNEL_UBER_PML4_INDEX), *(ppml4+0));
1063
1064 kernel64_cr3 = (addr64_t) kernel_pmap->pm_cr3;
1065 cpu_IA32e_enable(current_cpu_datap());
1066 current_cpu_datap()->cpu_is64bit = TRUE;
1067 /* welcome to a 64 bit world */
1068
1069 /* Re-initialize and load descriptors */
1070 cpu_desc_init64(&cpu_data_master, TRUE);
1071 cpu_desc_load64(&cpu_data_master);
1072 fast_syscall_init64();
1073
1074 pde_mapped_size = 512*4096 ;
1075
1076 ml_set_interrupts_enabled(istate);
1077
1078 }
1079 kernel_pmap->pm_hold = kernel_pmap->pm_pml4;
1080
1081 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
1082 VADDR(KPTDI,0), virtual_end);
1083 printf("PAE enabled\n");
1084 if (cpu_64bit){
1085 printf("64 bit mode enabled\n");kprintf("64 bit mode enabled\n"); }
1086
1087 kprintf("Available physical space from 0x%llx to 0x%llx\n",
1088 avail_start, avail_end);
1089
1090 /*
1091 * By default for 64-bit users loaded at 4GB, share kernel mapping.
1092 * But this may be overridden by the -no_shared_cr3 boot-arg.
1093 */
1094 if (PE_parse_boot_arg("-no_shared_cr3", &no_shared_cr3)) {
1095 kprintf("Shared kernel address space disabled\n");
1096 }
1097 }
1098
1099 void
1100 pmap_virtual_space(
1101 vm_offset_t *startp,
1102 vm_offset_t *endp)
1103 {
1104 *startp = virtual_avail;
1105 *endp = virtual_end;
1106 }
1107
1108 /*
1109 * Initialize the pmap module.
1110 * Called by vm_init, to initialize any structures that the pmap
1111 * system needs to map virtual memory.
1112 */
1113 void
1114 pmap_init(void)
1115 {
1116 register long npages;
1117 vm_offset_t addr;
1118 register vm_size_t s;
1119 vm_map_offset_t vaddr;
1120 ppnum_t ppn;
1121
1122 /*
1123 * Allocate memory for the pv_head_table and its lock bits,
1124 * the modify bit array, and the pte_page table.
1125 */
1126
1127 /* zero bias all these arrays now instead of off avail_start
1128 so we cover all memory */
1129 npages = i386_btop(avail_end);
1130 s = (vm_size_t) (sizeof(struct pv_entry) * npages
1131 + pv_lock_table_size(npages)
1132 + npages);
1133
1134 s = round_page(s);
1135 if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS)
1136 panic("pmap_init");
1137
1138 memset((char *)addr, 0, s);
1139
1140 /*
1141 * Allocate the structures first to preserve word-alignment.
1142 */
1143 pv_head_table = (pv_entry_t) addr;
1144 addr = (vm_offset_t) (pv_head_table + npages);
1145
1146 pv_lock_table = (char *) addr;
1147 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
1148
1149 pmap_phys_attributes = (char *) addr;
1150
1151 /*
1152 * Create the zone of physical maps,
1153 * and of the physical-to-virtual entries.
1154 */
1155 s = (vm_size_t) sizeof(struct pmap);
1156 pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
1157 s = (vm_size_t) sizeof(struct pv_entry);
1158 pv_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
1159 s = 63;
1160 pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */
1161
1162 /*
1163 * Only now, when all of the data structures are allocated,
1164 * can we set vm_first_phys and vm_last_phys. If we set them
1165 * too soon, the kmem_alloc_wired above will try to use these
1166 * data structures and blow up.
1167 */
1168
1169 /* zero bias this now so we cover all memory */
1170 vm_first_phys = 0;
1171 vm_last_phys = avail_end;
1172
1173 kptobj = &kptobj_object_store;
1174 _vm_object_allocate((vm_object_size_t)NKPDE, kptobj);
1175 kernel_pmap->pm_obj = kptobj;
1176
1177 /* create pv entries for kernel pages mapped by low level
1178 startup code. these have to exist so we can pmap_remove()
1179 e.g. kext pages from the middle of our addr space */
1180
1181 vaddr = (vm_map_offset_t)0;
1182 for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) {
1183 pv_entry_t pv_e;
1184
1185 pv_e = pai_to_pvh(ppn);
1186 pv_e->va = vaddr;
1187 vaddr += PAGE_SIZE;
1188 kernel_pmap->stats.resident_count++;
1189 pv_e->pmap = kernel_pmap;
1190 pv_e->next = PV_ENTRY_NULL;
1191 }
1192
1193 pmap_initialized = TRUE;
1194
1195 /*
1196 * Initializie pmap cache.
1197 */
1198 pmap_cache_list = PMAP_NULL;
1199 pmap_cache_count = 0;
1200 simple_lock_init(&pmap_cache_lock, 0);
1201 }
1202
1203 void
1204 x86_lowmem_free(void)
1205 {
1206 /* free lowmem pages back to the vm system. we had to defer doing this
1207 until the vm system was fully up.
1208 the actual pages that are released are determined by which
1209 pages the memory sizing code puts into the region table */
1210
1211 ml_static_mfree((vm_offset_t) i386_ptob(pmap_memory_regions[0].base),
1212 (vm_size_t) i386_ptob(pmap_memory_regions[0].end - pmap_memory_regions[0].base));
1213 }
1214
1215
1216 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
1217
1218 boolean_t
1219 pmap_verify_free(
1220 ppnum_t pn)
1221 {
1222 pmap_paddr_t phys;
1223 pv_entry_t pv_h;
1224 int pai;
1225 spl_t spl;
1226 boolean_t result;
1227
1228 assert(pn != vm_page_fictitious_addr);
1229 phys = (pmap_paddr_t)i386_ptob(pn);
1230 if (!pmap_initialized)
1231 return(TRUE);
1232
1233 if (!pmap_valid_page(pn))
1234 return(FALSE);
1235
1236 PMAP_WRITE_LOCK(spl);
1237
1238 pai = pa_index(phys);
1239 pv_h = pai_to_pvh(pai);
1240
1241 result = (pv_h->pmap == PMAP_NULL);
1242 PMAP_WRITE_UNLOCK(spl);
1243
1244 return(result);
1245 }
1246
1247 /*
1248 * Create and return a physical map.
1249 *
1250 * If the size specified for the map
1251 * is zero, the map is an actual physical
1252 * map, and may be referenced by the
1253 * hardware.
1254 *
1255 * If the size specified is non-zero,
1256 * the map will be used in software only, and
1257 * is bounded by that size.
1258 */
1259 pmap_t
1260 pmap_create(
1261 vm_map_size_t sz,
1262 boolean_t is_64bit)
1263 {
1264 register pmap_t p;
1265 int i;
1266 vm_offset_t va;
1267 vm_size_t size;
1268 pdpt_entry_t *pdpt;
1269 pml4_entry_t *pml4p;
1270 vm_page_t m;
1271 int template;
1272 pd_entry_t *pdp;
1273 spl_t s;
1274
1275 size = (vm_size_t) sz;
1276
1277 /*
1278 * A software use-only map doesn't even need a map.
1279 */
1280
1281 if (size != 0) {
1282 return(PMAP_NULL);
1283 }
1284
1285 p = (pmap_t) zalloc(pmap_zone);
1286 if (PMAP_NULL == p)
1287 panic("pmap_create zalloc");
1288
1289 /* init counts now since we'll be bumping some */
1290 simple_lock_init(&p->lock, 0);
1291 p->stats.resident_count = 0;
1292 p->stats.wired_count = 0;
1293 p->ref_count = 1;
1294 p->nx_enabled = 1;
1295 p->pm_64bit = is_64bit;
1296 p->pm_kernel_cr3 = FALSE;
1297
1298 if (!cpu_64bit) {
1299 /* legacy 32 bit setup */
1300 /* in the legacy case the pdpt layer is hardwired to 4 entries and each
1301 * entry covers 1GB of addr space */
1302 if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD))
1303 panic("pmap_create kmem_alloc_wired");
1304 p->pm_hold = (vm_offset_t)zalloc(pdpt_zone);
1305 if ((vm_offset_t)NULL == p->pm_hold) {
1306 panic("pdpt zalloc");
1307 }
1308 pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31);
1309 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)pdpt);
1310 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG))))
1311 panic("pmap_create vm_object_allocate");
1312
1313 memset((char *)p->dirbase, 0, NBPTD);
1314
1315 va = (vm_offset_t)p->dirbase;
1316 p->pdirbase = kvtophys(va);
1317
1318 template = cpu_64bit ? INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF : INTEL_PTE_VALID;
1319 for (i = 0; i< NPGPTD; i++, pdpt++) {
1320 pmap_paddr_t pa;
1321 pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i));
1322 pmap_store_pte(pdpt, pa | template);
1323 }
1324
1325 /* map the high shared pde */
1326 pmap_store_pte(pmap_pde(p, HIGH_MEM_BASE), high_shared_pde);
1327
1328 } else {
1329
1330 /* 64 bit setup */
1331
1332 /* alloc the pml4 page in kernel vm */
1333 if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->pm_hold), PAGE_SIZE))
1334 panic("pmap_create kmem_alloc_wired pml4");
1335
1336 memset((char *)p->pm_hold, 0, PAGE_SIZE);
1337 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_hold);
1338
1339 inuse_ptepages_count++;
1340 p->stats.resident_count++;
1341 p->stats.wired_count++;
1342
1343 /* allocate the vm_objs to hold the pdpt, pde and pte pages */
1344
1345 if (NULL == (p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS))))
1346 panic("pmap_create pdpt obj");
1347
1348 if (NULL == (p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS))))
1349 panic("pmap_create pdpt obj");
1350
1351 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS))))
1352 panic("pmap_create pte obj");
1353
1354 /* uber space points to uber mapped kernel */
1355 s = splhigh();
1356 pml4p = pmap64_pml4(p, 0ULL);
1357 pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX),*kernel_pmap->pm_pml4);
1358 if (!is_64bit) {
1359 while ((pdp = pmap64_pde(p, (uint64_t)HIGH_MEM_BASE)) == PD_ENTRY_NULL) {
1360 splx(s);
1361 pmap_expand_pdpt(p, (uint64_t)HIGH_MEM_BASE); /* need room for another pde entry */
1362 s = splhigh();
1363 }
1364 pmap_store_pte(pdp, high_shared_pde);
1365 }
1366
1367 splx(s);
1368 }
1369
1370 return(p);
1371 }
1372
1373 void
1374 pmap_set_4GB_pagezero(pmap_t p)
1375 {
1376 int spl;
1377 pdpt_entry_t *user_pdptp;
1378 pdpt_entry_t *kern_pdptp;
1379
1380 assert(p->pm_64bit);
1381
1382 /* Kernel-shared cr3 may be disabled by boot arg. */
1383 if (no_shared_cr3)
1384 return;
1385
1386 /*
1387 * Set the bottom 4 3rd-level pte's to be the kernel's.
1388 */
1389 spl = splhigh();
1390 while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) {
1391 splx(spl);
1392 pmap_expand_pml4(p, 0x0);
1393 spl = splhigh();
1394 }
1395 kern_pdptp = kernel_pmap->pm_pdpt;
1396 pmap_store_pte(user_pdptp+0, *(kern_pdptp+0));
1397 pmap_store_pte(user_pdptp+1, *(kern_pdptp+1));
1398 pmap_store_pte(user_pdptp+2, *(kern_pdptp+2));
1399 pmap_store_pte(user_pdptp+3, *(kern_pdptp+3));
1400
1401 p->pm_kernel_cr3 = TRUE;
1402
1403 splx(spl);
1404
1405 }
1406
1407 void
1408 pmap_load_kernel_cr3(void)
1409 {
1410 uint32_t kernel_cr3;
1411
1412 assert(!ml_get_interrupts_enabled());
1413
1414 /*
1415 * Reload cr3 with the true kernel cr3.
1416 * Note: kernel's pml4 resides below 4GB physical.
1417 */
1418 kernel_cr3 = current_cpu_datap()->cpu_kernel_cr3;
1419 set_cr3(kernel_cr3);
1420 current_cpu_datap()->cpu_active_cr3 = kernel_cr3;
1421 current_cpu_datap()->cpu_task_map = TASK_MAP_32BIT;
1422 current_cpu_datap()->cpu_tlb_invalid = FALSE;
1423 __asm__ volatile("mfence");
1424 }
1425
1426 void
1427 pmap_clear_4GB_pagezero(pmap_t p)
1428 {
1429 int spl;
1430 pdpt_entry_t *user_pdptp;
1431 uint32_t cr3;
1432
1433 if (!p->pm_kernel_cr3)
1434 return;
1435
1436 spl = splhigh();
1437 user_pdptp = pmap64_pdpt(p, 0x0);
1438 pmap_store_pte(user_pdptp+0, 0);
1439 pmap_store_pte(user_pdptp+1, 0);
1440 pmap_store_pte(user_pdptp+2, 0);
1441 pmap_store_pte(user_pdptp+3, 0);
1442
1443 p->pm_kernel_cr3 = FALSE;
1444
1445 pmap_load_kernel_cr3();
1446
1447 splx(spl);
1448 }
1449
1450 /*
1451 * Retire the given physical map from service.
1452 * Should only be called if the map contains
1453 * no valid mappings.
1454 */
1455
1456 void
1457 pmap_destroy(
1458 register pmap_t p)
1459 {
1460 register int c;
1461 spl_t s;
1462 #if 0
1463 register pt_entry_t *pdep;
1464 register vm_page_t m;
1465 #endif
1466
1467 if (p == PMAP_NULL)
1468 return;
1469 SPLVM(s);
1470 simple_lock(&p->lock);
1471 c = --p->ref_count;
1472 if (c == 0) {
1473 /*
1474 * If some cpu is not using the physical pmap pointer that it
1475 * is supposed to be (see set_dirbase), we might be using the
1476 * pmap that is being destroyed! Make sure we are
1477 * physically on the right pmap:
1478 */
1479 PMAP_UPDATE_TLBS(p,
1480 VM_MIN_ADDRESS,
1481 VM_MAX_KERNEL_ADDRESS);
1482
1483 }
1484 simple_unlock(&p->lock);
1485 SPLX(s);
1486
1487 if (c != 0) {
1488 return; /* still in use */
1489 }
1490
1491 /*
1492 * Free the memory maps, then the
1493 * pmap structure.
1494 */
1495
1496 if (!cpu_64bit) {
1497 #if 0
1498 pdep = (pt_entry_t *)p->dirbase;
1499
1500 while (pdep < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]) {
1501 int ind;
1502
1503 if (*pdep & INTEL_PTE_VALID) {
1504 ind = pdep - (pt_entry_t *)&p->dirbase[0];
1505
1506 vm_object_lock(p->pm_obj);
1507 m = vm_page_lookup(p->pm_obj, (vm_object_offset_t)ind);
1508 if (m == VM_PAGE_NULL) {
1509 panic("pmap_destroy: pte page not in object");
1510 }
1511 vm_page_lock_queues();
1512 vm_page_free(m);
1513 inuse_ptepages_count--;
1514
1515 vm_object_unlock(p->pm_obj);
1516 vm_page_unlock_queues();
1517
1518 /*
1519 * Clear pdes, this might be headed for the cache.
1520 */
1521 pmap_store_pte(pdep, 0);
1522 pdep++;
1523 }
1524 else {
1525 pmap_store_pte(pdep, 0);
1526 pdep++;
1527 }
1528
1529 }
1530 #else
1531 inuse_ptepages_count -= p->pm_obj->resident_page_count;
1532 #endif
1533 vm_object_deallocate(p->pm_obj);
1534 kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD);
1535 zfree(pdpt_zone, (void *)p->pm_hold);
1536 } else {
1537
1538 /* 64 bit */
1539
1540 pmap_unmap_sharedpage(p);
1541
1542 /* free 64 bit mode structs */
1543 inuse_ptepages_count--;
1544 kmem_free(kernel_map, (vm_offset_t)p->pm_hold, PAGE_SIZE);
1545
1546 inuse_ptepages_count -= p->pm_obj_pml4->resident_page_count;
1547 vm_object_deallocate(p->pm_obj_pml4);
1548
1549 inuse_ptepages_count -= p->pm_obj_pdpt->resident_page_count;
1550 vm_object_deallocate(p->pm_obj_pdpt);
1551
1552 inuse_ptepages_count -= p->pm_obj->resident_page_count;
1553 vm_object_deallocate(p->pm_obj);
1554
1555 }
1556
1557 zfree(pmap_zone, p);
1558 }
1559
1560 /*
1561 * Add a reference to the specified pmap.
1562 */
1563
1564 void
1565 pmap_reference(
1566 register pmap_t p)
1567 {
1568 spl_t s;
1569
1570 if (p != PMAP_NULL) {
1571 SPLVM(s);
1572 simple_lock(&p->lock);
1573 p->ref_count++;
1574 simple_unlock(&p->lock);
1575 SPLX(s);
1576 }
1577 }
1578
1579 /*
1580 * Remove a range of hardware page-table entries.
1581 * The entries given are the first (inclusive)
1582 * and last (exclusive) entries for the VM pages.
1583 * The virtual address is the va for the first pte.
1584 *
1585 * The pmap must be locked.
1586 * If the pmap is not the kernel pmap, the range must lie
1587 * entirely within one pte-page. This is NOT checked.
1588 * Assumes that the pte-page exists.
1589 */
1590
1591 static void
1592 pmap_remove_range(
1593 pmap_t pmap,
1594 vm_map_offset_t vaddr,
1595 pt_entry_t *spte,
1596 pt_entry_t *epte)
1597 {
1598 register pt_entry_t *cpte;
1599 int num_removed, num_unwired;
1600 int pai;
1601 pmap_paddr_t pa;
1602
1603 num_removed = 0;
1604 num_unwired = 0;
1605
1606 for (cpte = spte; cpte < epte;
1607 cpte++, vaddr += PAGE_SIZE) {
1608
1609 pa = pte_to_pa(*cpte);
1610 if (pa == 0)
1611 continue;
1612
1613 if (iswired(*cpte))
1614 num_unwired++;
1615
1616 if (!valid_page(i386_btop(pa))) {
1617
1618 /*
1619 * Outside range of managed physical memory.
1620 * Just remove the mappings.
1621 */
1622 register pt_entry_t *lpte = cpte;
1623
1624 pmap_store_pte(lpte, 0);
1625 continue;
1626 }
1627 num_removed++;
1628
1629 pai = pa_index(pa);
1630 LOCK_PVH(pai);
1631
1632 /*
1633 * Get the modify and reference bits.
1634 */
1635 {
1636 register pt_entry_t *lpte;
1637
1638 lpte = cpte;
1639 pmap_phys_attributes[pai] |=
1640 *lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
1641 pmap_store_pte(lpte, 0);
1642
1643 }
1644
1645 /*
1646 * Remove the mapping from the pvlist for
1647 * this physical page.
1648 */
1649 {
1650 register pv_entry_t pv_h, prev, cur;
1651
1652 pv_h = pai_to_pvh(pai);
1653 if (pv_h->pmap == PMAP_NULL) {
1654 panic("pmap_remove: null pv_list!");
1655 }
1656 if (pv_h->va == vaddr && pv_h->pmap == pmap) {
1657 /*
1658 * Header is the pv_entry. Copy the next one
1659 * to header and free the next one (we cannot
1660 * free the header)
1661 */
1662 cur = pv_h->next;
1663 if (cur != PV_ENTRY_NULL) {
1664 *pv_h = *cur;
1665 PV_FREE(cur);
1666 }
1667 else {
1668 pv_h->pmap = PMAP_NULL;
1669 }
1670 }
1671 else {
1672 cur = pv_h;
1673 do {
1674 prev = cur;
1675 if ((cur = prev->next) == PV_ENTRY_NULL) {
1676 panic("pmap-remove: mapping not in pv_list!");
1677 }
1678 } while (cur->va != vaddr || cur->pmap != pmap);
1679 prev->next = cur->next;
1680 PV_FREE(cur);
1681 }
1682 UNLOCK_PVH(pai);
1683 }
1684 }
1685
1686 /*
1687 * Update the counts
1688 */
1689 assert(pmap->stats.resident_count >= num_removed);
1690 pmap->stats.resident_count -= num_removed;
1691 assert(pmap->stats.wired_count >= num_unwired);
1692 pmap->stats.wired_count -= num_unwired;
1693 }
1694
1695 /*
1696 * Remove phys addr if mapped in specified map
1697 *
1698 */
1699 void
1700 pmap_remove_some_phys(
1701 __unused pmap_t map,
1702 __unused ppnum_t pn)
1703 {
1704
1705 /* Implement to support working set code */
1706
1707 }
1708
1709 /*
1710 * Remove the given range of addresses
1711 * from the specified map.
1712 *
1713 * It is assumed that the start and end are properly
1714 * rounded to the hardware page size.
1715 */
1716
1717
1718 void
1719 pmap_remove(
1720 pmap_t map,
1721 addr64_t s64,
1722 addr64_t e64)
1723 {
1724 spl_t spl;
1725 register pt_entry_t *pde;
1726 register pt_entry_t *spte, *epte;
1727 addr64_t l64;
1728 addr64_t orig_s64;
1729
1730 if (map == PMAP_NULL || s64 == e64)
1731 return;
1732
1733 PMAP_READ_LOCK(map, spl);
1734
1735 orig_s64 = s64;
1736
1737 while (s64 < e64) {
1738 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size-1);
1739 if (l64 > e64)
1740 l64 = e64;
1741 pde = pmap_pde(map, s64);
1742 if (pde && (*pde & INTEL_PTE_VALID)) {
1743 spte = (pt_entry_t *)pmap_pte(map, (s64 & ~(pde_mapped_size-1)));
1744 spte = &spte[ptenum(s64)];
1745 epte = &spte[intel_btop(l64-s64)];
1746 pmap_remove_range(map, s64, spte, epte);
1747 }
1748 s64 = l64;
1749 pde++;
1750 }
1751 PMAP_UPDATE_TLBS(map, orig_s64, e64);
1752
1753 PMAP_READ_UNLOCK(map, spl);
1754 }
1755
1756 /*
1757 * Routine: pmap_page_protect
1758 *
1759 * Function:
1760 * Lower the permission for all mappings to a given
1761 * page.
1762 */
1763 void
1764 pmap_page_protect(
1765 ppnum_t pn,
1766 vm_prot_t prot)
1767 {
1768 pv_entry_t pv_h, prev;
1769 register pv_entry_t pv_e;
1770 register pt_entry_t *pte;
1771 int pai;
1772 register pmap_t pmap;
1773 spl_t spl;
1774 boolean_t remove;
1775 pmap_paddr_t phys;
1776
1777 assert(pn != vm_page_fictitious_addr);
1778
1779 if (!valid_page(pn)) {
1780 /*
1781 * Not a managed page.
1782 */
1783 return;
1784 }
1785
1786 /*
1787 * Determine the new protection.
1788 */
1789 switch (prot) {
1790 case VM_PROT_READ:
1791 case VM_PROT_READ|VM_PROT_EXECUTE:
1792 remove = FALSE;
1793 break;
1794 case VM_PROT_ALL:
1795 return; /* nothing to do */
1796 default:
1797 remove = TRUE;
1798 break;
1799 }
1800 phys = (pmap_paddr_t)i386_ptob(pn);
1801 pai = pa_index(phys);
1802 pv_h = pai_to_pvh(pai);
1803
1804
1805 /*
1806 * Lock the pmap system first, since we will be changing
1807 * several pmaps.
1808 */
1809 PMAP_WRITE_LOCK(spl);
1810
1811 /*
1812 * Walk down PV list, changing or removing all mappings.
1813 * We do not have to lock the pv_list because we have
1814 * the entire pmap system locked.
1815 */
1816 if (pv_h->pmap != PMAP_NULL) {
1817
1818 prev = pv_e = pv_h;
1819
1820 do {
1821 register vm_map_offset_t vaddr;
1822
1823 pmap = pv_e->pmap;
1824 /*
1825 * Lock the pmap to block pmap_extract and similar routines.
1826 */
1827 simple_lock(&pmap->lock);
1828
1829 vaddr = pv_e->va;
1830 pte = pmap_pte(pmap, vaddr);
1831 if(0 == pte) {
1832 kprintf("pmap_page_protect pmap 0x%x pn 0x%x vaddr 0x%llx\n",pmap, pn, vaddr);
1833 panic("pmap_page_protect");
1834 }
1835 /*
1836 * Consistency checks.
1837 */
1838 /* assert(*pte & INTEL_PTE_VALID); XXX */
1839 /* assert(pte_to_phys(*pte) == phys); */
1840
1841
1842 /*
1843 * Remove the mapping if new protection is NONE
1844 * or if write-protecting a kernel mapping.
1845 */
1846 if (remove || pmap == kernel_pmap) {
1847 /*
1848 * Remove the mapping, collecting any modify bits.
1849 */
1850 pmap_store_pte(pte, *pte & ~INTEL_PTE_VALID);
1851
1852 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1853
1854 pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1855
1856 pmap_store_pte(pte, 0);
1857
1858
1859 //XXX breaks DEBUG build assert(pmap->stats.resident_count >= 1);
1860 pmap->stats.resident_count--;
1861
1862 /*
1863 * Remove the pv_entry.
1864 */
1865 if (pv_e == pv_h) {
1866 /*
1867 * Fix up head later.
1868 */
1869 pv_h->pmap = PMAP_NULL;
1870 }
1871 else {
1872 /*
1873 * Delete this entry.
1874 */
1875 prev->next = pv_e->next;
1876 PV_FREE(pv_e);
1877 }
1878 } else {
1879 /*
1880 * Write-protect.
1881 */
1882 pmap_store_pte(pte, *pte & ~INTEL_PTE_WRITE);
1883
1884 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1885 /*
1886 * Advance prev.
1887 */
1888 prev = pv_e;
1889 }
1890
1891 simple_unlock(&pmap->lock);
1892
1893 } while ((pv_e = prev->next) != PV_ENTRY_NULL);
1894
1895 /*
1896 * If pv_head mapping was removed, fix it up.
1897 */
1898 if (pv_h->pmap == PMAP_NULL) {
1899 pv_e = pv_h->next;
1900
1901 if (pv_e != PV_ENTRY_NULL) {
1902 *pv_h = *pv_e;
1903 PV_FREE(pv_e);
1904 }
1905 }
1906 }
1907 PMAP_WRITE_UNLOCK(spl);
1908 }
1909
1910 /*
1911 * Routine:
1912 * pmap_disconnect
1913 *
1914 * Function:
1915 * Disconnect all mappings for this page and return reference and change status
1916 * in generic format.
1917 *
1918 */
1919 unsigned int pmap_disconnect(
1920 ppnum_t pa)
1921 {
1922 pmap_page_protect(pa, 0); /* disconnect the page */
1923 return (pmap_get_refmod(pa)); /* return ref/chg status */
1924 }
1925
1926 /*
1927 * Set the physical protection on the
1928 * specified range of this map as requested.
1929 * Will not increase permissions.
1930 */
1931 void
1932 pmap_protect(
1933 pmap_t map,
1934 vm_map_offset_t sva,
1935 vm_map_offset_t eva,
1936 vm_prot_t prot)
1937 {
1938 register pt_entry_t *pde;
1939 register pt_entry_t *spte, *epte;
1940 vm_map_offset_t lva;
1941 vm_map_offset_t orig_sva;
1942 spl_t spl;
1943 boolean_t set_NX;
1944
1945 if (map == PMAP_NULL)
1946 return;
1947
1948 if (prot == VM_PROT_NONE) {
1949 pmap_remove(map, sva, eva);
1950 return;
1951 }
1952
1953 if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !map->nx_enabled )
1954 set_NX = FALSE;
1955 else
1956 set_NX = TRUE;
1957
1958 SPLVM(spl);
1959 simple_lock(&map->lock);
1960
1961 orig_sva = sva;
1962 while (sva < eva) {
1963 lva = (sva + pde_mapped_size) & ~(pde_mapped_size-1);
1964 if (lva > eva)
1965 lva = eva;
1966 pde = pmap_pde(map, sva);
1967 if (pde && (*pde & INTEL_PTE_VALID)) {
1968 spte = (pt_entry_t *)pmap_pte(map, (sva & ~(pde_mapped_size-1)));
1969 spte = &spte[ptenum(sva)];
1970 epte = &spte[intel_btop(lva-sva)];
1971
1972 while (spte < epte) {
1973 if (*spte & INTEL_PTE_VALID) {
1974
1975 if (prot & VM_PROT_WRITE)
1976 pmap_store_pte(spte, *spte | INTEL_PTE_WRITE);
1977 else
1978 pmap_store_pte(spte, *spte & ~INTEL_PTE_WRITE);
1979
1980 if (set_NX == TRUE)
1981 pmap_store_pte(spte, *spte | INTEL_PTE_NX);
1982 else
1983 pmap_store_pte(spte, *spte & ~INTEL_PTE_NX);
1984
1985 }
1986 spte++;
1987 }
1988 }
1989 sva = lva;
1990 pde++;
1991 }
1992 PMAP_UPDATE_TLBS(map, orig_sva, eva);
1993
1994 simple_unlock(&map->lock);
1995 SPLX(spl);
1996 }
1997
1998 /* Map a (possibly) autogenned block */
1999 void
2000 pmap_map_block(
2001 pmap_t pmap,
2002 addr64_t va,
2003 ppnum_t pa,
2004 uint32_t size,
2005 vm_prot_t prot,
2006 int attr,
2007 __unused unsigned int flags)
2008 {
2009 uint32_t page;
2010
2011 for (page = 0; page < size; page++) {
2012 pmap_enter(pmap, va, pa, prot, attr, TRUE);
2013 va += PAGE_SIZE;
2014 pa++;
2015 }
2016 }
2017
2018
2019 /*
2020 * Insert the given physical page (p) at
2021 * the specified virtual address (v) in the
2022 * target physical map with the protection requested.
2023 *
2024 * If specified, the page will be wired down, meaning
2025 * that the related pte cannot be reclaimed.
2026 *
2027 * NB: This is the only routine which MAY NOT lazy-evaluate
2028 * or lose information. That is, this routine must actually
2029 * insert this page into the given map NOW.
2030 */
2031 void
2032 pmap_enter(
2033 register pmap_t pmap,
2034 vm_map_offset_t vaddr,
2035 ppnum_t pn,
2036 vm_prot_t prot,
2037 unsigned int flags,
2038 boolean_t wired)
2039 {
2040 register pt_entry_t *pte;
2041 register pv_entry_t pv_h;
2042 register int pai;
2043 pv_entry_t pv_e;
2044 pt_entry_t template;
2045 spl_t spl;
2046 pmap_paddr_t old_pa;
2047 pmap_paddr_t pa = (pmap_paddr_t)i386_ptob(pn);
2048 boolean_t need_tlbflush = FALSE;
2049 boolean_t set_NX;
2050
2051 XPR(0x80000000, "%x/%x: pmap_enter %x/%qx/%x\n",
2052 current_thread(),
2053 current_thread(),
2054 pmap, vaddr, pn);
2055
2056 assert(pn != vm_page_fictitious_addr);
2057 if (pmap_debug)
2058 printf("pmap(%qx, %x)\n", vaddr, pn);
2059 if (pmap == PMAP_NULL)
2060 return;
2061
2062 if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled )
2063 set_NX = FALSE;
2064 else
2065 set_NX = TRUE;
2066
2067 /*
2068 * Must allocate a new pvlist entry while we're unlocked;
2069 * zalloc may cause pageout (which will lock the pmap system).
2070 * If we determine we need a pvlist entry, we will unlock
2071 * and allocate one. Then we will retry, throughing away
2072 * the allocated entry later (if we no longer need it).
2073 */
2074 pv_e = PV_ENTRY_NULL;
2075
2076 PMAP_READ_LOCK(pmap, spl);
2077
2078 /*
2079 * Expand pmap to include this pte. Assume that
2080 * pmap is always expanded to include enough hardware
2081 * pages to map one VM page.
2082 */
2083
2084 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
2085 /*
2086 * Must unlock to expand the pmap.
2087 */
2088 PMAP_READ_UNLOCK(pmap, spl);
2089
2090 pmap_expand(pmap, vaddr); /* going to grow pde level page(s) */
2091
2092 PMAP_READ_LOCK(pmap, spl);
2093 }
2094 /*
2095 * Special case if the physical page is already mapped
2096 * at this address.
2097 */
2098 old_pa = pte_to_pa(*pte);
2099 if (old_pa == pa) {
2100 /*
2101 * May be changing its wired attribute or protection
2102 */
2103
2104 template = pa_to_pte(pa) | INTEL_PTE_VALID;
2105
2106 if(VM_MEM_NOT_CACHEABLE == (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
2107 if(!(flags & VM_MEM_GUARDED))
2108 template |= INTEL_PTE_PTA;
2109 template |= INTEL_PTE_NCACHE;
2110 }
2111
2112 if (pmap != kernel_pmap)
2113 template |= INTEL_PTE_USER;
2114 if (prot & VM_PROT_WRITE)
2115 template |= INTEL_PTE_WRITE;
2116
2117 if (set_NX == TRUE)
2118 template |= INTEL_PTE_NX;
2119
2120 if (wired) {
2121 template |= INTEL_PTE_WIRED;
2122 if (!iswired(*pte))
2123 pmap->stats.wired_count++;
2124 }
2125 else {
2126 if (iswired(*pte)) {
2127 assert(pmap->stats.wired_count >= 1);
2128 pmap->stats.wired_count--;
2129 }
2130 }
2131
2132 if (*pte & INTEL_PTE_MOD)
2133 template |= INTEL_PTE_MOD;
2134
2135 pmap_store_pte(pte, template);
2136 pte++;
2137
2138 need_tlbflush = TRUE;
2139 goto Done;
2140 }
2141
2142 /*
2143 * Outline of code from here:
2144 * 1) If va was mapped, update TLBs, remove the mapping
2145 * and remove old pvlist entry.
2146 * 2) Add pvlist entry for new mapping
2147 * 3) Enter new mapping.
2148 *
2149 * SHARING FAULTS IS HORRIBLY BROKEN
2150 * SHARING_FAULTS complicates this slightly in that it cannot
2151 * replace the mapping, but must remove it (because adding the
2152 * pvlist entry for the new mapping may remove others), and
2153 * hence always enters the new mapping at step 3)
2154 *
2155 * If the old physical page is not managed step 1) is skipped
2156 * (except for updating the TLBs), and the mapping is
2157 * overwritten at step 3). If the new physical page is not
2158 * managed, step 2) is skipped.
2159 */
2160
2161 if (old_pa != (pmap_paddr_t) 0) {
2162
2163 /*
2164 * Don't do anything to pages outside valid memory here.
2165 * Instead convince the code that enters a new mapping
2166 * to overwrite the old one.
2167 */
2168
2169 if (valid_page(i386_btop(old_pa))) {
2170
2171 pai = pa_index(old_pa);
2172 LOCK_PVH(pai);
2173
2174 assert(pmap->stats.resident_count >= 1);
2175 pmap->stats.resident_count--;
2176 if (iswired(*pte)) {
2177 assert(pmap->stats.wired_count >= 1);
2178 pmap->stats.wired_count--;
2179 }
2180
2181 pmap_phys_attributes[pai] |=
2182 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
2183
2184 pmap_store_pte(pte, 0);
2185 /*
2186 * Remove the mapping from the pvlist for
2187 * this physical page.
2188 */
2189 {
2190 register pv_entry_t prev, cur;
2191
2192 pv_h = pai_to_pvh(pai);
2193 if (pv_h->pmap == PMAP_NULL) {
2194 panic("pmap_enter: null pv_list!");
2195 }
2196 if (pv_h->va == vaddr && pv_h->pmap == pmap) {
2197 /*
2198 * Header is the pv_entry. Copy the next one
2199 * to header and free the next one (we cannot
2200 * free the header)
2201 */
2202 cur = pv_h->next;
2203 if (cur != PV_ENTRY_NULL) {
2204 *pv_h = *cur;
2205 pv_e = cur;
2206 }
2207 else {
2208 pv_h->pmap = PMAP_NULL;
2209 }
2210 }
2211 else {
2212 cur = pv_h;
2213 do {
2214 prev = cur;
2215 if ((cur = prev->next) == PV_ENTRY_NULL) {
2216 panic("pmap_enter: mapping not in pv_list!");
2217 }
2218 } while (cur->va != vaddr || cur->pmap != pmap);
2219 prev->next = cur->next;
2220 pv_e = cur;
2221 }
2222 }
2223 UNLOCK_PVH(pai);
2224 }
2225 else {
2226
2227 /*
2228 * old_pa is not managed. Pretend it's zero so code
2229 * at Step 3) will enter new mapping (overwriting old
2230 * one). Do removal part of accounting.
2231 */
2232 old_pa = (pmap_paddr_t) 0;
2233
2234 if (iswired(*pte)) {
2235 assert(pmap->stats.wired_count >= 1);
2236 pmap->stats.wired_count--;
2237 }
2238 }
2239 need_tlbflush = TRUE;
2240
2241 }
2242
2243 if (valid_page(i386_btop(pa))) {
2244
2245 /*
2246 * Step 2) Enter the mapping in the PV list for this
2247 * physical page.
2248 */
2249
2250 pai = pa_index(pa);
2251
2252
2253 #if SHARING_FAULTS /* this is horribly broken , do not enable */
2254 RetryPvList:
2255 /*
2256 * We can return here from the sharing fault code below
2257 * in case we removed the only entry on the pv list and thus
2258 * must enter the new one in the list header.
2259 */
2260 #endif /* SHARING_FAULTS */
2261 LOCK_PVH(pai);
2262 pv_h = pai_to_pvh(pai);
2263
2264 if (pv_h->pmap == PMAP_NULL) {
2265 /*
2266 * No mappings yet
2267 */
2268 pv_h->va = vaddr;
2269 pv_h->pmap = pmap;
2270 pv_h->next = PV_ENTRY_NULL;
2271 }
2272 else {
2273 #if DEBUG
2274 {
2275 /*
2276 * check that this mapping is not already there
2277 * or there is no alias for this mapping in the same map
2278 */
2279 pv_entry_t e = pv_h;
2280 while (e != PV_ENTRY_NULL) {
2281 if (e->pmap == pmap && e->va == vaddr)
2282 panic("pmap_enter: already in pv_list");
2283 e = e->next;
2284 }
2285 }
2286 #endif /* DEBUG */
2287 #if SHARING_FAULTS /* broken, do not enable */
2288 {
2289 /*
2290 * do sharing faults.
2291 * if we find an entry on this pv list in the same address
2292 * space, remove it. we know there will not be more
2293 * than one.
2294 */
2295 pv_entry_t e = pv_h;
2296 pt_entry_t *opte;
2297
2298 while (e != PV_ENTRY_NULL) {
2299 if (e->pmap == pmap) {
2300 /*
2301 * Remove it, drop pv list lock first.
2302 */
2303 UNLOCK_PVH(pai);
2304
2305 opte = pmap_pte(pmap, e->va);
2306 assert(opte != PT_ENTRY_NULL);
2307 /*
2308 * Invalidate the translation buffer,
2309 * then remove the mapping.
2310 */
2311 pmap_remove_range(pmap, e->va, opte,
2312 opte + 1);
2313
2314 PMAP_UPDATE_TLBS(pmap, e->va, e->va + PAGE_SIZE);
2315
2316 /*
2317 * We could have remove the head entry,
2318 * so there could be no more entries
2319 * and so we have to use the pv head entry.
2320 * so, go back to the top and try the entry
2321 * again.
2322 */
2323 goto RetryPvList;
2324 }
2325 e = e->next;
2326 }
2327
2328 /*
2329 * check that this mapping is not already there
2330 */
2331 e = pv_h;
2332 while (e != PV_ENTRY_NULL) {
2333 if (e->pmap == pmap)
2334 panic("pmap_enter: alias in pv_list");
2335 e = e->next;
2336 }
2337 }
2338 #endif /* SHARING_FAULTS */
2339 #if DEBUG_ALIAS
2340 {
2341 /*
2342 * check for aliases within the same address space.
2343 */
2344 pv_entry_t e = pv_h;
2345 vm_offset_t rpc = get_rpc();
2346
2347 while (e != PV_ENTRY_NULL) {
2348 if (e->pmap == pmap) {
2349 /*
2350 * log this entry in the alias ring buffer
2351 * if it's not there already.
2352 */
2353 struct pmap_alias *pma;
2354 int ii, logit;
2355
2356 logit = TRUE;
2357 for (ii = 0; ii < pmap_alias_index; ii++) {
2358 if (pmap_aliasbuf[ii].rpc == rpc) {
2359 /* found it in the log already */
2360 logit = FALSE;
2361 break;
2362 }
2363 }
2364 if (logit) {
2365 pma = &pmap_aliasbuf[pmap_alias_index];
2366 pma->pmap = pmap;
2367 pma->va = vaddr;
2368 pma->rpc = rpc;
2369 pma->cookie = PMAP_ALIAS_COOKIE;
2370 if (++pmap_alias_index >= PMAP_ALIAS_MAX)
2371 panic("pmap_enter: exhausted alias log");
2372 }
2373 }
2374 e = e->next;
2375 }
2376 }
2377 #endif /* DEBUG_ALIAS */
2378 /*
2379 * Add new pv_entry after header.
2380 */
2381 if (pv_e == PV_ENTRY_NULL) {
2382 PV_ALLOC(pv_e);
2383 if (pv_e == PV_ENTRY_NULL) {
2384 panic("pmap no pv_e's");
2385 }
2386 }
2387 pv_e->va = vaddr;
2388 pv_e->pmap = pmap;
2389 pv_e->next = pv_h->next;
2390 pv_h->next = pv_e;
2391 /*
2392 * Remember that we used the pvlist entry.
2393 */
2394 pv_e = PV_ENTRY_NULL;
2395 }
2396 UNLOCK_PVH(pai);
2397
2398 /*
2399 * only count the mapping
2400 * for 'managed memory'
2401 */
2402 pmap->stats.resident_count++;
2403 }
2404
2405 /*
2406 * Step 3) Enter the mapping.
2407 */
2408
2409
2410 /*
2411 * Build a template to speed up entering -
2412 * only the pfn changes.
2413 */
2414 template = pa_to_pte(pa) | INTEL_PTE_VALID;
2415
2416 if(flags & VM_MEM_NOT_CACHEABLE) {
2417 if(!(flags & VM_MEM_GUARDED))
2418 template |= INTEL_PTE_PTA;
2419 template |= INTEL_PTE_NCACHE;
2420 }
2421
2422 if (pmap != kernel_pmap)
2423 template |= INTEL_PTE_USER;
2424 if (prot & VM_PROT_WRITE)
2425 template |= INTEL_PTE_WRITE;
2426
2427 if (set_NX == TRUE)
2428 template |= INTEL_PTE_NX;
2429
2430 if (wired) {
2431 template |= INTEL_PTE_WIRED;
2432 pmap->stats.wired_count++;
2433 }
2434 pmap_store_pte(pte, template);
2435
2436 Done:
2437 if (need_tlbflush == TRUE)
2438 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
2439
2440 if (pv_e != PV_ENTRY_NULL) {
2441 PV_FREE(pv_e);
2442 }
2443
2444 PMAP_READ_UNLOCK(pmap, spl);
2445 }
2446
2447 /*
2448 * Routine: pmap_change_wiring
2449 * Function: Change the wiring attribute for a map/virtual-address
2450 * pair.
2451 * In/out conditions:
2452 * The mapping must already exist in the pmap.
2453 */
2454 void
2455 pmap_change_wiring(
2456 register pmap_t map,
2457 vm_map_offset_t vaddr,
2458 boolean_t wired)
2459 {
2460 register pt_entry_t *pte;
2461 spl_t spl;
2462
2463 #if 1
2464 /*
2465 * We must grab the pmap system lock because we may
2466 * change a pte_page queue.
2467 */
2468 PMAP_READ_LOCK(map, spl);
2469
2470 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
2471 panic("pmap_change_wiring: pte missing");
2472
2473 if (wired && !iswired(*pte)) {
2474 /*
2475 * wiring down mapping
2476 */
2477 map->stats.wired_count++;
2478 pmap_store_pte(pte, *pte | INTEL_PTE_WIRED);
2479 pte++;
2480 }
2481 else if (!wired && iswired(*pte)) {
2482 /*
2483 * unwiring mapping
2484 */
2485 assert(map->stats.wired_count >= 1);
2486 map->stats.wired_count--;
2487 pmap_store_pte(pte, *pte & ~INTEL_PTE_WIRED);
2488 pte++;
2489 }
2490
2491 PMAP_READ_UNLOCK(map, spl);
2492
2493 #else
2494 return;
2495 #endif
2496
2497 }
2498
2499 ppnum_t
2500 pmap_find_phys(pmap_t pmap, addr64_t va)
2501 {
2502 pt_entry_t *ptp;
2503 ppnum_t ppn;
2504
2505 mp_disable_preemption();
2506
2507 ptp = pmap_pte(pmap, va);
2508 if (PT_ENTRY_NULL == ptp) {
2509 ppn = 0;
2510 } else {
2511 ppn = (ppnum_t) i386_btop(pte_to_pa(*ptp));
2512 }
2513 mp_enable_preemption();
2514
2515 return ppn;
2516 }
2517
2518 /*
2519 * Routine: pmap_extract
2520 * Function:
2521 * Extract the physical page address associated
2522 * with the given map/virtual_address pair.
2523 * Change to shim for backwards compatibility but will not
2524 * work for 64 bit systems. Some old drivers that we cannot
2525 * change need this.
2526 */
2527
2528 vm_offset_t
2529 pmap_extract(
2530 register pmap_t pmap,
2531 vm_map_offset_t vaddr)
2532 {
2533 ppnum_t ppn;
2534 vm_offset_t paddr;
2535
2536 paddr = (vm_offset_t)0;
2537 ppn = pmap_find_phys(pmap, vaddr);
2538 if (ppn) {
2539 paddr = ((vm_offset_t)i386_ptob(ppn)) | (vaddr & INTEL_OFFMASK);
2540 }
2541 return (paddr);
2542 }
2543
2544 void
2545 pmap_expand_pml4(
2546 pmap_t map,
2547 vm_map_offset_t vaddr)
2548 {
2549 register vm_page_t m;
2550 register pmap_paddr_t pa;
2551 uint64_t i;
2552 spl_t spl;
2553 ppnum_t pn;
2554 pml4_entry_t *pml4p;
2555
2556 if (kernel_pmap == map) panic("expand kernel pml4");
2557
2558 spl = splhigh();
2559 pml4p = pmap64_pml4(map, vaddr);
2560 splx(spl);
2561 if (PML4_ENTRY_NULL == pml4p) panic("pmap_expand_pml4 no pml4p");
2562
2563 /*
2564 * Allocate a VM page for the pml4 page
2565 */
2566 while ((m = vm_page_grab()) == VM_PAGE_NULL)
2567 VM_PAGE_WAIT();
2568
2569 /*
2570 * put the page into the pmap's obj list so it
2571 * can be found later.
2572 */
2573 pn = m->phys_page;
2574 pa = i386_ptob(pn);
2575 i = pml4idx(map, vaddr);
2576
2577 vm_object_lock(map->pm_obj_pml4);
2578 #if 0 /* DEBUG */
2579 if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i)) {
2580 kprintf("pmap_expand_pml4: obj_pml4 not empty, pmap 0x%x pm_obj_pml4 0x%x vaddr 0x%llx i 0x%llx\n",
2581 map, map->pm_obj_pml4, vaddr, i);
2582 }
2583 #endif
2584 vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i);
2585
2586 vm_page_lock_queues();
2587 vm_page_wire(m);
2588
2589 vm_page_unlock_queues();
2590 vm_object_unlock(map->pm_obj_pml4);
2591 inuse_ptepages_count++;
2592 map->stats.resident_count++;
2593 map->stats.wired_count++;
2594
2595 /*
2596 * Zero the page.
2597 */
2598 pmap_zero_page(pn);
2599
2600 PMAP_READ_LOCK(map, spl);
2601 /*
2602 * See if someone else expanded us first
2603 */
2604 if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) {
2605 PMAP_READ_UNLOCK(map, spl);
2606 vm_object_lock(map->pm_obj_pml4);
2607 vm_page_lock_queues();
2608 vm_page_free(m);
2609 inuse_ptepages_count--;
2610 map->stats.resident_count--;
2611 map->stats.wired_count--;
2612
2613 vm_page_unlock_queues();
2614 vm_object_unlock(map->pm_obj_pml4);
2615 return;
2616 }
2617
2618 /*
2619 * Set the page directory entry for this page table.
2620 * If we have allocated more than one hardware page,
2621 * set several page directory entries.
2622 */
2623
2624 pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */
2625
2626 pmap_store_pte(pml4p, pa_to_pte(pa)
2627 | INTEL_PTE_VALID
2628 | INTEL_PTE_USER
2629 | INTEL_PTE_WRITE);
2630
2631 PMAP_READ_UNLOCK(map, spl);
2632
2633 return;
2634
2635 }
2636
2637 void
2638 pmap_expand_pdpt(
2639 pmap_t map,
2640 vm_map_offset_t vaddr)
2641 {
2642 register vm_page_t m;
2643 register pmap_paddr_t pa;
2644 uint64_t i;
2645 spl_t spl;
2646 ppnum_t pn;
2647 pdpt_entry_t *pdptp;
2648
2649 if (kernel_pmap == map) panic("expand kernel pdpt");
2650
2651 spl = splhigh();
2652 while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) {
2653 splx(spl);
2654 pmap_expand_pml4(map, vaddr); /* need room for another pdpt entry */
2655 spl = splhigh();
2656 }
2657 splx(spl);
2658
2659
2660 /*
2661 * Allocate a VM page for the pdpt page
2662 */
2663 while ((m = vm_page_grab()) == VM_PAGE_NULL)
2664 VM_PAGE_WAIT();
2665
2666 /*
2667 * put the page into the pmap's obj list so it
2668 * can be found later.
2669 */
2670 pn = m->phys_page;
2671 pa = i386_ptob(pn);
2672 i = pdptidx(map, vaddr);
2673
2674 vm_object_lock(map->pm_obj_pdpt);
2675 #if 0 /* DEBUG */
2676 if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i)) {
2677 kprintf("pmap_expand_pdpt: obj_pdpt not empty, pmap 0x%x pm_obj_pdpt 0x%x vaddr 0x%llx i 0x%llx\n",
2678 map, map->pm_obj_pdpt, vaddr, i);
2679 }
2680 #endif
2681 vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i);
2682
2683 vm_page_lock_queues();
2684 vm_page_wire(m);
2685
2686 vm_page_unlock_queues();
2687 vm_object_unlock(map->pm_obj_pdpt);
2688 inuse_ptepages_count++;
2689 map->stats.resident_count++;
2690 map->stats.wired_count++;
2691
2692 /*
2693 * Zero the page.
2694 */
2695 pmap_zero_page(pn);
2696
2697 PMAP_READ_LOCK(map, spl);
2698 /*
2699 * See if someone else expanded us first
2700 */
2701 if (pmap64_pde(map, vaddr) != PD_ENTRY_NULL) {
2702 PMAP_READ_UNLOCK(map, spl);
2703 vm_object_lock(map->pm_obj_pdpt);
2704 vm_page_lock_queues();
2705 vm_page_free(m);
2706 inuse_ptepages_count--;
2707 map->stats.resident_count--;
2708 map->stats.wired_count--;
2709
2710 vm_page_unlock_queues();
2711 vm_object_unlock(map->pm_obj_pdpt);
2712 return;
2713 }
2714
2715 /*
2716 * Set the page directory entry for this page table.
2717 * If we have allocated more than one hardware page,
2718 * set several page directory entries.
2719 */
2720
2721 pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */
2722
2723 pmap_store_pte(pdptp, pa_to_pte(pa)
2724 | INTEL_PTE_VALID
2725 | INTEL_PTE_USER
2726 | INTEL_PTE_WRITE);
2727
2728 PMAP_READ_UNLOCK(map, spl);
2729
2730 return;
2731
2732 }
2733
2734
2735
2736 /*
2737 * Routine: pmap_expand
2738 *
2739 * Expands a pmap to be able to map the specified virtual address.
2740 *
2741 * Allocates new virtual memory for the P0 or P1 portion of the
2742 * pmap, then re-maps the physical pages that were in the old
2743 * pmap to be in the new pmap.
2744 *
2745 * Must be called with the pmap system and the pmap unlocked,
2746 * since these must be unlocked to use vm_allocate or vm_deallocate.
2747 * Thus it must be called in a loop that checks whether the map
2748 * has been expanded enough.
2749 * (We won't loop forever, since page tables aren't shrunk.)
2750 */
2751 void
2752 pmap_expand(
2753 pmap_t map,
2754 vm_map_offset_t vaddr)
2755 {
2756 pt_entry_t *pdp;
2757 register vm_page_t m;
2758 register pmap_paddr_t pa;
2759 uint64_t i;
2760 spl_t spl;
2761 ppnum_t pn;
2762
2763 /*
2764 * if not the kernel map (while we are still compat kernel mode)
2765 * and we are 64 bit, propagate expand upwards
2766 */
2767
2768 if (cpu_64bit && (map != kernel_pmap)) {
2769 spl = splhigh();
2770 while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) {
2771 splx(spl);
2772 pmap_expand_pdpt(map, vaddr); /* need room for another pde entry */
2773 spl = splhigh();
2774 }
2775 splx(spl);
2776 } else {
2777 pdp = pmap_pde(map, vaddr);
2778 }
2779
2780
2781 /*
2782 * Allocate a VM page for the pde entries.
2783 */
2784 while ((m = vm_page_grab()) == VM_PAGE_NULL)
2785 VM_PAGE_WAIT();
2786
2787 /*
2788 * put the page into the pmap's obj list so it
2789 * can be found later.
2790 */
2791 pn = m->phys_page;
2792 pa = i386_ptob(pn);
2793 i = pdeidx(map, vaddr);
2794
2795 vm_object_lock(map->pm_obj);
2796 #if 0 /* DEBUG */
2797 if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i)) {
2798 kprintf("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n",
2799 map, map->pm_obj, vaddr, i);
2800 }
2801 #endif
2802 vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i);
2803
2804 vm_page_lock_queues();
2805 vm_page_wire(m);
2806 inuse_ptepages_count++;
2807
2808 vm_page_unlock_queues();
2809 vm_object_unlock(map->pm_obj);
2810
2811 /*
2812 * Zero the page.
2813 */
2814 pmap_zero_page(pn);
2815
2816 PMAP_READ_LOCK(map, spl);
2817 /*
2818 * See if someone else expanded us first
2819 */
2820 if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) {
2821 PMAP_READ_UNLOCK(map, spl);
2822 vm_object_lock(map->pm_obj);
2823
2824 vm_page_lock_queues();
2825 vm_page_free(m);
2826 inuse_ptepages_count--;
2827
2828 vm_page_unlock_queues();
2829 vm_object_unlock(map->pm_obj);
2830 return;
2831 }
2832
2833 pdp = pmap_pde(map, vaddr); /* refetch while locked */
2834
2835 /*
2836 * Set the page directory entry for this page table.
2837 * If we have allocated more than one hardware page,
2838 * set several page directory entries.
2839 */
2840
2841 pmap_store_pte(pdp, pa_to_pte(pa)
2842 | INTEL_PTE_VALID
2843 | INTEL_PTE_USER
2844 | INTEL_PTE_WRITE);
2845
2846
2847 PMAP_READ_UNLOCK(map, spl);
2848
2849 return;
2850 }
2851
2852
2853 /*
2854 * pmap_sync_page_data_phys(ppnum_t pa)
2855 *
2856 * Invalidates all of the instruction cache on a physical page and
2857 * pushes any dirty data from the data cache for the same physical page
2858 * Not required in i386.
2859 */
2860 void
2861 pmap_sync_page_data_phys(__unused ppnum_t pa)
2862 {
2863 return;
2864 }
2865
2866 /*
2867 * pmap_sync_page_attributes_phys(ppnum_t pa)
2868 *
2869 * Write back and invalidate all cachelines on a physical page.
2870 */
2871 void
2872 pmap_sync_page_attributes_phys(ppnum_t pa)
2873 {
2874 cache_flush_page_phys(pa);
2875 }
2876
2877 int collect_ref;
2878 int collect_unref;
2879
2880 /*
2881 * Routine: pmap_collect
2882 * Function:
2883 * Garbage collects the physical map system for
2884 * pages which are no longer used.
2885 * Success need not be guaranteed -- that is, there
2886 * may well be pages which are not referenced, but
2887 * others may be collected.
2888 * Usage:
2889 * Called by the pageout daemon when pages are scarce.
2890 */
2891 void
2892 pmap_collect(
2893 pmap_t p)
2894 {
2895 register pt_entry_t *pdp, *ptp;
2896 pt_entry_t *eptp;
2897 int wired;
2898 spl_t spl;
2899
2900 if (p == PMAP_NULL)
2901 return;
2902
2903 if (p == kernel_pmap)
2904 return;
2905
2906 /*
2907 * Garbage collect map.
2908 */
2909 PMAP_READ_LOCK(p, spl);
2910
2911 for (pdp = (pt_entry_t *)p->dirbase;
2912 pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)];
2913 pdp++)
2914 {
2915 if (*pdp & INTEL_PTE_VALID) {
2916 if(*pdp & INTEL_PTE_REF) {
2917 pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF);
2918 collect_ref++;
2919 } else {
2920 collect_unref++;
2921 ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase));
2922 eptp = ptp + NPTEPG;
2923
2924 /*
2925 * If the pte page has any wired mappings, we cannot
2926 * free it.
2927 */
2928 wired = 0;
2929 {
2930 register pt_entry_t *ptep;
2931 for (ptep = ptp; ptep < eptp; ptep++) {
2932 if (iswired(*ptep)) {
2933 wired = 1;
2934 break;
2935 }
2936 }
2937 }
2938 if (!wired) {
2939 /*
2940 * Remove the virtual addresses mapped by this pte page.
2941 */
2942 pmap_remove_range(p,
2943 pdetova(pdp - (pt_entry_t *)p->dirbase),
2944 ptp,
2945 eptp);
2946
2947 /*
2948 * Invalidate the page directory pointer.
2949 */
2950 pmap_store_pte(pdp, 0x0);
2951
2952 PMAP_READ_UNLOCK(p, spl);
2953
2954 /*
2955 * And free the pte page itself.
2956 */
2957 {
2958 register vm_page_t m;
2959
2960 vm_object_lock(p->pm_obj);
2961 m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]));
2962 if (m == VM_PAGE_NULL)
2963 panic("pmap_collect: pte page not in object");
2964 vm_page_lock_queues();
2965 vm_page_free(m);
2966 inuse_ptepages_count--;
2967 vm_page_unlock_queues();
2968 vm_object_unlock(p->pm_obj);
2969 }
2970
2971 PMAP_READ_LOCK(p, spl);
2972 }
2973 }
2974 }
2975 }
2976 PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
2977
2978 PMAP_READ_UNLOCK(p, spl);
2979 return;
2980
2981 }
2982
2983
2984 void
2985 pmap_copy_page(src, dst)
2986 ppnum_t src;
2987 ppnum_t dst;
2988 {
2989 bcopy_phys((addr64_t)i386_ptob(src),
2990 (addr64_t)i386_ptob(dst),
2991 PAGE_SIZE);
2992 }
2993
2994
2995 /*
2996 * Routine: pmap_pageable
2997 * Function:
2998 * Make the specified pages (by pmap, offset)
2999 * pageable (or not) as requested.
3000 *
3001 * A page which is not pageable may not take
3002 * a fault; therefore, its page table entry
3003 * must remain valid for the duration.
3004 *
3005 * This routine is merely advisory; pmap_enter
3006 * will specify that these pages are to be wired
3007 * down (or not) as appropriate.
3008 */
3009 void
3010 pmap_pageable(
3011 __unused pmap_t pmap,
3012 __unused vm_map_offset_t start_addr,
3013 __unused vm_map_offset_t end_addr,
3014 __unused boolean_t pageable)
3015 {
3016 #ifdef lint
3017 pmap++; start_addr++; end_addr++; pageable++;
3018 #endif /* lint */
3019 }
3020
3021 /*
3022 * Clear specified attribute bits.
3023 */
3024 void
3025 phys_attribute_clear(
3026 ppnum_t pn,
3027 int bits)
3028 {
3029 pv_entry_t pv_h;
3030 register pv_entry_t pv_e;
3031 register pt_entry_t *pte;
3032 int pai;
3033 register pmap_t pmap;
3034 spl_t spl;
3035 pmap_paddr_t phys;
3036
3037 assert(pn != vm_page_fictitious_addr);
3038 if (!valid_page(pn)) {
3039 /*
3040 * Not a managed page.
3041 */
3042 return;
3043 }
3044
3045 /*
3046 * Lock the pmap system first, since we will be changing
3047 * several pmaps.
3048 */
3049
3050 PMAP_WRITE_LOCK(spl);
3051 phys = i386_ptob(pn);
3052 pai = pa_index(phys);
3053 pv_h = pai_to_pvh(pai);
3054
3055 /*
3056 * Walk down PV list, clearing all modify or reference bits.
3057 * We do not have to lock the pv_list because we have
3058 * the entire pmap system locked.
3059 */
3060 if (pv_h->pmap != PMAP_NULL) {
3061 /*
3062 * There are some mappings.
3063 */
3064 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
3065
3066 pmap = pv_e->pmap;
3067 /*
3068 * Lock the pmap to block pmap_extract and similar routines.
3069 */
3070 simple_lock(&pmap->lock);
3071
3072 {
3073 register vm_map_offset_t va;
3074
3075 va = pv_e->va;
3076 pte = pmap_pte(pmap, va);
3077
3078 #if 0
3079 /*
3080 * Consistency checks.
3081 */
3082 assert(*pte & INTEL_PTE_VALID);
3083 /* assert(pte_to_phys(*pte) == phys); */
3084 #endif
3085
3086 /*
3087 * Clear modify or reference bits.
3088 */
3089
3090 pmap_store_pte(pte, *pte & ~bits);
3091 pte++;
3092 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
3093 }
3094 simple_unlock(&pmap->lock);
3095
3096 }
3097 }
3098
3099 pmap_phys_attributes[pai] &= ~bits;
3100
3101 PMAP_WRITE_UNLOCK(spl);
3102 }
3103
3104 /*
3105 * Check specified attribute bits.
3106 */
3107 boolean_t
3108 phys_attribute_test(
3109 ppnum_t pn,
3110 int bits)
3111 {
3112 pv_entry_t pv_h;
3113 register pv_entry_t pv_e;
3114 register pt_entry_t *pte;
3115 int pai;
3116 register pmap_t pmap;
3117 spl_t spl;
3118 pmap_paddr_t phys;
3119
3120 assert(pn != vm_page_fictitious_addr);
3121 if (!valid_page(pn)) {
3122 /*
3123 * Not a managed page.
3124 */
3125 return (FALSE);
3126 }
3127
3128 phys = i386_ptob(pn);
3129 pai = pa_index(phys);
3130 /*
3131 * super fast check... if bits already collected
3132 * no need to take any locks...
3133 * if not set, we need to recheck after taking
3134 * the lock in case they got pulled in while
3135 * we were waiting for the lock
3136 */
3137 if (pmap_phys_attributes[pai] & bits)
3138 return (TRUE);
3139 pv_h = pai_to_pvh(pai);
3140
3141 /*
3142 * Lock the pmap system first, since we will be checking
3143 * several pmaps.
3144 */
3145 PMAP_WRITE_LOCK(spl);
3146
3147 if (pmap_phys_attributes[pai] & bits) {
3148 PMAP_WRITE_UNLOCK(spl);
3149 return (TRUE);
3150 }
3151
3152 /*
3153 * Walk down PV list, checking all mappings.
3154 * We do not have to lock the pv_list because we have
3155 * the entire pmap system locked.
3156 */
3157 if (pv_h->pmap != PMAP_NULL) {
3158 /*
3159 * There are some mappings.
3160 */
3161 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
3162
3163 pmap = pv_e->pmap;
3164 /*
3165 * Lock the pmap to block pmap_extract and similar routines.
3166 */
3167 simple_lock(&pmap->lock);
3168
3169 {
3170 register vm_map_offset_t va;
3171
3172 va = pv_e->va;
3173 pte = pmap_pte(pmap, va);
3174
3175 #if 0
3176 /*
3177 * Consistency checks.
3178 */
3179 assert(*pte & INTEL_PTE_VALID);
3180 /* assert(pte_to_phys(*pte) == phys); */
3181 #endif
3182 }
3183
3184 /*
3185 * Check modify or reference bits.
3186 */
3187 {
3188 if (*pte++ & bits) {
3189 simple_unlock(&pmap->lock);
3190 PMAP_WRITE_UNLOCK(spl);
3191 return (TRUE);
3192 }
3193 }
3194 simple_unlock(&pmap->lock);
3195 }
3196 }
3197 PMAP_WRITE_UNLOCK(spl);
3198 return (FALSE);
3199 }
3200
3201 /*
3202 * Set specified attribute bits.
3203 */
3204 void
3205 phys_attribute_set(
3206 ppnum_t pn,
3207 int bits)
3208 {
3209 int spl;
3210 pmap_paddr_t phys;
3211
3212 assert(pn != vm_page_fictitious_addr);
3213 if (!valid_page(pn)) {
3214 /*
3215 * Not a managed page.
3216 */
3217 return;
3218 }
3219
3220 /*
3221 * Lock the pmap system and set the requested bits in
3222 * the phys attributes array. Don't need to bother with
3223 * ptes because the test routine looks here first.
3224 */
3225 phys = i386_ptob(pn);
3226 PMAP_WRITE_LOCK(spl);
3227 pmap_phys_attributes[pa_index(phys)] |= bits;
3228 PMAP_WRITE_UNLOCK(spl);
3229 }
3230
3231 /*
3232 * Set the modify bit on the specified physical page.
3233 */
3234
3235 void pmap_set_modify(
3236 ppnum_t pn)
3237 {
3238 phys_attribute_set(pn, PHYS_MODIFIED);
3239 }
3240
3241 /*
3242 * Clear the modify bits on the specified physical page.
3243 */
3244
3245 void
3246 pmap_clear_modify(
3247 ppnum_t pn)
3248 {
3249 phys_attribute_clear(pn, PHYS_MODIFIED);
3250 }
3251
3252 /*
3253 * pmap_is_modified:
3254 *
3255 * Return whether or not the specified physical page is modified
3256 * by any physical maps.
3257 */
3258
3259 boolean_t
3260 pmap_is_modified(
3261 ppnum_t pn)
3262 {
3263 return (phys_attribute_test(pn, PHYS_MODIFIED));
3264 }
3265
3266 /*
3267 * pmap_clear_reference:
3268 *
3269 * Clear the reference bit on the specified physical page.
3270 */
3271
3272 void
3273 pmap_clear_reference(
3274 ppnum_t pn)
3275 {
3276 phys_attribute_clear(pn, PHYS_REFERENCED);
3277 }
3278
3279 void
3280 pmap_set_reference(ppnum_t pn)
3281 {
3282 phys_attribute_set(pn, PHYS_REFERENCED);
3283 }
3284
3285 /*
3286 * pmap_is_referenced:
3287 *
3288 * Return whether or not the specified physical page is referenced
3289 * by any physical maps.
3290 */
3291
3292 boolean_t
3293 pmap_is_referenced(
3294 ppnum_t pn)
3295 {
3296 return (phys_attribute_test(pn, PHYS_REFERENCED));
3297 }
3298
3299 /*
3300 * pmap_get_refmod(phys)
3301 * returns the referenced and modified bits of the specified
3302 * physical page.
3303 */
3304 unsigned int
3305 pmap_get_refmod(ppnum_t pa)
3306 {
3307 return ( ((phys_attribute_test(pa, PHYS_MODIFIED))? VM_MEM_MODIFIED : 0)
3308 | ((phys_attribute_test(pa, PHYS_REFERENCED))? VM_MEM_REFERENCED : 0));
3309 }
3310
3311 /*
3312 * pmap_clear_refmod(phys, mask)
3313 * clears the referenced and modified bits as specified by the mask
3314 * of the specified physical page.
3315 */
3316 void
3317 pmap_clear_refmod(ppnum_t pa, unsigned int mask)
3318 {
3319 unsigned int x86Mask;
3320
3321 x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0)
3322 | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
3323 phys_attribute_clear(pa, x86Mask);
3324 }
3325
3326 /*
3327 * Set the modify bit on the specified range
3328 * of this map as requested.
3329 *
3330 * This optimization stands only if each time the dirty bit
3331 * in vm_page_t is tested, it is also tested in the pmap.
3332 */
3333 void
3334 pmap_modify_pages(
3335 pmap_t map,
3336 vm_map_offset_t sva,
3337 vm_map_offset_t eva)
3338 {
3339 spl_t spl;
3340 register pt_entry_t *pde;
3341 register pt_entry_t *spte, *epte;
3342 vm_map_offset_t lva;
3343 vm_map_offset_t orig_sva;
3344
3345 if (map == PMAP_NULL)
3346 return;
3347
3348 PMAP_READ_LOCK(map, spl);
3349
3350 orig_sva = sva;
3351 while (sva && sva < eva) {
3352 lva = (sva + pde_mapped_size) & ~(pde_mapped_size-1);
3353 if (lva > eva)
3354 lva = eva;
3355 pde = pmap_pde(map, sva);
3356 if (pde && (*pde & INTEL_PTE_VALID)) {
3357 spte = (pt_entry_t *)pmap_pte(map, (sva & ~(pde_mapped_size-1)));
3358 if (lva) {
3359 spte = &spte[ptenum(sva)];
3360 epte = &spte[intel_btop(lva-sva)];
3361 } else {
3362 epte = &spte[intel_btop(pde_mapped_size)];
3363 spte = &spte[ptenum(sva)];
3364 }
3365 while (spte < epte) {
3366 if (*spte & INTEL_PTE_VALID) {
3367 pmap_store_pte(spte, *spte
3368 | INTEL_PTE_MOD
3369 | INTEL_PTE_WRITE);
3370 }
3371 spte++;
3372 }
3373 }
3374 sva = lva;
3375 pde++;
3376 }
3377 PMAP_UPDATE_TLBS(map, orig_sva, eva);
3378
3379 PMAP_READ_UNLOCK(map, spl);
3380 }
3381
3382
3383 void
3384 invalidate_icache(__unused vm_offset_t addr,
3385 __unused unsigned cnt,
3386 __unused int phys)
3387 {
3388 return;
3389 }
3390 void
3391 flush_dcache(__unused vm_offset_t addr,
3392 __unused unsigned count,
3393 __unused int phys)
3394 {
3395 return;
3396 }
3397
3398 #if MACH_KDB
3399
3400 /* show phys page mappings and attributes */
3401
3402 extern void db_show_page(pmap_paddr_t pa);
3403
3404 void
3405 db_show_page(pmap_paddr_t pa)
3406 {
3407 pv_entry_t pv_h;
3408 int pai;
3409 char attr;
3410
3411 pai = pa_index(pa);
3412 pv_h = pai_to_pvh(pai);
3413
3414 attr = pmap_phys_attributes[pai];
3415 printf("phys page %x ", pa);
3416 if (attr & PHYS_MODIFIED)
3417 printf("modified, ");
3418 if (attr & PHYS_REFERENCED)
3419 printf("referenced, ");
3420 if (pv_h->pmap || pv_h->next)
3421 printf(" mapped at\n");
3422 else
3423 printf(" not mapped\n");
3424 for (; pv_h; pv_h = pv_h->next)
3425 if (pv_h->pmap)
3426 printf("%x in pmap %x\n", pv_h->va, pv_h->pmap);
3427 }
3428
3429 #endif /* MACH_KDB */
3430
3431 #if MACH_KDB
3432 void db_kvtophys(vm_offset_t);
3433 void db_show_vaddrs(pt_entry_t *);
3434
3435 /*
3436 * print out the results of kvtophys(arg)
3437 */
3438 void
3439 db_kvtophys(
3440 vm_offset_t vaddr)
3441 {
3442 db_printf("0x%qx", kvtophys(vaddr));
3443 }
3444
3445 /*
3446 * Walk the pages tables.
3447 */
3448 void
3449 db_show_vaddrs(
3450 pt_entry_t *dirbase)
3451 {
3452 pt_entry_t *ptep, *pdep, tmp;
3453 unsigned int x, y, pdecnt, ptecnt;
3454
3455 if (dirbase == 0) {
3456 dirbase = kernel_pmap->dirbase;
3457 }
3458 if (dirbase == 0) {
3459 db_printf("need a dirbase...\n");
3460 return;
3461 }
3462 dirbase = (pt_entry_t *) (int) ((unsigned long) dirbase & ~INTEL_OFFMASK);
3463
3464 db_printf("dirbase: 0x%x\n", dirbase);
3465
3466 pdecnt = ptecnt = 0;
3467 pdep = &dirbase[0];
3468 for (y = 0; y < NPDEPG; y++, pdep++) {
3469 if (((tmp = *pdep) & INTEL_PTE_VALID) == 0) {
3470 continue;
3471 }
3472 pdecnt++;
3473 ptep = (pt_entry_t *) ((*pdep) & ~INTEL_OFFMASK);
3474 db_printf("dir[%4d]: 0x%x\n", y, *pdep);
3475 for (x = 0; x < NPTEPG; x++, ptep++) {
3476 if (((tmp = *ptep) & INTEL_PTE_VALID) == 0) {
3477 continue;
3478 }
3479 ptecnt++;
3480 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
3481 x,
3482 *ptep,
3483 (y << 22) | (x << 12),
3484 *ptep & ~INTEL_OFFMASK);
3485 }
3486 }
3487
3488 db_printf("total: %d tables, %d page table entries.\n", pdecnt, ptecnt);
3489
3490 }
3491 #endif /* MACH_KDB */
3492
3493 #include <mach_vm_debug.h>
3494 #if MACH_VM_DEBUG
3495 #include <vm/vm_debug.h>
3496
3497 int
3498 pmap_list_resident_pages(
3499 __unused pmap_t pmap,
3500 __unused vm_offset_t *listp,
3501 __unused int space)
3502 {
3503 return 0;
3504 }
3505 #endif /* MACH_VM_DEBUG */
3506
3507
3508
3509 /* temporary workaround */
3510 boolean_t
3511 coredumpok(__unused vm_map_t map, __unused vm_offset_t va)
3512 {
3513 #if 0
3514 pt_entry_t *ptep;
3515
3516 ptep = pmap_pte(map->pmap, va);
3517 if (0 == ptep)
3518 return FALSE;
3519 return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED));
3520 #else
3521 return TRUE;
3522 #endif
3523 }
3524
3525
3526 boolean_t
3527 phys_page_exists(
3528 ppnum_t pn)
3529 {
3530 pmap_paddr_t phys;
3531
3532 assert(pn != vm_page_fictitious_addr);
3533
3534 if (!pmap_initialized)
3535 return (TRUE);
3536 phys = (pmap_paddr_t) i386_ptob(pn);
3537 if (!pmap_valid_page(pn))
3538 return (FALSE);
3539
3540 return TRUE;
3541 }
3542
3543 void
3544 mapping_free_prime()
3545 {
3546 int i;
3547 pv_entry_t pv_e;
3548
3549 for (i = 0; i < (5 * PV_ALLOC_CHUNK); i++) {
3550 pv_e = (pv_entry_t) zalloc(pv_list_zone);
3551 PV_FREE(pv_e);
3552 }
3553 }
3554
3555 void
3556 mapping_adjust()
3557 {
3558 pv_entry_t pv_e;
3559 int i;
3560 int spl;
3561
3562 if (mapping_adjust_call == NULL) {
3563 thread_call_setup(&mapping_adjust_call_data,
3564 (thread_call_func_t) mapping_adjust,
3565 (thread_call_param_t) NULL);
3566 mapping_adjust_call = &mapping_adjust_call_data;
3567 }
3568 /* XXX rethink best way to do locking here */
3569 if (pv_free_count < PV_LOW_WATER_MARK) {
3570 for (i = 0; i < PV_ALLOC_CHUNK; i++) {
3571 pv_e = (pv_entry_t) zalloc(pv_list_zone);
3572 SPLVM(spl);
3573 PV_FREE(pv_e);
3574 SPLX(spl);
3575 }
3576 }
3577 mappingrecurse = 0;
3578 }
3579
3580 void
3581 pmap_commpage32_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt)
3582 {
3583 int i;
3584 pt_entry_t *opte, *npte;
3585 pt_entry_t pte;
3586
3587
3588 for (i = 0; i < cnt; i++) {
3589 opte = pmap_pte(kernel_pmap, (vm_map_offset_t)kernel_commpage);
3590 if (0 == opte) panic("kernel_commpage");
3591 pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL;
3592 pte &= ~INTEL_PTE_WRITE; // ensure read only
3593 npte = pmap_pte(kernel_pmap, (vm_map_offset_t)user_commpage);
3594 if (0 == npte) panic("user_commpage");
3595 pmap_store_pte(npte, pte);
3596 kernel_commpage += INTEL_PGBYTES;
3597 user_commpage += INTEL_PGBYTES;
3598 }
3599 }
3600
3601 #define PMAP_COMMPAGE64_CNT (_COMM_PAGE64_AREA_USED/PAGE_SIZE)
3602 pt_entry_t pmap_commpage64_ptes[PMAP_COMMPAGE64_CNT];
3603
3604 void
3605 pmap_commpage64_init(vm_offset_t kernel_commpage, __unused vm_map_offset_t user_commpage, int cnt)
3606 {
3607 spl_t s;
3608 int i;
3609 pt_entry_t *kptep;
3610
3611 s = splhigh();
3612 for (i = 0; i< cnt; i++) {
3613 kptep = pmap_pte(kernel_pmap, (uint64_t)kernel_commpage + (i*PAGE_SIZE));
3614 if ((0 == kptep) || (0 == (*kptep & INTEL_PTE_VALID))) panic("pmap_commpage64_init pte");
3615 pmap_commpage64_ptes[i] = ((*kptep & ~INTEL_PTE_WRITE) | INTEL_PTE_USER);
3616 }
3617 splx(s);
3618
3619 }
3620
3621 void
3622 pmap_map_sharedpage(__unused task_t task, pmap_t p)
3623 {
3624 pt_entry_t *ptep;
3625 spl_t s;
3626 int i;
3627
3628 if (!p->pm_64bit) return;
3629 /* setup high 64 bit commpage */
3630 s = splhigh();
3631 while ((ptep = pmap_pte(p, (uint64_t)_COMM_PAGE64_BASE_ADDRESS)) == PD_ENTRY_NULL) {
3632 splx(s);
3633 pmap_expand(p, (uint64_t)_COMM_PAGE64_BASE_ADDRESS);
3634 s = splhigh();
3635 }
3636
3637 for (i = 0; i< PMAP_COMMPAGE64_CNT; i++) {
3638 ptep = pmap_pte(p, (uint64_t)_COMM_PAGE64_BASE_ADDRESS + (i*PAGE_SIZE));
3639 if (0 == ptep) panic("pmap_map_sharedpage");
3640 pmap_store_pte(ptep, pmap_commpage64_ptes[i]);
3641 }
3642 splx(s);
3643
3644 }
3645
3646 void
3647 pmap_unmap_sharedpage(pmap_t pmap)
3648 {
3649 spl_t s;
3650 pt_entry_t *ptep;
3651 int i;
3652
3653 if (!pmap->pm_64bit) return;
3654 s = splhigh();
3655 for (i = 0; i< PMAP_COMMPAGE64_CNT; i++) {
3656 ptep = pmap_pte(pmap, (uint64_t)_COMM_PAGE64_BASE_ADDRESS + (i*PAGE_SIZE));
3657 if (ptep) pmap_store_pte(ptep, 0);
3658 }
3659 splx(s);
3660 }
3661
3662 static cpu_pmap_t cpu_pmap_master;
3663
3664 struct cpu_pmap *
3665 pmap_cpu_alloc(boolean_t is_boot_cpu)
3666 {
3667 int ret;
3668 int i;
3669 cpu_pmap_t *cp;
3670 vm_offset_t address;
3671 vm_map_address_t mapaddr;
3672 vm_map_entry_t entry;
3673 pt_entry_t *pte;
3674
3675 if (is_boot_cpu) {
3676 cp = &cpu_pmap_master;
3677 } else {
3678 /*
3679 * The per-cpu pmap data structure itself.
3680 */
3681 ret = kmem_alloc(kernel_map,
3682 (vm_offset_t *) &cp, sizeof(cpu_pmap_t));
3683 if (ret != KERN_SUCCESS) {
3684 printf("pmap_cpu_alloc() failed ret=%d\n", ret);
3685 return NULL;
3686 }
3687 bzero((void *)cp, sizeof(cpu_pmap_t));
3688
3689 /*
3690 * The temporary windows used for copy/zero - see loose_ends.c
3691 */
3692 ret = vm_map_find_space(kernel_map,
3693 &mapaddr, PMAP_NWINDOWS*PAGE_SIZE, (vm_map_offset_t)0, 0, &entry);
3694 if (ret != KERN_SUCCESS) {
3695 printf("pmap_cpu_alloc() "
3696 "vm_map_find_space ret=%d\n", ret);
3697 pmap_cpu_free(cp);
3698 return NULL;
3699 }
3700 address = (vm_offset_t)mapaddr;
3701
3702 for (i = 0; i < PMAP_NWINDOWS; i++, address += PAGE_SIZE) {
3703 while ((pte = pmap_pte(kernel_pmap, (vm_map_offset_t)address)) == 0)
3704 pmap_expand(kernel_pmap, (vm_map_offset_t)address);
3705 * (int *) pte = 0;
3706 cp->mapwindow[i].prv_CADDR = (caddr_t) address;
3707 cp->mapwindow[i].prv_CMAP = pte;
3708 }
3709 vm_map_unlock(kernel_map);
3710 }
3711
3712 cp->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW;
3713 cp->pde_window_index = PMAP_PDE_FIRST_WINDOW;
3714 cp->pte_window_index = PMAP_PTE_FIRST_WINDOW;
3715
3716 return cp;
3717 }
3718
3719 void
3720 pmap_cpu_free(struct cpu_pmap *cp)
3721 {
3722 if (cp != NULL && cp != &cpu_pmap_master) {
3723 kfree((void *) cp, sizeof(cpu_pmap_t));
3724 }
3725 }
3726
3727
3728 mapwindow_t *
3729 pmap_get_mapwindow(pt_entry_t pentry)
3730 {
3731 mapwindow_t *mp;
3732 int i;
3733 boolean_t istate;
3734
3735 /*
3736 * can be called from hardware interrupt context
3737 * so we need to protect the lookup process
3738 */
3739 istate = ml_set_interrupts_enabled(FALSE);
3740
3741 /*
3742 * Note: 0th map reserved for pmap_pte()
3743 */
3744 for (i = PMAP_NWINDOWS_FIRSTFREE; i < PMAP_NWINDOWS; i++) {
3745 mp = &current_cpu_datap()->cpu_pmap->mapwindow[i];
3746
3747 if (*mp->prv_CMAP == 0) {
3748 *mp->prv_CMAP = pentry;
3749 break;
3750 }
3751 }
3752 if (i >= PMAP_NWINDOWS)
3753 mp = NULL;
3754 (void) ml_set_interrupts_enabled(istate);
3755
3756 return (mp);
3757 }
3758
3759
3760 /*
3761 * kern_return_t pmap_nest(grand, subord, vstart, size)
3762 *
3763 * grand = the pmap that we will nest subord into
3764 * subord = the pmap that goes into the grand
3765 * vstart = start of range in pmap to be inserted
3766 * nstart = start of range in pmap nested pmap
3767 * size = Size of nest area (up to 16TB)
3768 *
3769 * Inserts a pmap into another. This is used to implement shared segments.
3770 *
3771 * on x86 this is very limited right now. must be exactly 1 segment.
3772 *
3773 * Note that we depend upon higher level VM locks to insure that things don't change while
3774 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
3775 * or do 2 nests at once.
3776 */
3777
3778
3779 kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t nstart, uint64_t size) {
3780
3781 vm_map_offset_t vaddr, nvaddr;
3782 pd_entry_t *pde,*npde;
3783 unsigned int i, need_flush;
3784 unsigned int num_pde;
3785 spl_t s;
3786
3787 // do validity tests
3788
3789 if(size & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this for multiples of 256MB */
3790 if((size >> 28) > 65536) return KERN_INVALID_VALUE; /* Max size we can nest is 16TB */
3791 if(vstart & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this aligned to 256MB */
3792 if(nstart & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this aligned to 256MB */
3793 if(size == 0) {
3794 panic("pmap_nest: size is invalid - %016llX\n", size);
3795 }
3796 if ((size >> 28) != 1) panic("pmap_nest: size 0x%llx must be 0x%x", size, NBPDE);
3797
3798 // prepopulate subord pmap pde's if necessary
3799
3800 if (cpu_64bit) {
3801 s = splhigh();
3802 while (PD_ENTRY_NULL == (npde = pmap_pde(subord, nstart))) {
3803 splx(s);
3804 pmap_expand(subord, nstart);
3805 s = splhigh();
3806 }
3807 splx(s);
3808 }
3809
3810 PMAP_READ_LOCK(subord,s);
3811 nvaddr = (vm_map_offset_t)nstart;
3812 need_flush = 0;
3813 num_pde = size >> PDESHIFT;
3814
3815 for (i=0;i<num_pde;i++) {
3816 npde = pmap_pde(subord, nvaddr);
3817 if ((0 == npde) || (*npde++ & INTEL_PTE_VALID) == 0) {
3818 PMAP_READ_UNLOCK(subord,s);
3819 pmap_expand(subord, nvaddr); // pmap_expand handles races
3820 PMAP_READ_LOCK(subord,s);
3821 need_flush++;
3822 }
3823 nvaddr += NBPDE;
3824 }
3825
3826 if (need_flush) {
3827 nvaddr = (vm_map_offset_t)nstart;
3828 PMAP_UPDATE_TLBS(subord, nvaddr, nvaddr + (1 << 28) -1 );
3829 }
3830 PMAP_READ_UNLOCK(subord,s);
3831
3832 // copy pde's from subord pmap into grand pmap
3833
3834 if (cpu_64bit) {
3835 s = splhigh();
3836 while (PD_ENTRY_NULL == (pde = pmap_pde(grand, vstart))) {
3837 splx(s);
3838 pmap_expand(grand, vstart);
3839 s = splhigh();
3840 }
3841 splx(s);
3842 }
3843
3844 PMAP_READ_LOCK(grand,s);
3845 vaddr = (vm_map_offset_t)vstart;
3846 for (i=0;i<num_pde;i++,pde++) {
3847 pd_entry_t tpde;
3848 npde = pmap_pde(subord, nstart);
3849 if (npde == 0) panic("pmap_nest: no npde, subord 0x%x nstart 0x%llx", subord, nstart);
3850 tpde = *npde;
3851 nstart += NBPDE;
3852 pde = pmap_pde(grand, vaddr);
3853 if (pde == 0) panic("pmap_nest: no pde, grand 0x%x vaddr 0x%llx", grand, vaddr);
3854 vaddr += NBPDE;
3855 pmap_store_pte(pde, tpde);
3856 }
3857 PMAP_UPDATE_TLBS(grand, vaddr, vaddr + (1 << 28) -1 );
3858
3859 PMAP_READ_UNLOCK(grand,s);
3860
3861 return KERN_SUCCESS;
3862 }
3863
3864 /*
3865 * kern_return_t pmap_unnest(grand, vaddr)
3866 *
3867 * grand = the pmap that we will nest subord into
3868 * vaddr = start of range in pmap to be unnested
3869 *
3870 * Removes a pmap from another. This is used to implement shared segments.
3871 * On the current PPC processors, this is limited to segment (256MB) aligned
3872 * segment sized ranges.
3873 */
3874
3875 kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr) {
3876
3877 spl_t s;
3878 pd_entry_t *pde;
3879 unsigned int i;
3880 unsigned int num_pde;
3881
3882 PMAP_READ_LOCK(grand,s);
3883
3884 // invalidate all pdes for segment at vaddr in pmap grand
3885
3886 num_pde = (1<<28) >> PDESHIFT;
3887
3888 for (i=0;i<num_pde;i++,pde++) {
3889 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
3890 if (pde == 0) panic("pmap_unnest: no pde, grand 0x%x vaddr 0x%llx\n", grand, vaddr);
3891 pmap_store_pte(pde, (pd_entry_t)0);
3892 vaddr += NBPDE;
3893 }
3894 PMAP_UPDATE_TLBS(grand, vaddr, vaddr + (1<<28) -1 );
3895
3896 PMAP_READ_UNLOCK(grand,s);
3897
3898 return KERN_SUCCESS; /* Bye, bye, butterfly... */
3899 }
3900
3901 void
3902 pmap_switch(pmap_t tpmap)
3903 {
3904 spl_t s;
3905 int my_cpu;
3906
3907 s = splhigh(); /* Make sure interruptions are disabled */
3908 my_cpu = cpu_number();
3909
3910 set_dirbase(tpmap, my_cpu);
3911
3912 splx(s);
3913 }
3914
3915
3916 /*
3917 * disable no-execute capability on
3918 * the specified pmap
3919 */
3920 void pmap_disable_NX(pmap_t pmap) {
3921
3922 pmap->nx_enabled = 0;
3923 }
3924
3925 void
3926 pt_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size,
3927 vm_size_t *alloc_size, int *collectable, int *exhaustable)
3928 {
3929 *count = inuse_ptepages_count;
3930 *cur_size = PAGE_SIZE * inuse_ptepages_count;
3931 *max_size = PAGE_SIZE * (inuse_ptepages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
3932 *elem_size = PAGE_SIZE;
3933 *alloc_size = PAGE_SIZE;
3934
3935 *collectable = 1;
3936 *exhaustable = 0;
3937 }
3938
3939 vm_offset_t pmap_cpu_high_map_vaddr(int cpu, enum high_cpu_types e)
3940 {
3941 enum high_fixed_addresses a;
3942 a = e + HIGH_CPU_END * cpu;
3943 return pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a);
3944 }
3945
3946 vm_offset_t pmap_high_map_vaddr(enum high_cpu_types e)
3947 {
3948 return pmap_cpu_high_map_vaddr(cpu_number(), e);
3949 }
3950
3951 vm_offset_t pmap_high_map(pt_entry_t pte, enum high_cpu_types e)
3952 {
3953 enum high_fixed_addresses a;
3954 vm_offset_t vaddr;
3955
3956 a = e + HIGH_CPU_END * cpu_number();
3957 vaddr = (vm_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a);
3958 *(pte_unique_base + a) = pte;
3959
3960 /* TLB flush for this page for this cpu */
3961 invlpg((uintptr_t)vaddr);
3962
3963 return vaddr;
3964 }
3965
3966
3967 /*
3968 * Called with pmap locked, we:
3969 * - scan through per-cpu data to see which other cpus need to flush
3970 * - send an IPI to each non-idle cpu to be flushed
3971 * - wait for all to signal back that they are inactive or we see that
3972 * they are in an interrupt handler or at a safe point
3973 * - flush the local tlb is active for this pmap
3974 * - return ... the caller will unlock the pmap
3975 */
3976 void
3977 pmap_flush_tlbs(pmap_t pmap)
3978 {
3979 unsigned int cpu;
3980 unsigned int cpu_bit;
3981 cpu_set cpus_to_signal;
3982 unsigned int my_cpu = cpu_number();
3983 pmap_paddr_t pmap_cr3 = pmap->pm_cr3;
3984 boolean_t flush_self = FALSE;
3985 uint64_t deadline;
3986
3987 assert(!ml_get_interrupts_enabled());
3988
3989 /*
3990 * Scan other cpus for matching active or task CR3.
3991 * For idle cpus (with no active map) we mark them invalid but
3992 * don't signal -- they'll check as they go busy.
3993 * Note: for the kernel pmap we look for 64-bit shared address maps.
3994 */
3995 cpus_to_signal = 0;
3996 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
3997 if (!cpu_datap(cpu)->cpu_running)
3998 continue;
3999 if ((cpu_datap(cpu)->cpu_task_cr3 == pmap_cr3) ||
4000 (cpu_datap(cpu)->cpu_active_cr3 == pmap_cr3) ||
4001 ((pmap == kernel_pmap) &&
4002 (!CPU_CR3_IS_ACTIVE(cpu) ||
4003 cpu_datap(cpu)->cpu_task_map == TASK_MAP_64BIT_SHARED))) {
4004 if (cpu == my_cpu) {
4005 flush_self = TRUE;
4006 continue;
4007 }
4008 cpu_datap(cpu)->cpu_tlb_invalid = TRUE;
4009 __asm__ volatile("mfence");
4010
4011 if (CPU_CR3_IS_ACTIVE(cpu)) {
4012 cpus_to_signal |= cpu_bit;
4013 i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC);
4014 }
4015 }
4016 }
4017
4018 if (cpus_to_signal) {
4019 KERNEL_DEBUG(0xef800024 | DBG_FUNC_START, cpus_to_signal, 0, 0, 0, 0);
4020
4021 deadline = mach_absolute_time() + LockTimeOut;
4022 /*
4023 * Wait for those other cpus to acknowledge
4024 */
4025 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
4026 while ((cpus_to_signal & cpu_bit) != 0) {
4027 if (!cpu_datap(cpu)->cpu_running ||
4028 cpu_datap(cpu)->cpu_tlb_invalid == FALSE ||
4029 !CPU_CR3_IS_ACTIVE(cpu)) {
4030 cpus_to_signal &= ~cpu_bit;
4031 break;
4032 }
4033 if (mach_absolute_time() > deadline)
4034 panic("pmap_flush_tlbs() "
4035 "timeout pmap=%p cpus_to_signal=%p",
4036 pmap, cpus_to_signal);
4037 cpu_pause();
4038 }
4039 if (cpus_to_signal == 0)
4040 break;
4041 }
4042 KERNEL_DEBUG(0xef800024 | DBG_FUNC_END, cpus_to_signal, 0, 0, 0, 0);
4043 }
4044
4045 /*
4046 * Flush local tlb if required.
4047 * We need this flush even if the pmap being changed
4048 * is the user map... in case we do a copyin/out
4049 * before returning to user mode.
4050 */
4051 if (flush_self)
4052 flush_tlb();
4053
4054 }
4055
4056 void
4057 process_pmap_updates(void)
4058 {
4059 flush_tlb();
4060
4061 current_cpu_datap()->cpu_tlb_invalid = FALSE;
4062 __asm__ volatile("mfence");
4063 }
4064
4065 void
4066 pmap_update_interrupt(void)
4067 {
4068 KERNEL_DEBUG(0xef800028 | DBG_FUNC_START, 0, 0, 0, 0, 0);
4069
4070 assert(!ml_get_interrupts_enabled());
4071
4072 process_pmap_updates();
4073
4074 KERNEL_DEBUG(0xef800028 | DBG_FUNC_END, 0, 0, 0, 0, 0);
4075 }
4076
4077
4078 unsigned int pmap_cache_attributes(ppnum_t pn) {
4079
4080 if (!pmap_valid_page(pn))
4081 return (VM_WIMG_IO);
4082
4083 return (VM_WIMG_COPYBACK);
4084 }
4085
4086 #ifdef PMAP_DEBUG
4087 void
4088 pmap_dump(pmap_t p)
4089 {
4090 int i;
4091
4092 kprintf("pmap 0x%x\n",p);
4093
4094 kprintf(" pm_cr3 0x%llx\n",p->pm_cr3);
4095 kprintf(" pm_pml4 0x%x\n",p->pm_pml4);
4096 kprintf(" pm_pdpt 0x%x\n",p->pm_pdpt);
4097
4098 kprintf(" pml4[0] 0x%llx\n",*p->pm_pml4);
4099 for (i=0;i<8;i++)
4100 kprintf(" pdpt[%d] 0x%llx\n",i, p->pm_pdpt[i]);
4101 }
4102
4103 void pmap_dump_wrap(void)
4104 {
4105 pmap_dump(current_cpu_datap()->cpu_active_thread->task->map->pmap);
4106 }
4107
4108 void
4109 dump_4GB_pdpt(pmap_t p)
4110 {
4111 int spl;
4112 pdpt_entry_t *user_pdptp;
4113 pdpt_entry_t *kern_pdptp;
4114 pdpt_entry_t *pml4p;
4115
4116 spl = splhigh();
4117 while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) {
4118 splx(spl);
4119 pmap_expand_pml4(p, 0x0);
4120 spl = splhigh();
4121 }
4122 kern_pdptp = kernel_pmap->pm_pdpt;
4123 if (kern_pdptp == NULL)
4124 panic("kern_pdptp == NULL");
4125 kprintf("dump_4GB_pdpt(%p)\n"
4126 "kern_pdptp=%p (phys=0x%016llx)\n"
4127 "\t 0x%08x: 0x%016llx\n"
4128 "\t 0x%08x: 0x%016llx\n"
4129 "\t 0x%08x: 0x%016llx\n"
4130 "\t 0x%08x: 0x%016llx\n"
4131 "\t 0x%08x: 0x%016llx\n"
4132 "user_pdptp=%p (phys=0x%016llx)\n"
4133 "\t 0x%08x: 0x%016llx\n"
4134 "\t 0x%08x: 0x%016llx\n"
4135 "\t 0x%08x: 0x%016llx\n"
4136 "\t 0x%08x: 0x%016llx\n"
4137 "\t 0x%08x: 0x%016llx\n",
4138 p, kern_pdptp, kvtophys(kern_pdptp),
4139 kern_pdptp+0, *(kern_pdptp+0),
4140 kern_pdptp+1, *(kern_pdptp+1),
4141 kern_pdptp+2, *(kern_pdptp+2),
4142 kern_pdptp+3, *(kern_pdptp+3),
4143 kern_pdptp+4, *(kern_pdptp+4),
4144 user_pdptp, kvtophys(user_pdptp),
4145 user_pdptp+0, *(user_pdptp+0),
4146 user_pdptp+1, *(user_pdptp+1),
4147 user_pdptp+2, *(user_pdptp+2),
4148 user_pdptp+3, *(user_pdptp+3),
4149 user_pdptp+4, *(user_pdptp+4));
4150 kprintf("user pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
4151 p->pm_cr3, p->pm_hold, p->pm_pml4);
4152 pml4p = (pdpt_entry_t *)p->pm_hold;
4153 if (pml4p == NULL)
4154 panic("user pml4p == NULL");
4155 kprintf("\t 0x%08x: 0x%016llx\n"
4156 "\t 0x%08x: 0x%016llx\n",
4157 pml4p+0, *(pml4p),
4158 pml4p+KERNEL_UBER_PML4_INDEX, *(pml4p+KERNEL_UBER_PML4_INDEX));
4159 kprintf("kern pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
4160 kernel_pmap->pm_cr3, kernel_pmap->pm_hold, kernel_pmap->pm_pml4);
4161 pml4p = (pdpt_entry_t *)kernel_pmap->pm_hold;
4162 if (pml4p == NULL)
4163 panic("kern pml4p == NULL");
4164 kprintf("\t 0x%08x: 0x%016llx\n"
4165 "\t 0x%08x: 0x%016llx\n",
4166 pml4p+0, *(pml4p),
4167 pml4p+511, *(pml4p+511));
4168 splx(spl);
4169 }
4170
4171 void dump_4GB_pdpt_thread(thread_t tp)
4172 {
4173 dump_4GB_pdpt(tp->map->pmap);
4174 }
4175
4176
4177 #endif