]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap.c
xnu-792.18.15.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap.c
CommitLineData
1c79356b 1/*
89b3af67 2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
1c79356b 3 *
8f6c56a5 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
8f6c56a5
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
8ad349bb 24 * limitations under the License.
8f6c56a5
A
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
59/*
60 * File: pmap.c
61 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * (These guys wrote the Vax version)
63 *
64 * Physical Map management code for Intel i386, i486, and i860.
65 *
66 * Manages physical address maps.
67 *
68 * In addition to hardware address maps, this
69 * module is called upon to provide software-use-only
70 * maps which may or may not be stored in the same
71 * form as hardware maps. These pseudo-maps are
72 * used to store intermediate results from copy
73 * operations to and from address spaces.
74 *
75 * Since the information managed by this module is
76 * also stored by the logical address mapping module,
77 * this module may throw away valid virtual-to-physical
78 * mappings at almost any time. However, invalidations
79 * of virtual-to-physical mappings must be done as
80 * requested.
81 *
82 * In order to cope with hardware architectures which
83 * make virtual-to-physical map invalidates expensive,
84 * this module may delay invalidate or reduced protection
85 * operations until such time as they are actually
86 * necessary. This module is given full information as
87 * to which processors are currently using which maps,
88 * and to when physical maps must be made correct.
89 */
90
1c79356b
A
91#include <string.h>
92#include <norma_vm.h>
93#include <mach_kdb.h>
94#include <mach_ldebug.h>
95
96#include <mach/machine/vm_types.h>
97
98#include <mach/boolean.h>
99#include <kern/thread.h>
100#include <kern/zalloc.h>
101
102#include <kern/lock.h>
91447636 103#include <kern/kalloc.h>
1c79356b
A
104#include <kern/spl.h>
105
106#include <vm/pmap.h>
107#include <vm/vm_map.h>
108#include <vm/vm_kern.h>
109#include <mach/vm_param.h>
110#include <mach/vm_prot.h>
111#include <vm/vm_object.h>
112#include <vm/vm_page.h>
113
114#include <mach/machine/vm_param.h>
115#include <machine/thread.h>
116
117#include <kern/misc_protos.h> /* prototyping */
118#include <i386/misc_protos.h>
119
120#include <i386/cpuid.h>
91447636 121#include <i386/cpu_data.h>
55e303ae
A
122#include <i386/cpu_number.h>
123#include <i386/machine_cpu.h>
91447636 124#include <i386/mp_slave_boot.h>
89b3af67
A
125#include <i386/seg.h>
126#include <i386/cpu_capabilities.h>
1c79356b
A
127
128#if MACH_KDB
129#include <ddb/db_command.h>
130#include <ddb/db_output.h>
131#include <ddb/db_sym.h>
132#include <ddb/db_print.h>
133#endif /* MACH_KDB */
134
135#include <kern/xpr.h>
136
91447636
A
137#include <vm/vm_protos.h>
138
139#include <i386/mp.h>
89b3af67
A
140#include <i386/mp_desc.h>
141
142#include <sys/kdebug.h>
143
144#ifdef IWANTTODEBUG
145#undef DEBUG
146#define DEBUG 1
147#define POSTCODE_DELAY 1
148#include <i386/postcode.h>
149#endif /* IWANTTODEBUG */
1c79356b
A
150
151/*
152 * Forward declarations for internal functions.
153 */
89b3af67
A
154void pmap_expand_pml4(
155 pmap_t map,
156 vm_map_offset_t v);
157
158void pmap_expand_pdpt(
159 pmap_t map,
160 vm_map_offset_t v);
161
91447636 162void pmap_expand(
1c79356b 163 pmap_t map,
89b3af67 164 vm_map_offset_t v);
1c79356b 165
89b3af67 166static void pmap_remove_range(
1c79356b 167 pmap_t pmap,
89b3af67 168 vm_map_offset_t va,
1c79356b
A
169 pt_entry_t *spte,
170 pt_entry_t *epte);
171
91447636
A
172void phys_attribute_clear(
173 ppnum_t phys,
1c79356b
A
174 int bits);
175
91447636
A
176boolean_t phys_attribute_test(
177 ppnum_t phys,
1c79356b
A
178 int bits);
179
91447636
A
180void phys_attribute_set(
181 ppnum_t phys,
1c79356b
A
182 int bits);
183
91447636
A
184void pmap_set_reference(
185 ppnum_t pn);
186
187void pmap_movepage(
188 unsigned long from,
189 unsigned long to,
190 vm_size_t size);
191
91447636
A
192boolean_t phys_page_exists(
193 ppnum_t pn);
1c79356b 194
89b3af67
A
195#ifdef PMAP_DEBUG
196void dump_pmap(pmap_t);
197void dump_4GB_pdpt(pmap_t p);
198void dump_4GB_pdpt_thread(thread_t tp);
199#endif
1c79356b 200
1c79356b
A
201#define iswired(pte) ((pte) & INTEL_PTE_WIRED)
202
89b3af67
A
203int nx_enabled = 1; /* enable no-execute protection */
204
205int cpu_64bit = 0;
1c79356b 206
55e303ae 207
1c79356b
A
208/*
209 * Private data structures.
210 */
211
212/*
213 * For each vm_page_t, there is a list of all currently
214 * valid virtual mappings of that page. An entry is
215 * a pv_entry_t; the list is the pv_table.
216 */
217
218typedef struct pv_entry {
219 struct pv_entry *next; /* next pv_entry */
220 pmap_t pmap; /* pmap where mapping lies */
89b3af67 221 vm_map_offset_t va; /* virtual address for mapping */
1c79356b
A
222} *pv_entry_t;
223
224#define PV_ENTRY_NULL ((pv_entry_t) 0)
225
226pv_entry_t pv_head_table; /* array of entries, one per page */
227
228/*
229 * pv_list entries are kept on a list that can only be accessed
230 * with the pmap system locked (at SPLVM, not in the cpus_active set).
231 * The list is refilled from the pv_list_zone if it becomes empty.
232 */
233pv_entry_t pv_free_list; /* free list at SPLVM */
234decl_simple_lock_data(,pv_free_list_lock)
91447636
A
235int pv_free_count = 0;
236#define PV_LOW_WATER_MARK 5000
237#define PV_ALLOC_CHUNK 2000
238thread_call_t mapping_adjust_call;
239static thread_call_data_t mapping_adjust_call_data;
240int mappingrecurse = 0;
1c79356b
A
241
242#define PV_ALLOC(pv_e) { \
243 simple_lock(&pv_free_list_lock); \
244 if ((pv_e = pv_free_list) != 0) { \
245 pv_free_list = pv_e->next; \
91447636
A
246 pv_free_count--; \
247 if (pv_free_count < PV_LOW_WATER_MARK) \
89b3af67 248 if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
91447636 249 thread_call_enter(mapping_adjust_call); \
1c79356b
A
250 } \
251 simple_unlock(&pv_free_list_lock); \
252}
253
254#define PV_FREE(pv_e) { \
255 simple_lock(&pv_free_list_lock); \
256 pv_e->next = pv_free_list; \
257 pv_free_list = pv_e; \
91447636 258 pv_free_count++; \
1c79356b
A
259 simple_unlock(&pv_free_list_lock); \
260}
261
262zone_t pv_list_zone; /* zone of pv_entry structures */
263
91447636 264static zone_t pdpt_zone;
91447636 265
1c79356b
A
266/*
267 * Each entry in the pv_head_table is locked by a bit in the
268 * pv_lock_table. The lock bits are accessed by the physical
269 * address of the page they lock.
270 */
271
272char *pv_lock_table; /* pointer to array of bits */
273#define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
274
275/*
276 * First and last physical addresses that we maintain any information
277 * for. Initialized to zero so that pmap operations done before
278 * pmap_init won't touch any non-existent structures.
279 */
91447636
A
280pmap_paddr_t vm_first_phys = (pmap_paddr_t) 0;
281pmap_paddr_t vm_last_phys = (pmap_paddr_t) 0;
1c79356b
A
282boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */
283
91447636
A
284static struct vm_object kptobj_object_store;
285static vm_object_t kptobj;
91447636 286
1c79356b
A
287/*
288 * Index into pv_head table, its lock bits, and the modify/reference
289 * bits starting at vm_first_phys.
290 */
291
91447636 292#define pa_index(pa) (i386_btop(pa - vm_first_phys))
1c79356b
A
293
294#define pai_to_pvh(pai) (&pv_head_table[pai])
295#define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
296#define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
297
298/*
299 * Array of physical page attribites for managed pages.
300 * One byte per physical page.
301 */
302char *pmap_phys_attributes;
303
304/*
305 * Physical page attributes. Copy bits from PTE definition.
306 */
307#define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
308#define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
55e303ae 309#define PHYS_NCACHE INTEL_PTE_NCACHE
1c79356b
A
310
311/*
312 * Amount of virtual memory mapped by one
313 * page-directory entry.
314 */
315#define PDE_MAPPED_SIZE (pdetova(1))
89b3af67 316uint64_t pde_mapped_size;
1c79356b 317
1c79356b
A
318/*
319 * Locking and TLB invalidation
320 */
321
322/*
323 * Locking Protocols:
324 *
325 * There are two structures in the pmap module that need locking:
326 * the pmaps themselves, and the per-page pv_lists (which are locked
327 * by locking the pv_lock_table entry that corresponds to the pv_head
328 * for the list in question.) Most routines want to lock a pmap and
329 * then do operations in it that require pv_list locking -- however
330 * pmap_remove_all and pmap_copy_on_write operate on a physical page
331 * basis and want to do the locking in the reverse order, i.e. lock
332 * a pv_list and then go through all the pmaps referenced by that list.
333 * To protect against deadlock between these two cases, the pmap_lock
334 * is used. There are three different locking protocols as a result:
335 *
336 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
337 * the pmap.
338 *
339 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
340 * lock on the pmap_lock (shared read), then lock the pmap
341 * and finally the pv_lists as needed [i.e. pmap lock before
342 * pv_list lock.]
343 *
344 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
345 * Get a write lock on the pmap_lock (exclusive write); this
346 * also guaranteees exclusive access to the pv_lists. Lock the
347 * pmaps as needed.
348 *
349 * At no time may any routine hold more than one pmap lock or more than
350 * one pv_list lock. Because interrupt level routines can allocate
351 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
352 * kernel_pmap can only be held at splhigh.
353 */
354
1c79356b 355/*
55e303ae 356 * We raise the interrupt level to splvm, to block interprocessor
89b3af67
A
357 * interrupts during pmap operations. We mark the cpu's cr3 inactive
358 * while interrupts are blocked.
1c79356b 359 */
89b3af67
A
360#define SPLVM(spl) { \
361 spl = splhigh(); \
362 CPU_CR3_MARK_INACTIVE(); \
1c79356b
A
363}
364
89b3af67
A
365#define SPLX(spl) { \
366 if (current_cpu_datap()->cpu_tlb_invalid) \
367 process_pmap_updates(); \
368 CPU_CR3_MARK_ACTIVE(); \
369 splx(spl); \
1c79356b 370}
89b3af67 371
1c79356b
A
372/*
373 * Lock on pmap system
374 */
375lock_t pmap_system_lock;
376
377#define PMAP_READ_LOCK(pmap, spl) { \
378 SPLVM(spl); \
379 lock_read(&pmap_system_lock); \
380 simple_lock(&(pmap)->lock); \
381}
382
383#define PMAP_WRITE_LOCK(spl) { \
384 SPLVM(spl); \
385 lock_write(&pmap_system_lock); \
386}
387
388#define PMAP_READ_UNLOCK(pmap, spl) { \
389 simple_unlock(&(pmap)->lock); \
390 lock_read_done(&pmap_system_lock); \
391 SPLX(spl); \
392}
393
394#define PMAP_WRITE_UNLOCK(spl) { \
395 lock_write_done(&pmap_system_lock); \
396 SPLX(spl); \
397}
398
399#define PMAP_WRITE_TO_READ_LOCK(pmap) { \
400 simple_lock(&(pmap)->lock); \
401 lock_write_to_read(&pmap_system_lock); \
402}
403
404#define LOCK_PVH(index) lock_pvh_pai(index)
405
406#define UNLOCK_PVH(index) unlock_pvh_pai(index)
407
55e303ae
A
408#if USLOCK_DEBUG
409extern int max_lock_loops;
91447636
A
410extern int disableSerialOuput;
411#define LOOP_VAR \
412 unsigned int loop_count; \
413 loop_count = disableSerialOuput ? max_lock_loops \
414 : max_lock_loops*100
55e303ae 415#define LOOP_CHECK(msg, pmap) \
91447636 416 if (--loop_count == 0) { \
55e303ae 417 mp_disable_preemption(); \
89b3af67
A
418 kprintf("%s: cpu %d pmap %x\n", \
419 msg, cpu_number(), pmap); \
55e303ae
A
420 Debugger("deadlock detection"); \
421 mp_enable_preemption(); \
91447636 422 loop_count = max_lock_loops; \
55e303ae
A
423 }
424#else /* USLOCK_DEBUG */
425#define LOOP_VAR
426#define LOOP_CHECK(msg, pmap)
427#endif /* USLOCK_DEBUG */
1c79356b 428
8ad349bb 429
89b3af67 430static void pmap_flush_tlbs(pmap_t pmap);
8ad349bb 431
89b3af67
A
432#define PMAP_UPDATE_TLBS(pmap, s, e) \
433 pmap_flush_tlbs(pmap)
8f6c56a5 434
55e303ae 435
89b3af67 436#define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
8f6c56a5 437
1c79356b 438
91447636 439pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE];
1c79356b
A
440
441/*
442 * Other useful macros.
443 */
91447636 444#define current_pmap() (vm_map_pmap(current_thread()->map))
1c79356b
A
445
446struct pmap kernel_pmap_store;
447pmap_t kernel_pmap;
448
89b3af67
A
449pd_entry_t high_shared_pde;
450pd_entry_t commpage64_pde;
91447636 451
1c79356b
A
452struct zone *pmap_zone; /* zone of pmap structures */
453
454int pmap_debug = 0; /* flag for debugging prints */
91447636 455
1c79356b
A
456unsigned int inuse_ptepages_count = 0; /* debugging */
457
89b3af67
A
458addr64_t kernel64_cr3;
459boolean_t no_shared_cr3 = FALSE; /* -no_shared_cr3 boot arg */
460
1c79356b
A
461/*
462 * Pmap cache. Cache is threaded through ref_count field of pmap.
463 * Max will eventually be constant -- variable for experimentation.
464 */
465int pmap_cache_max = 32;
466int pmap_alloc_chunk = 8;
467pmap_t pmap_cache_list;
468int pmap_cache_count;
469decl_simple_lock_data(,pmap_cache_lock)
470
1c79356b
A
471extern char end;
472
91447636 473static int nkpt;
89b3af67
A
474extern uint32_t lowGlo;
475extern void *version;
91447636
A
476
477pt_entry_t *DMAP1, *DMAP2;
478caddr_t DADDR1;
479caddr_t DADDR2;
1c79356b
A
480
481#if DEBUG_ALIAS
482#define PMAP_ALIAS_MAX 32
483struct pmap_alias {
484 vm_offset_t rpc;
485 pmap_t pmap;
89b3af67 486 vm_map_offset_t va;
1c79356b
A
487 int cookie;
488#define PMAP_ALIAS_COOKIE 0xdeadbeef
489} pmap_aliasbuf[PMAP_ALIAS_MAX];
490int pmap_alias_index = 0;
491extern vm_offset_t get_rpc();
492
493#endif /* DEBUG_ALIAS */
494
89b3af67
A
495/*
496 * for legacy, returns the address of the pde entry.
497 * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
498 * then returns the mapped address of the pde entry in that page
499 */
500pd_entry_t *
501pmap_pde(pmap_t m, vm_map_offset_t v)
5d5c5d0d 502{
89b3af67
A
503 pd_entry_t *pde;
504 if (!cpu_64bit || (m == kernel_pmap)) {
505 pde = (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]));
506 } else {
507 assert(m);
508 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
509 pde = pmap64_pde(m, v);
510 }
511 return pde;
5d5c5d0d
A
512}
513
c0fea474 514
5d5c5d0d 515/*
89b3af67
A
516 * the single pml4 page per pmap is allocated at pmap create time and exists
517 * for the duration of the pmap. we allocate this page in kernel vm (to save us one
518 * level of page table dynamic mapping.
519 * this returns the address of the requested pml4 entry in the top level page.
5d5c5d0d 520 */
89b3af67
A
521static inline
522pml4_entry_t *
523pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr)
524{
525 return ((pml4_entry_t *)pmap->pm_hold + ((vm_offset_t)((vaddr>>PML4SHIFT)&(NPML4PG-1))));
526}
527
528/*
529 * maps in the pml4 page, if any, containing the pdpt entry requested
530 * and returns the address of the pdpt entry in that mapped page
531 */
532pdpt_entry_t *
533pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
534{
535 pml4_entry_t newpf;
536 pml4_entry_t *pml4;
537 int i;
538
539 assert(pmap);
540 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
541 if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) {
542 return(0);
5d5c5d0d 543 }
89b3af67
A
544
545 pml4 = pmap64_pml4(pmap, vaddr);
546
547 if (pml4 && ((*pml4 & INTEL_PTE_VALID))) {
548
549 newpf = *pml4 & PG_FRAME;
550
551
552 for (i=PMAP_PDPT_FIRST_WINDOW; i < PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS; i++) {
553 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
554 return((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
555 ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1))));
556 }
557 }
558
559 current_cpu_datap()->cpu_pmap->pdpt_window_index++;
560 if (current_cpu_datap()->cpu_pmap->pdpt_window_index > (PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS-1))
561 current_cpu_datap()->cpu_pmap->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW;
562 pmap_store_pte(
563 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CMAP),
564 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
565 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR));
566 return ((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR) +
567 ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1))));
568 }
569
570 return (0);
5d5c5d0d
A
571}
572
89b3af67
A
573/*
574 * maps in the pdpt page, if any, containing the pde entry requested
575 * and returns the address of the pde entry in that mapped page
576 */
577pd_entry_t *
578pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr)
5d5c5d0d 579{
89b3af67
A
580 pdpt_entry_t newpf;
581 pdpt_entry_t *pdpt;
582 int i;
5d5c5d0d 583
89b3af67
A
584 assert(pmap);
585 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
586 if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) {
587 return(0);
588 }
589
590 /* if (vaddr & (1ULL << 63)) panic("neg addr");*/
591 pdpt = pmap64_pdpt(pmap, vaddr);
592
593 if (pdpt && ((*pdpt & INTEL_PTE_VALID))) {
594
595 newpf = *pdpt & PG_FRAME;
596
597 for (i=PMAP_PDE_FIRST_WINDOW; i < PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS; i++) {
598 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
599 return((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
600 ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1))));
601 }
5d5c5d0d 602 }
89b3af67
A
603
604 current_cpu_datap()->cpu_pmap->pde_window_index++;
605 if (current_cpu_datap()->cpu_pmap->pde_window_index > (PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS-1))
606 current_cpu_datap()->cpu_pmap->pde_window_index = PMAP_PDE_FIRST_WINDOW;
607 pmap_store_pte(
608 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CMAP),
609 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
610 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR));
611 return ((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR) +
612 ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1))));
8ad349bb 613 }
5d5c5d0d 614
89b3af67
A
615 return (0);
616}
617
618
619
620/*
621 * return address of mapped pte for vaddr va in pmap pmap.
622 * must be called with pre-emption or interrupts disabled
623 * if targeted pmap is not the kernel pmap
624 * since we may be passing back a virtual address that is
625 * associated with this cpu... pre-emption or interrupts
626 * must remain disabled until the caller is done using
627 * the pointer that was passed back .
628 *
629 * maps the pde page, if any, containing the pte in and returns
630 * the address of the pte in that mapped page
631 */
632pt_entry_t *
633pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
634{
635 pd_entry_t *pde;
636 pd_entry_t newpf;
637 int i;
638
639 assert(pmap);
640 pde = pmap_pde(pmap,vaddr);
641
642 if (pde && ((*pde & INTEL_PTE_VALID))) {
643 if (pmap == kernel_pmap) {
644 return (vtopte(vaddr)); /* compat kernel still has pte's mapped */
645 }
646
647 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
648
649 newpf = *pde & PG_FRAME;
650
651 for (i=PMAP_PTE_FIRST_WINDOW; i < PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS; i++) {
652 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
653 return((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) +
654 ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1)));
655 }
656 }
657
658 current_cpu_datap()->cpu_pmap->pte_window_index++;
659 if (current_cpu_datap()->cpu_pmap->pte_window_index > (PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS-1))
660 current_cpu_datap()->cpu_pmap->pte_window_index = PMAP_PTE_FIRST_WINDOW;
661 pmap_store_pte(
662 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CMAP),
663 newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
664 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR));
665 return ((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR) +
666 ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1)));
8f6c56a5 667 }
89b3af67
A
668
669 return(0);
1c79356b 670}
89b3af67 671
1c79356b
A
672
673/*
674 * Map memory at initialization. The physical addresses being
675 * mapped are not managed and are never unmapped.
676 *
677 * For now, VM is already on, we only need to map the
678 * specified memory.
679 */
680vm_offset_t
681pmap_map(
89b3af67
A
682 vm_offset_t virt,
683 vm_map_offset_t start_addr,
684 vm_map_offset_t end_addr,
685 vm_prot_t prot,
686 unsigned int flags)
1c79356b 687{
89b3af67 688 int ps;
1c79356b
A
689
690 ps = PAGE_SIZE;
91447636 691 while (start_addr < end_addr) {
89b3af67
A
692 pmap_enter(kernel_pmap, (vm_map_offset_t)virt,
693 (ppnum_t) i386_btop(start_addr), prot, flags, FALSE);
1c79356b 694 virt += ps;
91447636 695 start_addr += ps;
1c79356b
A
696 }
697 return(virt);
698}
699
700/*
701 * Back-door routine for mapping kernel VM at initialization.
702 * Useful for mapping memory outside the range
703 * Sets no-cache, A, D.
704 * [vm_first_phys, vm_last_phys) (i.e., devices).
705 * Otherwise like pmap_map.
706 */
707vm_offset_t
708pmap_map_bd(
89b3af67
A
709 vm_offset_t virt,
710 vm_map_offset_t start_addr,
711 vm_map_offset_t end_addr,
712 vm_prot_t prot,
713 unsigned int flags)
1c79356b 714{
89b3af67
A
715 pt_entry_t template;
716 pt_entry_t *pte;
1c79356b 717
91447636 718 template = pa_to_pte(start_addr)
1c79356b
A
719 | INTEL_PTE_REF
720 | INTEL_PTE_MOD
721 | INTEL_PTE_WIRED
722 | INTEL_PTE_VALID;
89b3af67
A
723
724 if(flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) {
725 template |= INTEL_PTE_NCACHE;
726 if(!(flags & (VM_MEM_GUARDED | VM_WIMG_USE_DEFAULT)))
727 template |= INTEL_PTE_PTA;
728 }
729
1c79356b
A
730 if (prot & VM_PROT_WRITE)
731 template |= INTEL_PTE_WRITE;
732
91447636 733 while (start_addr < end_addr) {
89b3af67 734 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
91447636 735 if (pte == PT_ENTRY_NULL) {
1c79356b 736 panic("pmap_map_bd: Invalid kernel address\n");
91447636 737 }
89b3af67 738 pmap_store_pte(pte, template);
1c79356b
A
739 pte_increment_pa(template);
740 virt += PAGE_SIZE;
91447636 741 start_addr += PAGE_SIZE;
1c79356b
A
742 }
743
55e303ae 744 flush_tlb();
1c79356b
A
745 return(virt);
746}
747
1c79356b
A
748extern char *first_avail;
749extern vm_offset_t virtual_avail, virtual_end;
91447636
A
750extern pmap_paddr_t avail_start, avail_end;
751extern vm_offset_t etext;
752extern void *sectHIBB;
753extern int sectSizeHIB;
1c79356b 754
89b3af67
A
755
756vm_offset_t
757pmap_high_shared_remap(enum high_fixed_addresses e, vm_offset_t va, int sz)
758{
759 vm_offset_t ve = pmap_index_to_virt(e);
760 pt_entry_t *ptep;
761 pmap_paddr_t pa;
762 int i;
763
764 assert(0 == (va & PAGE_MASK)); /* expecting page aligned */
765 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ve);
766
767 for (i=0; i< sz; i++) {
768 pa = (pmap_paddr_t) kvtophys(va);
769 pmap_store_pte(ptep, (pa & PG_FRAME)
770 | INTEL_PTE_VALID
771 | INTEL_PTE_GLOBAL
772 | INTEL_PTE_RW
773 | INTEL_PTE_REF
774 | INTEL_PTE_MOD);
775 va+= PAGE_SIZE;
776 ptep++;
777 }
778 return ve;
779}
780
781vm_offset_t
782pmap_cpu_high_shared_remap(int cpu, enum high_cpu_types e, vm_offset_t va, int sz)
783{
784 enum high_fixed_addresses a = e + HIGH_CPU_END * cpu;
785 return pmap_high_shared_remap(HIGH_FIXED_CPUS_BEGIN + a, va, sz);
786}
787
788void pmap_init_high_shared(void);
789
790extern vm_offset_t gdtptr, idtptr;
791
792extern uint32_t low_intstack;
793
794extern struct fake_descriptor ldt_desc_pattern;
795extern struct fake_descriptor tss_desc_pattern;
796
797extern char hi_remap_text, hi_remap_etext;
798extern char t_zero_div;
799
800pt_entry_t *pte_unique_base;
801
802void
803pmap_init_high_shared(void)
804{
805
806 vm_offset_t haddr;
807 struct __gdt_desc_struct gdt_desc = {0,0,0};
808 struct __idt_desc_struct idt_desc = {0,0,0};
809#if MACH_KDB
810 struct i386_tss *ttss;
811#endif
812
813 kprintf("HIGH_MEM_BASE 0x%x fixed per-cpu begin 0x%x\n",
814 HIGH_MEM_BASE,pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN));
815 pte_unique_base = pmap_pte(kernel_pmap, (vm_map_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN));
816
817 if (i386_btop(&hi_remap_etext - &hi_remap_text + 1) >
818 HIGH_FIXED_TRAMPS_END - HIGH_FIXED_TRAMPS + 1)
819 panic("tramps too large");
820 haddr = pmap_high_shared_remap(HIGH_FIXED_TRAMPS,
821 (vm_offset_t) &hi_remap_text, 3);
822 kprintf("tramp: 0x%x, ",haddr);
823 printf("hi mem tramps at 0x%x\n",haddr);
824 /* map gdt up high and update ptr for reload */
825 haddr = pmap_high_shared_remap(HIGH_FIXED_GDT,
826 (vm_offset_t) master_gdt, 1);
827 __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory");
828 gdt_desc.address = haddr;
829 kprintf("GDT: 0x%x, ",haddr);
830 /* map ldt up high */
831 haddr = pmap_high_shared_remap(HIGH_FIXED_LDT_BEGIN,
832 (vm_offset_t) master_ldt,
833 HIGH_FIXED_LDT_END - HIGH_FIXED_LDT_BEGIN + 1);
834 kprintf("LDT: 0x%x, ",haddr);
835 /* put new ldt addr into gdt */
836 master_gdt[sel_idx(KERNEL_LDT)] = ldt_desc_pattern;
837 master_gdt[sel_idx(KERNEL_LDT)].offset = (vm_offset_t) haddr;
838 fix_desc(&master_gdt[sel_idx(KERNEL_LDT)], 1);
839 master_gdt[sel_idx(USER_LDT)] = ldt_desc_pattern;
840 master_gdt[sel_idx(USER_LDT)].offset = (vm_offset_t) haddr;
841 fix_desc(&master_gdt[sel_idx(USER_LDT)], 1);
842
843 /* map idt up high */
844 haddr = pmap_high_shared_remap(HIGH_FIXED_IDT,
845 (vm_offset_t) master_idt, 1);
846 __asm__ __volatile__("sidt %0" : "=m" (idt_desc));
847 idt_desc.address = haddr;
848 kprintf("IDT: 0x%x, ", haddr);
849 /* remap ktss up high and put new high addr into gdt */
850 haddr = pmap_high_shared_remap(HIGH_FIXED_KTSS,
851 (vm_offset_t) &master_ktss, 1);
852 master_gdt[sel_idx(KERNEL_TSS)] = tss_desc_pattern;
853 master_gdt[sel_idx(KERNEL_TSS)].offset = (vm_offset_t) haddr;
854 fix_desc(&master_gdt[sel_idx(KERNEL_TSS)], 1);
855 kprintf("KTSS: 0x%x, ",haddr);
856#if MACH_KDB
857 /* remap dbtss up high and put new high addr into gdt */
858 haddr = pmap_high_shared_remap(HIGH_FIXED_DBTSS,
859 (vm_offset_t) &master_dbtss, 1);
860 master_gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern;
861 master_gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) haddr;
862 fix_desc(&master_gdt[sel_idx(DEBUG_TSS)], 1);
863 ttss = (struct i386_tss *)haddr;
864 kprintf("DBTSS: 0x%x, ",haddr);
865#endif /* MACH_KDB */
866
867 /* remap dftss up high and put new high addr into gdt */
868 haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS,
869 (vm_offset_t) &master_dftss, 1);
870 master_gdt[sel_idx(DF_TSS)] = tss_desc_pattern;
871 master_gdt[sel_idx(DF_TSS)].offset = (vm_offset_t) haddr;
872 fix_desc(&master_gdt[sel_idx(DF_TSS)], 1);
873 kprintf("DFTSS: 0x%x\n",haddr);
874
875 /* remap mctss up high and put new high addr into gdt */
876 haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS,
877 (vm_offset_t) &master_mctss, 1);
878 master_gdt[sel_idx(MC_TSS)] = tss_desc_pattern;
879 master_gdt[sel_idx(MC_TSS)].offset = (vm_offset_t) haddr;
880 fix_desc(&master_gdt[sel_idx(MC_TSS)], 1);
881 kprintf("MCTSS: 0x%x\n",haddr);
882
883 __asm__ __volatile__("lgdt %0": "=m" (gdt_desc));
884 __asm__ __volatile__("lidt %0": "=m" (idt_desc));
885 kprintf("gdt/idt reloaded, ");
886 set_tr(KERNEL_TSS);
887 kprintf("tr reset to KERNEL_TSS\n");
888}
889
890
1c79356b
A
891/*
892 * Bootstrap the system enough to run with virtual memory.
893 * Map the kernel's code and data, and allocate the system page table.
894 * Called with mapping OFF. Page_size must already be set.
895 *
896 * Parameters:
897 * load_start: PA where kernel was loaded
898 * avail_start PA of first available physical page -
899 * after kernel page tables
900 * avail_end PA of last available physical page
901 * virtual_avail VA of first available page -
902 * after kernel page tables
903 * virtual_end VA of last available page -
904 * end of kernel address space
905 *
906 * &start_text start of kernel text
907 * &etext end of kernel text
908 */
909
910void
911pmap_bootstrap(
89b3af67
A
912 __unused vm_offset_t load_start,
913 boolean_t IA32e)
1c79356b 914{
91447636
A
915 vm_offset_t va;
916 pt_entry_t *pte;
917 int i;
918 int wpkernel, boot_arg;
89b3af67 919 pdpt_entry_t *pdpt;
1c79356b 920
91447636
A
921 vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address
922 * known to VM */
1c79356b
A
923 /*
924 * The kernel's pmap is statically allocated so we don't
925 * have to use pmap_create, which is unlikely to work
926 * correctly at this part of the boot sequence.
927 */
928
89b3af67 929
1c79356b 930 kernel_pmap = &kernel_pmap_store;
91447636 931 kernel_pmap->ref_count = 1;
89b3af67
A
932 kernel_pmap->nx_enabled = FALSE;
933 kernel_pmap->pm_64bit = 0;
91447636
A
934 kernel_pmap->pm_obj = (vm_object_t) NULL;
935 kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE);
89b3af67
A
936 kernel_pmap->pdirbase = (pmap_paddr_t)((int)IdlePTD);
937 pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE );
938 kernel_pmap->pm_pdpt = pdpt;
939 kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePDPT);
1c79356b 940
91447636
A
941 va = (vm_offset_t)kernel_pmap->dirbase;
942 /* setup self referential mapping(s) */
89b3af67 943 for (i = 0; i< NPGPTD; i++, pdpt++) {
91447636
A
944 pmap_paddr_t pa;
945 pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i));
89b3af67
A
946 pmap_store_pte(
947 (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i),
91447636 948 (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF |
89b3af67
A
949 INTEL_PTE_MOD | INTEL_PTE_WIRED) ;
950 pmap_store_pte(pdpt, pa | INTEL_PTE_VALID);
91447636 951 }
1c79356b 952
89b3af67
A
953 cpu_64bit = IA32e;
954
955 lo_kernel_cr3 = kernel_pmap->pm_cr3;
956 current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3;
957
958 /* save the value we stuff into created pmaps to share the gdts etc */
959 high_shared_pde = *pmap_pde(kernel_pmap, HIGH_MEM_BASE);
960 /* make sure G bit is on for high shared pde entry */
961 high_shared_pde |= INTEL_PTE_GLOBAL;
962 pmap_store_pte(pmap_pde(kernel_pmap, HIGH_MEM_BASE), high_shared_pde);
963
91447636 964 nkpt = NKPT;
89b3af67 965 inuse_ptepages_count += NKPT;
1c79356b 966
91447636
A
967 virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail;
968 virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS);
1c79356b
A
969
970 /*
91447636
A
971 * Reserve some special page table entries/VA space for temporary
972 * mapping of pages.
1c79356b 973 */
91447636 974#define SYSMAP(c, p, v, n) \
89b3af67 975 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n)
91447636
A
976
977 va = virtual_avail;
89b3af67 978 pte = vtopte(va);
8ad349bb 979
89b3af67
A
980 for (i=0; i<PMAP_NWINDOWS; i++) {
981 SYSMAP(caddr_t,
982 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP),
983 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR),
984 1);
985 *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0;
986 }
1c79356b 987
91447636
A
988 /* DMAP user for debugger */
989 SYSMAP(caddr_t, DMAP1, DADDR1, 1);
990 SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */
1c79356b 991
1c79356b 992
91447636
A
993 lock_init(&pmap_system_lock,
994 FALSE, /* NOT a sleep lock */
995 0, 0);
1c79356b 996
91447636 997 virtual_avail = va;
1c79356b 998
91447636 999 wpkernel = 1;
89b3af67
A
1000 if (PE_parse_boot_arg("wpkernel", &boot_arg)) {
1001 if (boot_arg == 0)
1002 wpkernel = 0;
1c79356b
A
1003 }
1004
89b3af67
A
1005 /* Remap kernel text readonly unless the "wpkernel" boot-arg is present
1006 * and set to 0.
1007 */
91447636
A
1008 if (wpkernel)
1009 {
1010 vm_offset_t myva;
1011 pt_entry_t *ptep;
1012
89b3af67 1013 for (myva = i386_round_page(MP_BOOT + MP_BOOTSTACK); myva < etext; myva += PAGE_SIZE) {
91447636
A
1014 if (myva >= (vm_offset_t)sectHIBB && myva < ((vm_offset_t)sectHIBB + sectSizeHIB))
1015 continue;
89b3af67 1016 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
91447636 1017 if (ptep)
89b3af67 1018 pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW);
91447636 1019 }
91447636 1020 }
1c79356b 1021
89b3af67
A
1022 /* no matter what, kernel page zero is not accessible */
1023 pte = pmap_pte(kernel_pmap, 0);
1024 pmap_store_pte(pte, INTEL_PTE_INVALID);
1025
1026 /* map lowmem global page into fixed addr 0x2000 */
1027 if (0 == (pte = pmap_pte(kernel_pmap,0x2000))) panic("lowmem pte");
1028
1029 pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)|INTEL_PTE_VALID|INTEL_PTE_REF|INTEL_PTE_MOD|INTEL_PTE_WIRED|INTEL_PTE_RW);
1030 flush_tlb();
1031
91447636
A
1032 simple_lock_init(&kernel_pmap->lock, 0);
1033 simple_lock_init(&pv_free_list_lock, 0);
1c79356b 1034
89b3af67
A
1035 pmap_init_high_shared();
1036
1037 pde_mapped_size = PDE_MAPPED_SIZE;
1038
1039 if (cpu_64bit) {
1040 pdpt_entry_t *ppdpt = (pdpt_entry_t *)IdlePDPT;
1041 pdpt_entry_t *ppdpt64 = (pdpt_entry_t *)IdlePDPT64;
1042 pdpt_entry_t *ppml4 = (pdpt_entry_t *)IdlePML4;
1043 int istate = ml_set_interrupts_enabled(FALSE);
1044
1045 /*
1046 * Clone a new 64-bit 3rd-level page table directory, IdlePML4,
1047 * with page bits set for the correct IA-32e operation and so that
1048 * the legacy-mode IdlePDPT is retained for slave processor start-up.
1049 * This is necessary due to the incompatible use of page bits between
1050 * 64-bit and legacy modes.
1051 */
1052 kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePML4); /* setup in start.s for us */
1053 kernel_pmap->pm_pml4 = IdlePML4;
1054 kernel_pmap->pm_pdpt = (pd_entry_t *)
1055 ((unsigned int)IdlePDPT64 | KERNBASE );
1056#define PAGE_BITS INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF
1057 pmap_store_pte(kernel_pmap->pm_pml4,
1058 (uint32_t)IdlePDPT64 | PAGE_BITS);
1059 pmap_store_pte((ppdpt64+0), *(ppdpt+0) | PAGE_BITS);
1060 pmap_store_pte((ppdpt64+1), *(ppdpt+1) | PAGE_BITS);
1061 pmap_store_pte((ppdpt64+2), *(ppdpt+2) | PAGE_BITS);
1062 pmap_store_pte((ppdpt64+3), *(ppdpt+3) | PAGE_BITS);
1063
1064 /*
1065 * The kernel is also mapped in the uber-sapce at the 4GB starting
1066 * 0xFFFFFF80:00000000. This is the highest entry in the 4th-level.
1067 */
1068 pmap_store_pte((ppml4+KERNEL_UBER_PML4_INDEX), *(ppml4+0));
1069
1070 kernel64_cr3 = (addr64_t) kernel_pmap->pm_cr3;
1071 cpu_IA32e_enable(current_cpu_datap());
1072 current_cpu_datap()->cpu_is64bit = TRUE;
1073 /* welcome to a 64 bit world */
1074
1075 /* Re-initialize and load descriptors */
1076 cpu_desc_init64(&cpu_data_master, TRUE);
1077 cpu_desc_load64(&cpu_data_master);
1078 fast_syscall_init64();
1079
1080 pde_mapped_size = 512*4096 ;
1081
1082 ml_set_interrupts_enabled(istate);
1083
1084 }
1085 kernel_pmap->pm_hold = (vm_offset_t)kernel_pmap->pm_pml4;
1c79356b 1086
91447636
A
1087 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
1088 VADDR(KPTDI,0), virtual_end);
8f6c56a5 1089 printf("PAE enabled\n");
89b3af67
A
1090 if (cpu_64bit){
1091 printf("64 bit mode enabled\n");kprintf("64 bit mode enabled\n"); }
1092
1093 kprintf("Available physical space from 0x%llx to 0x%llx\n",
8f6c56a5 1094 avail_start, avail_end);
89b3af67
A
1095
1096 /*
1097 * By default for 64-bit users loaded at 4GB, share kernel mapping.
1098 * But this may be overridden by the -no_shared_cr3 boot-arg.
1099 */
1100 if (PE_parse_boot_arg("-no_shared_cr3", &no_shared_cr3)) {
1101 kprintf("Shared kernel address space disabled\n");
1102 }
1c79356b
A
1103}
1104
1105void
1106pmap_virtual_space(
1107 vm_offset_t *startp,
1108 vm_offset_t *endp)
1109{
1110 *startp = virtual_avail;
1111 *endp = virtual_end;
1112}
1113
1114/*
1115 * Initialize the pmap module.
1116 * Called by vm_init, to initialize any structures that the pmap
1117 * system needs to map virtual memory.
1118 */
1119void
1120pmap_init(void)
1121{
1122 register long npages;
1123 vm_offset_t addr;
1124 register vm_size_t s;
89b3af67
A
1125 vm_map_offset_t vaddr;
1126 ppnum_t ppn;
1c79356b
A
1127
1128 /*
1129 * Allocate memory for the pv_head_table and its lock bits,
1130 * the modify bit array, and the pte_page table.
1131 */
1132
91447636
A
1133 /* zero bias all these arrays now instead of off avail_start
1134 so we cover all memory */
1135 npages = i386_btop(avail_end);
1c79356b
A
1136 s = (vm_size_t) (sizeof(struct pv_entry) * npages
1137 + pv_lock_table_size(npages)
1138 + npages);
1139
1140 s = round_page(s);
1141 if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS)
1142 panic("pmap_init");
1143
1144 memset((char *)addr, 0, s);
1145
1146 /*
1147 * Allocate the structures first to preserve word-alignment.
1148 */
1149 pv_head_table = (pv_entry_t) addr;
1150 addr = (vm_offset_t) (pv_head_table + npages);
1151
1152 pv_lock_table = (char *) addr;
1153 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
1154
1155 pmap_phys_attributes = (char *) addr;
1156
1157 /*
1158 * Create the zone of physical maps,
1159 * and of the physical-to-virtual entries.
1160 */
1161 s = (vm_size_t) sizeof(struct pmap);
1162 pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
1163 s = (vm_size_t) sizeof(struct pv_entry);
1164 pv_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
91447636
A
1165 s = 63;
1166 pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */
55e303ae 1167
1c79356b
A
1168 /*
1169 * Only now, when all of the data structures are allocated,
1170 * can we set vm_first_phys and vm_last_phys. If we set them
1171 * too soon, the kmem_alloc_wired above will try to use these
1172 * data structures and blow up.
1173 */
1174
91447636
A
1175 /* zero bias this now so we cover all memory */
1176 vm_first_phys = 0;
1c79356b 1177 vm_last_phys = avail_end;
91447636 1178
91447636
A
1179 kptobj = &kptobj_object_store;
1180 _vm_object_allocate((vm_object_size_t)NKPDE, kptobj);
1181 kernel_pmap->pm_obj = kptobj;
91447636
A
1182
1183 /* create pv entries for kernel pages mapped by low level
1184 startup code. these have to exist so we can pmap_remove()
1185 e.g. kext pages from the middle of our addr space */
1186
89b3af67 1187 vaddr = (vm_map_offset_t)0;
91447636
A
1188 for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) {
1189 pv_entry_t pv_e;
1190
1191 pv_e = pai_to_pvh(ppn);
1192 pv_e->va = vaddr;
1193 vaddr += PAGE_SIZE;
89b3af67 1194 kernel_pmap->stats.resident_count++;
91447636
A
1195 pv_e->pmap = kernel_pmap;
1196 pv_e->next = PV_ENTRY_NULL;
1197 }
1198
1c79356b
A
1199 pmap_initialized = TRUE;
1200
1201 /*
1202 * Initializie pmap cache.
1203 */
1204 pmap_cache_list = PMAP_NULL;
1205 pmap_cache_count = 0;
91447636 1206 simple_lock_init(&pmap_cache_lock, 0);
1c79356b
A
1207}
1208
91447636
A
1209void
1210x86_lowmem_free(void)
1211{
1212 /* free lowmem pages back to the vm system. we had to defer doing this
1213 until the vm system was fully up.
1214 the actual pages that are released are determined by which
1215 pages the memory sizing code puts into the region table */
1c79356b 1216
89b3af67 1217 ml_static_mfree((vm_offset_t) i386_ptob(pmap_memory_regions[0].base),
91447636
A
1218 (vm_size_t) i386_ptob(pmap_memory_regions[0].end - pmap_memory_regions[0].base));
1219}
1c79356b
A
1220
1221
1222#define valid_page(x) (pmap_initialized && pmap_valid_page(x))
1223
1224boolean_t
1225pmap_verify_free(
55e303ae 1226 ppnum_t pn)
1c79356b 1227{
91447636 1228 pmap_paddr_t phys;
1c79356b
A
1229 pv_entry_t pv_h;
1230 int pai;
1231 spl_t spl;
1232 boolean_t result;
1233
55e303ae 1234 assert(pn != vm_page_fictitious_addr);
91447636 1235 phys = (pmap_paddr_t)i386_ptob(pn);
1c79356b
A
1236 if (!pmap_initialized)
1237 return(TRUE);
1238
91447636 1239 if (!pmap_valid_page(pn))
1c79356b
A
1240 return(FALSE);
1241
1242 PMAP_WRITE_LOCK(spl);
1243
1244 pai = pa_index(phys);
1245 pv_h = pai_to_pvh(pai);
1246
1247 result = (pv_h->pmap == PMAP_NULL);
1248 PMAP_WRITE_UNLOCK(spl);
1249
1250 return(result);
1251}
1252
1253/*
1254 * Create and return a physical map.
1255 *
1256 * If the size specified for the map
1257 * is zero, the map is an actual physical
1258 * map, and may be referenced by the
1259 * hardware.
1260 *
1261 * If the size specified is non-zero,
1262 * the map will be used in software only, and
1263 * is bounded by that size.
1264 */
1265pmap_t
1266pmap_create(
89b3af67
A
1267 vm_map_size_t sz,
1268 boolean_t is_64bit)
1c79356b 1269{
89b3af67
A
1270 register pmap_t p;
1271 int i;
1272 vm_offset_t va;
1273 vm_size_t size;
1274 pdpt_entry_t *pdpt;
1275 pml4_entry_t *pml4p;
1276 int template;
1277 pd_entry_t *pdp;
1278 spl_t s;
1279
1280 size = (vm_size_t) sz;
1c79356b
A
1281
1282 /*
1283 * A software use-only map doesn't even need a map.
1284 */
1285
1286 if (size != 0) {
1287 return(PMAP_NULL);
1288 }
1289
91447636
A
1290 p = (pmap_t) zalloc(pmap_zone);
1291 if (PMAP_NULL == p)
1292 panic("pmap_create zalloc");
8f6c56a5 1293
89b3af67
A
1294 /* init counts now since we'll be bumping some */
1295 simple_lock_init(&p->lock, 0);
1c79356b
A
1296 p->stats.resident_count = 0;
1297 p->stats.wired_count = 0;
1c79356b 1298 p->ref_count = 1;
89b3af67
A
1299 p->nx_enabled = 1;
1300 p->pm_64bit = is_64bit;
1301 p->pm_kernel_cr3 = FALSE;
1302 p->pm_shared = FALSE;
1303
1304 if (!cpu_64bit) {
1305 /* legacy 32 bit setup */
1306 /* in the legacy case the pdpt layer is hardwired to 4 entries and each
1307 * entry covers 1GB of addr space */
1308 if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD))
1309 panic("pmap_create kmem_alloc_wired");
1310 p->pm_hold = (vm_offset_t)zalloc(pdpt_zone);
1311 if ((vm_offset_t)NULL == p->pm_hold) {
1312 panic("pdpt zalloc");
1313 }
1314 pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31);
1315 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)pdpt);
1316 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG))))
1317 panic("pmap_create vm_object_allocate");
1318
1319 memset((char *)p->dirbase, 0, NBPTD);
1320
1321 va = (vm_offset_t)p->dirbase;
1322 p->pdirbase = kvtophys(va);
1323
1324 template = cpu_64bit ? INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF : INTEL_PTE_VALID;
1325 for (i = 0; i< NPGPTD; i++, pdpt++) {
1326 pmap_paddr_t pa;
1327 pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i));
1328 pmap_store_pte(pdpt, pa | template);
1329 }
1330
1331 /* map the high shared pde */
1332 pmap_store_pte(pmap_pde(p, HIGH_MEM_BASE), high_shared_pde);
c0fea474 1333
89b3af67 1334 } else {
5d5c5d0d 1335
89b3af67 1336 /* 64 bit setup */
5d5c5d0d 1337
89b3af67
A
1338 /* alloc the pml4 page in kernel vm */
1339 if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->pm_hold), PAGE_SIZE))
1340 panic("pmap_create kmem_alloc_wired pml4");
1341
1342 memset((char *)p->pm_hold, 0, PAGE_SIZE);
1343 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_hold);
1344
1345 inuse_ptepages_count++;
1346 p->stats.resident_count++;
1347 p->stats.wired_count++;
1348
1349 /* allocate the vm_objs to hold the pdpt, pde and pte pages */
1350
1351 if (NULL == (p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS))))
1352 panic("pmap_create pdpt obj");
1353
1354 if (NULL == (p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS))))
1355 panic("pmap_create pdpt obj");
1356
1357 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS))))
1358 panic("pmap_create pte obj");
1359
1360 /* uber space points to uber mapped kernel */
1361 s = splhigh();
1362 pml4p = pmap64_pml4(p, 0ULL);
1363 pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX),*kernel_pmap->pm_pml4);
1364 if (!is_64bit) {
1365 while ((pdp = pmap64_pde(p, (uint64_t)HIGH_MEM_BASE)) == PD_ENTRY_NULL) {
1366 splx(s);
1367 pmap_expand_pdpt(p, (uint64_t)HIGH_MEM_BASE); /* need room for another pde entry */
1368 s = splhigh();
1369 }
1370 pmap_store_pte(pdp, high_shared_pde);
1371 }
1372
1373 splx(s);
1374 }
1c79356b
A
1375
1376 return(p);
1377}
1378
89b3af67
A
1379void
1380pmap_set_4GB_pagezero(pmap_t p)
1381{
1382 int spl;
1383 pdpt_entry_t *user_pdptp;
1384 pdpt_entry_t *kern_pdptp;
1385
1386 assert(p->pm_64bit);
1387
1388 /* Kernel-shared cr3 may be disabled by boot arg. */
1389 if (no_shared_cr3)
1390 return;
1391
1392 /*
1393 * Set the bottom 4 3rd-level pte's to be the kernel's.
1394 */
1395 spl = splhigh();
1396 while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) {
1397 splx(spl);
1398 pmap_expand_pml4(p, 0x0);
1399 spl = splhigh();
1400 }
1401 kern_pdptp = kernel_pmap->pm_pdpt;
1402 pmap_store_pte(user_pdptp+0, *(kern_pdptp+0));
1403 pmap_store_pte(user_pdptp+1, *(kern_pdptp+1));
1404 pmap_store_pte(user_pdptp+2, *(kern_pdptp+2));
1405 pmap_store_pte(user_pdptp+3, *(kern_pdptp+3));
1406
1407 p->pm_kernel_cr3 = TRUE;
1408
1409 splx(spl);
1410
1411}
1412
1413void
1414pmap_load_kernel_cr3(void)
1415{
1416 uint32_t kernel_cr3;
1417
1418 assert(!ml_get_interrupts_enabled());
1419
1420 /*
1421 * Reload cr3 with the true kernel cr3.
1422 * Note: kernel's pml4 resides below 4GB physical.
1423 */
1424 kernel_cr3 = current_cpu_datap()->cpu_kernel_cr3;
1425 set_cr3(kernel_cr3);
1426 current_cpu_datap()->cpu_active_cr3 = kernel_cr3;
1427 current_cpu_datap()->cpu_task_map = TASK_MAP_32BIT;
1428 current_cpu_datap()->cpu_tlb_invalid = FALSE;
1429 __asm__ volatile("mfence");
1430}
1431
1432void
1433pmap_clear_4GB_pagezero(pmap_t p)
1434{
1435 int spl;
1436 pdpt_entry_t *user_pdptp;
1437
1438 if (!p->pm_kernel_cr3)
1439 return;
1440
1441 spl = splhigh();
1442 user_pdptp = pmap64_pdpt(p, 0x0);
1443 pmap_store_pte(user_pdptp+0, 0);
1444 pmap_store_pte(user_pdptp+1, 0);
1445 pmap_store_pte(user_pdptp+2, 0);
1446 pmap_store_pte(user_pdptp+3, 0);
1447
1448 p->pm_kernel_cr3 = FALSE;
1449
1450 pmap_load_kernel_cr3();
1451
1452 splx(spl);
1453}
1454
1c79356b
A
1455/*
1456 * Retire the given physical map from service.
1457 * Should only be called if the map contains
1458 * no valid mappings.
1459 */
1460
1461void
1462pmap_destroy(
1463 register pmap_t p)
1464{
1c79356b
A
1465 register int c;
1466 spl_t s;
89b3af67
A
1467#if 0
1468 register pt_entry_t *pdep;
1c79356b 1469 register vm_page_t m;
91447636 1470#endif
1c79356b
A
1471
1472 if (p == PMAP_NULL)
1473 return;
1c79356b
A
1474 SPLVM(s);
1475 simple_lock(&p->lock);
1476 c = --p->ref_count;
1477 if (c == 0) {
1c79356b
A
1478 /*
1479 * If some cpu is not using the physical pmap pointer that it
1480 * is supposed to be (see set_dirbase), we might be using the
1481 * pmap that is being destroyed! Make sure we are
1482 * physically on the right pmap:
1483 */
55e303ae
A
1484 PMAP_UPDATE_TLBS(p,
1485 VM_MIN_ADDRESS,
1486 VM_MAX_KERNEL_ADDRESS);
1c79356b 1487
1c79356b
A
1488 }
1489 simple_unlock(&p->lock);
1490 SPLX(s);
1491
1492 if (c != 0) {
1493 return; /* still in use */
1494 }
1495
1496 /*
1497 * Free the memory maps, then the
1498 * pmap structure.
1499 */
91447636 1500
89b3af67
A
1501 if (!cpu_64bit) {
1502#if 0
91447636
A
1503 pdep = (pt_entry_t *)p->dirbase;
1504
1505 while (pdep < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]) {
89b3af67
A
1506 int ind;
1507
1c79356b 1508 if (*pdep & INTEL_PTE_VALID) {
89b3af67
A
1509 ind = pdep - (pt_entry_t *)&p->dirbase[0];
1510
91447636
A
1511 vm_object_lock(p->pm_obj);
1512 m = vm_page_lookup(p->pm_obj, (vm_object_offset_t)ind);
1513 if (m == VM_PAGE_NULL) {
1c79356b 1514 panic("pmap_destroy: pte page not in object");
91447636 1515 }
1c79356b
A
1516 vm_page_lock_queues();
1517 vm_page_free(m);
1518 inuse_ptepages_count--;
89b3af67 1519
91447636 1520 vm_object_unlock(p->pm_obj);
1c79356b
A
1521 vm_page_unlock_queues();
1522
1523 /*
1524 * Clear pdes, this might be headed for the cache.
1525 */
89b3af67
A
1526 pmap_store_pte(pdep, 0);
1527 pdep++;
1c79356b
A
1528 }
1529 else {
89b3af67
A
1530 pmap_store_pte(pdep, 0);
1531 pdep++;
1c79356b
A
1532 }
1533
1534 }
89b3af67
A
1535#else
1536 inuse_ptepages_count -= p->pm_obj->resident_page_count;
8f6c56a5 1537#endif
89b3af67
A
1538 vm_object_deallocate(p->pm_obj);
1539 kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD);
1540 zfree(pdpt_zone, (void *)p->pm_hold);
1541 } else {
1542
1543 /* 64 bit */
1544
1545 pmap_unmap_sharedpage(p);
1546
1547 /* free 64 bit mode structs */
1548 inuse_ptepages_count--;
1549 kmem_free(kernel_map, (vm_offset_t)p->pm_hold, PAGE_SIZE);
1550
1551 inuse_ptepages_count -= p->pm_obj_pml4->resident_page_count;
1552 vm_object_deallocate(p->pm_obj_pml4);
1553
1554 inuse_ptepages_count -= p->pm_obj_pdpt->resident_page_count;
1555 vm_object_deallocate(p->pm_obj_pdpt);
1556
1557 inuse_ptepages_count -= p->pm_obj->resident_page_count;
1558 vm_object_deallocate(p->pm_obj);
1559
1560 }
1561
91447636 1562 zfree(pmap_zone, p);
1c79356b
A
1563}
1564
1565/*
1566 * Add a reference to the specified pmap.
1567 */
1568
1569void
1570pmap_reference(
1571 register pmap_t p)
1572{
1573 spl_t s;
1574
1575 if (p != PMAP_NULL) {
1576 SPLVM(s);
1577 simple_lock(&p->lock);
1578 p->ref_count++;
1579 simple_unlock(&p->lock);
1580 SPLX(s);
1581 }
1582}
1583
1584/*
1585 * Remove a range of hardware page-table entries.
1586 * The entries given are the first (inclusive)
1587 * and last (exclusive) entries for the VM pages.
1588 * The virtual address is the va for the first pte.
1589 *
1590 * The pmap must be locked.
1591 * If the pmap is not the kernel pmap, the range must lie
1592 * entirely within one pte-page. This is NOT checked.
1593 * Assumes that the pte-page exists.
1594 */
1595
89b3af67 1596static void
1c79356b
A
1597pmap_remove_range(
1598 pmap_t pmap,
89b3af67 1599 vm_map_offset_t vaddr,
1c79356b
A
1600 pt_entry_t *spte,
1601 pt_entry_t *epte)
1602{
1603 register pt_entry_t *cpte;
1604 int num_removed, num_unwired;
1605 int pai;
91447636 1606 pmap_paddr_t pa;
1c79356b 1607
1c79356b
A
1608 num_removed = 0;
1609 num_unwired = 0;
1610
1611 for (cpte = spte; cpte < epte;
89b3af67 1612 cpte++, vaddr += PAGE_SIZE) {
1c79356b
A
1613
1614 pa = pte_to_pa(*cpte);
1615 if (pa == 0)
1616 continue;
1617
1c79356b
A
1618 if (iswired(*cpte))
1619 num_unwired++;
1620
91447636 1621 if (!valid_page(i386_btop(pa))) {
1c79356b
A
1622
1623 /*
1624 * Outside range of managed physical memory.
1625 * Just remove the mappings.
1626 */
1c79356b 1627 register pt_entry_t *lpte = cpte;
91447636 1628
89b3af67 1629 pmap_store_pte(lpte, 0);
1c79356b
A
1630 continue;
1631 }
89b3af67 1632 num_removed++;
1c79356b
A
1633
1634 pai = pa_index(pa);
1635 LOCK_PVH(pai);
1636
1637 /*
1638 * Get the modify and reference bits.
1639 */
1640 {
1c79356b
A
1641 register pt_entry_t *lpte;
1642
1c79356b 1643 lpte = cpte;
89b3af67 1644 pmap_phys_attributes[pai] |=
1c79356b 1645 *lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
89b3af67 1646 pmap_store_pte(lpte, 0);
91447636 1647
1c79356b
A
1648 }
1649
1650 /*
1651 * Remove the mapping from the pvlist for
1652 * this physical page.
1653 */
1654 {
1655 register pv_entry_t pv_h, prev, cur;
1656
1657 pv_h = pai_to_pvh(pai);
1658 if (pv_h->pmap == PMAP_NULL) {
1659 panic("pmap_remove: null pv_list!");
1660 }
89b3af67 1661 if (pv_h->va == vaddr && pv_h->pmap == pmap) {
1c79356b
A
1662 /*
1663 * Header is the pv_entry. Copy the next one
1664 * to header and free the next one (we cannot
1665 * free the header)
1666 */
1667 cur = pv_h->next;
1668 if (cur != PV_ENTRY_NULL) {
1669 *pv_h = *cur;
1670 PV_FREE(cur);
1671 }
1672 else {
1673 pv_h->pmap = PMAP_NULL;
1674 }
1675 }
1676 else {
1677 cur = pv_h;
1678 do {
1679 prev = cur;
1680 if ((cur = prev->next) == PV_ENTRY_NULL) {
55e303ae 1681 panic("pmap-remove: mapping not in pv_list!");
1c79356b 1682 }
89b3af67 1683 } while (cur->va != vaddr || cur->pmap != pmap);
1c79356b
A
1684 prev->next = cur->next;
1685 PV_FREE(cur);
1686 }
1687 UNLOCK_PVH(pai);
1688 }
1689 }
1690
1691 /*
1692 * Update the counts
1693 */
1694 assert(pmap->stats.resident_count >= num_removed);
1695 pmap->stats.resident_count -= num_removed;
1696 assert(pmap->stats.wired_count >= num_unwired);
1697 pmap->stats.wired_count -= num_unwired;
1698}
1699
0b4e3aa0
A
1700/*
1701 * Remove phys addr if mapped in specified map
1702 *
1703 */
1704void
1705pmap_remove_some_phys(
91447636
A
1706 __unused pmap_t map,
1707 __unused ppnum_t pn)
0b4e3aa0
A
1708{
1709
1710/* Implement to support working set code */
1711
1712}
1713
1c79356b
A
1714/*
1715 * Remove the given range of addresses
1716 * from the specified map.
1717 *
1718 * It is assumed that the start and end are properly
1719 * rounded to the hardware page size.
1720 */
1721
55e303ae 1722
1c79356b
A
1723void
1724pmap_remove(
1725 pmap_t map,
55e303ae
A
1726 addr64_t s64,
1727 addr64_t e64)
1c79356b
A
1728{
1729 spl_t spl;
1730 register pt_entry_t *pde;
1731 register pt_entry_t *spte, *epte;
89b3af67
A
1732 addr64_t l64;
1733 addr64_t orig_s64;
1c79356b 1734
89b3af67 1735 if (map == PMAP_NULL || s64 == e64)
1c79356b
A
1736 return;
1737
1738 PMAP_READ_LOCK(map, spl);
1739
89b3af67
A
1740 orig_s64 = s64;
1741
1742 while (s64 < e64) {
1743 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size-1);
1744 if (l64 > e64)
1745 l64 = e64;
1746 pde = pmap_pde(map, s64);
1747 if (pde && (*pde & INTEL_PTE_VALID)) {
1748 spte = (pt_entry_t *)pmap_pte(map, (s64 & ~(pde_mapped_size-1)));
1749 spte = &spte[ptenum(s64)];
1750 epte = &spte[intel_btop(l64-s64)];
1751 pmap_remove_range(map, s64, spte, epte);
1c79356b 1752 }
89b3af67 1753 s64 = l64;
1c79356b
A
1754 pde++;
1755 }
89b3af67 1756 PMAP_UPDATE_TLBS(map, orig_s64, e64);
91447636 1757
1c79356b
A
1758 PMAP_READ_UNLOCK(map, spl);
1759}
1760
1761/*
1762 * Routine: pmap_page_protect
1763 *
1764 * Function:
1765 * Lower the permission for all mappings to a given
1766 * page.
1767 */
1768void
1769pmap_page_protect(
55e303ae 1770 ppnum_t pn,
1c79356b
A
1771 vm_prot_t prot)
1772{
1773 pv_entry_t pv_h, prev;
1774 register pv_entry_t pv_e;
1775 register pt_entry_t *pte;
1776 int pai;
1777 register pmap_t pmap;
1778 spl_t spl;
1779 boolean_t remove;
89b3af67 1780 pmap_paddr_t phys;
1c79356b 1781
55e303ae 1782 assert(pn != vm_page_fictitious_addr);
89b3af67 1783
91447636 1784 if (!valid_page(pn)) {
1c79356b
A
1785 /*
1786 * Not a managed page.
1787 */
1788 return;
1789 }
1790
1791 /*
1792 * Determine the new protection.
1793 */
1794 switch (prot) {
1795 case VM_PROT_READ:
1796 case VM_PROT_READ|VM_PROT_EXECUTE:
1797 remove = FALSE;
1798 break;
1799 case VM_PROT_ALL:
1800 return; /* nothing to do */
1801 default:
1802 remove = TRUE;
1803 break;
1804 }
89b3af67
A
1805 phys = (pmap_paddr_t)i386_ptob(pn);
1806 pai = pa_index(phys);
1807 pv_h = pai_to_pvh(pai);
1808
1c79356b
A
1809
1810 /*
1811 * Lock the pmap system first, since we will be changing
1812 * several pmaps.
1813 */
1c79356b
A
1814 PMAP_WRITE_LOCK(spl);
1815
1c79356b
A
1816 /*
1817 * Walk down PV list, changing or removing all mappings.
1818 * We do not have to lock the pv_list because we have
1819 * the entire pmap system locked.
1820 */
1821 if (pv_h->pmap != PMAP_NULL) {
1822
89b3af67 1823 prev = pv_e = pv_h;
c0fea474 1824
89b3af67
A
1825 do {
1826 register vm_map_offset_t vaddr;
5d5c5d0d 1827
89b3af67
A
1828 pmap = pv_e->pmap;
1829 /*
1830 * Lock the pmap to block pmap_extract and similar routines.
1831 */
1832 simple_lock(&pmap->lock);
5d5c5d0d 1833
89b3af67
A
1834 vaddr = pv_e->va;
1835 pte = pmap_pte(pmap, vaddr);
1836 if(0 == pte) {
1837 kprintf("pmap_page_protect pmap 0x%x pn 0x%x vaddr 0x%llx\n",pmap, pn, vaddr);
1838 panic("pmap_page_protect");
1839 }
8f6c56a5 1840 /*
89b3af67 1841 * Consistency checks.
8f6c56a5 1842 */
89b3af67
A
1843 /* assert(*pte & INTEL_PTE_VALID); XXX */
1844 /* assert(pte_to_phys(*pte) == phys); */
1845
1846
8f6c56a5 1847 /*
89b3af67
A
1848 * Remove the mapping if new protection is NONE
1849 * or if write-protecting a kernel mapping.
8f6c56a5 1850 */
89b3af67
A
1851 if (remove || pmap == kernel_pmap) {
1852 /*
1853 * Remove the mapping, collecting any modify bits.
1854 */
1855 pmap_store_pte(pte, *pte & ~INTEL_PTE_VALID);
5d5c5d0d 1856
89b3af67 1857 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
5d5c5d0d 1858
89b3af67 1859 pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
5d5c5d0d 1860
89b3af67 1861 pmap_store_pte(pte, 0);
5d5c5d0d 1862
89b3af67
A
1863
1864 //XXX breaks DEBUG build assert(pmap->stats.resident_count >= 1);
1865 pmap->stats.resident_count--;
1866
1867 /*
1868 * Remove the pv_entry.
1869 */
1870 if (pv_e == pv_h) {
1871 /*
1872 * Fix up head later.
1873 */
1874 pv_h->pmap = PMAP_NULL;
1875 }
1876 else {
1877 /*
1878 * Delete this entry.
1879 */
1880 prev->next = pv_e->next;
1881 PV_FREE(pv_e);
1882 }
1883 } else {
1884 /*
1885 * Write-protect.
1886 */
1887 pmap_store_pte(pte, *pte & ~INTEL_PTE_WRITE);
1888
1889 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1890 /*
1891 * Advance prev.
1892 */
1893 prev = pv_e;
1894 }
1895
1896 simple_unlock(&pmap->lock);
1897
1898 } while ((pv_e = prev->next) != PV_ENTRY_NULL);
1899
1900 /*
1901 * If pv_head mapping was removed, fix it up.
1902 */
1903 if (pv_h->pmap == PMAP_NULL) {
1904 pv_e = pv_h->next;
1905
1906 if (pv_e != PV_ENTRY_NULL) {
1907 *pv_h = *pv_e;
1908 PV_FREE(pv_e);
1909 }
1c79356b 1910 }
1c79356b 1911 }
1c79356b
A
1912 PMAP_WRITE_UNLOCK(spl);
1913}
1914
91447636
A
1915/*
1916 * Routine:
1917 * pmap_disconnect
1918 *
1919 * Function:
1920 * Disconnect all mappings for this page and return reference and change status
1921 * in generic format.
1922 *
1923 */
1924unsigned int pmap_disconnect(
1925 ppnum_t pa)
1926{
1927 pmap_page_protect(pa, 0); /* disconnect the page */
1928 return (pmap_get_refmod(pa)); /* return ref/chg status */
1929}
1930
1c79356b
A
1931/*
1932 * Set the physical protection on the
1933 * specified range of this map as requested.
1934 * Will not increase permissions.
1935 */
1936void
1937pmap_protect(
1938 pmap_t map,
89b3af67
A
1939 vm_map_offset_t sva,
1940 vm_map_offset_t eva,
1c79356b
A
1941 vm_prot_t prot)
1942{
1943 register pt_entry_t *pde;
1944 register pt_entry_t *spte, *epte;
89b3af67
A
1945 vm_map_offset_t lva;
1946 vm_map_offset_t orig_sva;
1c79356b 1947 spl_t spl;
89b3af67 1948 boolean_t set_NX;
1c79356b
A
1949
1950 if (map == PMAP_NULL)
1951 return;
1952
89b3af67
A
1953 if (prot == VM_PROT_NONE) {
1954 pmap_remove(map, sva, eva);
1c79356b
A
1955 return;
1956 }
1957
89b3af67
A
1958 if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !map->nx_enabled )
1959 set_NX = FALSE;
1960 else
1961 set_NX = TRUE;
1962
1c79356b
A
1963 SPLVM(spl);
1964 simple_lock(&map->lock);
1965
89b3af67
A
1966 orig_sva = sva;
1967 while (sva < eva) {
1968 lva = (sva + pde_mapped_size) & ~(pde_mapped_size-1);
1969 if (lva > eva)
1970 lva = eva;
1971 pde = pmap_pde(map, sva);
1972 if (pde && (*pde & INTEL_PTE_VALID)) {
1973 spte = (pt_entry_t *)pmap_pte(map, (sva & ~(pde_mapped_size-1)));
1974 spte = &spte[ptenum(sva)];
1975 epte = &spte[intel_btop(lva-sva)];
1c79356b
A
1976
1977 while (spte < epte) {
89b3af67
A
1978 if (*spte & INTEL_PTE_VALID) {
1979
1980 if (prot & VM_PROT_WRITE)
1981 pmap_store_pte(spte, *spte | INTEL_PTE_WRITE);
1982 else
1983 pmap_store_pte(spte, *spte & ~INTEL_PTE_WRITE);
1984
1985 if (set_NX == TRUE)
1986 pmap_store_pte(spte, *spte | INTEL_PTE_NX);
1987 else
1988 pmap_store_pte(spte, *spte & ~INTEL_PTE_NX);
1989
1990 }
1c79356b
A
1991 spte++;
1992 }
1993 }
89b3af67 1994 sva = lva;
1c79356b
A
1995 pde++;
1996 }
89b3af67 1997 PMAP_UPDATE_TLBS(map, orig_sva, eva);
91447636 1998
1c79356b
A
1999 simple_unlock(&map->lock);
2000 SPLX(spl);
2001}
2002
89b3af67
A
2003/* Map a (possibly) autogenned block */
2004void
2005pmap_map_block(
2006 pmap_t pmap,
2007 addr64_t va,
2008 ppnum_t pa,
2009 uint32_t size,
2010 vm_prot_t prot,
2011 int attr,
2012 __unused unsigned int flags)
2013{
2014 uint32_t page;
2015
2016 for (page = 0; page < size; page++) {
2017 pmap_enter(pmap, va, pa, prot, attr, TRUE);
2018 va += PAGE_SIZE;
2019 pa++;
2020 }
2021}
1c79356b
A
2022
2023
2024/*
2025 * Insert the given physical page (p) at
2026 * the specified virtual address (v) in the
2027 * target physical map with the protection requested.
2028 *
2029 * If specified, the page will be wired down, meaning
2030 * that the related pte cannot be reclaimed.
2031 *
2032 * NB: This is the only routine which MAY NOT lazy-evaluate
2033 * or lose information. That is, this routine must actually
2034 * insert this page into the given map NOW.
2035 */
2036void
2037pmap_enter(
2038 register pmap_t pmap,
89b3af67 2039 vm_map_offset_t vaddr,
55e303ae 2040 ppnum_t pn,
1c79356b 2041 vm_prot_t prot,
9bccf70c 2042 unsigned int flags,
1c79356b
A
2043 boolean_t wired)
2044{
2045 register pt_entry_t *pte;
2046 register pv_entry_t pv_h;
91447636 2047 register int pai;
1c79356b
A
2048 pv_entry_t pv_e;
2049 pt_entry_t template;
2050 spl_t spl;
91447636 2051 pmap_paddr_t old_pa;
89b3af67
A
2052 pmap_paddr_t pa = (pmap_paddr_t)i386_ptob(pn);
2053 boolean_t need_tlbflush = FALSE;
2054 boolean_t set_NX;
1c79356b 2055
89b3af67 2056 XPR(0x80000000, "%x/%x: pmap_enter %x/%qx/%x\n",
91447636 2057 current_thread(),
1c79356b 2058 current_thread(),
89b3af67 2059 pmap, vaddr, pn);
1c79356b 2060
55e303ae 2061 assert(pn != vm_page_fictitious_addr);
1c79356b 2062 if (pmap_debug)
89b3af67 2063 printf("pmap(%qx, %x)\n", vaddr, pn);
1c79356b
A
2064 if (pmap == PMAP_NULL)
2065 return;
2066
89b3af67
A
2067 if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled )
2068 set_NX = FALSE;
2069 else
2070 set_NX = TRUE;
2071
1c79356b
A
2072 /*
2073 * Must allocate a new pvlist entry while we're unlocked;
2074 * zalloc may cause pageout (which will lock the pmap system).
2075 * If we determine we need a pvlist entry, we will unlock
2076 * and allocate one. Then we will retry, throughing away
2077 * the allocated entry later (if we no longer need it).
2078 */
2079 pv_e = PV_ENTRY_NULL;
91447636 2080
1c79356b
A
2081 PMAP_READ_LOCK(pmap, spl);
2082
2083 /*
2084 * Expand pmap to include this pte. Assume that
2085 * pmap is always expanded to include enough hardware
2086 * pages to map one VM page.
2087 */
2088
89b3af67 2089 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
1c79356b
A
2090 /*
2091 * Must unlock to expand the pmap.
2092 */
2093 PMAP_READ_UNLOCK(pmap, spl);
2094
89b3af67 2095 pmap_expand(pmap, vaddr); /* going to grow pde level page(s) */
1c79356b
A
2096
2097 PMAP_READ_LOCK(pmap, spl);
2098 }
2099 /*
2100 * Special case if the physical page is already mapped
2101 * at this address.
2102 */
2103 old_pa = pte_to_pa(*pte);
2104 if (old_pa == pa) {
2105 /*
2106 * May be changing its wired attribute or protection
2107 */
55e303ae 2108
1c79356b 2109 template = pa_to_pte(pa) | INTEL_PTE_VALID;
55e303ae 2110
89b3af67 2111 if(VM_MEM_NOT_CACHEABLE == (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
55e303ae
A
2112 if(!(flags & VM_MEM_GUARDED))
2113 template |= INTEL_PTE_PTA;
2114 template |= INTEL_PTE_NCACHE;
2115 }
2116
1c79356b
A
2117 if (pmap != kernel_pmap)
2118 template |= INTEL_PTE_USER;
2119 if (prot & VM_PROT_WRITE)
2120 template |= INTEL_PTE_WRITE;
89b3af67
A
2121
2122 if (set_NX == TRUE)
2123 template |= INTEL_PTE_NX;
2124
1c79356b
A
2125 if (wired) {
2126 template |= INTEL_PTE_WIRED;
2127 if (!iswired(*pte))
2128 pmap->stats.wired_count++;
2129 }
2130 else {
2131 if (iswired(*pte)) {
2132 assert(pmap->stats.wired_count >= 1);
2133 pmap->stats.wired_count--;
2134 }
2135 }
2136
1c79356b
A
2137 if (*pte & INTEL_PTE_MOD)
2138 template |= INTEL_PTE_MOD;
1c79356b 2139
89b3af67
A
2140 pmap_store_pte(pte, template);
2141 pte++;
2142
2143 need_tlbflush = TRUE;
1c79356b
A
2144 goto Done;
2145 }
2146
2147 /*
2148 * Outline of code from here:
2149 * 1) If va was mapped, update TLBs, remove the mapping
2150 * and remove old pvlist entry.
2151 * 2) Add pvlist entry for new mapping
2152 * 3) Enter new mapping.
2153 *
89b3af67 2154 * SHARING FAULTS IS HORRIBLY BROKEN
1c79356b
A
2155 * SHARING_FAULTS complicates this slightly in that it cannot
2156 * replace the mapping, but must remove it (because adding the
2157 * pvlist entry for the new mapping may remove others), and
2158 * hence always enters the new mapping at step 3)
2159 *
2160 * If the old physical page is not managed step 1) is skipped
2161 * (except for updating the TLBs), and the mapping is
2162 * overwritten at step 3). If the new physical page is not
2163 * managed, step 2) is skipped.
2164 */
2165
91447636 2166 if (old_pa != (pmap_paddr_t) 0) {
1c79356b 2167
1c79356b
A
2168 /*
2169 * Don't do anything to pages outside valid memory here.
2170 * Instead convince the code that enters a new mapping
2171 * to overwrite the old one.
2172 */
2173
91447636 2174 if (valid_page(i386_btop(old_pa))) {
1c79356b
A
2175
2176 pai = pa_index(old_pa);
2177 LOCK_PVH(pai);
2178
2179 assert(pmap->stats.resident_count >= 1);
2180 pmap->stats.resident_count--;
2181 if (iswired(*pte)) {
2182 assert(pmap->stats.wired_count >= 1);
2183 pmap->stats.wired_count--;
2184 }
91447636 2185
1c79356b
A
2186 pmap_phys_attributes[pai] |=
2187 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1c79356b 2188
89b3af67 2189 pmap_store_pte(pte, 0);
1c79356b
A
2190 /*
2191 * Remove the mapping from the pvlist for
2192 * this physical page.
2193 */
2194 {
2195 register pv_entry_t prev, cur;
2196
2197 pv_h = pai_to_pvh(pai);
2198 if (pv_h->pmap == PMAP_NULL) {
2199 panic("pmap_enter: null pv_list!");
2200 }
89b3af67
A
2201
2202 if (pv_h->va == vaddr && pv_h->pmap == pmap) {
1c79356b
A
2203 /*
2204 * Header is the pv_entry. Copy the next one
2205 * to header and free the next one (we cannot
2206 * free the header)
2207 */
2208 cur = pv_h->next;
2209 if (cur != PV_ENTRY_NULL) {
2210 *pv_h = *cur;
2211 pv_e = cur;
2212 }
2213 else {
2214 pv_h->pmap = PMAP_NULL;
2215 }
2216 }
2217 else {
2218 cur = pv_h;
2219 do {
2220 prev = cur;
2221 if ((cur = prev->next) == PV_ENTRY_NULL) {
2222 panic("pmap_enter: mapping not in pv_list!");
2223 }
89b3af67 2224 } while (cur->va != vaddr || cur->pmap != pmap);
1c79356b
A
2225 prev->next = cur->next;
2226 pv_e = cur;
2227 }
2228 }
2229 UNLOCK_PVH(pai);
2230 }
2231 else {
2232
2233 /*
2234 * old_pa is not managed. Pretend it's zero so code
2235 * at Step 3) will enter new mapping (overwriting old
2236 * one). Do removal part of accounting.
2237 */
91447636 2238 old_pa = (pmap_paddr_t) 0;
89b3af67 2239
1c79356b
A
2240 if (iswired(*pte)) {
2241 assert(pmap->stats.wired_count >= 1);
2242 pmap->stats.wired_count--;
2243 }
2244 }
89b3af67 2245 need_tlbflush = TRUE;
91447636 2246
1c79356b
A
2247 }
2248
91447636 2249 if (valid_page(i386_btop(pa))) {
1c79356b
A
2250
2251 /*
2252 * Step 2) Enter the mapping in the PV list for this
2253 * physical page.
2254 */
2255
2256 pai = pa_index(pa);
2257
2258
89b3af67 2259#if SHARING_FAULTS /* this is horribly broken , do not enable */
1c79356b
A
2260RetryPvList:
2261 /*
2262 * We can return here from the sharing fault code below
2263 * in case we removed the only entry on the pv list and thus
2264 * must enter the new one in the list header.
2265 */
2266#endif /* SHARING_FAULTS */
2267 LOCK_PVH(pai);
2268 pv_h = pai_to_pvh(pai);
2269
2270 if (pv_h->pmap == PMAP_NULL) {
2271 /*
2272 * No mappings yet
2273 */
89b3af67 2274 pv_h->va = vaddr;
1c79356b
A
2275 pv_h->pmap = pmap;
2276 pv_h->next = PV_ENTRY_NULL;
2277 }
2278 else {
2279#if DEBUG
2280 {
2281 /*
2282 * check that this mapping is not already there
2283 * or there is no alias for this mapping in the same map
2284 */
2285 pv_entry_t e = pv_h;
2286 while (e != PV_ENTRY_NULL) {
89b3af67 2287 if (e->pmap == pmap && e->va == vaddr)
1c79356b
A
2288 panic("pmap_enter: already in pv_list");
2289 e = e->next;
2290 }
2291 }
2292#endif /* DEBUG */
89b3af67 2293#if SHARING_FAULTS /* broken, do not enable */
1c79356b
A
2294 {
2295 /*
2296 * do sharing faults.
2297 * if we find an entry on this pv list in the same address
2298 * space, remove it. we know there will not be more
2299 * than one.
2300 */
2301 pv_entry_t e = pv_h;
2302 pt_entry_t *opte;
2303
2304 while (e != PV_ENTRY_NULL) {
2305 if (e->pmap == pmap) {
2306 /*
2307 * Remove it, drop pv list lock first.
2308 */
2309 UNLOCK_PVH(pai);
2310
2311 opte = pmap_pte(pmap, e->va);
2312 assert(opte != PT_ENTRY_NULL);
2313 /*
2314 * Invalidate the translation buffer,
2315 * then remove the mapping.
2316 */
1c79356b 2317 pmap_remove_range(pmap, e->va, opte,
91447636 2318 opte + 1);
89b3af67 2319
91447636
A
2320 PMAP_UPDATE_TLBS(pmap, e->va, e->va + PAGE_SIZE);
2321
1c79356b
A
2322 /*
2323 * We could have remove the head entry,
2324 * so there could be no more entries
2325 * and so we have to use the pv head entry.
2326 * so, go back to the top and try the entry
2327 * again.
2328 */
2329 goto RetryPvList;
2330 }
2331 e = e->next;
2332 }
2333
2334 /*
2335 * check that this mapping is not already there
2336 */
2337 e = pv_h;
2338 while (e != PV_ENTRY_NULL) {
2339 if (e->pmap == pmap)
2340 panic("pmap_enter: alias in pv_list");
2341 e = e->next;
2342 }
2343 }
2344#endif /* SHARING_FAULTS */
2345#if DEBUG_ALIAS
2346 {
2347 /*
2348 * check for aliases within the same address space.
2349 */
2350 pv_entry_t e = pv_h;
2351 vm_offset_t rpc = get_rpc();
2352
2353 while (e != PV_ENTRY_NULL) {
2354 if (e->pmap == pmap) {
2355 /*
2356 * log this entry in the alias ring buffer
2357 * if it's not there already.
2358 */
2359 struct pmap_alias *pma;
2360 int ii, logit;
2361
2362 logit = TRUE;
2363 for (ii = 0; ii < pmap_alias_index; ii++) {
2364 if (pmap_aliasbuf[ii].rpc == rpc) {
2365 /* found it in the log already */
2366 logit = FALSE;
2367 break;
2368 }
2369 }
2370 if (logit) {
2371 pma = &pmap_aliasbuf[pmap_alias_index];
2372 pma->pmap = pmap;
89b3af67 2373 pma->va = vaddr;
1c79356b
A
2374 pma->rpc = rpc;
2375 pma->cookie = PMAP_ALIAS_COOKIE;
2376 if (++pmap_alias_index >= PMAP_ALIAS_MAX)
2377 panic("pmap_enter: exhausted alias log");
2378 }
2379 }
2380 e = e->next;
2381 }
2382 }
2383#endif /* DEBUG_ALIAS */
2384 /*
2385 * Add new pv_entry after header.
2386 */
2387 if (pv_e == PV_ENTRY_NULL) {
2388 PV_ALLOC(pv_e);
2389 if (pv_e == PV_ENTRY_NULL) {
91447636 2390 panic("pmap no pv_e's");
1c79356b
A
2391 }
2392 }
89b3af67 2393 pv_e->va = vaddr;
1c79356b
A
2394 pv_e->pmap = pmap;
2395 pv_e->next = pv_h->next;
2396 pv_h->next = pv_e;
2397 /*
2398 * Remember that we used the pvlist entry.
2399 */
2400 pv_e = PV_ENTRY_NULL;
2401 }
2402 UNLOCK_PVH(pai);
89b3af67
A
2403
2404 /*
2405 * only count the mapping
2406 * for 'managed memory'
2407 */
2408 pmap->stats.resident_count++;
1c79356b
A
2409 }
2410
2411 /*
89b3af67 2412 * Step 3) Enter the mapping.
1c79356b
A
2413 */
2414
1c79356b
A
2415
2416 /*
2417 * Build a template to speed up entering -
2418 * only the pfn changes.
2419 */
2420 template = pa_to_pte(pa) | INTEL_PTE_VALID;
55e303ae
A
2421
2422 if(flags & VM_MEM_NOT_CACHEABLE) {
2423 if(!(flags & VM_MEM_GUARDED))
2424 template |= INTEL_PTE_PTA;
2425 template |= INTEL_PTE_NCACHE;
2426 }
2427
1c79356b
A
2428 if (pmap != kernel_pmap)
2429 template |= INTEL_PTE_USER;
2430 if (prot & VM_PROT_WRITE)
2431 template |= INTEL_PTE_WRITE;
89b3af67
A
2432
2433 if (set_NX == TRUE)
2434 template |= INTEL_PTE_NX;
2435
1c79356b
A
2436 if (wired) {
2437 template |= INTEL_PTE_WIRED;
2438 pmap->stats.wired_count++;
2439 }
89b3af67 2440 pmap_store_pte(pte, template);
91447636 2441
1c79356b 2442Done:
89b3af67
A
2443 if (need_tlbflush == TRUE)
2444 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
91447636 2445
1c79356b 2446 if (pv_e != PV_ENTRY_NULL) {
89b3af67 2447 PV_FREE(pv_e);
1c79356b
A
2448 }
2449
2450 PMAP_READ_UNLOCK(pmap, spl);
2451}
2452
2453/*
2454 * Routine: pmap_change_wiring
2455 * Function: Change the wiring attribute for a map/virtual-address
2456 * pair.
2457 * In/out conditions:
2458 * The mapping must already exist in the pmap.
2459 */
2460void
2461pmap_change_wiring(
2462 register pmap_t map,
89b3af67 2463 vm_map_offset_t vaddr,
1c79356b
A
2464 boolean_t wired)
2465{
2466 register pt_entry_t *pte;
1c79356b
A
2467 spl_t spl;
2468
55e303ae 2469#if 1
1c79356b
A
2470 /*
2471 * We must grab the pmap system lock because we may
2472 * change a pte_page queue.
2473 */
2474 PMAP_READ_LOCK(map, spl);
2475
89b3af67 2476 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
1c79356b
A
2477 panic("pmap_change_wiring: pte missing");
2478
2479 if (wired && !iswired(*pte)) {
2480 /*
2481 * wiring down mapping
2482 */
2483 map->stats.wired_count++;
89b3af67
A
2484 pmap_store_pte(pte, *pte | INTEL_PTE_WIRED);
2485 pte++;
1c79356b
A
2486 }
2487 else if (!wired && iswired(*pte)) {
2488 /*
2489 * unwiring mapping
2490 */
2491 assert(map->stats.wired_count >= 1);
2492 map->stats.wired_count--;
89b3af67
A
2493 pmap_store_pte(pte, *pte & ~INTEL_PTE_WIRED);
2494 pte++;
1c79356b
A
2495 }
2496
2497 PMAP_READ_UNLOCK(map, spl);
9bccf70c
A
2498
2499#else
2500 return;
2501#endif
2502
1c79356b
A
2503}
2504
91447636 2505ppnum_t
55e303ae
A
2506pmap_find_phys(pmap_t pmap, addr64_t va)
2507{
91447636 2508 pt_entry_t *ptp;
91447636
A
2509 ppnum_t ppn;
2510
89b3af67
A
2511 mp_disable_preemption();
2512
2513 ptp = pmap_pte(pmap, va);
91447636
A
2514 if (PT_ENTRY_NULL == ptp) {
2515 ppn = 0;
2516 } else {
2517 ppn = (ppnum_t) i386_btop(pte_to_pa(*ptp));
2518 }
89b3af67
A
2519 mp_enable_preemption();
2520
91447636 2521 return ppn;
55e303ae
A
2522}
2523
1c79356b
A
2524/*
2525 * Routine: pmap_extract
2526 * Function:
2527 * Extract the physical page address associated
2528 * with the given map/virtual_address pair.
91447636
A
2529 * Change to shim for backwards compatibility but will not
2530 * work for 64 bit systems. Some old drivers that we cannot
2531 * change need this.
1c79356b
A
2532 */
2533
2534vm_offset_t
2535pmap_extract(
2536 register pmap_t pmap,
89b3af67 2537 vm_map_offset_t vaddr)
1c79356b 2538{
89b3af67
A
2539 ppnum_t ppn;
2540 vm_offset_t paddr;
91447636 2541
89b3af67
A
2542 paddr = (vm_offset_t)0;
2543 ppn = pmap_find_phys(pmap, vaddr);
2544 if (ppn) {
2545 paddr = ((vm_offset_t)i386_ptob(ppn)) | (vaddr & INTEL_OFFMASK);
2546 }
2547 return (paddr);
1c79356b
A
2548}
2549
1c79356b 2550void
89b3af67
A
2551pmap_expand_pml4(
2552 pmap_t map,
2553 vm_map_offset_t vaddr)
1c79356b 2554{
1c79356b 2555 register vm_page_t m;
91447636 2556 register pmap_paddr_t pa;
89b3af67 2557 uint64_t i;
1c79356b 2558 spl_t spl;
55e303ae 2559 ppnum_t pn;
89b3af67 2560 pml4_entry_t *pml4p;
c0fea474 2561
89b3af67
A
2562 if (kernel_pmap == map) panic("expand kernel pml4");
2563
2564 spl = splhigh();
2565 pml4p = pmap64_pml4(map, vaddr);
2566 splx(spl);
2567 if (PML4_ENTRY_NULL == pml4p) panic("pmap_expand_pml4 no pml4p");
1c79356b
A
2568
2569 /*
89b3af67 2570 * Allocate a VM page for the pml4 page
1c79356b
A
2571 */
2572 while ((m = vm_page_grab()) == VM_PAGE_NULL)
2573 VM_PAGE_WAIT();
2574
2575 /*
91447636 2576 * put the page into the pmap's obj list so it
1c79356b
A
2577 * can be found later.
2578 */
55e303ae
A
2579 pn = m->phys_page;
2580 pa = i386_ptob(pn);
89b3af67
A
2581 i = pml4idx(map, vaddr);
2582
2583 vm_object_lock(map->pm_obj_pml4);
2584#if 0 /* DEBUG */
2585 if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i)) {
2586 kprintf("pmap_expand_pml4: obj_pml4 not empty, pmap 0x%x pm_obj_pml4 0x%x vaddr 0x%llx i 0x%llx\n",
2587 map, map->pm_obj_pml4, vaddr, i);
2588 }
2589#endif
2590 vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i);
2591
1c79356b
A
2592 vm_page_lock_queues();
2593 vm_page_wire(m);
89b3af67 2594
8f6c56a5 2595 vm_page_unlock_queues();
89b3af67
A
2596 vm_object_unlock(map->pm_obj_pml4);
2597 inuse_ptepages_count++;
2598 map->stats.resident_count++;
2599 map->stats.wired_count++;
1c79356b
A
2600
2601 /*
2602 * Zero the page.
2603 */
91447636 2604 pmap_zero_page(pn);
1c79356b
A
2605
2606 PMAP_READ_LOCK(map, spl);
2607 /*
2608 * See if someone else expanded us first
2609 */
89b3af67 2610 if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) {
1c79356b 2611 PMAP_READ_UNLOCK(map, spl);
89b3af67 2612 vm_object_lock(map->pm_obj_pml4);
1c79356b
A
2613 vm_page_lock_queues();
2614 vm_page_free(m);
2615 inuse_ptepages_count--;
89b3af67
A
2616 map->stats.resident_count--;
2617 map->stats.wired_count--;
2618
1c79356b 2619 vm_page_unlock_queues();
89b3af67 2620 vm_object_unlock(map->pm_obj_pml4);
1c79356b
A
2621 return;
2622 }
2623
2624 /*
2625 * Set the page directory entry for this page table.
2626 * If we have allocated more than one hardware page,
2627 * set several page directory entries.
2628 */
2629
89b3af67 2630 pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */
55e303ae 2631
89b3af67
A
2632 pmap_store_pte(pml4p, pa_to_pte(pa)
2633 | INTEL_PTE_VALID
2634 | INTEL_PTE_USER
2635 | INTEL_PTE_WRITE);
1c79356b 2636
89b3af67 2637 PMAP_READ_UNLOCK(map, spl);
1c79356b 2638
8f6c56a5 2639 return;
89b3af67 2640
8f6c56a5 2641}
c0fea474 2642
8f6c56a5 2643void
89b3af67
A
2644pmap_expand_pdpt(
2645 pmap_t map,
2646 vm_map_offset_t vaddr)
8f6c56a5 2647{
89b3af67
A
2648 register vm_page_t m;
2649 register pmap_paddr_t pa;
2650 uint64_t i;
2651 spl_t spl;
2652 ppnum_t pn;
2653 pdpt_entry_t *pdptp;
c0fea474 2654
89b3af67 2655 if (kernel_pmap == map) panic("expand kernel pdpt");
c0fea474 2656
89b3af67
A
2657 spl = splhigh();
2658 while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) {
2659 splx(spl);
2660 pmap_expand_pml4(map, vaddr); /* need room for another pdpt entry */
2661 spl = splhigh();
2662 }
2663 splx(spl);
c0fea474 2664
5d5c5d0d 2665
89b3af67
A
2666 /*
2667 * Allocate a VM page for the pdpt page
2668 */
2669 while ((m = vm_page_grab()) == VM_PAGE_NULL)
2670 VM_PAGE_WAIT();
5d5c5d0d
A
2671
2672 /*
89b3af67
A
2673 * put the page into the pmap's obj list so it
2674 * can be found later.
5d5c5d0d 2675 */
89b3af67
A
2676 pn = m->phys_page;
2677 pa = i386_ptob(pn);
2678 i = pdptidx(map, vaddr);
5d5c5d0d 2679
89b3af67
A
2680 vm_object_lock(map->pm_obj_pdpt);
2681#if 0 /* DEBUG */
2682 if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i)) {
2683 kprintf("pmap_expand_pdpt: obj_pdpt not empty, pmap 0x%x pm_obj_pdpt 0x%x vaddr 0x%llx i 0x%llx\n",
2684 map, map->pm_obj_pdpt, vaddr, i);
2685 }
2686#endif
2687 vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i);
5d5c5d0d 2688
89b3af67
A
2689 vm_page_lock_queues();
2690 vm_page_wire(m);
2691
2692 vm_page_unlock_queues();
2693 vm_object_unlock(map->pm_obj_pdpt);
2694 inuse_ptepages_count++;
2695 map->stats.resident_count++;
2696 map->stats.wired_count++;
2697
2698 /*
2699 * Zero the page.
2700 */
2701 pmap_zero_page(pn);
2702
2703 PMAP_READ_LOCK(map, spl);
2704 /*
2705 * See if someone else expanded us first
2706 */
2707 if (pmap64_pde(map, vaddr) != PD_ENTRY_NULL) {
2708 PMAP_READ_UNLOCK(map, spl);
2709 vm_object_lock(map->pm_obj_pdpt);
2710 vm_page_lock_queues();
2711 vm_page_free(m);
2712 inuse_ptepages_count--;
2713 map->stats.resident_count--;
2714 map->stats.wired_count--;
2715
2716 vm_page_unlock_queues();
2717 vm_object_unlock(map->pm_obj_pdpt);
2718 return;
2719 }
2720
2721 /*
2722 * Set the page directory entry for this page table.
2723 * If we have allocated more than one hardware page,
2724 * set several page directory entries.
2725 */
2726
2727 pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */
2728
2729 pmap_store_pte(pdptp, pa_to_pte(pa)
2730 | INTEL_PTE_VALID
2731 | INTEL_PTE_USER
2732 | INTEL_PTE_WRITE);
2733
2734 PMAP_READ_UNLOCK(map, spl);
2735
2736 return;
2737
2738}
2739
2740
2741
2742/*
2743 * Routine: pmap_expand
2744 *
2745 * Expands a pmap to be able to map the specified virtual address.
2746 *
2747 * Allocates new virtual memory for the P0 or P1 portion of the
2748 * pmap, then re-maps the physical pages that were in the old
2749 * pmap to be in the new pmap.
2750 *
2751 * Must be called with the pmap system and the pmap unlocked,
2752 * since these must be unlocked to use vm_allocate or vm_deallocate.
2753 * Thus it must be called in a loop that checks whether the map
2754 * has been expanded enough.
2755 * (We won't loop forever, since page tables aren't shrunk.)
2756 */
2757void
2758pmap_expand(
2759 pmap_t map,
2760 vm_map_offset_t vaddr)
2761{
2762 pt_entry_t *pdp;
2763 register vm_page_t m;
2764 register pmap_paddr_t pa;
2765 uint64_t i;
2766 spl_t spl;
2767 ppnum_t pn;
2768
2769 /*
2770 * if not the kernel map (while we are still compat kernel mode)
2771 * and we are 64 bit, propagate expand upwards
2772 */
2773
2774 if (cpu_64bit && (map != kernel_pmap)) {
2775 spl = splhigh();
2776 while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) {
2777 splx(spl);
2778 pmap_expand_pdpt(map, vaddr); /* need room for another pde entry */
2779 spl = splhigh();
2780 }
2781 splx(spl);
2782 } else {
2783 pdp = pmap_pde(map, vaddr);
2784 }
2785
2786
2787 /*
2788 * Allocate a VM page for the pde entries.
2789 */
2790 while ((m = vm_page_grab()) == VM_PAGE_NULL)
2791 VM_PAGE_WAIT();
2792
2793 /*
2794 * put the page into the pmap's obj list so it
2795 * can be found later.
2796 */
2797 pn = m->phys_page;
2798 pa = i386_ptob(pn);
2799 i = pdeidx(map, vaddr);
2800
2801 vm_object_lock(map->pm_obj);
2802#if 0 /* DEBUG */
2803 if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i)) {
2804 kprintf("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n",
2805 map, map->pm_obj, vaddr, i);
2806 }
2807#endif
2808 vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i);
2809
2810 vm_page_lock_queues();
2811 vm_page_wire(m);
2812 inuse_ptepages_count++;
2813
2814 vm_page_unlock_queues();
2815 vm_object_unlock(map->pm_obj);
2816
2817 /*
2818 * Zero the page.
2819 */
2820 pmap_zero_page(pn);
2821
2822 PMAP_READ_LOCK(map, spl);
2823 /*
2824 * See if someone else expanded us first
2825 */
2826 if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) {
2827 PMAP_READ_UNLOCK(map, spl);
2828 vm_object_lock(map->pm_obj);
2829
2830 vm_page_lock_queues();
2831 vm_page_free(m);
2832 inuse_ptepages_count--;
2833
2834 vm_page_unlock_queues();
2835 vm_object_unlock(map->pm_obj);
2836 return;
2837 }
2838
2839 pdp = pmap_pde(map, vaddr); /* refetch while locked */
2840
2841 /*
2842 * Set the page directory entry for this page table.
2843 * If we have allocated more than one hardware page,
2844 * set several page directory entries.
2845 */
2846
2847 pmap_store_pte(pdp, pa_to_pte(pa)
2848 | INTEL_PTE_VALID
2849 | INTEL_PTE_USER
2850 | INTEL_PTE_WRITE);
2851
2852
2853 PMAP_READ_UNLOCK(map, spl);
2854
2855 return;
2856}
2857
2858
2859/*
2860 * pmap_sync_page_data_phys(ppnum_t pa)
2861 *
2862 * Invalidates all of the instruction cache on a physical page and
2863 * pushes any dirty data from the data cache for the same physical page
2864 * Not required in i386.
2865 */
2866void
2867pmap_sync_page_data_phys(__unused ppnum_t pa)
2868{
2869 return;
2870}
2871
2872/*
2873 * pmap_sync_page_attributes_phys(ppnum_t pa)
2874 *
2875 * Write back and invalidate all cachelines on a physical page.
2876 */
2877void
2878pmap_sync_page_attributes_phys(ppnum_t pa)
2879{
2880 cache_flush_page_phys(pa);
2881}
2882
2883int collect_ref;
2884int collect_unref;
2885
2886/*
2887 * Routine: pmap_collect
2888 * Function:
2889 * Garbage collects the physical map system for
2890 * pages which are no longer used.
2891 * Success need not be guaranteed -- that is, there
2892 * may well be pages which are not referenced, but
2893 * others may be collected.
2894 * Usage:
2895 * Called by the pageout daemon when pages are scarce.
2896 */
2897void
2898pmap_collect(
2899 pmap_t p)
2900{
2901 register pt_entry_t *pdp, *ptp;
2902 pt_entry_t *eptp;
2903 int wired;
2904 spl_t spl;
2905
2906 if (p == PMAP_NULL)
2907 return;
2908
2909 if (p == kernel_pmap)
2910 return;
2911
2912 /*
2913 * Garbage collect map.
2914 */
2915 PMAP_READ_LOCK(p, spl);
2916
2917 for (pdp = (pt_entry_t *)p->dirbase;
2918 pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)];
2919 pdp++)
2920 {
2921 if (*pdp & INTEL_PTE_VALID) {
2922 if(*pdp & INTEL_PTE_REF) {
2923 pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF);
2924 collect_ref++;
2925 } else {
2926 collect_unref++;
2927 ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase));
2928 eptp = ptp + NPTEPG;
2929
2930 /*
2931 * If the pte page has any wired mappings, we cannot
2932 * free it.
2933 */
2934 wired = 0;
2935 {
2936 register pt_entry_t *ptep;
2937 for (ptep = ptp; ptep < eptp; ptep++) {
2938 if (iswired(*ptep)) {
2939 wired = 1;
5d5c5d0d 2940 break;
1c79356b
A
2941 }
2942 }
2943 }
2944 if (!wired) {
2945 /*
2946 * Remove the virtual addresses mapped by this pte page.
2947 */
2948 pmap_remove_range(p,
91447636 2949 pdetova(pdp - (pt_entry_t *)p->dirbase),
1c79356b
A
2950 ptp,
2951 eptp);
2952
2953 /*
2954 * Invalidate the page directory pointer.
2955 */
89b3af67 2956 pmap_store_pte(pdp, 0x0);
91447636 2957
1c79356b
A
2958 PMAP_READ_UNLOCK(p, spl);
2959
2960 /*
2961 * And free the pte page itself.
2962 */
2963 {
2964 register vm_page_t m;
2965
91447636
A
2966 vm_object_lock(p->pm_obj);
2967 m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]));
1c79356b
A
2968 if (m == VM_PAGE_NULL)
2969 panic("pmap_collect: pte page not in object");
2970 vm_page_lock_queues();
2971 vm_page_free(m);
2972 inuse_ptepages_count--;
2973 vm_page_unlock_queues();
91447636 2974 vm_object_unlock(p->pm_obj);
1c79356b
A
2975 }
2976
2977 PMAP_READ_LOCK(p, spl);
2978 }
91447636
A
2979 }
2980 }
1c79356b 2981 }
91447636 2982 PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
89b3af67 2983
1c79356b
A
2984 PMAP_READ_UNLOCK(p, spl);
2985 return;
2986
2987}
2988
1c79356b 2989
1c79356b 2990void
91447636
A
2991pmap_copy_page(src, dst)
2992 ppnum_t src;
2993 ppnum_t dst;
1c79356b 2994{
89b3af67
A
2995 bcopy_phys((addr64_t)i386_ptob(src),
2996 (addr64_t)i386_ptob(dst),
2997 PAGE_SIZE);
1c79356b 2998}
1c79356b 2999
1c79356b
A
3000
3001/*
3002 * Routine: pmap_pageable
3003 * Function:
3004 * Make the specified pages (by pmap, offset)
3005 * pageable (or not) as requested.
3006 *
3007 * A page which is not pageable may not take
3008 * a fault; therefore, its page table entry
3009 * must remain valid for the duration.
3010 *
3011 * This routine is merely advisory; pmap_enter
3012 * will specify that these pages are to be wired
3013 * down (or not) as appropriate.
3014 */
3015void
3016pmap_pageable(
91447636 3017 __unused pmap_t pmap,
89b3af67
A
3018 __unused vm_map_offset_t start_addr,
3019 __unused vm_map_offset_t end_addr,
91447636 3020 __unused boolean_t pageable)
1c79356b
A
3021{
3022#ifdef lint
91447636 3023 pmap++; start_addr++; end_addr++; pageable++;
1c79356b
A
3024#endif /* lint */
3025}
3026
3027/*
3028 * Clear specified attribute bits.
3029 */
3030void
3031phys_attribute_clear(
91447636 3032 ppnum_t pn,
1c79356b
A
3033 int bits)
3034{
3035 pv_entry_t pv_h;
3036 register pv_entry_t pv_e;
3037 register pt_entry_t *pte;
3038 int pai;
3039 register pmap_t pmap;
3040 spl_t spl;
91447636 3041 pmap_paddr_t phys;
1c79356b 3042
91447636
A
3043 assert(pn != vm_page_fictitious_addr);
3044 if (!valid_page(pn)) {
1c79356b
A
3045 /*
3046 * Not a managed page.
3047 */
3048 return;
3049 }
3050
3051 /*
3052 * Lock the pmap system first, since we will be changing
3053 * several pmaps.
3054 */
3055
3056 PMAP_WRITE_LOCK(spl);
91447636 3057 phys = i386_ptob(pn);
1c79356b
A
3058 pai = pa_index(phys);
3059 pv_h = pai_to_pvh(pai);
3060
3061 /*
3062 * Walk down PV list, clearing all modify or reference bits.
3063 * We do not have to lock the pv_list because we have
3064 * the entire pmap system locked.
3065 */
3066 if (pv_h->pmap != PMAP_NULL) {
3067 /*
3068 * There are some mappings.
3069 */
3070 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
3071
3072 pmap = pv_e->pmap;
3073 /*
3074 * Lock the pmap to block pmap_extract and similar routines.
3075 */
3076 simple_lock(&pmap->lock);
3077
3078 {
89b3af67 3079 register vm_map_offset_t va;
1c79356b
A
3080
3081 va = pv_e->va;
3082 pte = pmap_pte(pmap, va);
3083
3084#if 0
3085 /*
3086 * Consistency checks.
3087 */
3088 assert(*pte & INTEL_PTE_VALID);
3089 /* assert(pte_to_phys(*pte) == phys); */
3090#endif
3091
1c79356b
A
3092 /*
3093 * Clear modify or reference bits.
3094 */
91447636 3095
89b3af67
A
3096 pmap_store_pte(pte, *pte & ~bits);
3097 pte++;
91447636 3098 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1c79356b
A
3099 }
3100 simple_unlock(&pmap->lock);
91447636 3101
1c79356b
A
3102 }
3103 }
3104
3105 pmap_phys_attributes[pai] &= ~bits;
3106
3107 PMAP_WRITE_UNLOCK(spl);
3108}
3109
3110/*
3111 * Check specified attribute bits.
3112 */
3113boolean_t
3114phys_attribute_test(
91447636 3115 ppnum_t pn,
1c79356b
A
3116 int bits)
3117{
3118 pv_entry_t pv_h;
3119 register pv_entry_t pv_e;
3120 register pt_entry_t *pte;
3121 int pai;
3122 register pmap_t pmap;
3123 spl_t spl;
91447636 3124 pmap_paddr_t phys;
1c79356b 3125
91447636
A
3126 assert(pn != vm_page_fictitious_addr);
3127 if (!valid_page(pn)) {
1c79356b
A
3128 /*
3129 * Not a managed page.
3130 */
3131 return (FALSE);
3132 }
3133
89b3af67
A
3134 phys = i386_ptob(pn);
3135 pai = pa_index(phys);
3136 /*
3137 * super fast check... if bits already collected
3138 * no need to take any locks...
3139 * if not set, we need to recheck after taking
3140 * the lock in case they got pulled in while
3141 * we were waiting for the lock
3142 */
3143 if (pmap_phys_attributes[pai] & bits)
3144 return (TRUE);
3145 pv_h = pai_to_pvh(pai);
3146
1c79356b
A
3147 /*
3148 * Lock the pmap system first, since we will be checking
3149 * several pmaps.
3150 */
1c79356b 3151 PMAP_WRITE_LOCK(spl);
1c79356b
A
3152
3153 if (pmap_phys_attributes[pai] & bits) {
3154 PMAP_WRITE_UNLOCK(spl);
3155 return (TRUE);
3156 }
3157
3158 /*
3159 * Walk down PV list, checking all mappings.
3160 * We do not have to lock the pv_list because we have
3161 * the entire pmap system locked.
3162 */
3163 if (pv_h->pmap != PMAP_NULL) {
3164 /*
3165 * There are some mappings.
3166 */
3167 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
3168
3169 pmap = pv_e->pmap;
3170 /*
3171 * Lock the pmap to block pmap_extract and similar routines.
3172 */
3173 simple_lock(&pmap->lock);
3174
3175 {
89b3af67 3176 register vm_map_offset_t va;
1c79356b
A
3177
3178 va = pv_e->va;
3179 pte = pmap_pte(pmap, va);
3180
3181#if 0
3182 /*
3183 * Consistency checks.
3184 */
3185 assert(*pte & INTEL_PTE_VALID);
3186 /* assert(pte_to_phys(*pte) == phys); */
3187#endif
3188 }
3189
3190 /*
3191 * Check modify or reference bits.
3192 */
3193 {
1c79356b
A
3194 if (*pte++ & bits) {
3195 simple_unlock(&pmap->lock);
3196 PMAP_WRITE_UNLOCK(spl);
3197 return (TRUE);
3198 }
1c79356b
A
3199 }
3200 simple_unlock(&pmap->lock);
3201 }
3202 }
3203 PMAP_WRITE_UNLOCK(spl);
3204 return (FALSE);
3205}
3206
3207/*
3208 * Set specified attribute bits.
3209 */
3210void
3211phys_attribute_set(
91447636 3212 ppnum_t pn,
1c79356b
A
3213 int bits)
3214{
3215 int spl;
91447636 3216 pmap_paddr_t phys;
1c79356b 3217
91447636
A
3218 assert(pn != vm_page_fictitious_addr);
3219 if (!valid_page(pn)) {
1c79356b
A
3220 /*
3221 * Not a managed page.
3222 */
3223 return;
3224 }
3225
3226 /*
3227 * Lock the pmap system and set the requested bits in
3228 * the phys attributes array. Don't need to bother with
3229 * ptes because the test routine looks here first.
3230 */
91447636 3231 phys = i386_ptob(pn);
1c79356b
A
3232 PMAP_WRITE_LOCK(spl);
3233 pmap_phys_attributes[pa_index(phys)] |= bits;
3234 PMAP_WRITE_UNLOCK(spl);
3235}
3236
3237/*
3238 * Set the modify bit on the specified physical page.
3239 */
3240
3241void pmap_set_modify(
55e303ae 3242 ppnum_t pn)
1c79356b 3243{
91447636 3244 phys_attribute_set(pn, PHYS_MODIFIED);
1c79356b
A
3245}
3246
3247/*
3248 * Clear the modify bits on the specified physical page.
3249 */
3250
3251void
3252pmap_clear_modify(
55e303ae 3253 ppnum_t pn)
1c79356b 3254{
91447636 3255 phys_attribute_clear(pn, PHYS_MODIFIED);
1c79356b
A
3256}
3257
3258/*
3259 * pmap_is_modified:
3260 *
3261 * Return whether or not the specified physical page is modified
3262 * by any physical maps.
3263 */
3264
3265boolean_t
3266pmap_is_modified(
55e303ae 3267 ppnum_t pn)
1c79356b 3268{
91447636 3269 return (phys_attribute_test(pn, PHYS_MODIFIED));
1c79356b
A
3270}
3271
3272/*
3273 * pmap_clear_reference:
3274 *
3275 * Clear the reference bit on the specified physical page.
3276 */
3277
3278void
3279pmap_clear_reference(
55e303ae 3280 ppnum_t pn)
1c79356b 3281{
91447636
A
3282 phys_attribute_clear(pn, PHYS_REFERENCED);
3283}
3284
3285void
3286pmap_set_reference(ppnum_t pn)
3287{
3288 phys_attribute_set(pn, PHYS_REFERENCED);
1c79356b
A
3289}
3290
3291/*
3292 * pmap_is_referenced:
3293 *
3294 * Return whether or not the specified physical page is referenced
3295 * by any physical maps.
3296 */
3297
3298boolean_t
3299pmap_is_referenced(
55e303ae 3300 ppnum_t pn)
1c79356b 3301{
91447636
A
3302 return (phys_attribute_test(pn, PHYS_REFERENCED));
3303}
3304
3305/*
3306 * pmap_get_refmod(phys)
3307 * returns the referenced and modified bits of the specified
3308 * physical page.
3309 */
3310unsigned int
3311pmap_get_refmod(ppnum_t pa)
3312{
3313 return ( ((phys_attribute_test(pa, PHYS_MODIFIED))? VM_MEM_MODIFIED : 0)
3314 | ((phys_attribute_test(pa, PHYS_REFERENCED))? VM_MEM_REFERENCED : 0));
3315}
3316
3317/*
3318 * pmap_clear_refmod(phys, mask)
3319 * clears the referenced and modified bits as specified by the mask
3320 * of the specified physical page.
3321 */
3322void
3323pmap_clear_refmod(ppnum_t pa, unsigned int mask)
3324{
3325 unsigned int x86Mask;
3326
3327 x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0)
3328 | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
3329 phys_attribute_clear(pa, x86Mask);
1c79356b
A
3330}
3331
3332/*
3333 * Set the modify bit on the specified range
3334 * of this map as requested.
3335 *
3336 * This optimization stands only if each time the dirty bit
3337 * in vm_page_t is tested, it is also tested in the pmap.
3338 */
3339void
3340pmap_modify_pages(
3341 pmap_t map,
89b3af67
A
3342 vm_map_offset_t sva,
3343 vm_map_offset_t eva)
1c79356b
A
3344{
3345 spl_t spl;
3346 register pt_entry_t *pde;
3347 register pt_entry_t *spte, *epte;
89b3af67
A
3348 vm_map_offset_t lva;
3349 vm_map_offset_t orig_sva;
1c79356b
A
3350
3351 if (map == PMAP_NULL)
3352 return;
3353
3354 PMAP_READ_LOCK(map, spl);
3355
89b3af67
A
3356 orig_sva = sva;
3357 while (sva && sva < eva) {
3358 lva = (sva + pde_mapped_size) & ~(pde_mapped_size-1);
3359 if (lva > eva)
3360 lva = eva;
3361 pde = pmap_pde(map, sva);
3362 if (pde && (*pde & INTEL_PTE_VALID)) {
3363 spte = (pt_entry_t *)pmap_pte(map, (sva & ~(pde_mapped_size-1)));
3364 if (lva) {
3365 spte = &spte[ptenum(sva)];
3366 epte = &spte[intel_btop(lva-sva)];
1c79356b 3367 } else {
89b3af67
A
3368 epte = &spte[intel_btop(pde_mapped_size)];
3369 spte = &spte[ptenum(sva)];
1c79356b
A
3370 }
3371 while (spte < epte) {
3372 if (*spte & INTEL_PTE_VALID) {
89b3af67
A
3373 pmap_store_pte(spte, *spte
3374 | INTEL_PTE_MOD
3375 | INTEL_PTE_WRITE);
1c79356b
A
3376 }
3377 spte++;
3378 }
3379 }
89b3af67 3380 sva = lva;
1c79356b
A
3381 pde++;
3382 }
89b3af67
A
3383 PMAP_UPDATE_TLBS(map, orig_sva, eva);
3384
1c79356b
A
3385 PMAP_READ_UNLOCK(map, spl);
3386}
3387
3388
3389void
91447636
A
3390invalidate_icache(__unused vm_offset_t addr,
3391 __unused unsigned cnt,
3392 __unused int phys)
1c79356b
A
3393{
3394 return;
3395}
3396void
91447636
A
3397flush_dcache(__unused vm_offset_t addr,
3398 __unused unsigned count,
3399 __unused int phys)
1c79356b
A
3400{
3401 return;
3402}
3403
89b3af67 3404#if MACH_KDB
8f6c56a5 3405
89b3af67 3406/* show phys page mappings and attributes */
8f6c56a5 3407
89b3af67 3408extern void db_show_page(pmap_paddr_t pa);
8f6c56a5
A
3409
3410void
89b3af67 3411db_show_page(pmap_paddr_t pa)
8f6c56a5 3412{
89b3af67
A
3413 pv_entry_t pv_h;
3414 int pai;
3415 char attr;
3416
3417 pai = pa_index(pa);
3418 pv_h = pai_to_pvh(pai);
1c79356b
A
3419
3420 attr = pmap_phys_attributes[pai];
3421 printf("phys page %x ", pa);
3422 if (attr & PHYS_MODIFIED)
3423 printf("modified, ");
3424 if (attr & PHYS_REFERENCED)
3425 printf("referenced, ");
3426 if (pv_h->pmap || pv_h->next)
3427 printf(" mapped at\n");
3428 else
3429 printf(" not mapped\n");
3430 for (; pv_h; pv_h = pv_h->next)
3431 if (pv_h->pmap)
3432 printf("%x in pmap %x\n", pv_h->va, pv_h->pmap);
3433}
3434
3435#endif /* MACH_KDB */
3436
3437#if MACH_KDB
3438void db_kvtophys(vm_offset_t);
3439void db_show_vaddrs(pt_entry_t *);
3440
3441/*
3442 * print out the results of kvtophys(arg)
3443 */
3444void
3445db_kvtophys(
3446 vm_offset_t vaddr)
3447{
89b3af67 3448 db_printf("0x%qx", kvtophys(vaddr));
1c79356b
A
3449}
3450
3451/*
3452 * Walk the pages tables.
3453 */
3454void
3455db_show_vaddrs(
3456 pt_entry_t *dirbase)
3457{
3458 pt_entry_t *ptep, *pdep, tmp;
89b3af67 3459 unsigned int x, y, pdecnt, ptecnt;
1c79356b
A
3460
3461 if (dirbase == 0) {
3462 dirbase = kernel_pmap->dirbase;
3463 }
3464 if (dirbase == 0) {
3465 db_printf("need a dirbase...\n");
3466 return;
3467 }
89b3af67 3468 dirbase = (pt_entry_t *) (int) ((unsigned long) dirbase & ~INTEL_OFFMASK);
1c79356b
A
3469
3470 db_printf("dirbase: 0x%x\n", dirbase);
3471
3472 pdecnt = ptecnt = 0;
3473 pdep = &dirbase[0];
91447636 3474 for (y = 0; y < NPDEPG; y++, pdep++) {
1c79356b
A
3475 if (((tmp = *pdep) & INTEL_PTE_VALID) == 0) {
3476 continue;
3477 }
3478 pdecnt++;
3479 ptep = (pt_entry_t *) ((*pdep) & ~INTEL_OFFMASK);
3480 db_printf("dir[%4d]: 0x%x\n", y, *pdep);
91447636 3481 for (x = 0; x < NPTEPG; x++, ptep++) {
1c79356b
A
3482 if (((tmp = *ptep) & INTEL_PTE_VALID) == 0) {
3483 continue;
3484 }
3485 ptecnt++;
3486 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
3487 x,
3488 *ptep,
3489 (y << 22) | (x << 12),
3490 *ptep & ~INTEL_OFFMASK);
3491 }
3492 }
3493
3494 db_printf("total: %d tables, %d page table entries.\n", pdecnt, ptecnt);
3495
3496}
3497#endif /* MACH_KDB */
3498
3499#include <mach_vm_debug.h>
3500#if MACH_VM_DEBUG
3501#include <vm/vm_debug.h>
3502
3503int
3504pmap_list_resident_pages(
91447636
A
3505 __unused pmap_t pmap,
3506 __unused vm_offset_t *listp,
3507 __unused int space)
1c79356b
A
3508{
3509 return 0;
3510}
3511#endif /* MACH_VM_DEBUG */
3512
8f6c56a5 3513
1c79356b 3514
91447636
A
3515/* temporary workaround */
3516boolean_t
89b3af67 3517coredumpok(__unused vm_map_t map, __unused vm_offset_t va)
91447636 3518{
89b3af67 3519#if 0
91447636 3520 pt_entry_t *ptep;
1c79356b 3521
91447636
A
3522 ptep = pmap_pte(map->pmap, va);
3523 if (0 == ptep)
3524 return FALSE;
3525 return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED));
89b3af67
A
3526#else
3527 return TRUE;
1c79356b 3528#endif
1c79356b
A
3529}
3530
1c79356b 3531
9bccf70c 3532boolean_t
91447636
A
3533phys_page_exists(
3534 ppnum_t pn)
9bccf70c 3535{
91447636
A
3536 pmap_paddr_t phys;
3537
3538 assert(pn != vm_page_fictitious_addr);
3539
3540 if (!pmap_initialized)
3541 return (TRUE);
3542 phys = (pmap_paddr_t) i386_ptob(pn);
3543 if (!pmap_valid_page(pn))
3544 return (FALSE);
3545
3546 return TRUE;
3547}
3548
3549void
3550mapping_free_prime()
3551{
3552 int i;
3553 pv_entry_t pv_e;
3554
3555 for (i = 0; i < (5 * PV_ALLOC_CHUNK); i++) {
3556 pv_e = (pv_entry_t) zalloc(pv_list_zone);
3557 PV_FREE(pv_e);
3558 }
3559}
3560
3561void
3562mapping_adjust()
3563{
3564 pv_entry_t pv_e;
3565 int i;
3566 int spl;
3567
3568 if (mapping_adjust_call == NULL) {
3569 thread_call_setup(&mapping_adjust_call_data,
3570 (thread_call_func_t) mapping_adjust,
3571 (thread_call_param_t) NULL);
3572 mapping_adjust_call = &mapping_adjust_call_data;
3573 }
3574 /* XXX rethink best way to do locking here */
3575 if (pv_free_count < PV_LOW_WATER_MARK) {
3576 for (i = 0; i < PV_ALLOC_CHUNK; i++) {
3577 pv_e = (pv_entry_t) zalloc(pv_list_zone);
3578 SPLVM(spl);
3579 PV_FREE(pv_e);
3580 SPLX(spl);
3581 }
3582 }
3583 mappingrecurse = 0;
3584}
3585
3586void
89b3af67 3587pmap_commpage32_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt)
91447636
A
3588{
3589 int i;
3590 pt_entry_t *opte, *npte;
3591 pt_entry_t pte;
3592
89b3af67 3593
91447636 3594 for (i = 0; i < cnt; i++) {
89b3af67 3595 opte = pmap_pte(kernel_pmap, (vm_map_offset_t)kernel_commpage);
91447636 3596 if (0 == opte) panic("kernel_commpage");
91447636
A
3597 pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL;
3598 pte &= ~INTEL_PTE_WRITE; // ensure read only
89b3af67
A
3599 npte = pmap_pte(kernel_pmap, (vm_map_offset_t)user_commpage);
3600 if (0 == npte) panic("user_commpage");
3601 pmap_store_pte(npte, pte);
91447636
A
3602 kernel_commpage += INTEL_PGBYTES;
3603 user_commpage += INTEL_PGBYTES;
3604 }
3605}
3606
89b3af67
A
3607#define PMAP_COMMPAGE64_CNT (_COMM_PAGE64_AREA_USED/PAGE_SIZE)
3608pt_entry_t pmap_commpage64_ptes[PMAP_COMMPAGE64_CNT];
3609
3610void
3611pmap_commpage64_init(vm_offset_t kernel_commpage, __unused vm_map_offset_t user_commpage, int cnt)
3612{
3613 spl_t s;
3614 int i;
3615 pt_entry_t *kptep;
3616
3617 s = splhigh();
3618 for (i = 0; i< cnt; i++) {
3619 kptep = pmap_pte(kernel_pmap, (uint64_t)kernel_commpage + (i*PAGE_SIZE));
3620 if ((0 == kptep) || (0 == (*kptep & INTEL_PTE_VALID))) panic("pmap_commpage64_init pte");
3621 pmap_commpage64_ptes[i] = ((*kptep & ~INTEL_PTE_WRITE) | INTEL_PTE_USER);
3622 }
3623 splx(s);
3624
3625}
3626
3627void
3628pmap_map_sharedpage(__unused task_t task, pmap_t p)
3629{
3630 pt_entry_t *ptep;
3631 spl_t s;
3632 int i;
3633
3634 if (!p->pm_64bit) return;
3635 /* setup high 64 bit commpage */
3636 s = splhigh();
3637 while ((ptep = pmap_pte(p, (uint64_t)_COMM_PAGE64_BASE_ADDRESS)) == PD_ENTRY_NULL) {
3638 splx(s);
3639 pmap_expand(p, (uint64_t)_COMM_PAGE64_BASE_ADDRESS);
3640 s = splhigh();
3641 }
3642
3643 for (i = 0; i< PMAP_COMMPAGE64_CNT; i++) {
3644 ptep = pmap_pte(p, (uint64_t)_COMM_PAGE64_BASE_ADDRESS + (i*PAGE_SIZE));
3645 if (0 == ptep) panic("pmap_map_sharedpage");
3646 pmap_store_pte(ptep, pmap_commpage64_ptes[i]);
3647 }
3648 splx(s);
3649
3650}
3651
3652void
3653pmap_unmap_sharedpage(pmap_t pmap)
3654{
3655 spl_t s;
3656 pt_entry_t *ptep;
3657 int i;
3658
3659 if (!pmap->pm_64bit) return;
3660 s = splhigh();
3661 for (i = 0; i< PMAP_COMMPAGE64_CNT; i++) {
3662 ptep = pmap_pte(pmap, (uint64_t)_COMM_PAGE64_BASE_ADDRESS + (i*PAGE_SIZE));
3663 if (ptep) pmap_store_pte(ptep, 0);
3664 }
3665 splx(s);
3666}
3667
91447636 3668static cpu_pmap_t cpu_pmap_master;
91447636
A
3669
3670struct cpu_pmap *
3671pmap_cpu_alloc(boolean_t is_boot_cpu)
3672{
3673 int ret;
3674 int i;
3675 cpu_pmap_t *cp;
91447636 3676 vm_offset_t address;
89b3af67 3677 vm_map_address_t mapaddr;
91447636 3678 vm_map_entry_t entry;
89b3af67 3679 pt_entry_t *pte;
91447636
A
3680
3681 if (is_boot_cpu) {
3682 cp = &cpu_pmap_master;
91447636
A
3683 } else {
3684 /*
3685 * The per-cpu pmap data structure itself.
3686 */
3687 ret = kmem_alloc(kernel_map,
3688 (vm_offset_t *) &cp, sizeof(cpu_pmap_t));
3689 if (ret != KERN_SUCCESS) {
3690 printf("pmap_cpu_alloc() failed ret=%d\n", ret);
3691 return NULL;
3692 }
3693 bzero((void *)cp, sizeof(cpu_pmap_t));
3694
3695 /*
89b3af67 3696 * The temporary windows used for copy/zero - see loose_ends.c
91447636 3697 */
89b3af67
A
3698 ret = vm_map_find_space(kernel_map,
3699 &mapaddr, PMAP_NWINDOWS*PAGE_SIZE, (vm_map_offset_t)0, 0, &entry);
91447636 3700 if (ret != KERN_SUCCESS) {
89b3af67
A
3701 printf("pmap_cpu_alloc() "
3702 "vm_map_find_space ret=%d\n", ret);
91447636
A
3703 pmap_cpu_free(cp);
3704 return NULL;
3705 }
89b3af67 3706 address = (vm_offset_t)mapaddr;
5d5c5d0d 3707
89b3af67
A
3708 for (i = 0; i < PMAP_NWINDOWS; i++, address += PAGE_SIZE) {
3709 while ((pte = pmap_pte(kernel_pmap, (vm_map_offset_t)address)) == 0)
3710 pmap_expand(kernel_pmap, (vm_map_offset_t)address);
3711 * (int *) pte = 0;
8f6c56a5 3712 cp->mapwindow[i].prv_CADDR = (caddr_t) address;
89b3af67 3713 cp->mapwindow[i].prv_CMAP = pte;
5d5c5d0d 3714 }
89b3af67 3715 vm_map_unlock(kernel_map);
5d5c5d0d
A
3716 }
3717
89b3af67
A
3718 cp->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW;
3719 cp->pde_window_index = PMAP_PDE_FIRST_WINDOW;
3720 cp->pte_window_index = PMAP_PTE_FIRST_WINDOW;
5d5c5d0d 3721
8f6c56a5 3722 return cp;
5d5c5d0d
A
3723}
3724
3725void
8f6c56a5 3726pmap_cpu_free(struct cpu_pmap *cp)
5d5c5d0d 3727{
8f6c56a5 3728 if (cp != NULL && cp != &cpu_pmap_master) {
8f6c56a5 3729 kfree((void *) cp, sizeof(cpu_pmap_t));
5d5c5d0d 3730 }
5d5c5d0d 3731}
89b3af67
A
3732
3733
3734mapwindow_t *
3735pmap_get_mapwindow(pt_entry_t pentry)
3736{
3737 mapwindow_t *mp;
3738 int i;
3739 boolean_t istate;
3740
3741 /*
3742 * can be called from hardware interrupt context
3743 * so we need to protect the lookup process
3744 */
3745 istate = ml_set_interrupts_enabled(FALSE);
3746
3747 /*
3748 * Note: 0th map reserved for pmap_pte()
3749 */
3750 for (i = PMAP_NWINDOWS_FIRSTFREE; i < PMAP_NWINDOWS; i++) {
3751 mp = &current_cpu_datap()->cpu_pmap->mapwindow[i];
3752
3753 if (*mp->prv_CMAP == 0) {
3754 *mp->prv_CMAP = pentry;
3755 break;
3756 }
3757 }
3758 if (i >= PMAP_NWINDOWS)
3759 mp = NULL;
3760 (void) ml_set_interrupts_enabled(istate);
3761
3762 return (mp);
3763}
3764
3765
3766/*
3767 * kern_return_t pmap_nest(grand, subord, vstart, size)
3768 *
3769 * grand = the pmap that we will nest subord into
3770 * subord = the pmap that goes into the grand
3771 * vstart = start of range in pmap to be inserted
3772 * nstart = start of range in pmap nested pmap
3773 * size = Size of nest area (up to 16TB)
3774 *
3775 * Inserts a pmap into another. This is used to implement shared segments.
3776 *
3777 * on x86 this is very limited right now. must be exactly 1 segment.
3778 *
3779 * Note that we depend upon higher level VM locks to insure that things don't change while
3780 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
3781 * or do 2 nests at once.
3782 */
3783
3784
3785kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t nstart, uint64_t size) {
3786
3787 vm_map_offset_t vaddr, nvaddr;
3788 pd_entry_t *pde,*npde;
3789 unsigned int i, need_flush;
3790 unsigned int num_pde;
3791 spl_t s;
3792
3793 // do validity tests
3794
3795 if(size & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this for multiples of 256MB */
3796 if((size >> 28) > 65536) return KERN_INVALID_VALUE; /* Max size we can nest is 16TB */
3797 if(vstart & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this aligned to 256MB */
3798 if(nstart & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this aligned to 256MB */
3799 if(size == 0) {
3800 panic("pmap_nest: size is invalid - %016llX\n", size);
3801 }
3802 if ((size >> 28) != 1) panic("pmap_nest: size 0x%llx must be 0x%x", size, NBPDE);
3803
3804 subord->pm_shared = TRUE;
3805
3806 // prepopulate subord pmap pde's if necessary
3807
3808 if (cpu_64bit) {
3809 s = splhigh();
3810 while (PD_ENTRY_NULL == (npde = pmap_pde(subord, nstart))) {
3811 splx(s);
3812 pmap_expand(subord, nstart);
3813 s = splhigh();
3814 }
3815 splx(s);
3816 }
3817
3818 PMAP_READ_LOCK(subord,s);
3819 nvaddr = (vm_map_offset_t)nstart;
3820 need_flush = 0;
3821 num_pde = size >> PDESHIFT;
3822
3823 for (i=0;i<num_pde;i++) {
3824 npde = pmap_pde(subord, nvaddr);
3825 if ((0 == npde) || (*npde++ & INTEL_PTE_VALID) == 0) {
3826 PMAP_READ_UNLOCK(subord,s);
3827 pmap_expand(subord, nvaddr); // pmap_expand handles races
3828 PMAP_READ_LOCK(subord,s);
3829 need_flush++;
3830 }
3831 nvaddr += NBPDE;
3832 }
3833
3834 if (need_flush) {
3835 nvaddr = (vm_map_offset_t)nstart;
3836 PMAP_UPDATE_TLBS(subord, nvaddr, nvaddr + (1 << 28) -1 );
3837 }
3838 PMAP_READ_UNLOCK(subord,s);
3839
3840 // copy pde's from subord pmap into grand pmap
3841
3842 if (cpu_64bit) {
3843 s = splhigh();
3844 while (PD_ENTRY_NULL == (pde = pmap_pde(grand, vstart))) {
3845 splx(s);
3846 pmap_expand(grand, vstart);
3847 s = splhigh();
3848 }
3849 splx(s);
3850 }
3851
3852 PMAP_READ_LOCK(grand,s);
3853 vaddr = (vm_map_offset_t)vstart;
3854 for (i=0;i<num_pde;i++,pde++) {
3855 pd_entry_t tpde;
3856 npde = pmap_pde(subord, nstart);
3857 if (npde == 0) panic("pmap_nest: no npde, subord 0x%x nstart 0x%llx", subord, nstart);
3858 tpde = *npde;
3859 nstart += NBPDE;
3860 pde = pmap_pde(grand, vaddr);
3861 if (pde == 0) panic("pmap_nest: no pde, grand 0x%x vaddr 0x%llx", grand, vaddr);
3862 vaddr += NBPDE;
3863 pmap_store_pte(pde, tpde);
3864 }
3865 PMAP_UPDATE_TLBS(grand, vaddr, vaddr + (1 << 28) -1 );
3866
3867 PMAP_READ_UNLOCK(grand,s);
3868
3869 return KERN_SUCCESS;
3870}
3871
3872/*
3873 * kern_return_t pmap_unnest(grand, vaddr)
3874 *
3875 * grand = the pmap that we will nest subord into
3876 * vaddr = start of range in pmap to be unnested
3877 *
3878 * Removes a pmap from another. This is used to implement shared segments.
3879 * On the current PPC processors, this is limited to segment (256MB) aligned
3880 * segment sized ranges.
3881 */
3882
3883kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr) {
3884
3885 spl_t s;
3886 pd_entry_t *pde;
3887 unsigned int i;
3888 unsigned int num_pde;
3889
3890 PMAP_READ_LOCK(grand,s);
3891
3892 // invalidate all pdes for segment at vaddr in pmap grand
3893
3894 num_pde = (1<<28) >> PDESHIFT;
3895
3896 for (i=0;i<num_pde;i++,pde++) {
3897 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
3898 if (pde == 0) panic("pmap_unnest: no pde, grand 0x%x vaddr 0x%llx\n", grand, vaddr);
3899 pmap_store_pte(pde, (pd_entry_t)0);
3900 vaddr += NBPDE;
3901 }
3902 PMAP_UPDATE_TLBS(grand, vaddr, vaddr + (1<<28) -1 );
3903
3904 PMAP_READ_UNLOCK(grand,s);
3905
3906 return KERN_SUCCESS; /* Bye, bye, butterfly... */
3907}
3908
3909void
3910pmap_switch(pmap_t tpmap)
3911{
3912 spl_t s;
3913 int my_cpu;
3914
3915 s = splhigh(); /* Make sure interruptions are disabled */
3916 my_cpu = cpu_number();
3917
3918 set_dirbase(tpmap, my_cpu);
3919
3920 splx(s);
3921}
3922
3923
3924/*
3925 * disable no-execute capability on
3926 * the specified pmap
3927 */
3928void pmap_disable_NX(pmap_t pmap) {
3929
3930 pmap->nx_enabled = 0;
3931}
3932
3933void
3934pt_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size,
3935 vm_size_t *alloc_size, int *collectable, int *exhaustable)
3936{
3937 *count = inuse_ptepages_count;
3938 *cur_size = PAGE_SIZE * inuse_ptepages_count;
3939 *max_size = PAGE_SIZE * (inuse_ptepages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
3940 *elem_size = PAGE_SIZE;
3941 *alloc_size = PAGE_SIZE;
3942
3943 *collectable = 1;
3944 *exhaustable = 0;
3945}
3946
3947vm_offset_t pmap_cpu_high_map_vaddr(int cpu, enum high_cpu_types e)
3948{
3949 enum high_fixed_addresses a;
3950 a = e + HIGH_CPU_END * cpu;
3951 return pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a);
3952}
3953
3954vm_offset_t pmap_high_map_vaddr(enum high_cpu_types e)
3955{
3956 return pmap_cpu_high_map_vaddr(cpu_number(), e);
3957}
3958
3959vm_offset_t pmap_high_map(pt_entry_t pte, enum high_cpu_types e)
3960{
3961 enum high_fixed_addresses a;
3962 vm_offset_t vaddr;
3963
3964 a = e + HIGH_CPU_END * cpu_number();
3965 vaddr = (vm_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a);
3966 *(pte_unique_base + a) = pte;
3967
3968 /* TLB flush for this page for this cpu */
3969 invlpg((uintptr_t)vaddr);
3970
3971 return vaddr;
3972}
3973
3974
3975/*
3976 * Called with pmap locked, we:
3977 * - scan through per-cpu data to see which other cpus need to flush
3978 * - send an IPI to each non-idle cpu to be flushed
3979 * - wait for all to signal back that they are inactive or we see that
3980 * they are in an interrupt handler or at a safe point
3981 * - flush the local tlb is active for this pmap
3982 * - return ... the caller will unlock the pmap
3983 */
3984void
3985pmap_flush_tlbs(pmap_t pmap)
3986{
3987 unsigned int cpu;
3988 unsigned int cpu_bit;
3989 cpu_set cpus_to_signal;
3990 unsigned int my_cpu = cpu_number();
3991 pmap_paddr_t pmap_cr3 = pmap->pm_cr3;
3992 boolean_t flush_self = FALSE;
3993 uint64_t deadline;
3994
3995 assert(!ml_get_interrupts_enabled());
3996
3997 /*
3998 * Scan other cpus for matching active or task CR3.
3999 * For idle cpus (with no active map) we mark them invalid but
4000 * don't signal -- they'll check as they go busy.
4001 * Note: for the kernel pmap we look for 64-bit shared address maps.
4002 */
4003 cpus_to_signal = 0;
4004 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
4005 if (!cpu_datap(cpu)->cpu_running)
4006 continue;
4007 if ((cpu_datap(cpu)->cpu_task_cr3 == pmap_cr3) ||
4008 (CPU_GET_ACTIVE_CR3(cpu) == pmap_cr3) ||
4009 (pmap->pm_shared) ||
4010 ((pmap == kernel_pmap) &&
4011 (!CPU_CR3_IS_ACTIVE(cpu) ||
4012 cpu_datap(cpu)->cpu_task_map == TASK_MAP_64BIT_SHARED))) {
4013 if (cpu == my_cpu) {
4014 flush_self = TRUE;
4015 continue;
4016 }
4017 cpu_datap(cpu)->cpu_tlb_invalid = TRUE;
4018 __asm__ volatile("mfence");
4019
4020 if (CPU_CR3_IS_ACTIVE(cpu)) {
4021 cpus_to_signal |= cpu_bit;
4022 i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC);
4023 }
4024 }
4025 }
4026
4027 if (cpus_to_signal) {
4028 KERNEL_DEBUG(0xef800024 | DBG_FUNC_START, cpus_to_signal, 0, 0, 0, 0);
4029
4030 deadline = mach_absolute_time() + LockTimeOut;
4031 /*
4032 * Wait for those other cpus to acknowledge
4033 */
4034 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
4035 while ((cpus_to_signal & cpu_bit) != 0) {
4036 if (!cpu_datap(cpu)->cpu_running ||
4037 cpu_datap(cpu)->cpu_tlb_invalid == FALSE ||
4038 !CPU_CR3_IS_ACTIVE(cpu)) {
4039 cpus_to_signal &= ~cpu_bit;
4040 break;
4041 }
4042 if (mach_absolute_time() > deadline)
4043 panic("pmap_flush_tlbs() "
4044 "timeout pmap=%p cpus_to_signal=%p",
4045 pmap, cpus_to_signal);
4046 cpu_pause();
4047 }
4048 if (cpus_to_signal == 0)
4049 break;
4050 }
4051 KERNEL_DEBUG(0xef800024 | DBG_FUNC_END, cpus_to_signal, 0, 0, 0, 0);
4052 }
4053
4054 /*
4055 * Flush local tlb if required.
4056 * We need this flush even if the pmap being changed
4057 * is the user map... in case we do a copyin/out
4058 * before returning to user mode.
4059 */
4060 if (flush_self)
4061 flush_tlb();
4062
4063}
4064
4065void
4066process_pmap_updates(void)
4067{
4068 flush_tlb();
4069
4070 current_cpu_datap()->cpu_tlb_invalid = FALSE;
4071 __asm__ volatile("mfence");
4072}
4073
4074void
4075pmap_update_interrupt(void)
4076{
4077 KERNEL_DEBUG(0xef800028 | DBG_FUNC_START, 0, 0, 0, 0, 0);
4078
4079 assert(!ml_get_interrupts_enabled());
4080
4081 process_pmap_updates();
4082
4083 KERNEL_DEBUG(0xef800028 | DBG_FUNC_END, 0, 0, 0, 0, 0);
4084}
4085
4086
4087unsigned int pmap_cache_attributes(ppnum_t pn) {
4088
4089 if (!pmap_valid_page(pn))
4090 return (VM_WIMG_IO);
4091
4092 return (VM_WIMG_COPYBACK);
4093}
4094
4095#ifdef PMAP_DEBUG
4096void
4097pmap_dump(pmap_t p)
4098{
4099 int i;
4100
4101 kprintf("pmap 0x%x\n",p);
4102
4103 kprintf(" pm_cr3 0x%llx\n",p->pm_cr3);
4104 kprintf(" pm_pml4 0x%x\n",p->pm_pml4);
4105 kprintf(" pm_pdpt 0x%x\n",p->pm_pdpt);
4106
4107 kprintf(" pml4[0] 0x%llx\n",*p->pm_pml4);
4108 for (i=0;i<8;i++)
4109 kprintf(" pdpt[%d] 0x%llx\n",i, p->pm_pdpt[i]);
4110}
4111
4112void pmap_dump_wrap(void)
4113{
4114 pmap_dump(current_cpu_datap()->cpu_active_thread->task->map->pmap);
4115}
4116
4117void
4118dump_4GB_pdpt(pmap_t p)
4119{
4120 int spl;
4121 pdpt_entry_t *user_pdptp;
4122 pdpt_entry_t *kern_pdptp;
4123 pdpt_entry_t *pml4p;
4124
4125 spl = splhigh();
4126 while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) {
4127 splx(spl);
4128 pmap_expand_pml4(p, 0x0);
4129 spl = splhigh();
4130 }
4131 kern_pdptp = kernel_pmap->pm_pdpt;
4132 if (kern_pdptp == NULL)
4133 panic("kern_pdptp == NULL");
4134 kprintf("dump_4GB_pdpt(%p)\n"
4135 "kern_pdptp=%p (phys=0x%016llx)\n"
4136 "\t 0x%08x: 0x%016llx\n"
4137 "\t 0x%08x: 0x%016llx\n"
4138 "\t 0x%08x: 0x%016llx\n"
4139 "\t 0x%08x: 0x%016llx\n"
4140 "\t 0x%08x: 0x%016llx\n"
4141 "user_pdptp=%p (phys=0x%016llx)\n"
4142 "\t 0x%08x: 0x%016llx\n"
4143 "\t 0x%08x: 0x%016llx\n"
4144 "\t 0x%08x: 0x%016llx\n"
4145 "\t 0x%08x: 0x%016llx\n"
4146 "\t 0x%08x: 0x%016llx\n",
4147 p, kern_pdptp, kvtophys(kern_pdptp),
4148 kern_pdptp+0, *(kern_pdptp+0),
4149 kern_pdptp+1, *(kern_pdptp+1),
4150 kern_pdptp+2, *(kern_pdptp+2),
4151 kern_pdptp+3, *(kern_pdptp+3),
4152 kern_pdptp+4, *(kern_pdptp+4),
4153 user_pdptp, kvtophys(user_pdptp),
4154 user_pdptp+0, *(user_pdptp+0),
4155 user_pdptp+1, *(user_pdptp+1),
4156 user_pdptp+2, *(user_pdptp+2),
4157 user_pdptp+3, *(user_pdptp+3),
4158 user_pdptp+4, *(user_pdptp+4));
4159 kprintf("user pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
4160 p->pm_cr3, p->pm_hold, p->pm_pml4);
4161 pml4p = (pdpt_entry_t *)p->pm_hold;
4162 if (pml4p == NULL)
4163 panic("user pml4p == NULL");
4164 kprintf("\t 0x%08x: 0x%016llx\n"
4165 "\t 0x%08x: 0x%016llx\n",
4166 pml4p+0, *(pml4p),
4167 pml4p+KERNEL_UBER_PML4_INDEX, *(pml4p+KERNEL_UBER_PML4_INDEX));
4168 kprintf("kern pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
4169 kernel_pmap->pm_cr3, kernel_pmap->pm_hold, kernel_pmap->pm_pml4);
4170 pml4p = (pdpt_entry_t *)kernel_pmap->pm_hold;
4171 if (pml4p == NULL)
4172 panic("kern pml4p == NULL");
4173 kprintf("\t 0x%08x: 0x%016llx\n"
4174 "\t 0x%08x: 0x%016llx\n",
4175 pml4p+0, *(pml4p),
4176 pml4p+511, *(pml4p+511));
4177 splx(spl);
4178}
4179
4180void dump_4GB_pdpt_thread(thread_t tp)
4181{
4182 dump_4GB_pdpt(tp->map->pmap);
4183}
4184
4185
4186#endif