]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/pmap.c
xnu-792.21.3.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 /*
60 * File: pmap.c
61 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * (These guys wrote the Vax version)
63 *
64 * Physical Map management code for Intel i386, i486, and i860.
65 *
66 * Manages physical address maps.
67 *
68 * In addition to hardware address maps, this
69 * module is called upon to provide software-use-only
70 * maps which may or may not be stored in the same
71 * form as hardware maps. These pseudo-maps are
72 * used to store intermediate results from copy
73 * operations to and from address spaces.
74 *
75 * Since the information managed by this module is
76 * also stored by the logical address mapping module,
77 * this module may throw away valid virtual-to-physical
78 * mappings at almost any time. However, invalidations
79 * of virtual-to-physical mappings must be done as
80 * requested.
81 *
82 * In order to cope with hardware architectures which
83 * make virtual-to-physical map invalidates expensive,
84 * this module may delay invalidate or reduced protection
85 * operations until such time as they are actually
86 * necessary. This module is given full information as
87 * to which processors are currently using which maps,
88 * and to when physical maps must be made correct.
89 */
90
91 #include <string.h>
92 #include <norma_vm.h>
93 #include <mach_kdb.h>
94 #include <mach_ldebug.h>
95
96 #include <mach/machine/vm_types.h>
97
98 #include <mach/boolean.h>
99 #include <kern/thread.h>
100 #include <kern/zalloc.h>
101
102 #include <kern/lock.h>
103 #include <kern/kalloc.h>
104 #include <kern/spl.h>
105
106 #include <vm/pmap.h>
107 #include <vm/vm_map.h>
108 #include <vm/vm_kern.h>
109 #include <mach/vm_param.h>
110 #include <mach/vm_prot.h>
111 #include <vm/vm_object.h>
112 #include <vm/vm_page.h>
113
114 #include <mach/machine/vm_param.h>
115 #include <machine/thread.h>
116
117 #include <kern/misc_protos.h> /* prototyping */
118 #include <i386/misc_protos.h>
119
120 #include <i386/cpuid.h>
121 #include <i386/cpu_data.h>
122 #include <i386/cpu_number.h>
123 #include <i386/machine_cpu.h>
124 #include <i386/mp_slave_boot.h>
125
126 #if MACH_KDB
127 #include <ddb/db_command.h>
128 #include <ddb/db_output.h>
129 #include <ddb/db_sym.h>
130 #include <ddb/db_print.h>
131 #endif /* MACH_KDB */
132
133 #include <kern/xpr.h>
134
135 #include <vm/vm_protos.h>
136
137 #include <i386/mp.h>
138
139 /*
140 * Forward declarations for internal functions.
141 */
142 void pmap_expand(
143 pmap_t map,
144 vm_offset_t v);
145
146 extern void pmap_remove_range(
147 pmap_t pmap,
148 vm_offset_t va,
149 pt_entry_t *spte,
150 pt_entry_t *epte);
151
152 void phys_attribute_clear(
153 ppnum_t phys,
154 int bits);
155
156 boolean_t phys_attribute_test(
157 ppnum_t phys,
158 int bits);
159
160 void phys_attribute_set(
161 ppnum_t phys,
162 int bits);
163
164 void pmap_growkernel(
165 vm_offset_t addr);
166
167 void pmap_set_reference(
168 ppnum_t pn);
169
170 void pmap_movepage(
171 unsigned long from,
172 unsigned long to,
173 vm_size_t size);
174
175 pt_entry_t * pmap_mapgetpte(
176 vm_map_t map,
177 vm_offset_t v);
178
179 boolean_t phys_page_exists(
180 ppnum_t pn);
181
182 #ifndef set_dirbase
183 void set_dirbase(vm_offset_t dirbase);
184 #endif /* set_dirbase */
185
186 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
187
188 #define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
189 #define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry);
190
191 #define value_64bit(value) ((value) & 0xFFFFFFFF00000000LL)
192 #define low32(x) ((unsigned int)((x) & 0x00000000ffffffffLL))
193
194 /*
195 * Private data structures.
196 */
197
198 /*
199 * For each vm_page_t, there is a list of all currently
200 * valid virtual mappings of that page. An entry is
201 * a pv_entry_t; the list is the pv_table.
202 */
203
204 typedef struct pv_entry {
205 struct pv_entry *next; /* next pv_entry */
206 pmap_t pmap; /* pmap where mapping lies */
207 vm_offset_t va; /* virtual address for mapping */
208 } *pv_entry_t;
209
210 #define PV_ENTRY_NULL ((pv_entry_t) 0)
211
212 pv_entry_t pv_head_table; /* array of entries, one per page */
213
214 /*
215 * pv_list entries are kept on a list that can only be accessed
216 * with the pmap system locked (at SPLVM, not in the cpus_active set).
217 * The list is refilled from the pv_list_zone if it becomes empty.
218 */
219 pv_entry_t pv_free_list; /* free list at SPLVM */
220 decl_simple_lock_data(,pv_free_list_lock)
221 int pv_free_count = 0;
222 #define PV_LOW_WATER_MARK 5000
223 #define PV_ALLOC_CHUNK 2000
224 thread_call_t mapping_adjust_call;
225 static thread_call_data_t mapping_adjust_call_data;
226 int mappingrecurse = 0;
227
228 #define PV_ALLOC(pv_e) { \
229 simple_lock(&pv_free_list_lock); \
230 if ((pv_e = pv_free_list) != 0) { \
231 pv_free_list = pv_e->next; \
232 pv_free_count--; \
233 if (pv_free_count < PV_LOW_WATER_MARK) \
234 if (hw_compare_and_store(0,1,&mappingrecurse)) \
235 thread_call_enter(mapping_adjust_call); \
236 } \
237 simple_unlock(&pv_free_list_lock); \
238 }
239
240 #define PV_FREE(pv_e) { \
241 simple_lock(&pv_free_list_lock); \
242 pv_e->next = pv_free_list; \
243 pv_free_list = pv_e; \
244 pv_free_count++; \
245 simple_unlock(&pv_free_list_lock); \
246 }
247
248 zone_t pv_list_zone; /* zone of pv_entry structures */
249
250 #ifdef PAE
251 static zone_t pdpt_zone;
252 #endif
253
254
255 /*
256 * Each entry in the pv_head_table is locked by a bit in the
257 * pv_lock_table. The lock bits are accessed by the physical
258 * address of the page they lock.
259 */
260
261 char *pv_lock_table; /* pointer to array of bits */
262 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
263
264 /*
265 * First and last physical addresses that we maintain any information
266 * for. Initialized to zero so that pmap operations done before
267 * pmap_init won't touch any non-existent structures.
268 */
269 pmap_paddr_t vm_first_phys = (pmap_paddr_t) 0;
270 pmap_paddr_t vm_last_phys = (pmap_paddr_t) 0;
271 boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */
272
273 pmap_paddr_t kernel_vm_end = (pmap_paddr_t)0;
274
275 #define GROW_KERNEL_FUNCTION_IMPLEMENTED 1
276 #if GROW_KERNEL_FUNCTION_IMPLEMENTED /* not needed until growing kernel pmap */
277 static struct vm_object kptobj_object_store;
278 static vm_object_t kptobj;
279 #endif
280
281
282 /*
283 * Index into pv_head table, its lock bits, and the modify/reference
284 * bits starting at vm_first_phys.
285 */
286
287 #define pa_index(pa) (i386_btop(pa - vm_first_phys))
288
289 #define pai_to_pvh(pai) (&pv_head_table[pai])
290 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
291 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
292
293 /*
294 * Array of physical page attribites for managed pages.
295 * One byte per physical page.
296 */
297 char *pmap_phys_attributes;
298
299 /*
300 * Physical page attributes. Copy bits from PTE definition.
301 */
302 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
303 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
304 #define PHYS_NCACHE INTEL_PTE_NCACHE
305
306 /*
307 * Amount of virtual memory mapped by one
308 * page-directory entry.
309 */
310 #define PDE_MAPPED_SIZE (pdetova(1))
311
312 /*
313 * Locking and TLB invalidation
314 */
315
316 /*
317 * Locking Protocols:
318 *
319 * There are two structures in the pmap module that need locking:
320 * the pmaps themselves, and the per-page pv_lists (which are locked
321 * by locking the pv_lock_table entry that corresponds to the pv_head
322 * for the list in question.) Most routines want to lock a pmap and
323 * then do operations in it that require pv_list locking -- however
324 * pmap_remove_all and pmap_copy_on_write operate on a physical page
325 * basis and want to do the locking in the reverse order, i.e. lock
326 * a pv_list and then go through all the pmaps referenced by that list.
327 * To protect against deadlock between these two cases, the pmap_lock
328 * is used. There are three different locking protocols as a result:
329 *
330 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
331 * the pmap.
332 *
333 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
334 * lock on the pmap_lock (shared read), then lock the pmap
335 * and finally the pv_lists as needed [i.e. pmap lock before
336 * pv_list lock.]
337 *
338 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
339 * Get a write lock on the pmap_lock (exclusive write); this
340 * also guaranteees exclusive access to the pv_lists. Lock the
341 * pmaps as needed.
342 *
343 * At no time may any routine hold more than one pmap lock or more than
344 * one pv_list lock. Because interrupt level routines can allocate
345 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
346 * kernel_pmap can only be held at splhigh.
347 */
348
349 /*
350 * We raise the interrupt level to splvm, to block interprocessor
351 * interrupts during pmap operations. We must take the CPU out of
352 * the cpus_active set while interrupts are blocked.
353 */
354 #define SPLVM(spl) { \
355 spl = splhigh(); \
356 mp_disable_preemption(); \
357 i_bit_clear(cpu_number(), &cpus_active); \
358 mp_enable_preemption(); \
359 }
360
361 #define SPLX(spl) { \
362 mp_disable_preemption(); \
363 i_bit_set(cpu_number(), &cpus_active); \
364 mp_enable_preemption(); \
365 splx(spl); \
366 }
367
368 /*
369 * Lock on pmap system
370 */
371 lock_t pmap_system_lock;
372
373 #define PMAP_READ_LOCK(pmap, spl) { \
374 SPLVM(spl); \
375 lock_read(&pmap_system_lock); \
376 simple_lock(&(pmap)->lock); \
377 }
378
379 #define PMAP_WRITE_LOCK(spl) { \
380 SPLVM(spl); \
381 lock_write(&pmap_system_lock); \
382 }
383
384 #define PMAP_READ_UNLOCK(pmap, spl) { \
385 simple_unlock(&(pmap)->lock); \
386 lock_read_done(&pmap_system_lock); \
387 SPLX(spl); \
388 }
389
390 #define PMAP_WRITE_UNLOCK(spl) { \
391 lock_write_done(&pmap_system_lock); \
392 SPLX(spl); \
393 }
394
395 #define PMAP_WRITE_TO_READ_LOCK(pmap) { \
396 simple_lock(&(pmap)->lock); \
397 lock_write_to_read(&pmap_system_lock); \
398 }
399
400 #define LOCK_PVH(index) lock_pvh_pai(index)
401
402 #define UNLOCK_PVH(index) unlock_pvh_pai(index)
403
404 #if USLOCK_DEBUG
405 extern int max_lock_loops;
406 extern int disableSerialOuput;
407 #define LOOP_VAR \
408 unsigned int loop_count; \
409 loop_count = disableSerialOuput ? max_lock_loops \
410 : max_lock_loops*100
411 #define LOOP_CHECK(msg, pmap) \
412 if (--loop_count == 0) { \
413 mp_disable_preemption(); \
414 kprintf("%s: cpu %d pmap %x, cpus_active 0x%x\n", \
415 msg, cpu_number(), pmap, cpus_active); \
416 Debugger("deadlock detection"); \
417 mp_enable_preemption(); \
418 loop_count = max_lock_loops; \
419 }
420 #else /* USLOCK_DEBUG */
421 #define LOOP_VAR
422 #define LOOP_CHECK(msg, pmap)
423 #endif /* USLOCK_DEBUG */
424
425 #define PMAP_UPDATE_TLBS(pmap, s, e) \
426 { \
427 cpu_set cpu_mask; \
428 cpu_set users; \
429 \
430 mp_disable_preemption(); \
431 cpu_mask = 1 << cpu_number(); \
432 \
433 /* Since the pmap is locked, other updates are locked */ \
434 /* out, and any pmap_activate has finished. */ \
435 \
436 /* find other cpus using the pmap */ \
437 users = (pmap)->cpus_using & ~cpu_mask; \
438 if (users) { \
439 LOOP_VAR; \
440 /* signal them, and wait for them to finish */ \
441 /* using the pmap */ \
442 signal_cpus(users, (pmap), (s), (e)); \
443 while (((pmap)->cpus_using & cpus_active & ~cpu_mask)) { \
444 LOOP_CHECK("PMAP_UPDATE_TLBS", pmap); \
445 cpu_pause(); \
446 } \
447 } \
448 /* invalidate our own TLB if pmap is in use */ \
449 \
450 if ((pmap)->cpus_using & cpu_mask) { \
451 INVALIDATE_TLB((pmap), (s), (e)); \
452 } \
453 \
454 mp_enable_preemption(); \
455 }
456
457 #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
458
459 #define INVALIDATE_TLB(m, s, e) { \
460 flush_tlb(); \
461 }
462
463 /*
464 * Structures to keep track of pending TLB invalidations
465 */
466 cpu_set cpus_active;
467 cpu_set cpus_idle;
468
469 #define UPDATE_LIST_SIZE 4
470
471 struct pmap_update_item {
472 pmap_t pmap; /* pmap to invalidate */
473 vm_offset_t start; /* start address to invalidate */
474 vm_offset_t end; /* end address to invalidate */
475 };
476
477 typedef struct pmap_update_item *pmap_update_item_t;
478
479 /*
480 * List of pmap updates. If the list overflows,
481 * the last entry is changed to invalidate all.
482 */
483 struct pmap_update_list {
484 decl_simple_lock_data(,lock)
485 int count;
486 struct pmap_update_item item[UPDATE_LIST_SIZE];
487 } ;
488 typedef struct pmap_update_list *pmap_update_list_t;
489
490 extern void signal_cpus(
491 cpu_set use_list,
492 pmap_t pmap,
493 vm_offset_t start,
494 vm_offset_t end);
495
496 pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE];
497
498 /*
499 * Other useful macros.
500 */
501 #define current_pmap() (vm_map_pmap(current_thread()->map))
502 #define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
503
504 struct pmap kernel_pmap_store;
505 pmap_t kernel_pmap;
506
507 #ifdef PMAP_QUEUE
508 decl_simple_lock_data(,free_pmap_lock)
509 #endif
510
511 struct zone *pmap_zone; /* zone of pmap structures */
512
513 int pmap_debug = 0; /* flag for debugging prints */
514
515 unsigned int inuse_ptepages_count = 0; /* debugging */
516
517 /*
518 * Pmap cache. Cache is threaded through ref_count field of pmap.
519 * Max will eventually be constant -- variable for experimentation.
520 */
521 int pmap_cache_max = 32;
522 int pmap_alloc_chunk = 8;
523 pmap_t pmap_cache_list;
524 int pmap_cache_count;
525 decl_simple_lock_data(,pmap_cache_lock)
526
527 extern vm_offset_t hole_start, hole_end;
528
529 extern char end;
530
531 static int nkpt;
532
533 pt_entry_t *DMAP1, *DMAP2;
534 caddr_t DADDR1;
535 caddr_t DADDR2;
536
537 #if DEBUG_ALIAS
538 #define PMAP_ALIAS_MAX 32
539 struct pmap_alias {
540 vm_offset_t rpc;
541 pmap_t pmap;
542 vm_offset_t va;
543 int cookie;
544 #define PMAP_ALIAS_COOKIE 0xdeadbeef
545 } pmap_aliasbuf[PMAP_ALIAS_MAX];
546 int pmap_alias_index = 0;
547 extern vm_offset_t get_rpc();
548
549 #endif /* DEBUG_ALIAS */
550
551 #define pmap_pde(m, v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]))
552 #define pdir_pde(d, v) (d[(vm_offset_t)(v) >> PDESHIFT])
553
554 static __inline int
555 pmap_is_current(pmap_t pmap)
556 {
557 return (pmap == kernel_pmap ||
558 (pmap->dirbase[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME));
559 }
560
561
562 /*
563 * return address of mapped pte for vaddr va in pmap pmap.
564 */
565 pt_entry_t *
566 pmap_pte(pmap_t pmap, vm_offset_t va)
567 {
568 pd_entry_t *pde;
569 pd_entry_t newpf;
570
571 pde = pmap_pde(pmap, va);
572 if (*pde != 0) {
573 if (pmap_is_current(pmap))
574 return( vtopte(va));
575 newpf = *pde & PG_FRAME;
576 if (((*CM4) & PG_FRAME) != newpf) {
577 *CM4 = newpf | INTEL_PTE_RW | INTEL_PTE_VALID;
578 invlpg((u_int)CA4);
579 }
580 return (pt_entry_t *)CA4 + (i386_btop(va) & (NPTEPG-1));
581 }
582 return(0);
583 }
584
585 #define DEBUG_PTE_PAGE 0
586
587 #if DEBUG_PTE_PAGE
588 void
589 ptep_check(
590 ptep_t ptep)
591 {
592 register pt_entry_t *pte, *epte;
593 int ctu, ctw;
594
595 /* check the use and wired counts */
596 if (ptep == PTE_PAGE_NULL)
597 return;
598 pte = pmap_pte(ptep->pmap, ptep->va);
599 epte = pte + INTEL_PGBYTES/sizeof(pt_entry_t);
600 ctu = 0;
601 ctw = 0;
602 while (pte < epte) {
603 if (pte->pfn != 0) {
604 ctu++;
605 if (pte->wired)
606 ctw++;
607 }
608 pte++;
609 }
610
611 if (ctu != ptep->use_count || ctw != ptep->wired_count) {
612 printf("use %d wired %d - actual use %d wired %d\n",
613 ptep->use_count, ptep->wired_count, ctu, ctw);
614 panic("pte count");
615 }
616 }
617 #endif /* DEBUG_PTE_PAGE */
618
619 /*
620 * Map memory at initialization. The physical addresses being
621 * mapped are not managed and are never unmapped.
622 *
623 * For now, VM is already on, we only need to map the
624 * specified memory.
625 */
626 vm_offset_t
627 pmap_map(
628 register vm_offset_t virt,
629 register vm_offset_t start_addr,
630 register vm_offset_t end_addr,
631 register vm_prot_t prot)
632 {
633 register int ps;
634
635 ps = PAGE_SIZE;
636 while (start_addr < end_addr) {
637 pmap_enter(kernel_pmap,
638 virt, (ppnum_t) i386_btop(start_addr), prot, 0, FALSE);
639 virt += ps;
640 start_addr += ps;
641 }
642 return(virt);
643 }
644
645 /*
646 * Back-door routine for mapping kernel VM at initialization.
647 * Useful for mapping memory outside the range
648 * Sets no-cache, A, D.
649 * [vm_first_phys, vm_last_phys) (i.e., devices).
650 * Otherwise like pmap_map.
651 */
652 vm_offset_t
653 pmap_map_bd(
654 register vm_offset_t virt,
655 register vm_offset_t start_addr,
656 register vm_offset_t end_addr,
657 vm_prot_t prot)
658 {
659 register pt_entry_t template;
660 register pt_entry_t *pte;
661
662 template = pa_to_pte(start_addr)
663 | INTEL_PTE_NCACHE
664 | INTEL_PTE_REF
665 | INTEL_PTE_MOD
666 | INTEL_PTE_WIRED
667 | INTEL_PTE_VALID;
668 if (prot & VM_PROT_WRITE)
669 template |= INTEL_PTE_WRITE;
670
671 /* XXX move pmap_pte out of loop, once one pte mapped, all are */
672 while (start_addr < end_addr) {
673 pte = pmap_pte(kernel_pmap, virt);
674 if (pte == PT_ENTRY_NULL) {
675 panic("pmap_map_bd: Invalid kernel address\n");
676 }
677 WRITE_PTE_FAST(pte, template)
678 pte_increment_pa(template);
679 virt += PAGE_SIZE;
680 start_addr += PAGE_SIZE;
681 }
682
683 flush_tlb();
684 return(virt);
685 }
686
687 extern char *first_avail;
688 extern vm_offset_t virtual_avail, virtual_end;
689 extern pmap_paddr_t avail_start, avail_end;
690 extern vm_offset_t etext;
691 extern void *sectHIBB;
692 extern int sectSizeHIB;
693
694 /*
695 * Bootstrap the system enough to run with virtual memory.
696 * Map the kernel's code and data, and allocate the system page table.
697 * Called with mapping OFF. Page_size must already be set.
698 *
699 * Parameters:
700 * load_start: PA where kernel was loaded
701 * avail_start PA of first available physical page -
702 * after kernel page tables
703 * avail_end PA of last available physical page
704 * virtual_avail VA of first available page -
705 * after kernel page tables
706 * virtual_end VA of last available page -
707 * end of kernel address space
708 *
709 * &start_text start of kernel text
710 * &etext end of kernel text
711 */
712
713 void
714 pmap_bootstrap(
715 __unused vm_offset_t load_start)
716 {
717 vm_offset_t va;
718 pt_entry_t *pte;
719 int i;
720 int wpkernel, boot_arg;
721
722 vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address
723 * known to VM */
724
725 /*
726 * The kernel's pmap is statically allocated so we don't
727 * have to use pmap_create, which is unlikely to work
728 * correctly at this part of the boot sequence.
729 */
730
731 kernel_pmap = &kernel_pmap_store;
732 #ifdef PMAP_QUEUE
733 kernel_pmap->pmap_link.next = (queue_t)kernel_pmap; /* Set up anchor forward */
734 kernel_pmap->pmap_link.prev = (queue_t)kernel_pmap; /* Set up anchor reverse */
735 #endif
736 kernel_pmap->ref_count = 1;
737 kernel_pmap->pm_obj = (vm_object_t) NULL;
738 kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE);
739 kernel_pmap->pdirbase = (pd_entry_t *)IdlePTD;
740 #ifdef PAE
741 kernel_pmap->pm_pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE );
742 kernel_pmap->pm_ppdpt = (vm_offset_t)IdlePDPT;
743 #endif
744
745 va = (vm_offset_t)kernel_pmap->dirbase;
746 /* setup self referential mapping(s) */
747 for (i = 0; i< NPGPTD; i++ ) {
748 pmap_paddr_t pa;
749 pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i));
750 * (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i) =
751 (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF |
752 INTEL_PTE_MOD | INTEL_PTE_WIRED ;
753 #ifdef PAE
754 kernel_pmap->pm_pdpt[i] = pa | INTEL_PTE_VALID;
755 #endif
756 }
757
758 nkpt = NKPT;
759
760 virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail;
761 virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS);
762
763 /*
764 * Reserve some special page table entries/VA space for temporary
765 * mapping of pages.
766 */
767 #define SYSMAP(c, p, v, n) \
768 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n);
769
770 va = virtual_avail;
771 pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
772
773 /*
774 * CMAP1/CMAP2 are used for zeroing and copying pages.
775 * CMAP3 is used for ml_phys_read/write.
776 */
777 SYSMAP(caddr_t, CM1, CA1, 1)
778 * (pt_entry_t *) CM1 = 0;
779 SYSMAP(caddr_t, CM2, CA2, 1)
780 * (pt_entry_t *) CM2 = 0;
781 SYSMAP(caddr_t, CM3, CA3, 1)
782 * (pt_entry_t *) CM3 = 0;
783
784 /* used by pmap_pte */
785 SYSMAP(caddr_t, CM4, CA4, 1)
786 * (pt_entry_t *) CM4 = 0;
787
788 /* DMAP user for debugger */
789 SYSMAP(caddr_t, DMAP1, DADDR1, 1);
790 SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */
791
792
793 lock_init(&pmap_system_lock,
794 FALSE, /* NOT a sleep lock */
795 0, 0);
796
797 virtual_avail = va;
798
799 wpkernel = 1;
800 if (PE_parse_boot_arg("debug", &boot_arg)) {
801 if (boot_arg & DB_PRT) wpkernel = 0;
802 if (boot_arg & DB_NMI) wpkernel = 0;
803 }
804
805 /* remap kernel text readonly if not debugging or kprintfing */
806 if (wpkernel)
807 {
808 vm_offset_t myva;
809 pt_entry_t *ptep;
810
811 for (myva = i386_round_page(VM_MIN_KERNEL_ADDRESS + MP_BOOT + MP_BOOTSTACK); myva < etext; myva += PAGE_SIZE) {
812 if (myva >= (vm_offset_t)sectHIBB && myva < ((vm_offset_t)sectHIBB + sectSizeHIB))
813 continue;
814 ptep = pmap_pte(kernel_pmap, myva);
815 if (ptep)
816 *ptep &= ~INTEL_PTE_RW;
817 }
818 flush_tlb();
819 }
820
821 simple_lock_init(&kernel_pmap->lock, 0);
822 simple_lock_init(&pv_free_list_lock, 0);
823
824 /* invalidate user virtual addresses */
825 memset((char *)kernel_pmap->dirbase,
826 0,
827 (KPTDI) * sizeof(pd_entry_t));
828
829 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
830 VADDR(KPTDI,0), virtual_end);
831 #ifdef PAE
832 kprintf("Available physical space from 0x%llx to 0x%llx\n",
833 avail_start, avail_end);
834 printf("PAE enabled\n");
835 #else
836 kprintf("Available physical space from 0x%x to 0x%x\n",
837 avail_start, avail_end);
838 #endif
839 }
840
841 void
842 pmap_virtual_space(
843 vm_offset_t *startp,
844 vm_offset_t *endp)
845 {
846 *startp = virtual_avail;
847 *endp = virtual_end;
848 }
849
850 /*
851 * Initialize the pmap module.
852 * Called by vm_init, to initialize any structures that the pmap
853 * system needs to map virtual memory.
854 */
855 void
856 pmap_init(void)
857 {
858 register long npages;
859 vm_offset_t addr;
860 register vm_size_t s;
861 vm_offset_t vaddr;
862 ppnum_t ppn;
863
864 /*
865 * Allocate memory for the pv_head_table and its lock bits,
866 * the modify bit array, and the pte_page table.
867 */
868
869 /* zero bias all these arrays now instead of off avail_start
870 so we cover all memory */
871 npages = i386_btop(avail_end);
872 s = (vm_size_t) (sizeof(struct pv_entry) * npages
873 + pv_lock_table_size(npages)
874 + npages);
875
876 s = round_page(s);
877 if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS)
878 panic("pmap_init");
879
880 memset((char *)addr, 0, s);
881
882 /*
883 * Allocate the structures first to preserve word-alignment.
884 */
885 pv_head_table = (pv_entry_t) addr;
886 addr = (vm_offset_t) (pv_head_table + npages);
887
888 pv_lock_table = (char *) addr;
889 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
890
891 pmap_phys_attributes = (char *) addr;
892
893 /*
894 * Create the zone of physical maps,
895 * and of the physical-to-virtual entries.
896 */
897 s = (vm_size_t) sizeof(struct pmap);
898 pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
899 s = (vm_size_t) sizeof(struct pv_entry);
900 pv_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
901 #ifdef PAE
902 // s = (vm_size_t) (sizeof(pdpt_entry_t) * NPGPTD);
903 s = 63;
904 pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */
905 #endif
906
907 /*
908 * Only now, when all of the data structures are allocated,
909 * can we set vm_first_phys and vm_last_phys. If we set them
910 * too soon, the kmem_alloc_wired above will try to use these
911 * data structures and blow up.
912 */
913
914 /* zero bias this now so we cover all memory */
915 vm_first_phys = 0;
916 vm_last_phys = avail_end;
917
918 #if GROW_KERNEL_FUNCTION_IMPLEMENTED
919 kptobj = &kptobj_object_store;
920 _vm_object_allocate((vm_object_size_t)NKPDE, kptobj);
921 kernel_pmap->pm_obj = kptobj;
922 #endif
923
924 /* create pv entries for kernel pages mapped by low level
925 startup code. these have to exist so we can pmap_remove()
926 e.g. kext pages from the middle of our addr space */
927
928 vaddr = (vm_offset_t)VM_MIN_KERNEL_ADDRESS;
929 for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) {
930 pv_entry_t pv_e;
931
932 pv_e = pai_to_pvh(ppn);
933 pv_e->va = vaddr;
934 vaddr += PAGE_SIZE;
935 pv_e->pmap = kernel_pmap;
936 pv_e->next = PV_ENTRY_NULL;
937 }
938
939 pmap_initialized = TRUE;
940
941 /*
942 * Initializie pmap cache.
943 */
944 pmap_cache_list = PMAP_NULL;
945 pmap_cache_count = 0;
946 simple_lock_init(&pmap_cache_lock, 0);
947 #ifdef PMAP_QUEUE
948 simple_lock_init(&free_pmap_lock, 0);
949 #endif
950
951 }
952
953 void
954 x86_lowmem_free(void)
955 {
956 /* free lowmem pages back to the vm system. we had to defer doing this
957 until the vm system was fully up.
958 the actual pages that are released are determined by which
959 pages the memory sizing code puts into the region table */
960
961 ml_static_mfree((vm_offset_t) i386_ptob(pmap_memory_regions[0].base)|VM_MIN_KERNEL_ADDRESS,
962 (vm_size_t) i386_ptob(pmap_memory_regions[0].end - pmap_memory_regions[0].base));
963 }
964
965
966 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
967
968 boolean_t
969 pmap_verify_free(
970 ppnum_t pn)
971 {
972 pmap_paddr_t phys;
973 pv_entry_t pv_h;
974 int pai;
975 spl_t spl;
976 boolean_t result;
977
978 assert(pn != vm_page_fictitious_addr);
979 phys = (pmap_paddr_t)i386_ptob(pn);
980 if (!pmap_initialized)
981 return(TRUE);
982
983 if (!pmap_valid_page(pn))
984 return(FALSE);
985
986 PMAP_WRITE_LOCK(spl);
987
988 pai = pa_index(phys);
989 pv_h = pai_to_pvh(pai);
990
991 result = (pv_h->pmap == PMAP_NULL);
992 PMAP_WRITE_UNLOCK(spl);
993
994 return(result);
995 }
996
997 /*
998 * Create and return a physical map.
999 *
1000 * If the size specified for the map
1001 * is zero, the map is an actual physical
1002 * map, and may be referenced by the
1003 * hardware.
1004 *
1005 * If the size specified is non-zero,
1006 * the map will be used in software only, and
1007 * is bounded by that size.
1008 */
1009 pmap_t
1010 pmap_create(
1011 vm_size_t size)
1012 {
1013 register pmap_t p;
1014 #ifdef PMAP_QUEUE
1015 register pmap_t pro;
1016 spl_t s;
1017 #endif
1018 register int i;
1019 register vm_offset_t va;
1020
1021 /*
1022 * A software use-only map doesn't even need a map.
1023 */
1024
1025 if (size != 0) {
1026 return(PMAP_NULL);
1027 }
1028
1029 p = (pmap_t) zalloc(pmap_zone);
1030 if (PMAP_NULL == p)
1031 panic("pmap_create zalloc");
1032 if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD))
1033 panic("pmap_create kmem_alloc_wired");
1034 #ifdef PAE
1035 p->pm_hold = (vm_offset_t)zalloc(pdpt_zone);
1036 if ((vm_offset_t)NULL == p->pm_hold) {
1037 panic("pdpt zalloc");
1038 }
1039 p->pm_pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31);
1040 p->pm_ppdpt = kvtophys((vm_offset_t)p->pm_pdpt); /* XXX */
1041 #endif
1042 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPDEPG))))
1043 panic("pmap_create vm_object_allocate");
1044 memcpy(p->dirbase,
1045 (void *)((unsigned int)IdlePTD | KERNBASE),
1046 NBPTD);
1047 va = (vm_offset_t)p->dirbase;
1048 p->pdirbase = (pd_entry_t *)(kvtophys(va));
1049 simple_lock_init(&p->lock, 0);
1050
1051 /* setup self referential mapping(s) */
1052 for (i = 0; i< NPGPTD; i++ ) {
1053 pmap_paddr_t pa;
1054 pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i));
1055 * (pd_entry_t *) (p->dirbase + PTDPTDI + i) =
1056 (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF |
1057 INTEL_PTE_MOD | INTEL_PTE_WIRED ;
1058 #ifdef PAE
1059 p->pm_pdpt[i] = pa | INTEL_PTE_VALID;
1060 #endif
1061 }
1062
1063 p->cpus_using = 0;
1064 p->stats.resident_count = 0;
1065 p->stats.wired_count = 0;
1066 p->ref_count = 1;
1067
1068 #ifdef PMAP_QUEUE
1069 /* insert new pmap at head of queue hanging off kernel_pmap */
1070 SPLVM(s);
1071 simple_lock(&free_pmap_lock);
1072 p->pmap_link.next = (queue_t)kernel_pmap->pmap_link.next;
1073 kernel_pmap->pmap_link.next = (queue_t)p;
1074
1075 pro = (pmap_t) p->pmap_link.next;
1076 p->pmap_link.prev = (queue_t)pro->pmap_link.prev;
1077 pro->pmap_link.prev = (queue_t)p;
1078
1079
1080 simple_unlock(&free_pmap_lock);
1081 SPLX(s);
1082 #endif
1083
1084 return(p);
1085 }
1086
1087 /*
1088 * Retire the given physical map from service.
1089 * Should only be called if the map contains
1090 * no valid mappings.
1091 */
1092
1093 void
1094 pmap_destroy(
1095 register pmap_t p)
1096 {
1097 register pt_entry_t *pdep;
1098 register int c;
1099 spl_t s;
1100 register vm_page_t m;
1101 #ifdef PMAP_QUEUE
1102 register pmap_t pre,pro;
1103 #endif
1104
1105 if (p == PMAP_NULL)
1106 return;
1107
1108 SPLVM(s);
1109 simple_lock(&p->lock);
1110 c = --p->ref_count;
1111 if (c == 0) {
1112 register int my_cpu;
1113
1114 mp_disable_preemption();
1115 my_cpu = cpu_number();
1116
1117 /*
1118 * If some cpu is not using the physical pmap pointer that it
1119 * is supposed to be (see set_dirbase), we might be using the
1120 * pmap that is being destroyed! Make sure we are
1121 * physically on the right pmap:
1122 */
1123 /* force pmap/cr3 update */
1124 PMAP_UPDATE_TLBS(p,
1125 VM_MIN_ADDRESS,
1126 VM_MAX_KERNEL_ADDRESS);
1127
1128 if (PMAP_REAL(my_cpu) == p) {
1129 PMAP_CPU_CLR(p, my_cpu);
1130 PMAP_REAL(my_cpu) = kernel_pmap;
1131 #ifdef PAE
1132 set_cr3((unsigned int)kernel_pmap->pm_ppdpt);
1133 #else
1134 set_cr3((unsigned int)kernel_pmap->pdirbase);
1135 #endif
1136 }
1137 mp_enable_preemption();
1138 }
1139 simple_unlock(&p->lock);
1140 SPLX(s);
1141
1142 if (c != 0) {
1143 return; /* still in use */
1144 }
1145
1146 #ifdef PMAP_QUEUE
1147 /* remove from pmap queue */
1148 SPLVM(s);
1149 simple_lock(&free_pmap_lock);
1150
1151 pre = (pmap_t)p->pmap_link.prev;
1152 pre->pmap_link.next = (queue_t)p->pmap_link.next;
1153 pro = (pmap_t)p->pmap_link.next;
1154 pro->pmap_link.prev = (queue_t)p->pmap_link.prev;
1155
1156 simple_unlock(&free_pmap_lock);
1157 SPLX(s);
1158 #endif
1159
1160 /*
1161 * Free the memory maps, then the
1162 * pmap structure.
1163 */
1164
1165 pdep = (pt_entry_t *)p->dirbase;
1166
1167 while (pdep < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]) {
1168 int ind;
1169 if (*pdep & INTEL_PTE_VALID) {
1170 ind = pdep - (pt_entry_t *)&p->dirbase[0];
1171 vm_object_lock(p->pm_obj);
1172 m = vm_page_lookup(p->pm_obj, (vm_object_offset_t)ind);
1173 if (m == VM_PAGE_NULL) {
1174 panic("pmap_destroy: pte page not in object");
1175 }
1176 vm_page_lock_queues();
1177 vm_page_free(m);
1178 inuse_ptepages_count--;
1179 vm_object_unlock(p->pm_obj);
1180 vm_page_unlock_queues();
1181
1182 /*
1183 * Clear pdes, this might be headed for the cache.
1184 */
1185 *pdep++ = 0;
1186 }
1187 else {
1188 *pdep++ = 0;
1189 }
1190
1191 }
1192
1193 vm_object_deallocate(p->pm_obj);
1194 kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD);
1195 #ifdef PAE
1196 zfree(pdpt_zone, (void *)p->pm_hold);
1197 #endif
1198 zfree(pmap_zone, p);
1199 }
1200
1201 /*
1202 * Add a reference to the specified pmap.
1203 */
1204
1205 void
1206 pmap_reference(
1207 register pmap_t p)
1208 {
1209 spl_t s;
1210
1211 if (p != PMAP_NULL) {
1212 SPLVM(s);
1213 simple_lock(&p->lock);
1214 p->ref_count++;
1215 simple_unlock(&p->lock);
1216 SPLX(s);
1217 }
1218 }
1219
1220 /*
1221 * Remove a range of hardware page-table entries.
1222 * The entries given are the first (inclusive)
1223 * and last (exclusive) entries for the VM pages.
1224 * The virtual address is the va for the first pte.
1225 *
1226 * The pmap must be locked.
1227 * If the pmap is not the kernel pmap, the range must lie
1228 * entirely within one pte-page. This is NOT checked.
1229 * Assumes that the pte-page exists.
1230 */
1231
1232 /* static */
1233 void
1234 pmap_remove_range(
1235 pmap_t pmap,
1236 vm_offset_t va,
1237 pt_entry_t *spte,
1238 pt_entry_t *epte)
1239 {
1240 register pt_entry_t *cpte;
1241 int num_removed, num_unwired;
1242 int pai;
1243 pmap_paddr_t pa;
1244
1245 #if DEBUG_PTE_PAGE
1246 if (pmap != kernel_pmap)
1247 ptep_check(get_pte_page(spte));
1248 #endif /* DEBUG_PTE_PAGE */
1249 num_removed = 0;
1250 num_unwired = 0;
1251
1252 for (cpte = spte; cpte < epte;
1253 cpte++, va += PAGE_SIZE) {
1254
1255 pa = pte_to_pa(*cpte);
1256 if (pa == 0)
1257 continue;
1258
1259 num_removed++;
1260 if (iswired(*cpte))
1261 num_unwired++;
1262
1263 if (!valid_page(i386_btop(pa))) {
1264
1265 /*
1266 * Outside range of managed physical memory.
1267 * Just remove the mappings.
1268 */
1269 register pt_entry_t *lpte = cpte;
1270
1271 *lpte = 0;
1272 continue;
1273 }
1274
1275 pai = pa_index(pa);
1276 LOCK_PVH(pai);
1277
1278 /*
1279 * Get the modify and reference bits.
1280 */
1281 {
1282 register pt_entry_t *lpte;
1283
1284 lpte = cpte;
1285 pmap_phys_attributes[pai] |=
1286 *lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
1287 *lpte = 0;
1288
1289 }
1290
1291 /*
1292 * Remove the mapping from the pvlist for
1293 * this physical page.
1294 */
1295 {
1296 register pv_entry_t pv_h, prev, cur;
1297
1298 pv_h = pai_to_pvh(pai);
1299 if (pv_h->pmap == PMAP_NULL) {
1300 panic("pmap_remove: null pv_list!");
1301 }
1302 if (pv_h->va == va && pv_h->pmap == pmap) {
1303 /*
1304 * Header is the pv_entry. Copy the next one
1305 * to header and free the next one (we cannot
1306 * free the header)
1307 */
1308 cur = pv_h->next;
1309 if (cur != PV_ENTRY_NULL) {
1310 *pv_h = *cur;
1311 PV_FREE(cur);
1312 }
1313 else {
1314 pv_h->pmap = PMAP_NULL;
1315 }
1316 }
1317 else {
1318 cur = pv_h;
1319 do {
1320 prev = cur;
1321 if ((cur = prev->next) == PV_ENTRY_NULL) {
1322 panic("pmap-remove: mapping not in pv_list!");
1323 }
1324 } while (cur->va != va || cur->pmap != pmap);
1325 prev->next = cur->next;
1326 PV_FREE(cur);
1327 }
1328 UNLOCK_PVH(pai);
1329 }
1330 }
1331
1332 /*
1333 * Update the counts
1334 */
1335 assert(pmap->stats.resident_count >= num_removed);
1336 pmap->stats.resident_count -= num_removed;
1337 assert(pmap->stats.wired_count >= num_unwired);
1338 pmap->stats.wired_count -= num_unwired;
1339 }
1340
1341 /*
1342 * Remove phys addr if mapped in specified map
1343 *
1344 */
1345 void
1346 pmap_remove_some_phys(
1347 __unused pmap_t map,
1348 __unused ppnum_t pn)
1349 {
1350
1351 /* Implement to support working set code */
1352
1353 }
1354
1355 /*
1356 * Remove the given range of addresses
1357 * from the specified map.
1358 *
1359 * It is assumed that the start and end are properly
1360 * rounded to the hardware page size.
1361 */
1362
1363
1364 void
1365 pmap_remove(
1366 pmap_t map,
1367 addr64_t s64,
1368 addr64_t e64)
1369 {
1370 spl_t spl;
1371 register pt_entry_t *pde;
1372 register pt_entry_t *spte, *epte;
1373 vm_offset_t l;
1374 vm_offset_t s, e;
1375 vm_offset_t orig_s;
1376
1377 if (map == PMAP_NULL)
1378 return;
1379
1380 PMAP_READ_LOCK(map, spl);
1381
1382 if (value_64bit(s64) || value_64bit(e64)) {
1383 panic("pmap_remove addr overflow");
1384 }
1385
1386 orig_s = s = (vm_offset_t)low32(s64);
1387 e = (vm_offset_t)low32(e64);
1388
1389 pde = pmap_pde(map, s);
1390
1391 while (s < e) {
1392 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
1393 if (l > e)
1394 l = e;
1395 if (*pde & INTEL_PTE_VALID) {
1396 spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1)));
1397 spte = &spte[ptenum(s)];
1398 epte = &spte[intel_btop(l-s)];
1399 pmap_remove_range(map, s, spte, epte);
1400 }
1401 s = l;
1402 pde++;
1403 }
1404
1405 PMAP_UPDATE_TLBS(map, orig_s, e);
1406
1407 PMAP_READ_UNLOCK(map, spl);
1408 }
1409
1410 /*
1411 * Routine: pmap_page_protect
1412 *
1413 * Function:
1414 * Lower the permission for all mappings to a given
1415 * page.
1416 */
1417 void
1418 pmap_page_protect(
1419 ppnum_t pn,
1420 vm_prot_t prot)
1421 {
1422 pv_entry_t pv_h, prev;
1423 register pv_entry_t pv_e;
1424 register pt_entry_t *pte;
1425 int pai;
1426 register pmap_t pmap;
1427 spl_t spl;
1428 boolean_t remove;
1429 pmap_paddr_t phys;
1430
1431 assert(pn != vm_page_fictitious_addr);
1432 phys = (pmap_paddr_t)i386_ptob(pn);
1433 if (!valid_page(pn)) {
1434 /*
1435 * Not a managed page.
1436 */
1437 return;
1438 }
1439
1440 /*
1441 * Determine the new protection.
1442 */
1443 switch (prot) {
1444 case VM_PROT_READ:
1445 case VM_PROT_READ|VM_PROT_EXECUTE:
1446 remove = FALSE;
1447 break;
1448 case VM_PROT_ALL:
1449 return; /* nothing to do */
1450 default:
1451 remove = TRUE;
1452 break;
1453 }
1454
1455 /*
1456 * Lock the pmap system first, since we will be changing
1457 * several pmaps.
1458 */
1459
1460 PMAP_WRITE_LOCK(spl);
1461
1462 pai = pa_index(phys);
1463 pv_h = pai_to_pvh(pai);
1464
1465 /*
1466 * Walk down PV list, changing or removing all mappings.
1467 * We do not have to lock the pv_list because we have
1468 * the entire pmap system locked.
1469 */
1470 if (pv_h->pmap != PMAP_NULL) {
1471
1472 prev = pv_e = pv_h;
1473 do {
1474 register vm_offset_t va;
1475 pmap = pv_e->pmap;
1476 /*
1477 * Lock the pmap to block pmap_extract and similar routines.
1478 */
1479 simple_lock(&pmap->lock);
1480
1481 {
1482
1483 va = pv_e->va;
1484 pte = pmap_pte(pmap, va);
1485
1486 /*
1487 * Consistency checks.
1488 */
1489 /* assert(*pte & INTEL_PTE_VALID); XXX */
1490 /* assert(pte_to_phys(*pte) == phys); */
1491
1492 }
1493
1494 /*
1495 * Remove the mapping if new protection is NONE
1496 * or if write-protecting a kernel mapping.
1497 */
1498 if (remove || pmap == kernel_pmap) {
1499 /*
1500 * Remove the mapping, collecting any modify bits.
1501 */
1502 {
1503 pmap_phys_attributes[pai] |=
1504 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1505 *pte++ = 0;
1506 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1507 }
1508
1509 assert(pmap->stats.resident_count >= 1);
1510 pmap->stats.resident_count--;
1511
1512 /*
1513 * Remove the pv_entry.
1514 */
1515 if (pv_e == pv_h) {
1516 /*
1517 * Fix up head later.
1518 */
1519 pv_h->pmap = PMAP_NULL;
1520 }
1521 else {
1522 /*
1523 * Delete this entry.
1524 */
1525 prev->next = pv_e->next;
1526 PV_FREE(pv_e);
1527 }
1528 }
1529 else {
1530 /*
1531 * Write-protect.
1532 */
1533
1534 *pte &= ~INTEL_PTE_WRITE;
1535 pte++;
1536 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1537 /*
1538 * Advance prev.
1539 */
1540 prev = pv_e;
1541 }
1542
1543 simple_unlock(&pmap->lock);
1544
1545 } while ((pv_e = prev->next) != PV_ENTRY_NULL);
1546
1547 /*
1548 * If pv_head mapping was removed, fix it up.
1549 */
1550 if (pv_h->pmap == PMAP_NULL) {
1551 pv_e = pv_h->next;
1552 if (pv_e != PV_ENTRY_NULL) {
1553 *pv_h = *pv_e;
1554 PV_FREE(pv_e);
1555 }
1556 }
1557 }
1558
1559 PMAP_WRITE_UNLOCK(spl);
1560 }
1561
1562 /*
1563 * Routine:
1564 * pmap_disconnect
1565 *
1566 * Function:
1567 * Disconnect all mappings for this page and return reference and change status
1568 * in generic format.
1569 *
1570 */
1571 unsigned int pmap_disconnect(
1572 ppnum_t pa)
1573 {
1574 pmap_page_protect(pa, 0); /* disconnect the page */
1575 return (pmap_get_refmod(pa)); /* return ref/chg status */
1576 }
1577
1578 /*
1579 * Set the physical protection on the
1580 * specified range of this map as requested.
1581 * Will not increase permissions.
1582 */
1583 void
1584 pmap_protect(
1585 pmap_t map,
1586 vm_offset_t s,
1587 vm_offset_t e,
1588 vm_prot_t prot)
1589 {
1590 register pt_entry_t *pde;
1591 register pt_entry_t *spte, *epte;
1592 vm_offset_t l;
1593 spl_t spl;
1594 vm_offset_t orig_s = s;
1595
1596
1597 if (map == PMAP_NULL)
1598 return;
1599
1600 /*
1601 * Determine the new protection.
1602 */
1603 switch (prot) {
1604 case VM_PROT_READ:
1605 case VM_PROT_READ|VM_PROT_EXECUTE:
1606 break;
1607 case VM_PROT_READ|VM_PROT_WRITE:
1608 case VM_PROT_ALL:
1609 return; /* nothing to do */
1610 default:
1611 pmap_remove(map, (addr64_t)s, (addr64_t)e);
1612 return;
1613 }
1614
1615 SPLVM(spl);
1616 simple_lock(&map->lock);
1617
1618 pde = pmap_pde(map, s);
1619 while (s < e) {
1620 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
1621 if (l > e)
1622 l = e;
1623 if (*pde & INTEL_PTE_VALID) {
1624 spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1)));
1625 spte = &spte[ptenum(s)];
1626 epte = &spte[intel_btop(l-s)];
1627
1628 while (spte < epte) {
1629 if (*spte & INTEL_PTE_VALID)
1630 *spte &= ~INTEL_PTE_WRITE;
1631 spte++;
1632 }
1633 }
1634 s = l;
1635 pde++;
1636 }
1637
1638 PMAP_UPDATE_TLBS(map, orig_s, e);
1639
1640 simple_unlock(&map->lock);
1641 SPLX(spl);
1642 }
1643
1644
1645
1646 /*
1647 * Insert the given physical page (p) at
1648 * the specified virtual address (v) in the
1649 * target physical map with the protection requested.
1650 *
1651 * If specified, the page will be wired down, meaning
1652 * that the related pte cannot be reclaimed.
1653 *
1654 * NB: This is the only routine which MAY NOT lazy-evaluate
1655 * or lose information. That is, this routine must actually
1656 * insert this page into the given map NOW.
1657 */
1658 void
1659 pmap_enter(
1660 register pmap_t pmap,
1661 vm_offset_t v,
1662 ppnum_t pn,
1663 vm_prot_t prot,
1664 unsigned int flags,
1665 boolean_t wired)
1666 {
1667 register pt_entry_t *pte;
1668 register pv_entry_t pv_h;
1669 register int pai;
1670 pv_entry_t pv_e;
1671 pt_entry_t template;
1672 spl_t spl;
1673 pmap_paddr_t old_pa;
1674 pmap_paddr_t pa = (pmap_paddr_t)i386_ptob(pn);
1675
1676 XPR(0x80000000, "%x/%x: pmap_enter %x/%x/%x\n",
1677 current_thread(),
1678 current_thread(),
1679 pmap, v, pn);
1680
1681 assert(pn != vm_page_fictitious_addr);
1682 if (pmap_debug)
1683 printf("pmap(%x, %x)\n", v, pn);
1684 if (pmap == PMAP_NULL)
1685 return;
1686
1687 /*
1688 * Must allocate a new pvlist entry while we're unlocked;
1689 * zalloc may cause pageout (which will lock the pmap system).
1690 * If we determine we need a pvlist entry, we will unlock
1691 * and allocate one. Then we will retry, throughing away
1692 * the allocated entry later (if we no longer need it).
1693 */
1694 pv_e = PV_ENTRY_NULL;
1695
1696 PMAP_READ_LOCK(pmap, spl);
1697
1698 /*
1699 * Expand pmap to include this pte. Assume that
1700 * pmap is always expanded to include enough hardware
1701 * pages to map one VM page.
1702 */
1703
1704 while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
1705 /*
1706 * Must unlock to expand the pmap.
1707 */
1708 PMAP_READ_UNLOCK(pmap, spl);
1709
1710 pmap_expand(pmap, v);
1711
1712 PMAP_READ_LOCK(pmap, spl);
1713 }
1714 /*
1715 * Special case if the physical page is already mapped
1716 * at this address.
1717 */
1718 old_pa = pte_to_pa(*pte);
1719 if (old_pa == pa) {
1720 /*
1721 * May be changing its wired attribute or protection
1722 */
1723
1724 template = pa_to_pte(pa) | INTEL_PTE_VALID;
1725
1726 if(flags & VM_MEM_NOT_CACHEABLE) {
1727 if(!(flags & VM_MEM_GUARDED))
1728 template |= INTEL_PTE_PTA;
1729 template |= INTEL_PTE_NCACHE;
1730 }
1731
1732 if (pmap != kernel_pmap)
1733 template |= INTEL_PTE_USER;
1734 if (prot & VM_PROT_WRITE)
1735 template |= INTEL_PTE_WRITE;
1736 if (wired) {
1737 template |= INTEL_PTE_WIRED;
1738 if (!iswired(*pte))
1739 pmap->stats.wired_count++;
1740 }
1741 else {
1742 if (iswired(*pte)) {
1743 assert(pmap->stats.wired_count >= 1);
1744 pmap->stats.wired_count--;
1745 }
1746 }
1747
1748 if (*pte & INTEL_PTE_MOD)
1749 template |= INTEL_PTE_MOD;
1750 WRITE_PTE(pte, template)
1751 pte++;
1752
1753 goto Done;
1754 }
1755
1756 /*
1757 * Outline of code from here:
1758 * 1) If va was mapped, update TLBs, remove the mapping
1759 * and remove old pvlist entry.
1760 * 2) Add pvlist entry for new mapping
1761 * 3) Enter new mapping.
1762 *
1763 * SHARING_FAULTS complicates this slightly in that it cannot
1764 * replace the mapping, but must remove it (because adding the
1765 * pvlist entry for the new mapping may remove others), and
1766 * hence always enters the new mapping at step 3)
1767 *
1768 * If the old physical page is not managed step 1) is skipped
1769 * (except for updating the TLBs), and the mapping is
1770 * overwritten at step 3). If the new physical page is not
1771 * managed, step 2) is skipped.
1772 */
1773
1774 if (old_pa != (pmap_paddr_t) 0) {
1775
1776
1777 #if DEBUG_PTE_PAGE
1778 if (pmap != kernel_pmap)
1779 ptep_check(get_pte_page(pte));
1780 #endif /* DEBUG_PTE_PAGE */
1781
1782 /*
1783 * Don't do anything to pages outside valid memory here.
1784 * Instead convince the code that enters a new mapping
1785 * to overwrite the old one.
1786 */
1787
1788 if (valid_page(i386_btop(old_pa))) {
1789
1790 pai = pa_index(old_pa);
1791 LOCK_PVH(pai);
1792
1793 assert(pmap->stats.resident_count >= 1);
1794 pmap->stats.resident_count--;
1795 if (iswired(*pte)) {
1796 assert(pmap->stats.wired_count >= 1);
1797 pmap->stats.wired_count--;
1798 }
1799
1800 pmap_phys_attributes[pai] |=
1801 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1802 WRITE_PTE(pte, 0)
1803
1804 /*
1805 * Remove the mapping from the pvlist for
1806 * this physical page.
1807 */
1808 {
1809 register pv_entry_t prev, cur;
1810
1811 pv_h = pai_to_pvh(pai);
1812 if (pv_h->pmap == PMAP_NULL) {
1813 panic("pmap_enter: null pv_list!");
1814 }
1815 if (pv_h->va == v && pv_h->pmap == pmap) {
1816 /*
1817 * Header is the pv_entry. Copy the next one
1818 * to header and free the next one (we cannot
1819 * free the header)
1820 */
1821 cur = pv_h->next;
1822 if (cur != PV_ENTRY_NULL) {
1823 *pv_h = *cur;
1824 pv_e = cur;
1825 }
1826 else {
1827 pv_h->pmap = PMAP_NULL;
1828 }
1829 }
1830 else {
1831 cur = pv_h;
1832 do {
1833 prev = cur;
1834 if ((cur = prev->next) == PV_ENTRY_NULL) {
1835 panic("pmap_enter: mapping not in pv_list!");
1836 }
1837 } while (cur->va != v || cur->pmap != pmap);
1838 prev->next = cur->next;
1839 pv_e = cur;
1840 }
1841 }
1842 UNLOCK_PVH(pai);
1843 }
1844 else {
1845
1846 /*
1847 * old_pa is not managed. Pretend it's zero so code
1848 * at Step 3) will enter new mapping (overwriting old
1849 * one). Do removal part of accounting.
1850 */
1851 old_pa = (pmap_paddr_t) 0;
1852 assert(pmap->stats.resident_count >= 1);
1853 pmap->stats.resident_count--;
1854 if (iswired(*pte)) {
1855 assert(pmap->stats.wired_count >= 1);
1856 pmap->stats.wired_count--;
1857 }
1858 }
1859
1860 }
1861
1862 if (valid_page(i386_btop(pa))) {
1863
1864 /*
1865 * Step 2) Enter the mapping in the PV list for this
1866 * physical page.
1867 */
1868
1869 pai = pa_index(pa);
1870
1871
1872 #if SHARING_FAULTS
1873 RetryPvList:
1874 /*
1875 * We can return here from the sharing fault code below
1876 * in case we removed the only entry on the pv list and thus
1877 * must enter the new one in the list header.
1878 */
1879 #endif /* SHARING_FAULTS */
1880 LOCK_PVH(pai);
1881 pv_h = pai_to_pvh(pai);
1882
1883 if (pv_h->pmap == PMAP_NULL) {
1884 /*
1885 * No mappings yet
1886 */
1887 pv_h->va = v;
1888 pv_h->pmap = pmap;
1889 pv_h->next = PV_ENTRY_NULL;
1890 }
1891 else {
1892 #if DEBUG
1893 {
1894 /*
1895 * check that this mapping is not already there
1896 * or there is no alias for this mapping in the same map
1897 */
1898 pv_entry_t e = pv_h;
1899 while (e != PV_ENTRY_NULL) {
1900 if (e->pmap == pmap && e->va == v)
1901 panic("pmap_enter: already in pv_list");
1902 e = e->next;
1903 }
1904 }
1905 #endif /* DEBUG */
1906 #if SHARING_FAULTS
1907 {
1908 /*
1909 * do sharing faults.
1910 * if we find an entry on this pv list in the same address
1911 * space, remove it. we know there will not be more
1912 * than one.
1913 */
1914 pv_entry_t e = pv_h;
1915 pt_entry_t *opte;
1916
1917 while (e != PV_ENTRY_NULL) {
1918 if (e->pmap == pmap) {
1919 /*
1920 * Remove it, drop pv list lock first.
1921 */
1922 UNLOCK_PVH(pai);
1923
1924 opte = pmap_pte(pmap, e->va);
1925 assert(opte != PT_ENTRY_NULL);
1926 /*
1927 * Invalidate the translation buffer,
1928 * then remove the mapping.
1929 */
1930 pmap_remove_range(pmap, e->va, opte,
1931 opte + 1);
1932 PMAP_UPDATE_TLBS(pmap, e->va, e->va + PAGE_SIZE);
1933
1934 /*
1935 * We could have remove the head entry,
1936 * so there could be no more entries
1937 * and so we have to use the pv head entry.
1938 * so, go back to the top and try the entry
1939 * again.
1940 */
1941 goto RetryPvList;
1942 }
1943 e = e->next;
1944 }
1945
1946 /*
1947 * check that this mapping is not already there
1948 */
1949 e = pv_h;
1950 while (e != PV_ENTRY_NULL) {
1951 if (e->pmap == pmap)
1952 panic("pmap_enter: alias in pv_list");
1953 e = e->next;
1954 }
1955 }
1956 #endif /* SHARING_FAULTS */
1957 #if DEBUG_ALIAS
1958 {
1959 /*
1960 * check for aliases within the same address space.
1961 */
1962 pv_entry_t e = pv_h;
1963 vm_offset_t rpc = get_rpc();
1964
1965 while (e != PV_ENTRY_NULL) {
1966 if (e->pmap == pmap) {
1967 /*
1968 * log this entry in the alias ring buffer
1969 * if it's not there already.
1970 */
1971 struct pmap_alias *pma;
1972 int ii, logit;
1973
1974 logit = TRUE;
1975 for (ii = 0; ii < pmap_alias_index; ii++) {
1976 if (pmap_aliasbuf[ii].rpc == rpc) {
1977 /* found it in the log already */
1978 logit = FALSE;
1979 break;
1980 }
1981 }
1982 if (logit) {
1983 pma = &pmap_aliasbuf[pmap_alias_index];
1984 pma->pmap = pmap;
1985 pma->va = v;
1986 pma->rpc = rpc;
1987 pma->cookie = PMAP_ALIAS_COOKIE;
1988 if (++pmap_alias_index >= PMAP_ALIAS_MAX)
1989 panic("pmap_enter: exhausted alias log");
1990 }
1991 }
1992 e = e->next;
1993 }
1994 }
1995 #endif /* DEBUG_ALIAS */
1996 /*
1997 * Add new pv_entry after header.
1998 */
1999 if (pv_e == PV_ENTRY_NULL) {
2000 PV_ALLOC(pv_e);
2001 if (pv_e == PV_ENTRY_NULL) {
2002 panic("pmap no pv_e's");
2003 }
2004 }
2005 pv_e->va = v;
2006 pv_e->pmap = pmap;
2007 pv_e->next = pv_h->next;
2008 pv_h->next = pv_e;
2009 /*
2010 * Remember that we used the pvlist entry.
2011 */
2012 pv_e = PV_ENTRY_NULL;
2013 }
2014 UNLOCK_PVH(pai);
2015 }
2016
2017 /*
2018 * Step 3) Enter and count the mapping.
2019 */
2020
2021 pmap->stats.resident_count++;
2022
2023 /*
2024 * Build a template to speed up entering -
2025 * only the pfn changes.
2026 */
2027 template = pa_to_pte(pa) | INTEL_PTE_VALID;
2028
2029 if(flags & VM_MEM_NOT_CACHEABLE) {
2030 if(!(flags & VM_MEM_GUARDED))
2031 template |= INTEL_PTE_PTA;
2032 template |= INTEL_PTE_NCACHE;
2033 }
2034
2035 if (pmap != kernel_pmap)
2036 template |= INTEL_PTE_USER;
2037 if (prot & VM_PROT_WRITE)
2038 template |= INTEL_PTE_WRITE;
2039 if (wired) {
2040 template |= INTEL_PTE_WIRED;
2041 pmap->stats.wired_count++;
2042 }
2043
2044 WRITE_PTE(pte, template)
2045
2046 Done:
2047 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
2048
2049 if (pv_e != PV_ENTRY_NULL) {
2050 PV_FREE(pv_e);
2051 }
2052
2053 PMAP_READ_UNLOCK(pmap, spl);
2054 }
2055
2056 /*
2057 * Routine: pmap_change_wiring
2058 * Function: Change the wiring attribute for a map/virtual-address
2059 * pair.
2060 * In/out conditions:
2061 * The mapping must already exist in the pmap.
2062 */
2063 void
2064 pmap_change_wiring(
2065 register pmap_t map,
2066 vm_offset_t v,
2067 boolean_t wired)
2068 {
2069 register pt_entry_t *pte;
2070 spl_t spl;
2071
2072 #if 1
2073 /*
2074 * We must grab the pmap system lock because we may
2075 * change a pte_page queue.
2076 */
2077 PMAP_READ_LOCK(map, spl);
2078
2079 if ((pte = pmap_pte(map, v)) == PT_ENTRY_NULL)
2080 panic("pmap_change_wiring: pte missing");
2081
2082 if (wired && !iswired(*pte)) {
2083 /*
2084 * wiring down mapping
2085 */
2086 map->stats.wired_count++;
2087 *pte++ |= INTEL_PTE_WIRED;
2088 }
2089 else if (!wired && iswired(*pte)) {
2090 /*
2091 * unwiring mapping
2092 */
2093 assert(map->stats.wired_count >= 1);
2094 map->stats.wired_count--;
2095 *pte++ &= ~INTEL_PTE_WIRED;
2096 }
2097
2098 PMAP_READ_UNLOCK(map, spl);
2099
2100 #else
2101 return;
2102 #endif
2103
2104 }
2105
2106 ppnum_t
2107 pmap_find_phys(pmap_t pmap, addr64_t va)
2108 {
2109 pt_entry_t *ptp;
2110 vm_offset_t a32;
2111 ppnum_t ppn;
2112
2113 if (value_64bit(va))
2114 panic("pmap_find_phys 64 bit value");
2115 a32 = (vm_offset_t) low32(va);
2116 ptp = pmap_pte(pmap, a32);
2117 if (PT_ENTRY_NULL == ptp) {
2118 ppn = 0;
2119 } else {
2120 ppn = (ppnum_t) i386_btop(pte_to_pa(*ptp));
2121 }
2122 return ppn;
2123 }
2124
2125 /*
2126 * Routine: pmap_extract
2127 * Function:
2128 * Extract the physical page address associated
2129 * with the given map/virtual_address pair.
2130 * Change to shim for backwards compatibility but will not
2131 * work for 64 bit systems. Some old drivers that we cannot
2132 * change need this.
2133 */
2134
2135 vm_offset_t
2136 pmap_extract(
2137 register pmap_t pmap,
2138 vm_offset_t va)
2139 {
2140 ppnum_t ppn;
2141 vm_offset_t vaddr;
2142
2143 vaddr = (vm_offset_t)0;
2144 ppn = pmap_find_phys(pmap, (addr64_t)va);
2145 if (ppn) {
2146 vaddr = ((vm_offset_t)i386_ptob(ppn)) | (va & INTEL_OFFMASK);
2147 }
2148 return (vaddr);
2149 }
2150
2151
2152 /*
2153 * Routine: pmap_expand
2154 *
2155 * Expands a pmap to be able to map the specified virtual address.
2156 *
2157 * Allocates new virtual memory for the P0 or P1 portion of the
2158 * pmap, then re-maps the physical pages that were in the old
2159 * pmap to be in the new pmap.
2160 *
2161 * Must be called with the pmap system and the pmap unlocked,
2162 * since these must be unlocked to use vm_allocate or vm_deallocate.
2163 * Thus it must be called in a loop that checks whether the map
2164 * has been expanded enough.
2165 * (We won't loop forever, since page tables aren't shrunk.)
2166 */
2167 void
2168 pmap_expand(
2169 register pmap_t map,
2170 register vm_offset_t v)
2171 {
2172 pt_entry_t *pdp;
2173 register vm_page_t m;
2174 register pmap_paddr_t pa;
2175 register int i;
2176 spl_t spl;
2177 ppnum_t pn;
2178
2179 if (map == kernel_pmap) {
2180 pmap_growkernel(v);
2181 return;
2182 }
2183
2184 /*
2185 * Allocate a VM page for the level 2 page table entries.
2186 */
2187 while ((m = vm_page_grab()) == VM_PAGE_NULL)
2188 VM_PAGE_WAIT();
2189
2190 /*
2191 * put the page into the pmap's obj list so it
2192 * can be found later.
2193 */
2194 pn = m->phys_page;
2195 pa = i386_ptob(pn);
2196 i = pdenum(map, v);
2197 vm_object_lock(map->pm_obj);
2198 vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i);
2199 vm_page_lock_queues();
2200 vm_page_wire(m);
2201 inuse_ptepages_count++;
2202 vm_object_unlock(map->pm_obj);
2203 vm_page_unlock_queues();
2204
2205 /*
2206 * Zero the page.
2207 */
2208 pmap_zero_page(pn);
2209
2210 PMAP_READ_LOCK(map, spl);
2211 /*
2212 * See if someone else expanded us first
2213 */
2214 if (pmap_pte(map, v) != PT_ENTRY_NULL) {
2215 PMAP_READ_UNLOCK(map, spl);
2216 vm_object_lock(map->pm_obj);
2217 vm_page_lock_queues();
2218 vm_page_free(m);
2219 inuse_ptepages_count--;
2220 vm_page_unlock_queues();
2221 vm_object_unlock(map->pm_obj);
2222 return;
2223 }
2224
2225 /*
2226 * Set the page directory entry for this page table.
2227 * If we have allocated more than one hardware page,
2228 * set several page directory entries.
2229 */
2230
2231 pdp = &map->dirbase[pdenum(map, v)];
2232 *pdp = pa_to_pte(pa)
2233 | INTEL_PTE_VALID
2234 | INTEL_PTE_USER
2235 | INTEL_PTE_WRITE;
2236
2237 PMAP_READ_UNLOCK(map, spl);
2238 return;
2239 }
2240
2241 /*
2242 * Copy the range specified by src_addr/len
2243 * from the source map to the range dst_addr/len
2244 * in the destination map.
2245 *
2246 * This routine is only advisory and need not do anything.
2247 */
2248 #if 0
2249 void
2250 pmap_copy(
2251 pmap_t dst_pmap,
2252 pmap_t src_pmap,
2253 vm_offset_t dst_addr,
2254 vm_size_t len,
2255 vm_offset_t src_addr)
2256 {
2257 #ifdef lint
2258 dst_pmap++; src_pmap++; dst_addr++; len++; src_addr++;
2259 #endif /* lint */
2260 }
2261 #endif/* 0 */
2262
2263 /*
2264 * pmap_sync_page_data_phys(ppnum_t pa)
2265 *
2266 * Invalidates all of the instruction cache on a physical page and
2267 * pushes any dirty data from the data cache for the same physical page
2268 * Not required in i386.
2269 */
2270 void
2271 pmap_sync_page_data_phys(__unused ppnum_t pa)
2272 {
2273 return;
2274 }
2275
2276 /*
2277 * pmap_sync_page_attributes_phys(ppnum_t pa)
2278 *
2279 * Write back and invalidate all cachelines on a physical page.
2280 */
2281 void
2282 pmap_sync_page_attributes_phys(ppnum_t pa)
2283 {
2284 cache_flush_page_phys(pa);
2285 }
2286
2287 int collect_ref;
2288 int collect_unref;
2289
2290 /*
2291 * Routine: pmap_collect
2292 * Function:
2293 * Garbage collects the physical map system for
2294 * pages which are no longer used.
2295 * Success need not be guaranteed -- that is, there
2296 * may well be pages which are not referenced, but
2297 * others may be collected.
2298 * Usage:
2299 * Called by the pageout daemon when pages are scarce.
2300 */
2301 void
2302 pmap_collect(
2303 pmap_t p)
2304 {
2305 register pt_entry_t *pdp, *ptp;
2306 pt_entry_t *eptp;
2307 int wired;
2308 spl_t spl;
2309
2310 if (p == PMAP_NULL)
2311 return;
2312
2313 if (p == kernel_pmap)
2314 return;
2315
2316 /*
2317 * Garbage collect map.
2318 */
2319 PMAP_READ_LOCK(p, spl);
2320
2321 for (pdp = (pt_entry_t *)p->dirbase;
2322 pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)];
2323 pdp++)
2324 {
2325 if (*pdp & INTEL_PTE_VALID) {
2326 if(*pdp & INTEL_PTE_REF) {
2327 *pdp &= ~INTEL_PTE_REF;
2328 collect_ref++;
2329 } else {
2330 collect_unref++;
2331 ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase));
2332 eptp = ptp + NPTEPG;
2333
2334 /*
2335 * If the pte page has any wired mappings, we cannot
2336 * free it.
2337 */
2338 wired = 0;
2339 {
2340 register pt_entry_t *ptep;
2341 for (ptep = ptp; ptep < eptp; ptep++) {
2342 if (iswired(*ptep)) {
2343 wired = 1;
2344 break;
2345 }
2346 }
2347 }
2348 if (!wired) {
2349 /*
2350 * Remove the virtual addresses mapped by this pte page.
2351 */
2352 pmap_remove_range(p,
2353 pdetova(pdp - (pt_entry_t *)p->dirbase),
2354 ptp,
2355 eptp);
2356
2357 /*
2358 * Invalidate the page directory pointer.
2359 */
2360 *pdp = 0x0;
2361
2362 PMAP_READ_UNLOCK(p, spl);
2363
2364 /*
2365 * And free the pte page itself.
2366 */
2367 {
2368 register vm_page_t m;
2369
2370 vm_object_lock(p->pm_obj);
2371 m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]));
2372 if (m == VM_PAGE_NULL)
2373 panic("pmap_collect: pte page not in object");
2374 vm_page_lock_queues();
2375 vm_page_free(m);
2376 inuse_ptepages_count--;
2377 vm_page_unlock_queues();
2378 vm_object_unlock(p->pm_obj);
2379 }
2380
2381 PMAP_READ_LOCK(p, spl);
2382 }
2383 }
2384 }
2385 }
2386 PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
2387 PMAP_READ_UNLOCK(p, spl);
2388 return;
2389
2390 }
2391
2392 /*
2393 * Routine: pmap_kernel
2394 * Function:
2395 * Returns the physical map handle for the kernel.
2396 */
2397 #if 0
2398 pmap_t
2399 pmap_kernel(void)
2400 {
2401 return (kernel_pmap);
2402 }
2403 #endif/* 0 */
2404
2405 void
2406 pmap_copy_page(src, dst)
2407 ppnum_t src;
2408 ppnum_t dst;
2409 {
2410 bcopy_phys((addr64_t)i386_ptob(src),
2411 (addr64_t)i386_ptob(dst),
2412 PAGE_SIZE);
2413 }
2414
2415
2416 /*
2417 * Routine: pmap_pageable
2418 * Function:
2419 * Make the specified pages (by pmap, offset)
2420 * pageable (or not) as requested.
2421 *
2422 * A page which is not pageable may not take
2423 * a fault; therefore, its page table entry
2424 * must remain valid for the duration.
2425 *
2426 * This routine is merely advisory; pmap_enter
2427 * will specify that these pages are to be wired
2428 * down (or not) as appropriate.
2429 */
2430 void
2431 pmap_pageable(
2432 __unused pmap_t pmap,
2433 __unused vm_offset_t start_addr,
2434 __unused vm_offset_t end_addr,
2435 __unused boolean_t pageable)
2436 {
2437 #ifdef lint
2438 pmap++; start_addr++; end_addr++; pageable++;
2439 #endif /* lint */
2440 }
2441
2442 /*
2443 * Clear specified attribute bits.
2444 */
2445 void
2446 phys_attribute_clear(
2447 ppnum_t pn,
2448 int bits)
2449 {
2450 pv_entry_t pv_h;
2451 register pv_entry_t pv_e;
2452 register pt_entry_t *pte;
2453 int pai;
2454 register pmap_t pmap;
2455 spl_t spl;
2456 pmap_paddr_t phys;
2457
2458 assert(pn != vm_page_fictitious_addr);
2459 if (!valid_page(pn)) {
2460 /*
2461 * Not a managed page.
2462 */
2463 return;
2464 }
2465
2466 /*
2467 * Lock the pmap system first, since we will be changing
2468 * several pmaps.
2469 */
2470
2471 PMAP_WRITE_LOCK(spl);
2472 phys = i386_ptob(pn);
2473 pai = pa_index(phys);
2474 pv_h = pai_to_pvh(pai);
2475
2476 /*
2477 * Walk down PV list, clearing all modify or reference bits.
2478 * We do not have to lock the pv_list because we have
2479 * the entire pmap system locked.
2480 */
2481 if (pv_h->pmap != PMAP_NULL) {
2482 /*
2483 * There are some mappings.
2484 */
2485 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
2486
2487 pmap = pv_e->pmap;
2488 /*
2489 * Lock the pmap to block pmap_extract and similar routines.
2490 */
2491 simple_lock(&pmap->lock);
2492
2493 {
2494 register vm_offset_t va;
2495
2496 va = pv_e->va;
2497 pte = pmap_pte(pmap, va);
2498
2499 #if 0
2500 /*
2501 * Consistency checks.
2502 */
2503 assert(*pte & INTEL_PTE_VALID);
2504 /* assert(pte_to_phys(*pte) == phys); */
2505 #endif
2506
2507 /*
2508 * Clear modify or reference bits.
2509 */
2510
2511 *pte++ &= ~bits;
2512 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
2513 }
2514 simple_unlock(&pmap->lock);
2515
2516 }
2517 }
2518
2519 pmap_phys_attributes[pai] &= ~bits;
2520
2521 PMAP_WRITE_UNLOCK(spl);
2522 }
2523
2524 /*
2525 * Check specified attribute bits.
2526 */
2527 boolean_t
2528 phys_attribute_test(
2529 ppnum_t pn,
2530 int bits)
2531 {
2532 pv_entry_t pv_h;
2533 register pv_entry_t pv_e;
2534 register pt_entry_t *pte;
2535 int pai;
2536 register pmap_t pmap;
2537 spl_t spl;
2538 pmap_paddr_t phys;
2539
2540 assert(pn != vm_page_fictitious_addr);
2541 if (!valid_page(pn)) {
2542 /*
2543 * Not a managed page.
2544 */
2545 return (FALSE);
2546 }
2547
2548 /*
2549 * Lock the pmap system first, since we will be checking
2550 * several pmaps.
2551 */
2552
2553 PMAP_WRITE_LOCK(spl);
2554 phys = i386_ptob(pn);
2555 pai = pa_index(phys);
2556 pv_h = pai_to_pvh(pai);
2557
2558 if (pmap_phys_attributes[pai] & bits) {
2559 PMAP_WRITE_UNLOCK(spl);
2560 return (TRUE);
2561 }
2562
2563 /*
2564 * Walk down PV list, checking all mappings.
2565 * We do not have to lock the pv_list because we have
2566 * the entire pmap system locked.
2567 */
2568 if (pv_h->pmap != PMAP_NULL) {
2569 /*
2570 * There are some mappings.
2571 */
2572 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
2573
2574 pmap = pv_e->pmap;
2575 /*
2576 * Lock the pmap to block pmap_extract and similar routines.
2577 */
2578 simple_lock(&pmap->lock);
2579
2580 {
2581 register vm_offset_t va;
2582
2583 va = pv_e->va;
2584 pte = pmap_pte(pmap, va);
2585
2586 #if 0
2587 /*
2588 * Consistency checks.
2589 */
2590 assert(*pte & INTEL_PTE_VALID);
2591 /* assert(pte_to_phys(*pte) == phys); */
2592 #endif
2593 }
2594
2595 /*
2596 * Check modify or reference bits.
2597 */
2598 {
2599 if (*pte++ & bits) {
2600 simple_unlock(&pmap->lock);
2601 PMAP_WRITE_UNLOCK(spl);
2602 return (TRUE);
2603 }
2604 }
2605 simple_unlock(&pmap->lock);
2606 }
2607 }
2608 PMAP_WRITE_UNLOCK(spl);
2609 return (FALSE);
2610 }
2611
2612 /*
2613 * Set specified attribute bits.
2614 */
2615 void
2616 phys_attribute_set(
2617 ppnum_t pn,
2618 int bits)
2619 {
2620 int spl;
2621 pmap_paddr_t phys;
2622
2623 assert(pn != vm_page_fictitious_addr);
2624 if (!valid_page(pn)) {
2625 /*
2626 * Not a managed page.
2627 */
2628 return;
2629 }
2630
2631 /*
2632 * Lock the pmap system and set the requested bits in
2633 * the phys attributes array. Don't need to bother with
2634 * ptes because the test routine looks here first.
2635 */
2636 phys = i386_ptob(pn);
2637 PMAP_WRITE_LOCK(spl);
2638 pmap_phys_attributes[pa_index(phys)] |= bits;
2639 PMAP_WRITE_UNLOCK(spl);
2640 }
2641
2642 /*
2643 * Set the modify bit on the specified physical page.
2644 */
2645
2646 void pmap_set_modify(
2647 ppnum_t pn)
2648 {
2649 phys_attribute_set(pn, PHYS_MODIFIED);
2650 }
2651
2652 /*
2653 * Clear the modify bits on the specified physical page.
2654 */
2655
2656 void
2657 pmap_clear_modify(
2658 ppnum_t pn)
2659 {
2660 phys_attribute_clear(pn, PHYS_MODIFIED);
2661 }
2662
2663 /*
2664 * pmap_is_modified:
2665 *
2666 * Return whether or not the specified physical page is modified
2667 * by any physical maps.
2668 */
2669
2670 boolean_t
2671 pmap_is_modified(
2672 ppnum_t pn)
2673 {
2674 return (phys_attribute_test(pn, PHYS_MODIFIED));
2675 }
2676
2677 /*
2678 * pmap_clear_reference:
2679 *
2680 * Clear the reference bit on the specified physical page.
2681 */
2682
2683 void
2684 pmap_clear_reference(
2685 ppnum_t pn)
2686 {
2687 phys_attribute_clear(pn, PHYS_REFERENCED);
2688 }
2689
2690 void
2691 pmap_set_reference(ppnum_t pn)
2692 {
2693 phys_attribute_set(pn, PHYS_REFERENCED);
2694 }
2695
2696 /*
2697 * pmap_is_referenced:
2698 *
2699 * Return whether or not the specified physical page is referenced
2700 * by any physical maps.
2701 */
2702
2703 boolean_t
2704 pmap_is_referenced(
2705 ppnum_t pn)
2706 {
2707 return (phys_attribute_test(pn, PHYS_REFERENCED));
2708 }
2709
2710 /*
2711 * pmap_get_refmod(phys)
2712 * returns the referenced and modified bits of the specified
2713 * physical page.
2714 */
2715 unsigned int
2716 pmap_get_refmod(ppnum_t pa)
2717 {
2718 return ( ((phys_attribute_test(pa, PHYS_MODIFIED))? VM_MEM_MODIFIED : 0)
2719 | ((phys_attribute_test(pa, PHYS_REFERENCED))? VM_MEM_REFERENCED : 0));
2720 }
2721
2722 /*
2723 * pmap_clear_refmod(phys, mask)
2724 * clears the referenced and modified bits as specified by the mask
2725 * of the specified physical page.
2726 */
2727 void
2728 pmap_clear_refmod(ppnum_t pa, unsigned int mask)
2729 {
2730 unsigned int x86Mask;
2731
2732 x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0)
2733 | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
2734 phys_attribute_clear(pa, x86Mask);
2735 }
2736
2737 /*
2738 * Set the modify bit on the specified range
2739 * of this map as requested.
2740 *
2741 * This optimization stands only if each time the dirty bit
2742 * in vm_page_t is tested, it is also tested in the pmap.
2743 */
2744 void
2745 pmap_modify_pages(
2746 pmap_t map,
2747 vm_offset_t s,
2748 vm_offset_t e)
2749 {
2750 spl_t spl;
2751 register pt_entry_t *pde;
2752 register pt_entry_t *spte, *epte;
2753 vm_offset_t l;
2754 vm_offset_t orig_s = s;
2755
2756 if (map == PMAP_NULL)
2757 return;
2758
2759 PMAP_READ_LOCK(map, spl);
2760
2761 pde = pmap_pde(map, s);
2762 while (s && s < e) {
2763 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
2764 if (l > e)
2765 l = e;
2766 if (*pde & INTEL_PTE_VALID) {
2767 spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1)));
2768 if (l) {
2769 spte = &spte[ptenum(s)];
2770 epte = &spte[intel_btop(l-s)];
2771 } else {
2772 epte = &spte[intel_btop(PDE_MAPPED_SIZE)];
2773 spte = &spte[ptenum(s)];
2774 }
2775 while (spte < epte) {
2776 if (*spte & INTEL_PTE_VALID) {
2777 *spte |= (INTEL_PTE_MOD | INTEL_PTE_WRITE);
2778 }
2779 spte++;
2780 }
2781 }
2782 s = l;
2783 pde++;
2784 }
2785 PMAP_UPDATE_TLBS(map, orig_s, e);
2786 PMAP_READ_UNLOCK(map, spl);
2787 }
2788
2789
2790 void
2791 invalidate_icache(__unused vm_offset_t addr,
2792 __unused unsigned cnt,
2793 __unused int phys)
2794 {
2795 return;
2796 }
2797 void
2798 flush_dcache(__unused vm_offset_t addr,
2799 __unused unsigned count,
2800 __unused int phys)
2801 {
2802 return;
2803 }
2804
2805 /*
2806 * TLB Coherence Code (TLB "shootdown" code)
2807 *
2808 * Threads that belong to the same task share the same address space and
2809 * hence share a pmap. However, they may run on distinct cpus and thus
2810 * have distinct TLBs that cache page table entries. In order to guarantee
2811 * the TLBs are consistent, whenever a pmap is changed, all threads that
2812 * are active in that pmap must have their TLB updated. To keep track of
2813 * this information, the set of cpus that are currently using a pmap is
2814 * maintained within each pmap structure (cpus_using). Pmap_activate() and
2815 * pmap_deactivate add and remove, respectively, a cpu from this set.
2816 * Since the TLBs are not addressable over the bus, each processor must
2817 * flush its own TLB; a processor that needs to invalidate another TLB
2818 * needs to interrupt the processor that owns that TLB to signal the
2819 * update.
2820 *
2821 * Whenever a pmap is updated, the lock on that pmap is locked, and all
2822 * cpus using the pmap are signaled to invalidate. All threads that need
2823 * to activate a pmap must wait for the lock to clear to await any updates
2824 * in progress before using the pmap. They must ACQUIRE the lock to add
2825 * their cpu to the cpus_using set. An implicit assumption made
2826 * throughout the TLB code is that all kernel code that runs at or higher
2827 * than splvm blocks out update interrupts, and that such code does not
2828 * touch pageable pages.
2829 *
2830 * A shootdown interrupt serves another function besides signaling a
2831 * processor to invalidate. The interrupt routine (pmap_update_interrupt)
2832 * waits for the both the pmap lock (and the kernel pmap lock) to clear,
2833 * preventing user code from making implicit pmap updates while the
2834 * sending processor is performing its update. (This could happen via a
2835 * user data write reference that turns on the modify bit in the page
2836 * table). It must wait for any kernel updates that may have started
2837 * concurrently with a user pmap update because the IPC code
2838 * changes mappings.
2839 * Spinning on the VALUES of the locks is sufficient (rather than
2840 * having to acquire the locks) because any updates that occur subsequent
2841 * to finding the lock unlocked will be signaled via another interrupt.
2842 * (This assumes the interrupt is cleared before the low level interrupt code
2843 * calls pmap_update_interrupt()).
2844 *
2845 * The signaling processor must wait for any implicit updates in progress
2846 * to terminate before continuing with its update. Thus it must wait for an
2847 * acknowledgement of the interrupt from each processor for which such
2848 * references could be made. For maintaining this information, a set
2849 * cpus_active is used. A cpu is in this set if and only if it can
2850 * use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from
2851 * this set; when all such cpus are removed, it is safe to update.
2852 *
2853 * Before attempting to acquire the update lock on a pmap, a cpu (A) must
2854 * be at least at the priority of the interprocessor interrupt
2855 * (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a
2856 * kernel update; it would spin forever in pmap_update_interrupt() trying
2857 * to acquire the user pmap lock it had already acquired. Furthermore A
2858 * must remove itself from cpus_active. Otherwise, another cpu holding
2859 * the lock (B) could be in the process of sending an update signal to A,
2860 * and thus be waiting for A to remove itself from cpus_active. If A is
2861 * spinning on the lock at priority this will never happen and a deadlock
2862 * will result.
2863 */
2864
2865 /*
2866 * Signal another CPU that it must flush its TLB
2867 */
2868 void
2869 signal_cpus(
2870 cpu_set use_list,
2871 pmap_t pmap,
2872 vm_offset_t start_addr,
2873 vm_offset_t end_addr)
2874 {
2875 register int which_cpu, j;
2876 register pmap_update_list_t update_list_p;
2877
2878 while ((which_cpu = ffs((unsigned long)use_list)) != 0) {
2879 which_cpu -= 1; /* convert to 0 origin */
2880
2881 update_list_p = cpu_update_list(which_cpu);
2882 simple_lock(&update_list_p->lock);
2883
2884 j = update_list_p->count;
2885 if (j >= UPDATE_LIST_SIZE) {
2886 /*
2887 * list overflowed. Change last item to
2888 * indicate overflow.
2889 */
2890 update_list_p->item[UPDATE_LIST_SIZE-1].pmap = kernel_pmap;
2891 update_list_p->item[UPDATE_LIST_SIZE-1].start = VM_MIN_ADDRESS;
2892 update_list_p->item[UPDATE_LIST_SIZE-1].end = VM_MAX_KERNEL_ADDRESS;
2893 }
2894 else {
2895 update_list_p->item[j].pmap = pmap;
2896 update_list_p->item[j].start = start_addr;
2897 update_list_p->item[j].end = end_addr;
2898 update_list_p->count = j+1;
2899 }
2900 cpu_update_needed(which_cpu) = TRUE;
2901 simple_unlock(&update_list_p->lock);
2902
2903 /* if its the kernel pmap, ignore cpus_idle */
2904 if (((cpus_idle & (1 << which_cpu)) == 0) ||
2905 (pmap == kernel_pmap) || PMAP_REAL(which_cpu) == pmap)
2906 {
2907 i386_signal_cpu(which_cpu, MP_TLB_FLUSH, ASYNC);
2908 }
2909 use_list &= ~(1 << which_cpu);
2910 }
2911 }
2912
2913 void
2914 process_pmap_updates(
2915 register pmap_t my_pmap)
2916 {
2917 register int my_cpu;
2918 register pmap_update_list_t update_list_p;
2919 register int j;
2920 register pmap_t pmap;
2921
2922 mp_disable_preemption();
2923 my_cpu = cpu_number();
2924 update_list_p = cpu_update_list(my_cpu);
2925 simple_lock(&update_list_p->lock);
2926
2927 for (j = 0; j < update_list_p->count; j++) {
2928 pmap = update_list_p->item[j].pmap;
2929 if (pmap == my_pmap ||
2930 pmap == kernel_pmap) {
2931
2932 if (pmap->ref_count <= 0) {
2933 PMAP_CPU_CLR(pmap, my_cpu);
2934 PMAP_REAL(my_cpu) = kernel_pmap;
2935 #ifdef PAE
2936 set_cr3((unsigned int)kernel_pmap->pm_ppdpt);
2937 #else
2938 set_cr3((unsigned int)kernel_pmap->pdirbase);
2939 #endif
2940 } else
2941 INVALIDATE_TLB(pmap,
2942 update_list_p->item[j].start,
2943 update_list_p->item[j].end);
2944 }
2945 }
2946 update_list_p->count = 0;
2947 cpu_update_needed(my_cpu) = FALSE;
2948 simple_unlock(&update_list_p->lock);
2949 mp_enable_preemption();
2950 }
2951
2952 /*
2953 * Interrupt routine for TBIA requested from other processor.
2954 * This routine can also be called at all interrupts time if
2955 * the cpu was idle. Some driver interrupt routines might access
2956 * newly allocated vm. (This is the case for hd)
2957 */
2958 void
2959 pmap_update_interrupt(void)
2960 {
2961 register int my_cpu;
2962 spl_t s;
2963 register pmap_t my_pmap;
2964
2965 mp_disable_preemption();
2966 my_cpu = cpu_number();
2967
2968 /*
2969 * Raise spl to splvm (above splip) to block out pmap_extract
2970 * from IO code (which would put this cpu back in the active
2971 * set).
2972 */
2973 s = splhigh();
2974
2975 my_pmap = PMAP_REAL(my_cpu);
2976
2977 if (!(my_pmap && pmap_in_use(my_pmap, my_cpu)))
2978 my_pmap = kernel_pmap;
2979
2980 do {
2981 LOOP_VAR;
2982
2983 /*
2984 * Indicate that we're not using either user or kernel
2985 * pmap.
2986 */
2987 i_bit_clear(my_cpu, &cpus_active);
2988
2989 /*
2990 * Wait for any pmap updates in progress, on either user
2991 * or kernel pmap.
2992 */
2993 while (*(volatile int *)(&my_pmap->lock.interlock.lock_data) ||
2994 *(volatile int *)(&kernel_pmap->lock.interlock.lock_data)) {
2995 LOOP_CHECK("pmap_update_interrupt", my_pmap);
2996 cpu_pause();
2997 }
2998
2999 process_pmap_updates(my_pmap);
3000
3001 i_bit_set(my_cpu, &cpus_active);
3002
3003 } while (cpu_update_needed(my_cpu));
3004
3005 splx(s);
3006 mp_enable_preemption();
3007 }
3008
3009 #if MACH_KDB
3010
3011 /* show phys page mappings and attributes */
3012
3013 extern void db_show_page(pmap_paddr_t pa);
3014
3015 void
3016 db_show_page(pmap_paddr_t pa)
3017 {
3018 pv_entry_t pv_h;
3019 int pai;
3020 char attr;
3021
3022 pai = pa_index(pa);
3023 pv_h = pai_to_pvh(pai);
3024
3025 attr = pmap_phys_attributes[pai];
3026 printf("phys page %x ", pa);
3027 if (attr & PHYS_MODIFIED)
3028 printf("modified, ");
3029 if (attr & PHYS_REFERENCED)
3030 printf("referenced, ");
3031 if (pv_h->pmap || pv_h->next)
3032 printf(" mapped at\n");
3033 else
3034 printf(" not mapped\n");
3035 for (; pv_h; pv_h = pv_h->next)
3036 if (pv_h->pmap)
3037 printf("%x in pmap %x\n", pv_h->va, pv_h->pmap);
3038 }
3039
3040 #endif /* MACH_KDB */
3041
3042 #if MACH_KDB
3043 void db_kvtophys(vm_offset_t);
3044 void db_show_vaddrs(pt_entry_t *);
3045
3046 /*
3047 * print out the results of kvtophys(arg)
3048 */
3049 void
3050 db_kvtophys(
3051 vm_offset_t vaddr)
3052 {
3053 db_printf("0x%x", kvtophys(vaddr));
3054 }
3055
3056 /*
3057 * Walk the pages tables.
3058 */
3059 void
3060 db_show_vaddrs(
3061 pt_entry_t *dirbase)
3062 {
3063 pt_entry_t *ptep, *pdep, tmp;
3064 int x, y, pdecnt, ptecnt;
3065
3066 if (dirbase == 0) {
3067 dirbase = kernel_pmap->dirbase;
3068 }
3069 if (dirbase == 0) {
3070 db_printf("need a dirbase...\n");
3071 return;
3072 }
3073 dirbase = (pt_entry_t *) ((unsigned long) dirbase & ~INTEL_OFFMASK);
3074
3075 db_printf("dirbase: 0x%x\n", dirbase);
3076
3077 pdecnt = ptecnt = 0;
3078 pdep = &dirbase[0];
3079 for (y = 0; y < NPDEPG; y++, pdep++) {
3080 if (((tmp = *pdep) & INTEL_PTE_VALID) == 0) {
3081 continue;
3082 }
3083 pdecnt++;
3084 ptep = (pt_entry_t *) ((*pdep) & ~INTEL_OFFMASK);
3085 db_printf("dir[%4d]: 0x%x\n", y, *pdep);
3086 for (x = 0; x < NPTEPG; x++, ptep++) {
3087 if (((tmp = *ptep) & INTEL_PTE_VALID) == 0) {
3088 continue;
3089 }
3090 ptecnt++;
3091 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
3092 x,
3093 *ptep,
3094 (y << 22) | (x << 12),
3095 *ptep & ~INTEL_OFFMASK);
3096 }
3097 }
3098
3099 db_printf("total: %d tables, %d page table entries.\n", pdecnt, ptecnt);
3100
3101 }
3102 #endif /* MACH_KDB */
3103
3104 #include <mach_vm_debug.h>
3105 #if MACH_VM_DEBUG
3106 #include <vm/vm_debug.h>
3107
3108 int
3109 pmap_list_resident_pages(
3110 __unused pmap_t pmap,
3111 __unused vm_offset_t *listp,
3112 __unused int space)
3113 {
3114 return 0;
3115 }
3116 #endif /* MACH_VM_DEBUG */
3117
3118 #ifdef MACH_BSD
3119 /*
3120 * pmap_pagemove
3121 *
3122 * BSD support routine to reassign virtual addresses.
3123 */
3124
3125 void
3126 pmap_movepage(unsigned long from, unsigned long to, vm_size_t size)
3127 {
3128 spl_t spl;
3129 pt_entry_t *pte, saved_pte;
3130
3131 /* Lock the kernel map */
3132 PMAP_READ_LOCK(kernel_pmap, spl);
3133
3134
3135 while (size > 0) {
3136 pte = pmap_pte(kernel_pmap, from);
3137 if (pte == NULL)
3138 panic("pmap_pagemove from pte NULL");
3139 saved_pte = *pte;
3140 PMAP_READ_UNLOCK(kernel_pmap, spl);
3141
3142 pmap_enter(kernel_pmap, to, (ppnum_t)i386_btop(i386_trunc_page(*pte)),
3143 VM_PROT_READ|VM_PROT_WRITE, 0, *pte & INTEL_PTE_WIRED);
3144
3145 pmap_remove(kernel_pmap, (addr64_t)from, (addr64_t)(from+PAGE_SIZE));
3146
3147 PMAP_READ_LOCK(kernel_pmap, spl);
3148 pte = pmap_pte(kernel_pmap, to);
3149 if (pte == NULL)
3150 panic("pmap_pagemove 'to' pte NULL");
3151
3152 *pte = saved_pte;
3153
3154 from += PAGE_SIZE;
3155 to += PAGE_SIZE;
3156 size -= PAGE_SIZE;
3157 }
3158
3159 /* Get the processors to update the TLBs */
3160 PMAP_UPDATE_TLBS(kernel_pmap, from, from+size);
3161 PMAP_UPDATE_TLBS(kernel_pmap, to, to+size);
3162
3163 PMAP_READ_UNLOCK(kernel_pmap, spl);
3164
3165 }
3166 #endif /* MACH_BSD */
3167
3168 /* temporary workaround */
3169 boolean_t
3170 coredumpok(vm_map_t map, vm_offset_t va)
3171 {
3172 pt_entry_t *ptep;
3173
3174 ptep = pmap_pte(map->pmap, va);
3175 if (0 == ptep)
3176 return FALSE;
3177 return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED));
3178 }
3179
3180 /*
3181 * grow the number of kernel page table entries, if needed
3182 */
3183 void
3184 pmap_growkernel(vm_offset_t addr)
3185 {
3186 #if GROW_KERNEL_FUNCTION_IMPLEMENTED
3187 struct pmap *pmap;
3188 int s;
3189 vm_offset_t ptppaddr;
3190 ppnum_t ppn;
3191 vm_page_t nkpg;
3192 pd_entry_t newpdir = 0;
3193
3194 /*
3195 * Serialize.
3196 * Losers return to try again until the winner completes the work.
3197 */
3198 if (kptobj == 0) panic("growkernel 0");
3199 if (!vm_object_lock_try(kptobj)) {
3200 return;
3201 }
3202
3203 vm_page_lock_queues();
3204
3205 s = splhigh();
3206
3207 /*
3208 * If this is the first time thru, locate the end of the
3209 * kernel page table entries and set nkpt to the current
3210 * number of kernel page table pages
3211 */
3212
3213 if (kernel_vm_end == 0) {
3214 kernel_vm_end = KERNBASE;
3215 nkpt = 0;
3216
3217 while (pdir_pde(kernel_pmap->dirbase, kernel_vm_end)) {
3218 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
3219 nkpt++;
3220 }
3221 }
3222
3223 /*
3224 * Now allocate and map the required number of page tables
3225 */
3226 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
3227 while (kernel_vm_end < addr) {
3228 if (pdir_pde(kernel_pmap->dirbase, kernel_vm_end)) {
3229 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
3230 continue; /* someone already filled this one */
3231 }
3232
3233 nkpg = vm_page_alloc(kptobj, nkpt);
3234 if (!nkpg)
3235 panic("pmap_growkernel: no memory to grow kernel");
3236
3237 nkpt++;
3238 vm_page_wire(nkpg);
3239 ppn = nkpg->phys_page;
3240 pmap_zero_page(ppn);
3241 ptppaddr = i386_ptob(ppn);
3242 newpdir = (pd_entry_t) (ptppaddr | INTEL_PTE_VALID |
3243 INTEL_PTE_RW | INTEL_PTE_REF | INTEL_PTE_MOD);
3244 pdir_pde(kernel_pmap->dirbase, kernel_vm_end) = newpdir;
3245
3246 simple_lock(&free_pmap_lock);
3247 for (pmap = (struct pmap *)kernel_pmap->pmap_link.next;
3248 pmap != kernel_pmap ;
3249 pmap = (struct pmap *)pmap->pmap_link.next ) {
3250 *pmap_pde(pmap, kernel_vm_end) = newpdir;
3251 }
3252 simple_unlock(&free_pmap_lock);
3253 }
3254 splx(s);
3255 vm_page_unlock_queues();
3256 vm_object_unlock(kptobj);
3257 #endif
3258 }
3259
3260 pt_entry_t *
3261 pmap_mapgetpte(vm_map_t map, vm_offset_t v)
3262 {
3263 return pmap_pte(map->pmap, v);
3264 }
3265
3266 boolean_t
3267 phys_page_exists(
3268 ppnum_t pn)
3269 {
3270 pmap_paddr_t phys;
3271
3272 assert(pn != vm_page_fictitious_addr);
3273
3274 if (!pmap_initialized)
3275 return (TRUE);
3276 phys = (pmap_paddr_t) i386_ptob(pn);
3277 if (!pmap_valid_page(pn))
3278 return (FALSE);
3279
3280 return TRUE;
3281 }
3282
3283 void
3284 mapping_free_prime()
3285 {
3286 int i;
3287 pv_entry_t pv_e;
3288
3289 for (i = 0; i < (5 * PV_ALLOC_CHUNK); i++) {
3290 pv_e = (pv_entry_t) zalloc(pv_list_zone);
3291 PV_FREE(pv_e);
3292 }
3293 }
3294
3295 void
3296 mapping_adjust()
3297 {
3298 pv_entry_t pv_e;
3299 int i;
3300 int spl;
3301
3302 if (mapping_adjust_call == NULL) {
3303 thread_call_setup(&mapping_adjust_call_data,
3304 (thread_call_func_t) mapping_adjust,
3305 (thread_call_param_t) NULL);
3306 mapping_adjust_call = &mapping_adjust_call_data;
3307 }
3308 /* XXX rethink best way to do locking here */
3309 if (pv_free_count < PV_LOW_WATER_MARK) {
3310 for (i = 0; i < PV_ALLOC_CHUNK; i++) {
3311 pv_e = (pv_entry_t) zalloc(pv_list_zone);
3312 SPLVM(spl);
3313 PV_FREE(pv_e);
3314 SPLX(spl);
3315 }
3316 }
3317 mappingrecurse = 0;
3318 }
3319
3320 void
3321 pmap_commpage_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt)
3322 {
3323 int i;
3324 pt_entry_t *opte, *npte;
3325 pt_entry_t pte;
3326
3327 for (i = 0; i < cnt; i++) {
3328 opte = pmap_pte(kernel_pmap, kernel_commpage);
3329 if (0 == opte) panic("kernel_commpage");
3330 npte = pmap_pte(kernel_pmap, user_commpage);
3331 if (0 == npte) panic("user_commpage");
3332 pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL;
3333 pte &= ~INTEL_PTE_WRITE; // ensure read only
3334 WRITE_PTE_FAST(npte, pte);
3335 kernel_commpage += INTEL_PGBYTES;
3336 user_commpage += INTEL_PGBYTES;
3337 }
3338 }
3339
3340 static cpu_pmap_t cpu_pmap_master;
3341 static struct pmap_update_list cpu_update_list_master;
3342
3343 struct cpu_pmap *
3344 pmap_cpu_alloc(boolean_t is_boot_cpu)
3345 {
3346 int ret;
3347 int i;
3348 cpu_pmap_t *cp;
3349 pmap_update_list_t up;
3350 vm_offset_t address;
3351 vm_map_entry_t entry;
3352
3353 if (is_boot_cpu) {
3354 cp = &cpu_pmap_master;
3355 up = &cpu_update_list_master;
3356 } else {
3357 /*
3358 * The per-cpu pmap data structure itself.
3359 */
3360 ret = kmem_alloc(kernel_map,
3361 (vm_offset_t *) &cp, sizeof(cpu_pmap_t));
3362 if (ret != KERN_SUCCESS) {
3363 printf("pmap_cpu_alloc() failed ret=%d\n", ret);
3364 return NULL;
3365 }
3366 bzero((void *)cp, sizeof(cpu_pmap_t));
3367
3368 /*
3369 * The tlb flush update list.
3370 */
3371 ret = kmem_alloc(kernel_map,
3372 (vm_offset_t *) &up, sizeof(*up));
3373 if (ret != KERN_SUCCESS) {
3374 printf("pmap_cpu_alloc() failed ret=%d\n", ret);
3375 pmap_cpu_free(cp);
3376 return NULL;
3377 }
3378
3379 /*
3380 * The temporary windows used for copy/zero - see loose_ends.c
3381 */
3382 for (i = 0; i < PMAP_NWINDOWS; i++) {
3383 ret = vm_map_find_space(kernel_map,
3384 &address, PAGE_SIZE, 0, &entry);
3385 if (ret != KERN_SUCCESS) {
3386 printf("pmap_cpu_alloc() "
3387 "vm_map_find_space ret=%d\n", ret);
3388 pmap_cpu_free(cp);
3389 return NULL;
3390 }
3391 vm_map_unlock(kernel_map);
3392
3393 cp->mapwindow[i].prv_CADDR = (caddr_t) address;
3394 cp->mapwindow[i].prv_CMAP = vtopte(address);
3395 * (int *) cp->mapwindow[i].prv_CMAP = 0;
3396
3397 kprintf("pmap_cpu_alloc() "
3398 "window=%d CADDR=0x%x CMAP=0x%x\n",
3399 i, address, vtopte(address));
3400 }
3401 }
3402
3403 /*
3404 * Set up the pmap request list
3405 */
3406 cp->update_list = up;
3407 simple_lock_init(&up->lock, 0);
3408 up->count = 0;
3409
3410 return cp;
3411 }
3412
3413 void
3414 pmap_cpu_free(struct cpu_pmap *cp)
3415 {
3416 if (cp != NULL && cp != &cpu_pmap_master) {
3417 if (cp->update_list != NULL)
3418 kfree((void *) cp->update_list,
3419 sizeof(*cp->update_list));
3420 kfree((void *) cp, sizeof(cpu_pmap_t));
3421 }
3422 }