]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/pmap.c
14edd73266741574b4f76d7454fbefa8aba3b418
[apple/xnu.git] / osfmk / i386 / pmap.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /*
26 * @OSF_COPYRIGHT@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
31 * All Rights Reserved.
32 *
33 * Permission to use, copy, modify and distribute this software and its
34 * documentation is hereby granted, provided that both the copyright
35 * notice and this permission notice appear in all copies of the
36 * software, derivative works or modified versions, and any portions
37 * thereof, and that both notices appear in supporting documentation.
38 *
39 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
40 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
41 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
42 *
43 * Carnegie Mellon requests users of this software to return to
44 *
45 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
46 * School of Computer Science
47 * Carnegie Mellon University
48 * Pittsburgh PA 15213-3890
49 *
50 * any improvements or extensions that they make and grant Carnegie Mellon
51 * the rights to redistribute these changes.
52 */
53 /*
54 */
55
56 /*
57 * File: pmap.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young
59 * (These guys wrote the Vax version)
60 *
61 * Physical Map management code for Intel i386, i486, and i860.
62 *
63 * Manages physical address maps.
64 *
65 * In addition to hardware address maps, this
66 * module is called upon to provide software-use-only
67 * maps which may or may not be stored in the same
68 * form as hardware maps. These pseudo-maps are
69 * used to store intermediate results from copy
70 * operations to and from address spaces.
71 *
72 * Since the information managed by this module is
73 * also stored by the logical address mapping module,
74 * this module may throw away valid virtual-to-physical
75 * mappings at almost any time. However, invalidations
76 * of virtual-to-physical mappings must be done as
77 * requested.
78 *
79 * In order to cope with hardware architectures which
80 * make virtual-to-physical map invalidates expensive,
81 * this module may delay invalidate or reduced protection
82 * operations until such time as they are actually
83 * necessary. This module is given full information as
84 * to which processors are currently using which maps,
85 * and to when physical maps must be made correct.
86 */
87
88 #include <cpus.h>
89
90 #include <string.h>
91 #include <norma_vm.h>
92 #include <mach_kdb.h>
93 #include <mach_ldebug.h>
94
95 #include <mach/machine/vm_types.h>
96
97 #include <mach/boolean.h>
98 #include <kern/thread.h>
99 #include <kern/zalloc.h>
100
101 #include <kern/lock.h>
102 #include <kern/spl.h>
103
104 #include <vm/pmap.h>
105 #include <vm/vm_map.h>
106 #include <vm/vm_kern.h>
107 #include <mach/vm_param.h>
108 #include <mach/vm_prot.h>
109 #include <vm/vm_object.h>
110 #include <vm/vm_page.h>
111
112 #include <mach/machine/vm_param.h>
113 #include <machine/thread.h>
114
115 #include <kern/misc_protos.h> /* prototyping */
116 #include <i386/misc_protos.h>
117
118 #include <i386/cpuid.h>
119
120 #if MACH_KDB
121 #include <ddb/db_command.h>
122 #include <ddb/db_output.h>
123 #include <ddb/db_sym.h>
124 #include <ddb/db_print.h>
125 #endif /* MACH_KDB */
126
127 #include <kern/xpr.h>
128
129 #if NCPUS > 1
130 #include <i386/AT386/mp/mp_events.h>
131 #endif
132
133 /*
134 * Forward declarations for internal functions.
135 */
136 void pmap_expand(
137 pmap_t map,
138 vm_offset_t v);
139
140 extern void pmap_remove_range(
141 pmap_t pmap,
142 vm_offset_t va,
143 pt_entry_t *spte,
144 pt_entry_t *epte);
145
146 void phys_attribute_clear(
147 vm_offset_t phys,
148 int bits);
149
150 boolean_t phys_attribute_test(
151 vm_offset_t phys,
152 int bits);
153
154 void pmap_set_modify(vm_offset_t phys);
155
156 void phys_attribute_set(
157 vm_offset_t phys,
158 int bits);
159
160
161 #ifndef set_dirbase
162 void set_dirbase(vm_offset_t dirbase);
163 #endif /* set_dirbase */
164
165 #define PA_TO_PTE(pa) (pa_to_pte((pa) - VM_MIN_KERNEL_ADDRESS))
166 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
167
168 pmap_t real_pmap[NCPUS];
169
170 #define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
171 #define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry);
172
173 /*
174 * Private data structures.
175 */
176
177 /*
178 * For each vm_page_t, there is a list of all currently
179 * valid virtual mappings of that page. An entry is
180 * a pv_entry_t; the list is the pv_table.
181 */
182
183 typedef struct pv_entry {
184 struct pv_entry *next; /* next pv_entry */
185 pmap_t pmap; /* pmap where mapping lies */
186 vm_offset_t va; /* virtual address for mapping */
187 } *pv_entry_t;
188
189 #define PV_ENTRY_NULL ((pv_entry_t) 0)
190
191 pv_entry_t pv_head_table; /* array of entries, one per page */
192
193 /*
194 * pv_list entries are kept on a list that can only be accessed
195 * with the pmap system locked (at SPLVM, not in the cpus_active set).
196 * The list is refilled from the pv_list_zone if it becomes empty.
197 */
198 pv_entry_t pv_free_list; /* free list at SPLVM */
199 decl_simple_lock_data(,pv_free_list_lock)
200
201 #define PV_ALLOC(pv_e) { \
202 simple_lock(&pv_free_list_lock); \
203 if ((pv_e = pv_free_list) != 0) { \
204 pv_free_list = pv_e->next; \
205 } \
206 simple_unlock(&pv_free_list_lock); \
207 }
208
209 #define PV_FREE(pv_e) { \
210 simple_lock(&pv_free_list_lock); \
211 pv_e->next = pv_free_list; \
212 pv_free_list = pv_e; \
213 simple_unlock(&pv_free_list_lock); \
214 }
215
216 zone_t pv_list_zone; /* zone of pv_entry structures */
217
218 /*
219 * Each entry in the pv_head_table is locked by a bit in the
220 * pv_lock_table. The lock bits are accessed by the physical
221 * address of the page they lock.
222 */
223
224 char *pv_lock_table; /* pointer to array of bits */
225 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
226
227 /*
228 * First and last physical addresses that we maintain any information
229 * for. Initialized to zero so that pmap operations done before
230 * pmap_init won't touch any non-existent structures.
231 */
232 vm_offset_t vm_first_phys = (vm_offset_t) 0;
233 vm_offset_t vm_last_phys = (vm_offset_t) 0;
234 boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */
235
236 /*
237 * Index into pv_head table, its lock bits, and the modify/reference
238 * bits starting at vm_first_phys.
239 */
240
241 #define pa_index(pa) (atop(pa - vm_first_phys))
242
243 #define pai_to_pvh(pai) (&pv_head_table[pai])
244 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
245 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
246
247 /*
248 * Array of physical page attribites for managed pages.
249 * One byte per physical page.
250 */
251 char *pmap_phys_attributes;
252
253 /*
254 * Physical page attributes. Copy bits from PTE definition.
255 */
256 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
257 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
258
259 /*
260 * Amount of virtual memory mapped by one
261 * page-directory entry.
262 */
263 #define PDE_MAPPED_SIZE (pdetova(1))
264
265 /*
266 * We allocate page table pages directly from the VM system
267 * through this object. It maps physical memory.
268 */
269 vm_object_t pmap_object = VM_OBJECT_NULL;
270
271 /*
272 * Locking and TLB invalidation
273 */
274
275 /*
276 * Locking Protocols:
277 *
278 * There are two structures in the pmap module that need locking:
279 * the pmaps themselves, and the per-page pv_lists (which are locked
280 * by locking the pv_lock_table entry that corresponds to the pv_head
281 * for the list in question.) Most routines want to lock a pmap and
282 * then do operations in it that require pv_list locking -- however
283 * pmap_remove_all and pmap_copy_on_write operate on a physical page
284 * basis and want to do the locking in the reverse order, i.e. lock
285 * a pv_list and then go through all the pmaps referenced by that list.
286 * To protect against deadlock between these two cases, the pmap_lock
287 * is used. There are three different locking protocols as a result:
288 *
289 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
290 * the pmap.
291 *
292 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
293 * lock on the pmap_lock (shared read), then lock the pmap
294 * and finally the pv_lists as needed [i.e. pmap lock before
295 * pv_list lock.]
296 *
297 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
298 * Get a write lock on the pmap_lock (exclusive write); this
299 * also guaranteees exclusive access to the pv_lists. Lock the
300 * pmaps as needed.
301 *
302 * At no time may any routine hold more than one pmap lock or more than
303 * one pv_list lock. Because interrupt level routines can allocate
304 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
305 * kernel_pmap can only be held at splhigh.
306 */
307
308 #if NCPUS > 1
309 /*
310 * We raise the interrupt level to splhigh, to block interprocessor
311 * interrupts during pmap operations. We must take the CPU out of
312 * the cpus_active set while interrupts are blocked.
313 */
314 #define SPLVM(spl) { \
315 spl = splhigh(); \
316 mp_disable_preemption(); \
317 i_bit_clear(cpu_number(), &cpus_active); \
318 mp_enable_preemption(); \
319 }
320
321 #define SPLX(spl) { \
322 mp_disable_preemption(); \
323 i_bit_set(cpu_number(), &cpus_active); \
324 mp_enable_preemption(); \
325 splx(spl); \
326 }
327
328 /*
329 * Lock on pmap system
330 */
331 lock_t pmap_system_lock;
332
333 #define PMAP_READ_LOCK(pmap, spl) { \
334 SPLVM(spl); \
335 lock_read(&pmap_system_lock); \
336 simple_lock(&(pmap)->lock); \
337 }
338
339 #define PMAP_WRITE_LOCK(spl) { \
340 SPLVM(spl); \
341 lock_write(&pmap_system_lock); \
342 }
343
344 #define PMAP_READ_UNLOCK(pmap, spl) { \
345 simple_unlock(&(pmap)->lock); \
346 lock_read_done(&pmap_system_lock); \
347 SPLX(spl); \
348 }
349
350 #define PMAP_WRITE_UNLOCK(spl) { \
351 lock_write_done(&pmap_system_lock); \
352 SPLX(spl); \
353 }
354
355 #define PMAP_WRITE_TO_READ_LOCK(pmap) { \
356 simple_lock(&(pmap)->lock); \
357 lock_write_to_read(&pmap_system_lock); \
358 }
359
360 #define LOCK_PVH(index) lock_pvh_pai(index)
361
362 #define UNLOCK_PVH(index) unlock_pvh_pai(index)
363
364 #define PMAP_FLUSH_TLBS() \
365 { \
366 flush_tlb(); \
367 i386_signal_cpus(MP_TLB_FLUSH); \
368 }
369
370 #define PMAP_RELOAD_TLBS() { \
371 i386_signal_cpus(MP_TLB_RELOAD); \
372 set_cr3(kernel_pmap->pdirbase); \
373 }
374
375 #define PMAP_INVALIDATE_PAGE(map, addr) { \
376 if (map == kernel_pmap) \
377 invlpg((vm_offset_t) addr); \
378 else \
379 flush_tlb(); \
380 i386_signal_cpus(MP_TLB_FLUSH); \
381 }
382
383 #else /* NCPUS > 1 */
384
385 #if MACH_RT
386 #define SPLVM(spl) { (spl) = splhigh(); }
387 #define SPLX(spl) splx (spl)
388 #else /* MACH_RT */
389 #define SPLVM(spl)
390 #define SPLX(spl)
391 #endif /* MACH_RT */
392
393 #define PMAP_READ_LOCK(pmap, spl) SPLVM(spl)
394 #define PMAP_WRITE_LOCK(spl) SPLVM(spl)
395 #define PMAP_READ_UNLOCK(pmap, spl) SPLX(spl)
396 #define PMAP_WRITE_UNLOCK(spl) SPLX(spl)
397 #define PMAP_WRITE_TO_READ_LOCK(pmap)
398
399 #if MACH_RT
400 #define LOCK_PVH(index) disable_preemption()
401 #define UNLOCK_PVH(index) enable_preemption()
402 #else /* MACH_RT */
403 #define LOCK_PVH(index)
404 #define UNLOCK_PVH(index)
405 #endif /* MACH_RT */
406
407 #define PMAP_FLUSH_TLBS() flush_tlb()
408 #define PMAP_RELOAD_TLBS() set_cr3(kernel_pmap->pdirbase)
409 #define PMAP_INVALIDATE_PAGE(map, addr) { \
410 if (map == kernel_pmap) \
411 invlpg((vm_offset_t) addr); \
412 else \
413 flush_tlb(); \
414 }
415
416 #endif /* NCPUS > 1 */
417
418 #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
419
420 #if NCPUS > 1
421 /*
422 * Structures to keep track of pending TLB invalidations
423 */
424 cpu_set cpus_active;
425 cpu_set cpus_idle;
426 volatile boolean_t cpu_update_needed[NCPUS];
427
428
429 #endif /* NCPUS > 1 */
430
431 /*
432 * Other useful macros.
433 */
434 #define current_pmap() (vm_map_pmap(current_act()->map))
435 #define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
436
437 struct pmap kernel_pmap_store;
438 pmap_t kernel_pmap;
439
440 struct zone *pmap_zone; /* zone of pmap structures */
441
442 int pmap_debug = 0; /* flag for debugging prints */
443 int ptes_per_vm_page; /* number of hardware ptes needed
444 to map one VM page. */
445 unsigned int inuse_ptepages_count = 0; /* debugging */
446
447 /*
448 * Pmap cache. Cache is threaded through ref_count field of pmap.
449 * Max will eventually be constant -- variable for experimentation.
450 */
451 int pmap_cache_max = 32;
452 int pmap_alloc_chunk = 8;
453 pmap_t pmap_cache_list;
454 int pmap_cache_count;
455 decl_simple_lock_data(,pmap_cache_lock)
456
457 extern vm_offset_t hole_start, hole_end;
458
459 extern char end;
460
461 /*
462 * Page directory for kernel.
463 */
464 pt_entry_t *kpde = 0; /* set by start.s - keep out of bss */
465
466 #if DEBUG_ALIAS
467 #define PMAP_ALIAS_MAX 32
468 struct pmap_alias {
469 vm_offset_t rpc;
470 pmap_t pmap;
471 vm_offset_t va;
472 int cookie;
473 #define PMAP_ALIAS_COOKIE 0xdeadbeef
474 } pmap_aliasbuf[PMAP_ALIAS_MAX];
475 int pmap_alias_index = 0;
476 extern vm_offset_t get_rpc();
477
478 #endif /* DEBUG_ALIAS */
479
480 /*
481 * Given an offset and a map, compute the address of the
482 * pte. If the address is invalid with respect to the map
483 * then PT_ENTRY_NULL is returned (and the map may need to grow).
484 *
485 * This is only used in machine-dependent code.
486 */
487
488 pt_entry_t *
489 pmap_pte(
490 register pmap_t pmap,
491 register vm_offset_t addr)
492 {
493 register pt_entry_t *ptp;
494 register pt_entry_t pte;
495
496 pte = pmap->dirbase[pdenum(pmap, addr)];
497 if ((pte & INTEL_PTE_VALID) == 0)
498 return(PT_ENTRY_NULL);
499 ptp = (pt_entry_t *)ptetokv(pte);
500 return(&ptp[ptenum(addr)]);
501
502 }
503
504 #define pmap_pde(pmap, addr) (&(pmap)->dirbase[pdenum(pmap, addr)])
505
506 #define DEBUG_PTE_PAGE 0
507
508 #if DEBUG_PTE_PAGE
509 void
510 ptep_check(
511 ptep_t ptep)
512 {
513 register pt_entry_t *pte, *epte;
514 int ctu, ctw;
515
516 /* check the use and wired counts */
517 if (ptep == PTE_PAGE_NULL)
518 return;
519 pte = pmap_pte(ptep->pmap, ptep->va);
520 epte = pte + INTEL_PGBYTES/sizeof(pt_entry_t);
521 ctu = 0;
522 ctw = 0;
523 while (pte < epte) {
524 if (pte->pfn != 0) {
525 ctu++;
526 if (pte->wired)
527 ctw++;
528 }
529 pte += ptes_per_vm_page;
530 }
531
532 if (ctu != ptep->use_count || ctw != ptep->wired_count) {
533 printf("use %d wired %d - actual use %d wired %d\n",
534 ptep->use_count, ptep->wired_count, ctu, ctw);
535 panic("pte count");
536 }
537 }
538 #endif /* DEBUG_PTE_PAGE */
539
540 /*
541 * Map memory at initialization. The physical addresses being
542 * mapped are not managed and are never unmapped.
543 *
544 * For now, VM is already on, we only need to map the
545 * specified memory.
546 */
547 vm_offset_t
548 pmap_map(
549 register vm_offset_t virt,
550 register vm_offset_t start,
551 register vm_offset_t end,
552 register vm_prot_t prot)
553 {
554 register int ps;
555
556 ps = PAGE_SIZE;
557 while (start < end) {
558 pmap_enter(kernel_pmap, virt, start, prot, 0, FALSE);
559 virt += ps;
560 start += ps;
561 }
562 return(virt);
563 }
564
565 /*
566 * Back-door routine for mapping kernel VM at initialization.
567 * Useful for mapping memory outside the range
568 * Sets no-cache, A, D.
569 * [vm_first_phys, vm_last_phys) (i.e., devices).
570 * Otherwise like pmap_map.
571 */
572 vm_offset_t
573 pmap_map_bd(
574 register vm_offset_t virt,
575 register vm_offset_t start,
576 register vm_offset_t end,
577 vm_prot_t prot)
578 {
579 register pt_entry_t template;
580 register pt_entry_t *pte;
581
582 template = pa_to_pte(start)
583 | INTEL_PTE_NCACHE
584 | INTEL_PTE_REF
585 | INTEL_PTE_MOD
586 | INTEL_PTE_WIRED
587 | INTEL_PTE_VALID;
588 if (prot & VM_PROT_WRITE)
589 template |= INTEL_PTE_WRITE;
590
591 while (start < end) {
592 pte = pmap_pte(kernel_pmap, virt);
593 if (pte == PT_ENTRY_NULL)
594 panic("pmap_map_bd: Invalid kernel address\n");
595 WRITE_PTE_FAST(pte, template)
596 pte_increment_pa(template);
597 virt += PAGE_SIZE;
598 start += PAGE_SIZE;
599 }
600
601 PMAP_FLUSH_TLBS();
602
603 return(virt);
604 }
605
606 extern int cnvmem;
607 extern char *first_avail;
608 extern vm_offset_t virtual_avail, virtual_end;
609 extern vm_offset_t avail_start, avail_end, avail_next;
610
611 /*
612 * Bootstrap the system enough to run with virtual memory.
613 * Map the kernel's code and data, and allocate the system page table.
614 * Called with mapping OFF. Page_size must already be set.
615 *
616 * Parameters:
617 * load_start: PA where kernel was loaded
618 * avail_start PA of first available physical page -
619 * after kernel page tables
620 * avail_end PA of last available physical page
621 * virtual_avail VA of first available page -
622 * after kernel page tables
623 * virtual_end VA of last available page -
624 * end of kernel address space
625 *
626 * &start_text start of kernel text
627 * &etext end of kernel text
628 */
629
630 void
631 pmap_bootstrap(
632 vm_offset_t load_start)
633 {
634 vm_offset_t va, tva, paddr;
635 pt_entry_t template;
636 pt_entry_t *pde, *pte, *ptend;
637 vm_size_t morevm; /* VM space for kernel map */
638
639 /*
640 * Set ptes_per_vm_page for general use.
641 */
642 ptes_per_vm_page = PAGE_SIZE / INTEL_PGBYTES;
643
644 /*
645 * The kernel's pmap is statically allocated so we don't
646 * have to use pmap_create, which is unlikely to work
647 * correctly at this part of the boot sequence.
648 */
649
650 kernel_pmap = &kernel_pmap_store;
651
652 #if NCPUS > 1
653 lock_init(&pmap_system_lock,
654 FALSE, /* NOT a sleep lock */
655 ETAP_VM_PMAP_SYS,
656 ETAP_VM_PMAP_SYS_I);
657 #endif /* NCPUS > 1 */
658
659 simple_lock_init(&kernel_pmap->lock, ETAP_VM_PMAP_KERNEL);
660 simple_lock_init(&pv_free_list_lock, ETAP_VM_PMAP_FREE);
661
662 kernel_pmap->ref_count = 1;
663
664 /*
665 * The kernel page directory has been allocated;
666 * its virtual address is in kpde.
667 *
668 * Enough kernel page table pages have been allocated
669 * to map low system memory, kernel text, kernel data/bss,
670 * kdb's symbols, and the page directory and page tables.
671 *
672 * No other physical memory has been allocated.
673 */
674
675 /*
676 * Start mapping virtual memory to physical memory, 1-1,
677 * at end of mapped memory.
678 */
679
680 virtual_avail = phystokv(avail_start);
681 virtual_end = phystokv(avail_end);
682
683 pde = kpde;
684 pde += pdenum(kernel_pmap, virtual_avail);
685
686 if (pte_to_pa(*pde) == 0) {
687 /* This pte has not been allocated */
688 pte = 0; ptend = 0;
689 }
690 else {
691 pte = (pt_entry_t *)ptetokv(*pde);
692 /* first pte of page */
693 ptend = pte+NPTES; /* last pte of page */
694 pte += ptenum(virtual_avail); /* point to pte that
695 maps first avail VA */
696 pde++; /* point pde to first empty slot */
697 }
698
699 template = pa_to_pte(avail_start)
700 | INTEL_PTE_VALID
701 | INTEL_PTE_WRITE;
702
703 for (va = virtual_avail; va < virtual_end; va += INTEL_PGBYTES) {
704 if (pte >= ptend) {
705 pte = (pt_entry_t *)phystokv(virtual_avail);
706 ptend = pte + NPTES;
707 virtual_avail = (vm_offset_t)ptend;
708 if (virtual_avail == hole_start)
709 virtual_avail = hole_end;
710 *pde = PA_TO_PTE((vm_offset_t) pte)
711 | INTEL_PTE_VALID
712 | INTEL_PTE_WRITE;
713 pde++;
714 }
715 WRITE_PTE_FAST(pte, template)
716 pte++;
717 pte_increment_pa(template);
718 }
719
720 avail_start = virtual_avail - VM_MIN_KERNEL_ADDRESS;
721 avail_next = avail_start;
722
723 /*
724 * Figure out maximum kernel address.
725 * Kernel virtual space is:
726 * - at least three times physical memory
727 * - at least VM_MIN_KERNEL_ADDRESS
728 * - limited by VM_MAX_KERNEL_ADDRESS
729 */
730
731 morevm = 3*avail_end;
732 if (virtual_end + morevm > VM_MAX_KERNEL_ADDRESS)
733 morevm = VM_MAX_KERNEL_ADDRESS - virtual_end + 1;
734
735 /*
736 * startup requires additional virtual memory (for tables, buffers,
737 * etc.). The kd driver may also require some of that memory to
738 * access the graphics board.
739 *
740 */
741 *(int *)&template = 0;
742
743 /*
744 * Leave room for kernel-loaded servers, which have been linked at
745 * addresses from VM_MIN_KERNEL_LOADED_ADDRESS to
746 * VM_MAX_KERNEL_LOADED_ADDRESS.
747 */
748 if (virtual_end + morevm < VM_MAX_KERNEL_LOADED_ADDRESS + 1)
749 morevm = VM_MAX_KERNEL_LOADED_ADDRESS + 1 - virtual_end;
750
751
752 virtual_end += morevm;
753 for (tva = va; tva < virtual_end; tva += INTEL_PGBYTES) {
754 if (pte >= ptend) {
755 pmap_next_page(&paddr);
756 pte = (pt_entry_t *)phystokv(paddr);
757 ptend = pte + NPTES;
758 *pde = PA_TO_PTE((vm_offset_t) pte)
759 | INTEL_PTE_VALID
760 | INTEL_PTE_WRITE;
761 pde++;
762 }
763 WRITE_PTE_FAST(pte, template)
764 pte++;
765 }
766
767 virtual_avail = va;
768
769 /* Push the virtual avail address above hole_end */
770 if (virtual_avail < hole_end)
771 virtual_avail = hole_end;
772
773 /*
774 * c.f. comment above
775 *
776 */
777 virtual_end = va + morevm;
778 while (pte < ptend)
779 *pte++ = 0;
780
781 /*
782 * invalidate user virtual addresses
783 */
784 memset((char *)kpde,
785 0,
786 pdenum(kernel_pmap,VM_MIN_KERNEL_ADDRESS)*sizeof(pt_entry_t));
787 kernel_pmap->dirbase = kpde;
788 printf("Kernel virtual space from 0x%x to 0x%x.\n",
789 VM_MIN_KERNEL_ADDRESS, virtual_end);
790
791 avail_start = avail_next;
792 printf("Available physical space from 0x%x to 0x%x\n",
793 avail_start, avail_end);
794
795 kernel_pmap->pdirbase = kvtophys((vm_offset_t)kernel_pmap->dirbase);
796
797 }
798
799 void
800 pmap_virtual_space(
801 vm_offset_t *startp,
802 vm_offset_t *endp)
803 {
804 *startp = virtual_avail;
805 *endp = virtual_end;
806 }
807
808 /*
809 * Initialize the pmap module.
810 * Called by vm_init, to initialize any structures that the pmap
811 * system needs to map virtual memory.
812 */
813 void
814 pmap_init(void)
815 {
816 register long npages;
817 vm_offset_t addr;
818 register vm_size_t s;
819 int i;
820
821 /*
822 * Allocate memory for the pv_head_table and its lock bits,
823 * the modify bit array, and the pte_page table.
824 */
825
826 npages = atop(avail_end - avail_start);
827 s = (vm_size_t) (sizeof(struct pv_entry) * npages
828 + pv_lock_table_size(npages)
829 + npages);
830
831 s = round_page(s);
832 if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS)
833 panic("pmap_init");
834
835 memset((char *)addr, 0, s);
836
837 /*
838 * Allocate the structures first to preserve word-alignment.
839 */
840 pv_head_table = (pv_entry_t) addr;
841 addr = (vm_offset_t) (pv_head_table + npages);
842
843 pv_lock_table = (char *) addr;
844 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
845
846 pmap_phys_attributes = (char *) addr;
847
848 /*
849 * Create the zone of physical maps,
850 * and of the physical-to-virtual entries.
851 */
852 s = (vm_size_t) sizeof(struct pmap);
853 pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
854 s = (vm_size_t) sizeof(struct pv_entry);
855 pv_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
856
857 /*
858 * Only now, when all of the data structures are allocated,
859 * can we set vm_first_phys and vm_last_phys. If we set them
860 * too soon, the kmem_alloc_wired above will try to use these
861 * data structures and blow up.
862 */
863
864 vm_first_phys = avail_start;
865 vm_last_phys = avail_end;
866 pmap_initialized = TRUE;
867
868 /*
869 * Initializie pmap cache.
870 */
871 pmap_cache_list = PMAP_NULL;
872 pmap_cache_count = 0;
873 simple_lock_init(&pmap_cache_lock, ETAP_VM_PMAP_CACHE);
874 }
875
876
877 #define pmap_valid_page(x) ((avail_start <= x) && (x < avail_end))
878
879
880 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
881
882 boolean_t
883 pmap_verify_free(
884 vm_offset_t phys)
885 {
886 pv_entry_t pv_h;
887 int pai;
888 spl_t spl;
889 boolean_t result;
890
891 assert(phys != vm_page_fictitious_addr);
892 if (!pmap_initialized)
893 return(TRUE);
894
895 if (!pmap_valid_page(phys))
896 return(FALSE);
897
898 PMAP_WRITE_LOCK(spl);
899
900 pai = pa_index(phys);
901 pv_h = pai_to_pvh(pai);
902
903 result = (pv_h->pmap == PMAP_NULL);
904 PMAP_WRITE_UNLOCK(spl);
905
906 return(result);
907 }
908
909 /*
910 * Create and return a physical map.
911 *
912 * If the size specified for the map
913 * is zero, the map is an actual physical
914 * map, and may be referenced by the
915 * hardware.
916 *
917 * If the size specified is non-zero,
918 * the map will be used in software only, and
919 * is bounded by that size.
920 */
921 pmap_t
922 pmap_create(
923 vm_size_t size)
924 {
925 register pmap_t p;
926 register pmap_statistics_t stats;
927
928 /*
929 * A software use-only map doesn't even need a map.
930 */
931
932 if (size != 0) {
933 return(PMAP_NULL);
934 }
935
936 /*
937 * Try to get cached pmap, if this fails,
938 * allocate a pmap struct from the pmap_zone. Then allocate
939 * the page descriptor table from the pd_zone.
940 */
941
942 simple_lock(&pmap_cache_lock);
943 while ((p = pmap_cache_list) == PMAP_NULL) {
944
945 vm_offset_t dirbases;
946 register int i;
947
948 simple_unlock(&pmap_cache_lock);
949
950 #if NCPUS > 1
951 /*
952 * XXX NEEDS MP DOING ALLOC logic so that if multiple processors
953 * XXX get here, only one allocates a chunk of pmaps.
954 * (for now we'll just let it go - safe but wasteful)
955 */
956 #endif
957
958 /*
959 * Allocate a chunck of pmaps. Single kmem_alloc_wired
960 * operation reduces kernel map fragmentation.
961 */
962
963 if (kmem_alloc_wired(kernel_map, &dirbases,
964 pmap_alloc_chunk * INTEL_PGBYTES)
965 != KERN_SUCCESS)
966 panic("pmap_create.1");
967
968 for (i = pmap_alloc_chunk; i > 0 ; i--) {
969 p = (pmap_t) zalloc(pmap_zone);
970 if (p == PMAP_NULL)
971 panic("pmap_create.2");
972
973 /*
974 * Initialize pmap. Don't bother with
975 * ref count as cache list is threaded
976 * through it. It'll be set on cache removal.
977 */
978 p->dirbase = (pt_entry_t *) dirbases;
979 dirbases += INTEL_PGBYTES;
980 memcpy(p->dirbase, kpde, INTEL_PGBYTES);
981 p->pdirbase = kvtophys((vm_offset_t)p->dirbase);
982
983 simple_lock_init(&p->lock, ETAP_VM_PMAP);
984 p->cpus_using = 0;
985
986 /*
987 * Initialize statistics.
988 */
989 stats = &p->stats;
990 stats->resident_count = 0;
991 stats->wired_count = 0;
992
993 /*
994 * Insert into cache
995 */
996 simple_lock(&pmap_cache_lock);
997 p->ref_count = (int) pmap_cache_list;
998 pmap_cache_list = p;
999 pmap_cache_count++;
1000 simple_unlock(&pmap_cache_lock);
1001 }
1002 simple_lock(&pmap_cache_lock);
1003 }
1004
1005 assert(p->stats.resident_count == 0);
1006 assert(p->stats.wired_count == 0);
1007 p->stats.resident_count = 0;
1008 p->stats.wired_count = 0;
1009
1010 pmap_cache_list = (pmap_t) p->ref_count;
1011 p->ref_count = 1;
1012 pmap_cache_count--;
1013 simple_unlock(&pmap_cache_lock);
1014
1015 return(p);
1016 }
1017
1018 /*
1019 * Retire the given physical map from service.
1020 * Should only be called if the map contains
1021 * no valid mappings.
1022 */
1023
1024 void
1025 pmap_destroy(
1026 register pmap_t p)
1027 {
1028 register pt_entry_t *pdep;
1029 register vm_offset_t pa;
1030 register int c;
1031 spl_t s;
1032 register vm_page_t m;
1033
1034 if (p == PMAP_NULL)
1035 return;
1036
1037 SPLVM(s);
1038 simple_lock(&p->lock);
1039 c = --p->ref_count;
1040 if (c == 0) {
1041 register int my_cpu;
1042
1043 mp_disable_preemption();
1044 my_cpu = cpu_number();
1045
1046 /*
1047 * If some cpu is not using the physical pmap pointer that it
1048 * is supposed to be (see set_dirbase), we might be using the
1049 * pmap that is being destroyed! Make sure we are
1050 * physically on the right pmap:
1051 */
1052
1053
1054 if (real_pmap[my_cpu] == p) {
1055 PMAP_CPU_CLR(p, my_cpu);
1056 real_pmap[my_cpu] = kernel_pmap;
1057 PMAP_RELOAD_TLBS();
1058 }
1059 mp_enable_preemption();
1060 }
1061 simple_unlock(&p->lock);
1062 SPLX(s);
1063
1064 if (c != 0) {
1065 return; /* still in use */
1066 }
1067
1068 /*
1069 * Free the memory maps, then the
1070 * pmap structure.
1071 */
1072 pdep = p->dirbase;
1073 while (pdep < &p->dirbase[pdenum(p, LINEAR_KERNEL_ADDRESS)]) {
1074 if (*pdep & INTEL_PTE_VALID) {
1075 pa = pte_to_pa(*pdep);
1076 vm_object_lock(pmap_object);
1077 m = vm_page_lookup(pmap_object, pa);
1078 if (m == VM_PAGE_NULL)
1079 panic("pmap_destroy: pte page not in object");
1080 vm_page_lock_queues();
1081 vm_page_free(m);
1082 inuse_ptepages_count--;
1083 vm_object_unlock(pmap_object);
1084 vm_page_unlock_queues();
1085
1086 /*
1087 * Clear pdes, this might be headed for the cache.
1088 */
1089 c = ptes_per_vm_page;
1090 do {
1091 *pdep = 0;
1092 pdep++;
1093 } while (--c > 0);
1094 }
1095 else {
1096 pdep += ptes_per_vm_page;
1097 }
1098
1099 }
1100 assert(p->stats.resident_count == 0);
1101 assert(p->stats.wired_count == 0);
1102
1103 /*
1104 * Add to cache if not already full
1105 */
1106 simple_lock(&pmap_cache_lock);
1107 if (pmap_cache_count <= pmap_cache_max) {
1108 p->ref_count = (int) pmap_cache_list;
1109 pmap_cache_list = p;
1110 pmap_cache_count++;
1111 simple_unlock(&pmap_cache_lock);
1112 }
1113 else {
1114 simple_unlock(&pmap_cache_lock);
1115 kmem_free(kernel_map, (vm_offset_t)p->dirbase, INTEL_PGBYTES);
1116 zfree(pmap_zone, (vm_offset_t) p);
1117 }
1118 }
1119
1120 /*
1121 * Add a reference to the specified pmap.
1122 */
1123
1124 void
1125 pmap_reference(
1126 register pmap_t p)
1127 {
1128 spl_t s;
1129
1130 if (p != PMAP_NULL) {
1131 SPLVM(s);
1132 simple_lock(&p->lock);
1133 p->ref_count++;
1134 simple_unlock(&p->lock);
1135 SPLX(s);
1136 }
1137 }
1138
1139 /*
1140 * Remove a range of hardware page-table entries.
1141 * The entries given are the first (inclusive)
1142 * and last (exclusive) entries for the VM pages.
1143 * The virtual address is the va for the first pte.
1144 *
1145 * The pmap must be locked.
1146 * If the pmap is not the kernel pmap, the range must lie
1147 * entirely within one pte-page. This is NOT checked.
1148 * Assumes that the pte-page exists.
1149 */
1150
1151 /* static */
1152 void
1153 pmap_remove_range(
1154 pmap_t pmap,
1155 vm_offset_t va,
1156 pt_entry_t *spte,
1157 pt_entry_t *epte)
1158 {
1159 register pt_entry_t *cpte;
1160 int num_removed, num_unwired;
1161 int pai;
1162 vm_offset_t pa;
1163
1164 #if DEBUG_PTE_PAGE
1165 if (pmap != kernel_pmap)
1166 ptep_check(get_pte_page(spte));
1167 #endif /* DEBUG_PTE_PAGE */
1168 num_removed = 0;
1169 num_unwired = 0;
1170
1171 for (cpte = spte; cpte < epte;
1172 cpte += ptes_per_vm_page, va += PAGE_SIZE) {
1173
1174 pa = pte_to_pa(*cpte);
1175 if (pa == 0)
1176 continue;
1177
1178 num_removed++;
1179 if (iswired(*cpte))
1180 num_unwired++;
1181
1182 if (!valid_page(pa)) {
1183
1184 /*
1185 * Outside range of managed physical memory.
1186 * Just remove the mappings.
1187 */
1188 register int i = ptes_per_vm_page;
1189 register pt_entry_t *lpte = cpte;
1190 do {
1191 *lpte = 0;
1192 lpte++;
1193 } while (--i > 0);
1194 continue;
1195 }
1196
1197 pai = pa_index(pa);
1198 LOCK_PVH(pai);
1199
1200 /*
1201 * Get the modify and reference bits.
1202 */
1203 {
1204 register int i;
1205 register pt_entry_t *lpte;
1206
1207 i = ptes_per_vm_page;
1208 lpte = cpte;
1209 do {
1210 pmap_phys_attributes[pai] |=
1211 *lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
1212 *lpte = 0;
1213 lpte++;
1214 } while (--i > 0);
1215 }
1216
1217 /*
1218 * Remove the mapping from the pvlist for
1219 * this physical page.
1220 */
1221 {
1222 register pv_entry_t pv_h, prev, cur;
1223
1224 pv_h = pai_to_pvh(pai);
1225 if (pv_h->pmap == PMAP_NULL) {
1226 panic("pmap_remove: null pv_list!");
1227 }
1228 if (pv_h->va == va && pv_h->pmap == pmap) {
1229 /*
1230 * Header is the pv_entry. Copy the next one
1231 * to header and free the next one (we cannot
1232 * free the header)
1233 */
1234 cur = pv_h->next;
1235 if (cur != PV_ENTRY_NULL) {
1236 *pv_h = *cur;
1237 PV_FREE(cur);
1238 }
1239 else {
1240 pv_h->pmap = PMAP_NULL;
1241 }
1242 }
1243 else {
1244 cur = pv_h;
1245 do {
1246 prev = cur;
1247 if ((cur = prev->next) == PV_ENTRY_NULL) {
1248 panic("pmap-remove: mapping not in pv_list!");
1249 }
1250 } while (cur->va != va || cur->pmap != pmap);
1251 prev->next = cur->next;
1252 PV_FREE(cur);
1253 }
1254 UNLOCK_PVH(pai);
1255 }
1256 }
1257
1258 /*
1259 * Update the counts
1260 */
1261 assert(pmap->stats.resident_count >= num_removed);
1262 pmap->stats.resident_count -= num_removed;
1263 assert(pmap->stats.wired_count >= num_unwired);
1264 pmap->stats.wired_count -= num_unwired;
1265 }
1266
1267 /*
1268 * Remove phys addr if mapped in specified map
1269 *
1270 */
1271 void
1272 pmap_remove_some_phys(
1273 pmap_t map,
1274 vm_offset_t phys_addr)
1275 {
1276
1277 /* Implement to support working set code */
1278
1279 }
1280
1281
1282 /*
1283 * Remove the given range of addresses
1284 * from the specified map.
1285 *
1286 * It is assumed that the start and end are properly
1287 * rounded to the hardware page size.
1288 */
1289
1290 void
1291 pmap_remove(
1292 pmap_t map,
1293 vm_offset_t s,
1294 vm_offset_t e)
1295 {
1296 spl_t spl;
1297 register pt_entry_t *pde;
1298 register pt_entry_t *spte, *epte;
1299 vm_offset_t l;
1300
1301 if (map == PMAP_NULL)
1302 return;
1303
1304 PMAP_READ_LOCK(map, spl);
1305
1306 pde = pmap_pde(map, s);
1307
1308 while (s < e) {
1309 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
1310 if (l > e)
1311 l = e;
1312 if (*pde & INTEL_PTE_VALID) {
1313 spte = (pt_entry_t *)ptetokv(*pde);
1314 spte = &spte[ptenum(s)];
1315 epte = &spte[intel_btop(l-s)];
1316 pmap_remove_range(map, s, spte, epte);
1317 }
1318 s = l;
1319 pde++;
1320 }
1321
1322 PMAP_FLUSH_TLBS();
1323
1324 PMAP_READ_UNLOCK(map, spl);
1325 }
1326
1327 /*
1328 * Routine: pmap_page_protect
1329 *
1330 * Function:
1331 * Lower the permission for all mappings to a given
1332 * page.
1333 */
1334 void
1335 pmap_page_protect(
1336 vm_offset_t phys,
1337 vm_prot_t prot)
1338 {
1339 pv_entry_t pv_h, prev;
1340 register pv_entry_t pv_e;
1341 register pt_entry_t *pte;
1342 int pai;
1343 register pmap_t pmap;
1344 spl_t spl;
1345 boolean_t remove;
1346
1347 assert(phys != vm_page_fictitious_addr);
1348 if (!valid_page(phys)) {
1349 /*
1350 * Not a managed page.
1351 */
1352 return;
1353 }
1354
1355 /*
1356 * Determine the new protection.
1357 */
1358 switch (prot) {
1359 case VM_PROT_READ:
1360 case VM_PROT_READ|VM_PROT_EXECUTE:
1361 remove = FALSE;
1362 break;
1363 case VM_PROT_ALL:
1364 return; /* nothing to do */
1365 default:
1366 remove = TRUE;
1367 break;
1368 }
1369
1370 /*
1371 * Lock the pmap system first, since we will be changing
1372 * several pmaps.
1373 */
1374
1375 PMAP_WRITE_LOCK(spl);
1376
1377 pai = pa_index(phys);
1378 pv_h = pai_to_pvh(pai);
1379
1380 /*
1381 * Walk down PV list, changing or removing all mappings.
1382 * We do not have to lock the pv_list because we have
1383 * the entire pmap system locked.
1384 */
1385 if (pv_h->pmap != PMAP_NULL) {
1386
1387 prev = pv_e = pv_h;
1388 do {
1389 pmap = pv_e->pmap;
1390 /*
1391 * Lock the pmap to block pmap_extract and similar routines.
1392 */
1393 simple_lock(&pmap->lock);
1394
1395 {
1396 register vm_offset_t va;
1397
1398 va = pv_e->va;
1399 pte = pmap_pte(pmap, va);
1400
1401 /*
1402 * Consistency checks.
1403 */
1404 /* assert(*pte & INTEL_PTE_VALID); XXX */
1405 /* assert(pte_to_phys(*pte) == phys); */
1406
1407 /*
1408 * Invalidate TLBs for all CPUs using this mapping.
1409 */
1410 PMAP_INVALIDATE_PAGE(pmap, va);
1411 }
1412
1413 /*
1414 * Remove the mapping if new protection is NONE
1415 * or if write-protecting a kernel mapping.
1416 */
1417 if (remove || pmap == kernel_pmap) {
1418 /*
1419 * Remove the mapping, collecting any modify bits.
1420 */
1421 {
1422 register int i = ptes_per_vm_page;
1423
1424 do {
1425 pmap_phys_attributes[pai] |=
1426 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1427 *pte++ = 0;
1428 } while (--i > 0);
1429 }
1430
1431 assert(pmap->stats.resident_count >= 1);
1432 pmap->stats.resident_count--;
1433
1434 /*
1435 * Remove the pv_entry.
1436 */
1437 if (pv_e == pv_h) {
1438 /*
1439 * Fix up head later.
1440 */
1441 pv_h->pmap = PMAP_NULL;
1442 }
1443 else {
1444 /*
1445 * Delete this entry.
1446 */
1447 prev->next = pv_e->next;
1448 PV_FREE(pv_e);
1449 }
1450 }
1451 else {
1452 /*
1453 * Write-protect.
1454 */
1455 register int i = ptes_per_vm_page;
1456
1457 do {
1458 *pte &= ~INTEL_PTE_WRITE;
1459 pte++;
1460 } while (--i > 0);
1461
1462 /*
1463 * Advance prev.
1464 */
1465 prev = pv_e;
1466 }
1467
1468 simple_unlock(&pmap->lock);
1469
1470 } while ((pv_e = prev->next) != PV_ENTRY_NULL);
1471
1472 /*
1473 * If pv_head mapping was removed, fix it up.
1474 */
1475 if (pv_h->pmap == PMAP_NULL) {
1476 pv_e = pv_h->next;
1477 if (pv_e != PV_ENTRY_NULL) {
1478 *pv_h = *pv_e;
1479 PV_FREE(pv_e);
1480 }
1481 }
1482 }
1483
1484 PMAP_WRITE_UNLOCK(spl);
1485 }
1486
1487 /*
1488 * Set the physical protection on the
1489 * specified range of this map as requested.
1490 * Will not increase permissions.
1491 */
1492 void
1493 pmap_protect(
1494 pmap_t map,
1495 vm_offset_t s,
1496 vm_offset_t e,
1497 vm_prot_t prot)
1498 {
1499 register pt_entry_t *pde;
1500 register pt_entry_t *spte, *epte;
1501 vm_offset_t l;
1502 spl_t spl;
1503
1504
1505 if (map == PMAP_NULL)
1506 return;
1507
1508 /*
1509 * Determine the new protection.
1510 */
1511 switch (prot) {
1512 case VM_PROT_READ:
1513 case VM_PROT_READ|VM_PROT_EXECUTE:
1514 break;
1515 case VM_PROT_READ|VM_PROT_WRITE:
1516 case VM_PROT_ALL:
1517 return; /* nothing to do */
1518 default:
1519 pmap_remove(map, s, e);
1520 return;
1521 }
1522
1523 /*
1524 * If write-protecting in the kernel pmap,
1525 * remove the mappings; the i386 ignores
1526 * the write-permission bit in kernel mode.
1527 *
1528 * XXX should be #if'd for i386
1529 */
1530
1531 if (cpuid_family == CPUID_FAMILY_386)
1532 if (map == kernel_pmap) {
1533 pmap_remove(map, s, e);
1534 return;
1535 }
1536
1537 SPLVM(spl);
1538 simple_lock(&map->lock);
1539
1540
1541 pde = pmap_pde(map, s);
1542 while (s < e) {
1543 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
1544 if (l > e)
1545 l = e;
1546 if (*pde & INTEL_PTE_VALID) {
1547 spte = (pt_entry_t *)ptetokv(*pde);
1548 spte = &spte[ptenum(s)];
1549 epte = &spte[intel_btop(l-s)];
1550
1551 while (spte < epte) {
1552 if (*spte & INTEL_PTE_VALID)
1553 *spte &= ~INTEL_PTE_WRITE;
1554 spte++;
1555 }
1556 }
1557 s = l;
1558 pde++;
1559 }
1560
1561 PMAP_FLUSH_TLBS();
1562
1563 simple_unlock(&map->lock);
1564 SPLX(spl);
1565 }
1566
1567
1568
1569 /*
1570 * Insert the given physical page (p) at
1571 * the specified virtual address (v) in the
1572 * target physical map with the protection requested.
1573 *
1574 * If specified, the page will be wired down, meaning
1575 * that the related pte cannot be reclaimed.
1576 *
1577 * NB: This is the only routine which MAY NOT lazy-evaluate
1578 * or lose information. That is, this routine must actually
1579 * insert this page into the given map NOW.
1580 */
1581 void
1582 pmap_enter(
1583 register pmap_t pmap,
1584 vm_offset_t v,
1585 register vm_offset_t pa,
1586 vm_prot_t prot,
1587 unsigned int flags,
1588 boolean_t wired)
1589 {
1590 register pt_entry_t *pte;
1591 register pv_entry_t pv_h;
1592 register int i, pai;
1593 pv_entry_t pv_e;
1594 pt_entry_t template;
1595 spl_t spl;
1596 vm_offset_t old_pa;
1597
1598 XPR(0x80000000, "%x/%x: pmap_enter %x/%x/%x\n",
1599 current_thread()->top_act,
1600 current_thread(),
1601 pmap, v, pa);
1602
1603 assert(pa != vm_page_fictitious_addr);
1604 if (pmap_debug)
1605 printf("pmap(%x, %x)\n", v, pa);
1606 if (pmap == PMAP_NULL)
1607 return;
1608
1609 if (cpuid_family == CPUID_FAMILY_386)
1610 if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
1611 && !wired /* hack for io_wire */ ) {
1612 /*
1613 * Because the 386 ignores write protection in kernel mode,
1614 * we cannot enter a read-only kernel mapping, and must
1615 * remove an existing mapping if changing it.
1616 *
1617 * XXX should be #if'd for i386
1618 */
1619 PMAP_READ_LOCK(pmap, spl);
1620
1621 pte = pmap_pte(pmap, v);
1622 if (pte != PT_ENTRY_NULL && pte_to_pa(*pte) != 0) {
1623 /*
1624 * Invalidate the translation buffer,
1625 * then remove the mapping.
1626 */
1627 PMAP_INVALIDATE_PAGE(pmap, v);
1628 pmap_remove_range(pmap, v, pte,
1629 pte + ptes_per_vm_page);
1630 }
1631 PMAP_READ_UNLOCK(pmap, spl);
1632 return;
1633 }
1634
1635 /*
1636 * Must allocate a new pvlist entry while we're unlocked;
1637 * zalloc may cause pageout (which will lock the pmap system).
1638 * If we determine we need a pvlist entry, we will unlock
1639 * and allocate one. Then we will retry, throughing away
1640 * the allocated entry later (if we no longer need it).
1641 */
1642 pv_e = PV_ENTRY_NULL;
1643 Retry:
1644 PMAP_READ_LOCK(pmap, spl);
1645
1646 /*
1647 * Expand pmap to include this pte. Assume that
1648 * pmap is always expanded to include enough hardware
1649 * pages to map one VM page.
1650 */
1651
1652 while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
1653 /*
1654 * Must unlock to expand the pmap.
1655 */
1656 PMAP_READ_UNLOCK(pmap, spl);
1657
1658 pmap_expand(pmap, v);
1659
1660 PMAP_READ_LOCK(pmap, spl);
1661 }
1662 /*
1663 * Special case if the physical page is already mapped
1664 * at this address.
1665 */
1666 old_pa = pte_to_pa(*pte);
1667 if (old_pa == pa) {
1668 /*
1669 * May be changing its wired attribute or protection
1670 */
1671
1672 template = pa_to_pte(pa) | INTEL_PTE_VALID;
1673 if (pmap != kernel_pmap)
1674 template |= INTEL_PTE_USER;
1675 if (prot & VM_PROT_WRITE)
1676 template |= INTEL_PTE_WRITE;
1677 if (wired) {
1678 template |= INTEL_PTE_WIRED;
1679 if (!iswired(*pte))
1680 pmap->stats.wired_count++;
1681 }
1682 else {
1683 if (iswired(*pte)) {
1684 assert(pmap->stats.wired_count >= 1);
1685 pmap->stats.wired_count--;
1686 }
1687 }
1688
1689 PMAP_INVALIDATE_PAGE(pmap, v);
1690
1691 i = ptes_per_vm_page;
1692 do {
1693 if (*pte & INTEL_PTE_MOD)
1694 template |= INTEL_PTE_MOD;
1695 WRITE_PTE(pte, template)
1696 pte++;
1697 pte_increment_pa(template);
1698 } while (--i > 0);
1699
1700 goto Done;
1701 }
1702
1703 /*
1704 * Outline of code from here:
1705 * 1) If va was mapped, update TLBs, remove the mapping
1706 * and remove old pvlist entry.
1707 * 2) Add pvlist entry for new mapping
1708 * 3) Enter new mapping.
1709 *
1710 * SHARING_FAULTS complicates this slightly in that it cannot
1711 * replace the mapping, but must remove it (because adding the
1712 * pvlist entry for the new mapping may remove others), and
1713 * hence always enters the new mapping at step 3)
1714 *
1715 * If the old physical page is not managed step 1) is skipped
1716 * (except for updating the TLBs), and the mapping is
1717 * overwritten at step 3). If the new physical page is not
1718 * managed, step 2) is skipped.
1719 */
1720
1721 if (old_pa != (vm_offset_t) 0) {
1722
1723 PMAP_INVALIDATE_PAGE(pmap, v);
1724
1725 #if DEBUG_PTE_PAGE
1726 if (pmap != kernel_pmap)
1727 ptep_check(get_pte_page(pte));
1728 #endif /* DEBUG_PTE_PAGE */
1729
1730 /*
1731 * Don't do anything to pages outside valid memory here.
1732 * Instead convince the code that enters a new mapping
1733 * to overwrite the old one.
1734 */
1735
1736 if (valid_page(old_pa)) {
1737
1738 pai = pa_index(old_pa);
1739 LOCK_PVH(pai);
1740
1741 assert(pmap->stats.resident_count >= 1);
1742 pmap->stats.resident_count--;
1743 if (iswired(*pte)) {
1744 assert(pmap->stats.wired_count >= 1);
1745 pmap->stats.wired_count--;
1746 }
1747 i = ptes_per_vm_page;
1748 do {
1749 pmap_phys_attributes[pai] |=
1750 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1751 WRITE_PTE(pte, 0)
1752 pte++;
1753 pte_increment_pa(template);
1754 } while (--i > 0);
1755
1756 /*
1757 * Put pte back to beginning of page since it'll be
1758 * used later to enter the new page.
1759 */
1760 pte -= ptes_per_vm_page;
1761
1762 /*
1763 * Remove the mapping from the pvlist for
1764 * this physical page.
1765 */
1766 {
1767 register pv_entry_t prev, cur;
1768
1769 pv_h = pai_to_pvh(pai);
1770 if (pv_h->pmap == PMAP_NULL) {
1771 panic("pmap_enter: null pv_list!");
1772 }
1773 if (pv_h->va == v && pv_h->pmap == pmap) {
1774 /*
1775 * Header is the pv_entry. Copy the next one
1776 * to header and free the next one (we cannot
1777 * free the header)
1778 */
1779 cur = pv_h->next;
1780 if (cur != PV_ENTRY_NULL) {
1781 *pv_h = *cur;
1782 pv_e = cur;
1783 }
1784 else {
1785 pv_h->pmap = PMAP_NULL;
1786 }
1787 }
1788 else {
1789 cur = pv_h;
1790 do {
1791 prev = cur;
1792 if ((cur = prev->next) == PV_ENTRY_NULL) {
1793 panic("pmap_enter: mapping not in pv_list!");
1794 }
1795 } while (cur->va != v || cur->pmap != pmap);
1796 prev->next = cur->next;
1797 pv_e = cur;
1798 }
1799 }
1800 UNLOCK_PVH(pai);
1801 }
1802 else {
1803
1804 /*
1805 * old_pa is not managed. Pretend it's zero so code
1806 * at Step 3) will enter new mapping (overwriting old
1807 * one). Do removal part of accounting.
1808 */
1809 old_pa = (vm_offset_t) 0;
1810 assert(pmap->stats.resident_count >= 1);
1811 pmap->stats.resident_count--;
1812 if (iswired(*pte)) {
1813 assert(pmap->stats.wired_count >= 1);
1814 pmap->stats.wired_count--;
1815 }
1816 }
1817 }
1818
1819 if (valid_page(pa)) {
1820
1821 /*
1822 * Step 2) Enter the mapping in the PV list for this
1823 * physical page.
1824 */
1825
1826 pai = pa_index(pa);
1827
1828
1829 #if SHARING_FAULTS
1830 RetryPvList:
1831 /*
1832 * We can return here from the sharing fault code below
1833 * in case we removed the only entry on the pv list and thus
1834 * must enter the new one in the list header.
1835 */
1836 #endif /* SHARING_FAULTS */
1837 LOCK_PVH(pai);
1838 pv_h = pai_to_pvh(pai);
1839
1840 if (pv_h->pmap == PMAP_NULL) {
1841 /*
1842 * No mappings yet
1843 */
1844 pv_h->va = v;
1845 pv_h->pmap = pmap;
1846 pv_h->next = PV_ENTRY_NULL;
1847 }
1848 else {
1849 #if DEBUG
1850 {
1851 /*
1852 * check that this mapping is not already there
1853 * or there is no alias for this mapping in the same map
1854 */
1855 pv_entry_t e = pv_h;
1856 while (e != PV_ENTRY_NULL) {
1857 if (e->pmap == pmap && e->va == v)
1858 panic("pmap_enter: already in pv_list");
1859 e = e->next;
1860 }
1861 }
1862 #endif /* DEBUG */
1863 #if SHARING_FAULTS
1864 {
1865 /*
1866 * do sharing faults.
1867 * if we find an entry on this pv list in the same address
1868 * space, remove it. we know there will not be more
1869 * than one.
1870 */
1871 pv_entry_t e = pv_h;
1872 pt_entry_t *opte;
1873
1874 while (e != PV_ENTRY_NULL) {
1875 if (e->pmap == pmap) {
1876 /*
1877 * Remove it, drop pv list lock first.
1878 */
1879 UNLOCK_PVH(pai);
1880
1881 opte = pmap_pte(pmap, e->va);
1882 assert(opte != PT_ENTRY_NULL);
1883 /*
1884 * Invalidate the translation buffer,
1885 * then remove the mapping.
1886 */
1887 PMAP_INVALIDATE_PAGE(pmap, e->va);
1888 pmap_remove_range(pmap, e->va, opte,
1889 opte + ptes_per_vm_page);
1890 /*
1891 * We could have remove the head entry,
1892 * so there could be no more entries
1893 * and so we have to use the pv head entry.
1894 * so, go back to the top and try the entry
1895 * again.
1896 */
1897 goto RetryPvList;
1898 }
1899 e = e->next;
1900 }
1901
1902 /*
1903 * check that this mapping is not already there
1904 */
1905 e = pv_h;
1906 while (e != PV_ENTRY_NULL) {
1907 if (e->pmap == pmap)
1908 panic("pmap_enter: alias in pv_list");
1909 e = e->next;
1910 }
1911 }
1912 #endif /* SHARING_FAULTS */
1913 #if DEBUG_ALIAS
1914 {
1915 /*
1916 * check for aliases within the same address space.
1917 */
1918 pv_entry_t e = pv_h;
1919 vm_offset_t rpc = get_rpc();
1920
1921 while (e != PV_ENTRY_NULL) {
1922 if (e->pmap == pmap) {
1923 /*
1924 * log this entry in the alias ring buffer
1925 * if it's not there already.
1926 */
1927 struct pmap_alias *pma;
1928 int ii, logit;
1929
1930 logit = TRUE;
1931 for (ii = 0; ii < pmap_alias_index; ii++) {
1932 if (pmap_aliasbuf[ii].rpc == rpc) {
1933 /* found it in the log already */
1934 logit = FALSE;
1935 break;
1936 }
1937 }
1938 if (logit) {
1939 pma = &pmap_aliasbuf[pmap_alias_index];
1940 pma->pmap = pmap;
1941 pma->va = v;
1942 pma->rpc = rpc;
1943 pma->cookie = PMAP_ALIAS_COOKIE;
1944 if (++pmap_alias_index >= PMAP_ALIAS_MAX)
1945 panic("pmap_enter: exhausted alias log");
1946 }
1947 }
1948 e = e->next;
1949 }
1950 }
1951 #endif /* DEBUG_ALIAS */
1952 /*
1953 * Add new pv_entry after header.
1954 */
1955 if (pv_e == PV_ENTRY_NULL) {
1956 PV_ALLOC(pv_e);
1957 if (pv_e == PV_ENTRY_NULL) {
1958 UNLOCK_PVH(pai);
1959 PMAP_READ_UNLOCK(pmap, spl);
1960
1961 /*
1962 * Refill from zone.
1963 */
1964 pv_e = (pv_entry_t) zalloc(pv_list_zone);
1965 goto Retry;
1966 }
1967 }
1968 pv_e->va = v;
1969 pv_e->pmap = pmap;
1970 pv_e->next = pv_h->next;
1971 pv_h->next = pv_e;
1972 /*
1973 * Remember that we used the pvlist entry.
1974 */
1975 pv_e = PV_ENTRY_NULL;
1976 }
1977 UNLOCK_PVH(pai);
1978 }
1979
1980 /*
1981 * Step 3) Enter and count the mapping.
1982 */
1983
1984 pmap->stats.resident_count++;
1985
1986 /*
1987 * Build a template to speed up entering -
1988 * only the pfn changes.
1989 */
1990 template = pa_to_pte(pa) | INTEL_PTE_VALID;
1991 if (pmap != kernel_pmap)
1992 template |= INTEL_PTE_USER;
1993 if (prot & VM_PROT_WRITE)
1994 template |= INTEL_PTE_WRITE;
1995 if (wired) {
1996 template |= INTEL_PTE_WIRED;
1997 pmap->stats.wired_count++;
1998 }
1999 i = ptes_per_vm_page;
2000 do {
2001 WRITE_PTE(pte, template)
2002 pte++;
2003 pte_increment_pa(template);
2004 } while (--i > 0);
2005 Done:
2006 if (pv_e != PV_ENTRY_NULL) {
2007 PV_FREE(pv_e);
2008 }
2009
2010 PMAP_READ_UNLOCK(pmap, spl);
2011 }
2012
2013 /*
2014 * Routine: pmap_change_wiring
2015 * Function: Change the wiring attribute for a map/virtual-address
2016 * pair.
2017 * In/out conditions:
2018 * The mapping must already exist in the pmap.
2019 */
2020 void
2021 pmap_change_wiring(
2022 register pmap_t map,
2023 vm_offset_t v,
2024 boolean_t wired)
2025 {
2026 register pt_entry_t *pte;
2027 register int i;
2028 spl_t spl;
2029
2030 #if 0
2031 /*
2032 * We must grab the pmap system lock because we may
2033 * change a pte_page queue.
2034 */
2035 PMAP_READ_LOCK(map, spl);
2036
2037 if ((pte = pmap_pte(map, v)) == PT_ENTRY_NULL)
2038 panic("pmap_change_wiring: pte missing");
2039
2040 if (wired && !iswired(*pte)) {
2041 /*
2042 * wiring down mapping
2043 */
2044 map->stats.wired_count++;
2045 i = ptes_per_vm_page;
2046 do {
2047 *pte++ |= INTEL_PTE_WIRED;
2048 } while (--i > 0);
2049 }
2050 else if (!wired && iswired(*pte)) {
2051 /*
2052 * unwiring mapping
2053 */
2054 assert(map->stats.wired_count >= 1);
2055 map->stats.wired_count--;
2056 i = ptes_per_vm_page;
2057 do {
2058 *pte++ &= ~INTEL_PTE_WIRED;
2059 } while (--i > 0);
2060 }
2061
2062 PMAP_READ_UNLOCK(map, spl);
2063
2064 #else
2065 return;
2066 #endif
2067
2068 }
2069
2070 /*
2071 * Routine: pmap_extract
2072 * Function:
2073 * Extract the physical page address associated
2074 * with the given map/virtual_address pair.
2075 */
2076
2077 vm_offset_t
2078 pmap_extract(
2079 register pmap_t pmap,
2080 vm_offset_t va)
2081 {
2082 register pt_entry_t *pte;
2083 register vm_offset_t pa;
2084 spl_t spl;
2085
2086 SPLVM(spl);
2087 simple_lock(&pmap->lock);
2088 if ((pte = pmap_pte(pmap, va)) == PT_ENTRY_NULL)
2089 pa = (vm_offset_t) 0;
2090 else if (!(*pte & INTEL_PTE_VALID))
2091 pa = (vm_offset_t) 0;
2092 else
2093 pa = pte_to_pa(*pte) + (va & INTEL_OFFMASK);
2094 simple_unlock(&pmap->lock);
2095 SPLX(spl);
2096 return(pa);
2097 }
2098
2099 /*
2100 * Routine: pmap_expand
2101 *
2102 * Expands a pmap to be able to map the specified virtual address.
2103 *
2104 * Allocates new virtual memory for the P0 or P1 portion of the
2105 * pmap, then re-maps the physical pages that were in the old
2106 * pmap to be in the new pmap.
2107 *
2108 * Must be called with the pmap system and the pmap unlocked,
2109 * since these must be unlocked to use vm_allocate or vm_deallocate.
2110 * Thus it must be called in a loop that checks whether the map
2111 * has been expanded enough.
2112 * (We won't loop forever, since page tables aren't shrunk.)
2113 */
2114 void
2115 pmap_expand(
2116 register pmap_t map,
2117 register vm_offset_t v)
2118 {
2119 pt_entry_t *pdp;
2120 register vm_page_t m;
2121 register vm_offset_t pa;
2122 register int i;
2123 spl_t spl;
2124
2125 if (map == kernel_pmap)
2126 panic("pmap_expand");
2127
2128 /*
2129 * We cannot allocate the pmap_object in pmap_init,
2130 * because it is called before the zone package is up.
2131 * Allocate it now if it is missing.
2132 */
2133 if (pmap_object == VM_OBJECT_NULL)
2134 pmap_object = vm_object_allocate(avail_end);
2135
2136 /*
2137 * Allocate a VM page for the level 2 page table entries.
2138 */
2139 while ((m = vm_page_grab()) == VM_PAGE_NULL)
2140 VM_PAGE_WAIT();
2141
2142 /*
2143 * Map the page to its physical address so that it
2144 * can be found later.
2145 */
2146 pa = m->phys_addr;
2147 vm_object_lock(pmap_object);
2148 vm_page_insert(m, pmap_object, pa);
2149 vm_page_lock_queues();
2150 vm_page_wire(m);
2151 inuse_ptepages_count++;
2152 vm_object_unlock(pmap_object);
2153 vm_page_unlock_queues();
2154
2155 /*
2156 * Zero the page.
2157 */
2158 memset((void *)phystokv(pa), 0, PAGE_SIZE);
2159
2160 PMAP_READ_LOCK(map, spl);
2161 /*
2162 * See if someone else expanded us first
2163 */
2164 if (pmap_pte(map, v) != PT_ENTRY_NULL) {
2165 PMAP_READ_UNLOCK(map, spl);
2166 vm_object_lock(pmap_object);
2167 vm_page_lock_queues();
2168 vm_page_free(m);
2169 inuse_ptepages_count--;
2170 vm_page_unlock_queues();
2171 vm_object_unlock(pmap_object);
2172 return;
2173 }
2174
2175 /*
2176 * Set the page directory entry for this page table.
2177 * If we have allocated more than one hardware page,
2178 * set several page directory entries.
2179 */
2180
2181 i = ptes_per_vm_page;
2182 pdp = &map->dirbase[pdenum(map, v) & ~(i-1)];
2183 do {
2184 *pdp = pa_to_pte(pa)
2185 | INTEL_PTE_VALID
2186 | INTEL_PTE_USER
2187 | INTEL_PTE_WRITE;
2188 pdp++;
2189 pa += INTEL_PGBYTES;
2190 } while (--i > 0);
2191
2192 PMAP_READ_UNLOCK(map, spl);
2193 return;
2194 }
2195
2196 /*
2197 * Copy the range specified by src_addr/len
2198 * from the source map to the range dst_addr/len
2199 * in the destination map.
2200 *
2201 * This routine is only advisory and need not do anything.
2202 */
2203 #if 0
2204 void
2205 pmap_copy(
2206 pmap_t dst_pmap,
2207 pmap_t src_pmap,
2208 vm_offset_t dst_addr,
2209 vm_size_t len,
2210 vm_offset_t src_addr)
2211 {
2212 #ifdef lint
2213 dst_pmap++; src_pmap++; dst_addr++; len++; src_addr++;
2214 #endif /* lint */
2215 }
2216 #endif/* 0 */
2217
2218 int collect_ref;
2219 int collect_unref;
2220
2221 /*
2222 * Routine: pmap_collect
2223 * Function:
2224 * Garbage collects the physical map system for
2225 * pages which are no longer used.
2226 * Success need not be guaranteed -- that is, there
2227 * may well be pages which are not referenced, but
2228 * others may be collected.
2229 * Usage:
2230 * Called by the pageout daemon when pages are scarce.
2231 */
2232 void
2233 pmap_collect(
2234 pmap_t p)
2235 {
2236 register pt_entry_t *pdp, *ptp;
2237 pt_entry_t *eptp;
2238 vm_offset_t pa;
2239 int wired;
2240 spl_t spl;
2241
2242 if (p == PMAP_NULL)
2243 return;
2244
2245 if (p == kernel_pmap)
2246 return;
2247
2248 /*
2249 * Garbage collect map.
2250 */
2251 PMAP_READ_LOCK(p, spl);
2252 PMAP_FLUSH_TLBS();
2253
2254 for (pdp = p->dirbase;
2255 pdp < &p->dirbase[pdenum(p, LINEAR_KERNEL_ADDRESS)];
2256 pdp += ptes_per_vm_page)
2257 {
2258 if (*pdp & INTEL_PTE_VALID)
2259 if(*pdp & INTEL_PTE_REF) {
2260 *pdp &= ~INTEL_PTE_REF;
2261 collect_ref++;
2262 } else {
2263 collect_unref++;
2264 pa = pte_to_pa(*pdp);
2265 ptp = (pt_entry_t *)phystokv(pa);
2266 eptp = ptp + NPTES*ptes_per_vm_page;
2267
2268 /*
2269 * If the pte page has any wired mappings, we cannot
2270 * free it.
2271 */
2272 wired = 0;
2273 {
2274 register pt_entry_t *ptep;
2275 for (ptep = ptp; ptep < eptp; ptep++) {
2276 if (iswired(*ptep)) {
2277 wired = 1;
2278 break;
2279 }
2280 }
2281 }
2282 if (!wired) {
2283 /*
2284 * Remove the virtual addresses mapped by this pte page.
2285 */
2286 pmap_remove_range(p,
2287 pdetova(pdp - p->dirbase),
2288 ptp,
2289 eptp);
2290
2291 /*
2292 * Invalidate the page directory pointer.
2293 */
2294 {
2295 register int i = ptes_per_vm_page;
2296 register pt_entry_t *pdep = pdp;
2297 do {
2298 *pdep++ = 0;
2299 } while (--i > 0);
2300 }
2301
2302 PMAP_READ_UNLOCK(p, spl);
2303
2304 /*
2305 * And free the pte page itself.
2306 */
2307 {
2308 register vm_page_t m;
2309
2310 vm_object_lock(pmap_object);
2311 m = vm_page_lookup(pmap_object, pa);
2312 if (m == VM_PAGE_NULL)
2313 panic("pmap_collect: pte page not in object");
2314 vm_page_lock_queues();
2315 vm_page_free(m);
2316 inuse_ptepages_count--;
2317 vm_page_unlock_queues();
2318 vm_object_unlock(pmap_object);
2319 }
2320
2321 PMAP_READ_LOCK(p, spl);
2322 }
2323 }
2324 }
2325 PMAP_READ_UNLOCK(p, spl);
2326 return;
2327
2328 }
2329
2330 /*
2331 * Routine: pmap_kernel
2332 * Function:
2333 * Returns the physical map handle for the kernel.
2334 */
2335 #if 0
2336 pmap_t
2337 pmap_kernel(void)
2338 {
2339 return (kernel_pmap);
2340 }
2341 #endif/* 0 */
2342
2343 /*
2344 * pmap_zero_page zeros the specified (machine independent) page.
2345 * See machine/phys.c or machine/phys.s for implementation.
2346 */
2347 #if 0
2348 void
2349 pmap_zero_page(
2350 register vm_offset_t phys)
2351 {
2352 register int i;
2353
2354 assert(phys != vm_page_fictitious_addr);
2355 i = PAGE_SIZE / INTEL_PGBYTES;
2356 phys = intel_pfn(phys);
2357
2358 while (i--)
2359 zero_phys(phys++);
2360 }
2361 #endif/* 0 */
2362
2363 /*
2364 * pmap_copy_page copies the specified (machine independent) page.
2365 * See machine/phys.c or machine/phys.s for implementation.
2366 */
2367 #if 0
2368 void
2369 pmap_copy_page(
2370 vm_offset_t src,
2371 vm_offset_t dst)
2372 {
2373 int i;
2374
2375 assert(src != vm_page_fictitious_addr);
2376 assert(dst != vm_page_fictitious_addr);
2377 i = PAGE_SIZE / INTEL_PGBYTES;
2378
2379 while (i--) {
2380 copy_phys(intel_pfn(src), intel_pfn(dst));
2381 src += INTEL_PGBYTES;
2382 dst += INTEL_PGBYTES;
2383 }
2384 }
2385 #endif/* 0 */
2386
2387 /*
2388 * Routine: pmap_pageable
2389 * Function:
2390 * Make the specified pages (by pmap, offset)
2391 * pageable (or not) as requested.
2392 *
2393 * A page which is not pageable may not take
2394 * a fault; therefore, its page table entry
2395 * must remain valid for the duration.
2396 *
2397 * This routine is merely advisory; pmap_enter
2398 * will specify that these pages are to be wired
2399 * down (or not) as appropriate.
2400 */
2401 void
2402 pmap_pageable(
2403 pmap_t pmap,
2404 vm_offset_t start,
2405 vm_offset_t end,
2406 boolean_t pageable)
2407 {
2408 #ifdef lint
2409 pmap++; start++; end++; pageable++;
2410 #endif /* lint */
2411 }
2412
2413 /*
2414 * Clear specified attribute bits.
2415 */
2416 void
2417 phys_attribute_clear(
2418 vm_offset_t phys,
2419 int bits)
2420 {
2421 pv_entry_t pv_h;
2422 register pv_entry_t pv_e;
2423 register pt_entry_t *pte;
2424 int pai;
2425 register pmap_t pmap;
2426 spl_t spl;
2427
2428 assert(phys != vm_page_fictitious_addr);
2429 if (!valid_page(phys)) {
2430 /*
2431 * Not a managed page.
2432 */
2433 return;
2434 }
2435
2436 /*
2437 * Lock the pmap system first, since we will be changing
2438 * several pmaps.
2439 */
2440
2441 PMAP_WRITE_LOCK(spl);
2442
2443 pai = pa_index(phys);
2444 pv_h = pai_to_pvh(pai);
2445
2446 /*
2447 * Walk down PV list, clearing all modify or reference bits.
2448 * We do not have to lock the pv_list because we have
2449 * the entire pmap system locked.
2450 */
2451 if (pv_h->pmap != PMAP_NULL) {
2452 /*
2453 * There are some mappings.
2454 */
2455 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
2456
2457 pmap = pv_e->pmap;
2458 /*
2459 * Lock the pmap to block pmap_extract and similar routines.
2460 */
2461 simple_lock(&pmap->lock);
2462
2463 {
2464 register vm_offset_t va;
2465
2466 va = pv_e->va;
2467 pte = pmap_pte(pmap, va);
2468
2469 #if 0
2470 /*
2471 * Consistency checks.
2472 */
2473 assert(*pte & INTEL_PTE_VALID);
2474 /* assert(pte_to_phys(*pte) == phys); */
2475 #endif
2476
2477 /*
2478 * Invalidate TLBs for all CPUs using this mapping.
2479 */
2480 PMAP_INVALIDATE_PAGE(pmap, va);
2481 }
2482
2483 /*
2484 * Clear modify or reference bits.
2485 */
2486 {
2487 register int i = ptes_per_vm_page;
2488 do {
2489 *pte++ &= ~bits;
2490 } while (--i > 0);
2491 }
2492 simple_unlock(&pmap->lock);
2493 }
2494 }
2495
2496 pmap_phys_attributes[pai] &= ~bits;
2497
2498 PMAP_WRITE_UNLOCK(spl);
2499 }
2500
2501 /*
2502 * Check specified attribute bits.
2503 */
2504 boolean_t
2505 phys_attribute_test(
2506 vm_offset_t phys,
2507 int bits)
2508 {
2509 pv_entry_t pv_h;
2510 register pv_entry_t pv_e;
2511 register pt_entry_t *pte;
2512 int pai;
2513 register pmap_t pmap;
2514 spl_t spl;
2515
2516 assert(phys != vm_page_fictitious_addr);
2517 if (!valid_page(phys)) {
2518 /*
2519 * Not a managed page.
2520 */
2521 return (FALSE);
2522 }
2523
2524 /*
2525 * Lock the pmap system first, since we will be checking
2526 * several pmaps.
2527 */
2528
2529 PMAP_WRITE_LOCK(spl);
2530
2531 pai = pa_index(phys);
2532 pv_h = pai_to_pvh(pai);
2533
2534 if (pmap_phys_attributes[pai] & bits) {
2535 PMAP_WRITE_UNLOCK(spl);
2536 return (TRUE);
2537 }
2538
2539 /*
2540 * Walk down PV list, checking all mappings.
2541 * We do not have to lock the pv_list because we have
2542 * the entire pmap system locked.
2543 */
2544 if (pv_h->pmap != PMAP_NULL) {
2545 /*
2546 * There are some mappings.
2547 */
2548 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
2549
2550 pmap = pv_e->pmap;
2551 /*
2552 * Lock the pmap to block pmap_extract and similar routines.
2553 */
2554 simple_lock(&pmap->lock);
2555
2556 {
2557 register vm_offset_t va;
2558
2559 va = pv_e->va;
2560 pte = pmap_pte(pmap, va);
2561
2562 #if 0
2563 /*
2564 * Consistency checks.
2565 */
2566 assert(*pte & INTEL_PTE_VALID);
2567 /* assert(pte_to_phys(*pte) == phys); */
2568 #endif
2569 }
2570
2571 /*
2572 * Check modify or reference bits.
2573 */
2574 {
2575 register int i = ptes_per_vm_page;
2576
2577 do {
2578 if (*pte++ & bits) {
2579 simple_unlock(&pmap->lock);
2580 PMAP_WRITE_UNLOCK(spl);
2581 return (TRUE);
2582 }
2583 } while (--i > 0);
2584 }
2585 simple_unlock(&pmap->lock);
2586 }
2587 }
2588 PMAP_WRITE_UNLOCK(spl);
2589 return (FALSE);
2590 }
2591
2592 /*
2593 * Set specified attribute bits.
2594 */
2595 void
2596 phys_attribute_set(
2597 vm_offset_t phys,
2598 int bits)
2599 {
2600 int spl;
2601
2602 assert(phys != vm_page_fictitious_addr);
2603 if (!valid_page(phys)) {
2604 /*
2605 * Not a managed page.
2606 */
2607 return;
2608 }
2609
2610 /*
2611 * Lock the pmap system and set the requested bits in
2612 * the phys attributes array. Don't need to bother with
2613 * ptes because the test routine looks here first.
2614 */
2615
2616 PMAP_WRITE_LOCK(spl);
2617 pmap_phys_attributes[pa_index(phys)] |= bits;
2618 PMAP_WRITE_UNLOCK(spl);
2619 }
2620
2621 /*
2622 * Set the modify bit on the specified physical page.
2623 */
2624
2625 void pmap_set_modify(
2626 register vm_offset_t phys)
2627 {
2628 phys_attribute_set(phys, PHYS_MODIFIED);
2629 }
2630
2631 /*
2632 * Clear the modify bits on the specified physical page.
2633 */
2634
2635 void
2636 pmap_clear_modify(
2637 register vm_offset_t phys)
2638 {
2639 phys_attribute_clear(phys, PHYS_MODIFIED);
2640 }
2641
2642 /*
2643 * pmap_is_modified:
2644 *
2645 * Return whether or not the specified physical page is modified
2646 * by any physical maps.
2647 */
2648
2649 boolean_t
2650 pmap_is_modified(
2651 register vm_offset_t phys)
2652 {
2653 return (phys_attribute_test(phys, PHYS_MODIFIED));
2654 }
2655
2656 /*
2657 * pmap_clear_reference:
2658 *
2659 * Clear the reference bit on the specified physical page.
2660 */
2661
2662 void
2663 pmap_clear_reference(
2664 vm_offset_t phys)
2665 {
2666 phys_attribute_clear(phys, PHYS_REFERENCED);
2667 }
2668
2669 /*
2670 * pmap_is_referenced:
2671 *
2672 * Return whether or not the specified physical page is referenced
2673 * by any physical maps.
2674 */
2675
2676 boolean_t
2677 pmap_is_referenced(
2678 vm_offset_t phys)
2679 {
2680 return (phys_attribute_test(phys, PHYS_REFERENCED));
2681 }
2682
2683 /*
2684 * Set the modify bit on the specified range
2685 * of this map as requested.
2686 *
2687 * This optimization stands only if each time the dirty bit
2688 * in vm_page_t is tested, it is also tested in the pmap.
2689 */
2690 void
2691 pmap_modify_pages(
2692 pmap_t map,
2693 vm_offset_t s,
2694 vm_offset_t e)
2695 {
2696 spl_t spl;
2697 register pt_entry_t *pde;
2698 register pt_entry_t *spte, *epte;
2699 vm_offset_t l;
2700
2701 if (map == PMAP_NULL)
2702 return;
2703
2704 PMAP_READ_LOCK(map, spl);
2705
2706 pde = pmap_pde(map, s);
2707 while (s && s < e) {
2708 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
2709 if (l > e)
2710 l = e;
2711 if (*pde & INTEL_PTE_VALID) {
2712 spte = (pt_entry_t *)ptetokv(*pde);
2713 if (l) {
2714 spte = &spte[ptenum(s)];
2715 epte = &spte[intel_btop(l-s)];
2716 } else {
2717 epte = &spte[intel_btop(PDE_MAPPED_SIZE)];
2718 spte = &spte[ptenum(s)];
2719 }
2720 while (spte < epte) {
2721 if (*spte & INTEL_PTE_VALID) {
2722 *spte |= (INTEL_PTE_MOD | INTEL_PTE_WRITE);
2723 }
2724 spte++;
2725 }
2726 }
2727 s = l;
2728 pde++;
2729 }
2730 PMAP_FLUSH_TLBS();
2731 PMAP_READ_UNLOCK(map, spl);
2732 }
2733
2734
2735 void
2736 invalidate_icache(vm_offset_t addr, unsigned cnt, int phys)
2737 {
2738 return;
2739 }
2740 void
2741 flush_dcache(vm_offset_t addr, unsigned count, int phys)
2742 {
2743 return;
2744 }
2745
2746 #if NCPUS > 1
2747
2748 void inline
2749 pmap_wait_for_clear()
2750 {
2751 register int my_cpu;
2752 spl_t s;
2753 register pmap_t my_pmap;
2754
2755 mp_disable_preemption();
2756 my_cpu = cpu_number();
2757
2758
2759 my_pmap = real_pmap[my_cpu];
2760
2761 if (!(my_pmap && pmap_in_use(my_pmap, my_cpu)))
2762 my_pmap = kernel_pmap;
2763
2764 /*
2765 * Raise spl to splhigh (above splip) to block out pmap_extract
2766 * from IO code (which would put this cpu back in the active
2767 * set).
2768 */
2769 s = splhigh();
2770
2771 /*
2772 * Wait for any pmap updates in progress, on either user
2773 * or kernel pmap.
2774 */
2775 while (*(volatile hw_lock_t)&my_pmap->lock.interlock ||
2776 *(volatile hw_lock_t)&kernel_pmap->lock.interlock) {
2777 continue;
2778 }
2779
2780 splx(s);
2781 mp_enable_preemption();
2782 }
2783
2784 void
2785 pmap_flush_tlb_interrupt(void) {
2786 pmap_wait_for_clear();
2787
2788 flush_tlb();
2789 }
2790
2791 void
2792 pmap_reload_tlb_interrupt(void) {
2793 pmap_wait_for_clear();
2794
2795 set_cr3(kernel_pmap->pdirbase);
2796 }
2797
2798
2799 #endif /* NCPUS > 1 */
2800
2801 #if MACH_KDB
2802
2803 /* show phys page mappings and attributes */
2804
2805 extern void db_show_page(vm_offset_t pa);
2806
2807 void
2808 db_show_page(vm_offset_t pa)
2809 {
2810 pv_entry_t pv_h;
2811 int pai;
2812 char attr;
2813
2814 pai = pa_index(pa);
2815 pv_h = pai_to_pvh(pai);
2816
2817 attr = pmap_phys_attributes[pai];
2818 printf("phys page %x ", pa);
2819 if (attr & PHYS_MODIFIED)
2820 printf("modified, ");
2821 if (attr & PHYS_REFERENCED)
2822 printf("referenced, ");
2823 if (pv_h->pmap || pv_h->next)
2824 printf(" mapped at\n");
2825 else
2826 printf(" not mapped\n");
2827 for (; pv_h; pv_h = pv_h->next)
2828 if (pv_h->pmap)
2829 printf("%x in pmap %x\n", pv_h->va, pv_h->pmap);
2830 }
2831
2832 #endif /* MACH_KDB */
2833
2834 #if MACH_KDB
2835 void db_kvtophys(vm_offset_t);
2836 void db_show_vaddrs(pt_entry_t *);
2837
2838 /*
2839 * print out the results of kvtophys(arg)
2840 */
2841 void
2842 db_kvtophys(
2843 vm_offset_t vaddr)
2844 {
2845 db_printf("0x%x", kvtophys(vaddr));
2846 }
2847
2848 /*
2849 * Walk the pages tables.
2850 */
2851 void
2852 db_show_vaddrs(
2853 pt_entry_t *dirbase)
2854 {
2855 pt_entry_t *ptep, *pdep, tmp;
2856 int x, y, pdecnt, ptecnt;
2857
2858 if (dirbase == 0) {
2859 dirbase = kernel_pmap->dirbase;
2860 }
2861 if (dirbase == 0) {
2862 db_printf("need a dirbase...\n");
2863 return;
2864 }
2865 dirbase = (pt_entry_t *) ((unsigned long) dirbase & ~INTEL_OFFMASK);
2866
2867 db_printf("dirbase: 0x%x\n", dirbase);
2868
2869 pdecnt = ptecnt = 0;
2870 pdep = &dirbase[0];
2871 for (y = 0; y < NPDES; y++, pdep++) {
2872 if (((tmp = *pdep) & INTEL_PTE_VALID) == 0) {
2873 continue;
2874 }
2875 pdecnt++;
2876 ptep = (pt_entry_t *) ((*pdep) & ~INTEL_OFFMASK);
2877 db_printf("dir[%4d]: 0x%x\n", y, *pdep);
2878 for (x = 0; x < NPTES; x++, ptep++) {
2879 if (((tmp = *ptep) & INTEL_PTE_VALID) == 0) {
2880 continue;
2881 }
2882 ptecnt++;
2883 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
2884 x,
2885 *ptep,
2886 (y << 22) | (x << 12),
2887 *ptep & ~INTEL_OFFMASK);
2888 }
2889 }
2890
2891 db_printf("total: %d tables, %d page table entries.\n", pdecnt, ptecnt);
2892
2893 }
2894 #endif /* MACH_KDB */
2895
2896 #include <mach_vm_debug.h>
2897 #if MACH_VM_DEBUG
2898 #include <vm/vm_debug.h>
2899
2900 int
2901 pmap_list_resident_pages(
2902 register pmap_t pmap,
2903 register vm_offset_t *listp,
2904 register int space)
2905 {
2906 return 0;
2907 }
2908 #endif /* MACH_VM_DEBUG */
2909
2910 #ifdef MACH_BSD
2911 /*
2912 * pmap_pagemove
2913 *
2914 * BSD support routine to reassign virtual addresses.
2915 */
2916
2917 void
2918 pmap_movepage(unsigned long from, unsigned long to, vm_size_t size)
2919 {
2920 spl_t spl;
2921 pt_entry_t *pte, saved_pte;
2922 /* Lock the kernel map */
2923
2924
2925 while (size > 0) {
2926 PMAP_READ_LOCK(kernel_pmap, spl);
2927 pte = pmap_pte(kernel_pmap, from);
2928 if (pte == NULL)
2929 panic("pmap_pagemove from pte NULL");
2930 saved_pte = *pte;
2931 PMAP_READ_UNLOCK(kernel_pmap, spl);
2932
2933 pmap_enter(kernel_pmap, to, i386_trunc_page(*pte),
2934 VM_PROT_READ|VM_PROT_WRITE, 0, *pte & INTEL_PTE_WIRED);
2935
2936 pmap_remove(kernel_pmap, from, from+PAGE_SIZE);
2937
2938 PMAP_READ_LOCK(kernel_pmap, spl);
2939 pte = pmap_pte(kernel_pmap, to);
2940 if (pte == NULL)
2941 panic("pmap_pagemove 'to' pte NULL");
2942
2943 *pte = saved_pte;
2944 PMAP_READ_UNLOCK(kernel_pmap, spl);
2945
2946 from += PAGE_SIZE;
2947 to += PAGE_SIZE;
2948 size -= PAGE_SIZE;
2949 }
2950
2951 /* Get the processors to update the TLBs */
2952 PMAP_FLUSH_TLBS();
2953
2954 }
2955
2956 kern_return_t bmapvideo(vm_offset_t *info);
2957 kern_return_t bmapvideo(vm_offset_t *info) {
2958
2959 extern struct vc_info vinfo;
2960 #ifdef NOTIMPLEMENTED
2961 (void)copyout((char *)&vinfo, (char *)info, sizeof(struct vc_info)); /* Copy out the video info */
2962 #endif
2963 return KERN_SUCCESS;
2964 }
2965
2966 kern_return_t bmapmap(vm_offset_t va, vm_offset_t pa, vm_size_t size, vm_prot_t prot, int attr);
2967 kern_return_t bmapmap(vm_offset_t va, vm_offset_t pa, vm_size_t size, vm_prot_t prot, int attr) {
2968
2969 #ifdef NOTIMPLEMENTED
2970 pmap_map_block(current_act()->task->map->pmap, va, pa, size, prot, attr); /* Map it in */
2971 #endif
2972 return KERN_SUCCESS;
2973 }
2974
2975 kern_return_t bmapmapr(vm_offset_t va);
2976 kern_return_t bmapmapr(vm_offset_t va) {
2977
2978 #ifdef NOTIMPLEMENTED
2979 mapping_remove(current_act()->task->map->pmap, va); /* Remove map */
2980 #endif
2981 return KERN_SUCCESS;
2982 }
2983 #endif
2984
2985 /* temporary workaround */
2986 boolean_t
2987 coredumpok(vm_map_t map, vm_offset_t va)
2988 {
2989 pt_entry_t *ptep;
2990 ptep = pmap_pte(map->pmap, va);
2991 if (0 == ptep) return FALSE;
2992 return ((*ptep & (INTEL_PTE_NCACHE|INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE|INTEL_PTE_WIRED));
2993 }