]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/pmap.c
xnu-792.6.56.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /*
24 * @OSF_COPYRIGHT@
25 */
26 /*
27 * Mach Operating System
28 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
29 * All Rights Reserved.
30 *
31 * Permission to use, copy, modify and distribute this software and its
32 * documentation is hereby granted, provided that both the copyright
33 * notice and this permission notice appear in all copies of the
34 * software, derivative works or modified versions, and any portions
35 * thereof, and that both notices appear in supporting documentation.
36 *
37 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
38 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
39 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 *
41 * Carnegie Mellon requests users of this software to return to
42 *
43 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
44 * School of Computer Science
45 * Carnegie Mellon University
46 * Pittsburgh PA 15213-3890
47 *
48 * any improvements or extensions that they make and grant Carnegie Mellon
49 * the rights to redistribute these changes.
50 */
51 /*
52 */
53
54 /*
55 * File: pmap.c
56 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * (These guys wrote the Vax version)
58 *
59 * Physical Map management code for Intel i386, i486, and i860.
60 *
61 * Manages physical address maps.
62 *
63 * In addition to hardware address maps, this
64 * module is called upon to provide software-use-only
65 * maps which may or may not be stored in the same
66 * form as hardware maps. These pseudo-maps are
67 * used to store intermediate results from copy
68 * operations to and from address spaces.
69 *
70 * Since the information managed by this module is
71 * also stored by the logical address mapping module,
72 * this module may throw away valid virtual-to-physical
73 * mappings at almost any time. However, invalidations
74 * of virtual-to-physical mappings must be done as
75 * requested.
76 *
77 * In order to cope with hardware architectures which
78 * make virtual-to-physical map invalidates expensive,
79 * this module may delay invalidate or reduced protection
80 * operations until such time as they are actually
81 * necessary. This module is given full information as
82 * to which processors are currently using which maps,
83 * and to when physical maps must be made correct.
84 */
85
86 #include <string.h>
87 #include <norma_vm.h>
88 #include <mach_kdb.h>
89 #include <mach_ldebug.h>
90
91 #include <mach/machine/vm_types.h>
92
93 #include <mach/boolean.h>
94 #include <kern/thread.h>
95 #include <kern/zalloc.h>
96
97 #include <kern/lock.h>
98 #include <kern/kalloc.h>
99 #include <kern/spl.h>
100
101 #include <vm/pmap.h>
102 #include <vm/vm_map.h>
103 #include <vm/vm_kern.h>
104 #include <mach/vm_param.h>
105 #include <mach/vm_prot.h>
106 #include <vm/vm_object.h>
107 #include <vm/vm_page.h>
108
109 #include <mach/machine/vm_param.h>
110 #include <machine/thread.h>
111
112 #include <kern/misc_protos.h> /* prototyping */
113 #include <i386/misc_protos.h>
114
115 #include <i386/cpuid.h>
116 #include <i386/cpu_data.h>
117 #include <i386/cpu_number.h>
118 #include <i386/machine_cpu.h>
119 #include <i386/mp_slave_boot.h>
120
121 #if MACH_KDB
122 #include <ddb/db_command.h>
123 #include <ddb/db_output.h>
124 #include <ddb/db_sym.h>
125 #include <ddb/db_print.h>
126 #endif /* MACH_KDB */
127
128 #include <kern/xpr.h>
129
130 #include <vm/vm_protos.h>
131
132 #include <i386/mp.h>
133
134 /*
135 * Forward declarations for internal functions.
136 */
137 void pmap_expand(
138 pmap_t map,
139 vm_offset_t v);
140
141 extern void pmap_remove_range(
142 pmap_t pmap,
143 vm_offset_t va,
144 pt_entry_t *spte,
145 pt_entry_t *epte);
146
147 void phys_attribute_clear(
148 ppnum_t phys,
149 int bits);
150
151 boolean_t phys_attribute_test(
152 ppnum_t phys,
153 int bits);
154
155 void phys_attribute_set(
156 ppnum_t phys,
157 int bits);
158
159 void pmap_growkernel(
160 vm_offset_t addr);
161
162 void pmap_set_reference(
163 ppnum_t pn);
164
165 void pmap_movepage(
166 unsigned long from,
167 unsigned long to,
168 vm_size_t size);
169
170 pt_entry_t * pmap_mapgetpte(
171 vm_map_t map,
172 vm_offset_t v);
173
174 boolean_t phys_page_exists(
175 ppnum_t pn);
176
177 #ifndef set_dirbase
178 void set_dirbase(vm_offset_t dirbase);
179 #endif /* set_dirbase */
180
181 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
182
183 #define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
184 #define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry);
185
186 #define value_64bit(value) ((value) & 0xFFFFFFFF00000000LL)
187 #define low32(x) ((unsigned int)((x) & 0x00000000ffffffffLL))
188
189 /*
190 * Private data structures.
191 */
192
193 /*
194 * For each vm_page_t, there is a list of all currently
195 * valid virtual mappings of that page. An entry is
196 * a pv_entry_t; the list is the pv_table.
197 */
198
199 typedef struct pv_entry {
200 struct pv_entry *next; /* next pv_entry */
201 pmap_t pmap; /* pmap where mapping lies */
202 vm_offset_t va; /* virtual address for mapping */
203 } *pv_entry_t;
204
205 #define PV_ENTRY_NULL ((pv_entry_t) 0)
206
207 pv_entry_t pv_head_table; /* array of entries, one per page */
208
209 /*
210 * pv_list entries are kept on a list that can only be accessed
211 * with the pmap system locked (at SPLVM, not in the cpus_active set).
212 * The list is refilled from the pv_list_zone if it becomes empty.
213 */
214 pv_entry_t pv_free_list; /* free list at SPLVM */
215 decl_simple_lock_data(,pv_free_list_lock)
216 int pv_free_count = 0;
217 #define PV_LOW_WATER_MARK 5000
218 #define PV_ALLOC_CHUNK 2000
219 thread_call_t mapping_adjust_call;
220 static thread_call_data_t mapping_adjust_call_data;
221 int mappingrecurse = 0;
222
223 #define PV_ALLOC(pv_e) { \
224 simple_lock(&pv_free_list_lock); \
225 if ((pv_e = pv_free_list) != 0) { \
226 pv_free_list = pv_e->next; \
227 pv_free_count--; \
228 if (pv_free_count < PV_LOW_WATER_MARK) \
229 if (hw_compare_and_store(0,1,&mappingrecurse)) \
230 thread_call_enter(mapping_adjust_call); \
231 } \
232 simple_unlock(&pv_free_list_lock); \
233 }
234
235 #define PV_FREE(pv_e) { \
236 simple_lock(&pv_free_list_lock); \
237 pv_e->next = pv_free_list; \
238 pv_free_list = pv_e; \
239 pv_free_count++; \
240 simple_unlock(&pv_free_list_lock); \
241 }
242
243 zone_t pv_list_zone; /* zone of pv_entry structures */
244
245 #ifdef PAE
246 static zone_t pdpt_zone;
247 #endif
248
249
250 /*
251 * Each entry in the pv_head_table is locked by a bit in the
252 * pv_lock_table. The lock bits are accessed by the physical
253 * address of the page they lock.
254 */
255
256 char *pv_lock_table; /* pointer to array of bits */
257 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
258
259 /*
260 * First and last physical addresses that we maintain any information
261 * for. Initialized to zero so that pmap operations done before
262 * pmap_init won't touch any non-existent structures.
263 */
264 pmap_paddr_t vm_first_phys = (pmap_paddr_t) 0;
265 pmap_paddr_t vm_last_phys = (pmap_paddr_t) 0;
266 boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */
267
268 pmap_paddr_t kernel_vm_end = (pmap_paddr_t)0;
269
270 #define GROW_KERNEL_FUNCTION_IMPLEMENTED 1
271 #if GROW_KERNEL_FUNCTION_IMPLEMENTED /* not needed until growing kernel pmap */
272 static struct vm_object kptobj_object_store;
273 static vm_object_t kptobj;
274 #endif
275
276
277 /*
278 * Index into pv_head table, its lock bits, and the modify/reference
279 * bits starting at vm_first_phys.
280 */
281
282 #define pa_index(pa) (i386_btop(pa - vm_first_phys))
283
284 #define pai_to_pvh(pai) (&pv_head_table[pai])
285 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
286 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
287
288 /*
289 * Array of physical page attribites for managed pages.
290 * One byte per physical page.
291 */
292 char *pmap_phys_attributes;
293
294 /*
295 * Physical page attributes. Copy bits from PTE definition.
296 */
297 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
298 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
299 #define PHYS_NCACHE INTEL_PTE_NCACHE
300
301 /*
302 * Amount of virtual memory mapped by one
303 * page-directory entry.
304 */
305 #define PDE_MAPPED_SIZE (pdetova(1))
306
307 /*
308 * Locking and TLB invalidation
309 */
310
311 /*
312 * Locking Protocols:
313 *
314 * There are two structures in the pmap module that need locking:
315 * the pmaps themselves, and the per-page pv_lists (which are locked
316 * by locking the pv_lock_table entry that corresponds to the pv_head
317 * for the list in question.) Most routines want to lock a pmap and
318 * then do operations in it that require pv_list locking -- however
319 * pmap_remove_all and pmap_copy_on_write operate on a physical page
320 * basis and want to do the locking in the reverse order, i.e. lock
321 * a pv_list and then go through all the pmaps referenced by that list.
322 * To protect against deadlock between these two cases, the pmap_lock
323 * is used. There are three different locking protocols as a result:
324 *
325 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
326 * the pmap.
327 *
328 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
329 * lock on the pmap_lock (shared read), then lock the pmap
330 * and finally the pv_lists as needed [i.e. pmap lock before
331 * pv_list lock.]
332 *
333 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
334 * Get a write lock on the pmap_lock (exclusive write); this
335 * also guaranteees exclusive access to the pv_lists. Lock the
336 * pmaps as needed.
337 *
338 * At no time may any routine hold more than one pmap lock or more than
339 * one pv_list lock. Because interrupt level routines can allocate
340 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
341 * kernel_pmap can only be held at splhigh.
342 */
343
344 /*
345 * We raise the interrupt level to splvm, to block interprocessor
346 * interrupts during pmap operations. We must take the CPU out of
347 * the cpus_active set while interrupts are blocked.
348 */
349 #define SPLVM(spl) { \
350 spl = splhigh(); \
351 mp_disable_preemption(); \
352 i_bit_clear(cpu_number(), &cpus_active); \
353 mp_enable_preemption(); \
354 }
355
356 #define SPLX(spl) { \
357 mp_disable_preemption(); \
358 i_bit_set(cpu_number(), &cpus_active); \
359 mp_enable_preemption(); \
360 splx(spl); \
361 }
362
363 /*
364 * Lock on pmap system
365 */
366 lock_t pmap_system_lock;
367
368 #define PMAP_READ_LOCK(pmap, spl) { \
369 SPLVM(spl); \
370 lock_read(&pmap_system_lock); \
371 simple_lock(&(pmap)->lock); \
372 }
373
374 #define PMAP_WRITE_LOCK(spl) { \
375 SPLVM(spl); \
376 lock_write(&pmap_system_lock); \
377 }
378
379 #define PMAP_READ_UNLOCK(pmap, spl) { \
380 simple_unlock(&(pmap)->lock); \
381 lock_read_done(&pmap_system_lock); \
382 SPLX(spl); \
383 }
384
385 #define PMAP_WRITE_UNLOCK(spl) { \
386 lock_write_done(&pmap_system_lock); \
387 SPLX(spl); \
388 }
389
390 #define PMAP_WRITE_TO_READ_LOCK(pmap) { \
391 simple_lock(&(pmap)->lock); \
392 lock_write_to_read(&pmap_system_lock); \
393 }
394
395 #define LOCK_PVH(index) lock_pvh_pai(index)
396
397 #define UNLOCK_PVH(index) unlock_pvh_pai(index)
398
399 #if USLOCK_DEBUG
400 extern int max_lock_loops;
401 extern int disableSerialOuput;
402 #define LOOP_VAR \
403 unsigned int loop_count; \
404 loop_count = disableSerialOuput ? max_lock_loops \
405 : max_lock_loops*100
406 #define LOOP_CHECK(msg, pmap) \
407 if (--loop_count == 0) { \
408 mp_disable_preemption(); \
409 kprintf("%s: cpu %d pmap %x, cpus_active 0x%x\n", \
410 msg, cpu_number(), pmap, cpus_active); \
411 Debugger("deadlock detection"); \
412 mp_enable_preemption(); \
413 loop_count = max_lock_loops; \
414 }
415 #else /* USLOCK_DEBUG */
416 #define LOOP_VAR
417 #define LOOP_CHECK(msg, pmap)
418 #endif /* USLOCK_DEBUG */
419
420 #define PMAP_UPDATE_TLBS(pmap, s, e) \
421 { \
422 cpu_set cpu_mask; \
423 cpu_set users; \
424 \
425 mp_disable_preemption(); \
426 cpu_mask = 1 << cpu_number(); \
427 \
428 /* Since the pmap is locked, other updates are locked */ \
429 /* out, and any pmap_activate has finished. */ \
430 \
431 /* find other cpus using the pmap */ \
432 users = (pmap)->cpus_using & ~cpu_mask; \
433 if (users) { \
434 LOOP_VAR; \
435 /* signal them, and wait for them to finish */ \
436 /* using the pmap */ \
437 signal_cpus(users, (pmap), (s), (e)); \
438 while (((pmap)->cpus_using & cpus_active & ~cpu_mask)) { \
439 LOOP_CHECK("PMAP_UPDATE_TLBS", pmap); \
440 cpu_pause(); \
441 } \
442 } \
443 /* invalidate our own TLB if pmap is in use */ \
444 \
445 if ((pmap)->cpus_using & cpu_mask) { \
446 INVALIDATE_TLB((pmap), (s), (e)); \
447 } \
448 \
449 mp_enable_preemption(); \
450 }
451
452 #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
453
454 #define INVALIDATE_TLB(m, s, e) { \
455 flush_tlb(); \
456 }
457
458 /*
459 * Structures to keep track of pending TLB invalidations
460 */
461 cpu_set cpus_active;
462 cpu_set cpus_idle;
463
464 #define UPDATE_LIST_SIZE 4
465
466 struct pmap_update_item {
467 pmap_t pmap; /* pmap to invalidate */
468 vm_offset_t start; /* start address to invalidate */
469 vm_offset_t end; /* end address to invalidate */
470 };
471
472 typedef struct pmap_update_item *pmap_update_item_t;
473
474 /*
475 * List of pmap updates. If the list overflows,
476 * the last entry is changed to invalidate all.
477 */
478 struct pmap_update_list {
479 decl_simple_lock_data(,lock)
480 int count;
481 struct pmap_update_item item[UPDATE_LIST_SIZE];
482 } ;
483 typedef struct pmap_update_list *pmap_update_list_t;
484
485 extern void signal_cpus(
486 cpu_set use_list,
487 pmap_t pmap,
488 vm_offset_t start,
489 vm_offset_t end);
490
491 pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE];
492
493 /*
494 * Other useful macros.
495 */
496 #define current_pmap() (vm_map_pmap(current_thread()->map))
497 #define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
498
499 struct pmap kernel_pmap_store;
500 pmap_t kernel_pmap;
501
502 #ifdef PMAP_QUEUE
503 decl_simple_lock_data(,free_pmap_lock)
504 #endif
505
506 struct zone *pmap_zone; /* zone of pmap structures */
507
508 int pmap_debug = 0; /* flag for debugging prints */
509
510 unsigned int inuse_ptepages_count = 0; /* debugging */
511
512 /*
513 * Pmap cache. Cache is threaded through ref_count field of pmap.
514 * Max will eventually be constant -- variable for experimentation.
515 */
516 int pmap_cache_max = 32;
517 int pmap_alloc_chunk = 8;
518 pmap_t pmap_cache_list;
519 int pmap_cache_count;
520 decl_simple_lock_data(,pmap_cache_lock)
521
522 extern vm_offset_t hole_start, hole_end;
523
524 extern char end;
525
526 static int nkpt;
527
528 pt_entry_t *DMAP1, *DMAP2;
529 caddr_t DADDR1;
530 caddr_t DADDR2;
531
532 #if DEBUG_ALIAS
533 #define PMAP_ALIAS_MAX 32
534 struct pmap_alias {
535 vm_offset_t rpc;
536 pmap_t pmap;
537 vm_offset_t va;
538 int cookie;
539 #define PMAP_ALIAS_COOKIE 0xdeadbeef
540 } pmap_aliasbuf[PMAP_ALIAS_MAX];
541 int pmap_alias_index = 0;
542 extern vm_offset_t get_rpc();
543
544 #endif /* DEBUG_ALIAS */
545
546 #define pmap_pde(m, v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]))
547 #define pdir_pde(d, v) (d[(vm_offset_t)(v) >> PDESHIFT])
548
549 static __inline int
550 pmap_is_current(pmap_t pmap)
551 {
552 return (pmap == kernel_pmap ||
553 (pmap->dirbase[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME));
554 }
555
556
557 /*
558 * return address of mapped pte for vaddr va in pmap pmap.
559 */
560 pt_entry_t *
561 pmap_pte(pmap_t pmap, vm_offset_t va)
562 {
563 pd_entry_t *pde;
564 pd_entry_t newpf;
565
566 pde = pmap_pde(pmap, va);
567 if (*pde != 0) {
568 if (pmap_is_current(pmap))
569 return( vtopte(va));
570 newpf = *pde & PG_FRAME;
571 if (((*CM4) & PG_FRAME) != newpf) {
572 *CM4 = newpf | INTEL_PTE_RW | INTEL_PTE_VALID;
573 invlpg((u_int)CA4);
574 }
575 return (pt_entry_t *)CA4 + (i386_btop(va) & (NPTEPG-1));
576 }
577 return(0);
578 }
579
580 #define DEBUG_PTE_PAGE 0
581
582 #if DEBUG_PTE_PAGE
583 void
584 ptep_check(
585 ptep_t ptep)
586 {
587 register pt_entry_t *pte, *epte;
588 int ctu, ctw;
589
590 /* check the use and wired counts */
591 if (ptep == PTE_PAGE_NULL)
592 return;
593 pte = pmap_pte(ptep->pmap, ptep->va);
594 epte = pte + INTEL_PGBYTES/sizeof(pt_entry_t);
595 ctu = 0;
596 ctw = 0;
597 while (pte < epte) {
598 if (pte->pfn != 0) {
599 ctu++;
600 if (pte->wired)
601 ctw++;
602 }
603 pte++;
604 }
605
606 if (ctu != ptep->use_count || ctw != ptep->wired_count) {
607 printf("use %d wired %d - actual use %d wired %d\n",
608 ptep->use_count, ptep->wired_count, ctu, ctw);
609 panic("pte count");
610 }
611 }
612 #endif /* DEBUG_PTE_PAGE */
613
614 /*
615 * Map memory at initialization. The physical addresses being
616 * mapped are not managed and are never unmapped.
617 *
618 * For now, VM is already on, we only need to map the
619 * specified memory.
620 */
621 vm_offset_t
622 pmap_map(
623 register vm_offset_t virt,
624 register vm_offset_t start_addr,
625 register vm_offset_t end_addr,
626 register vm_prot_t prot)
627 {
628 register int ps;
629
630 ps = PAGE_SIZE;
631 while (start_addr < end_addr) {
632 pmap_enter(kernel_pmap,
633 virt, (ppnum_t) i386_btop(start_addr), prot, 0, FALSE);
634 virt += ps;
635 start_addr += ps;
636 }
637 return(virt);
638 }
639
640 /*
641 * Back-door routine for mapping kernel VM at initialization.
642 * Useful for mapping memory outside the range
643 * Sets no-cache, A, D.
644 * [vm_first_phys, vm_last_phys) (i.e., devices).
645 * Otherwise like pmap_map.
646 */
647 vm_offset_t
648 pmap_map_bd(
649 register vm_offset_t virt,
650 register vm_offset_t start_addr,
651 register vm_offset_t end_addr,
652 vm_prot_t prot)
653 {
654 register pt_entry_t template;
655 register pt_entry_t *pte;
656
657 template = pa_to_pte(start_addr)
658 | INTEL_PTE_NCACHE
659 | INTEL_PTE_REF
660 | INTEL_PTE_MOD
661 | INTEL_PTE_WIRED
662 | INTEL_PTE_VALID;
663 if (prot & VM_PROT_WRITE)
664 template |= INTEL_PTE_WRITE;
665
666 /* XXX move pmap_pte out of loop, once one pte mapped, all are */
667 while (start_addr < end_addr) {
668 pte = pmap_pte(kernel_pmap, virt);
669 if (pte == PT_ENTRY_NULL) {
670 panic("pmap_map_bd: Invalid kernel address\n");
671 }
672 WRITE_PTE_FAST(pte, template)
673 pte_increment_pa(template);
674 virt += PAGE_SIZE;
675 start_addr += PAGE_SIZE;
676 }
677
678 flush_tlb();
679 return(virt);
680 }
681
682 extern char *first_avail;
683 extern vm_offset_t virtual_avail, virtual_end;
684 extern pmap_paddr_t avail_start, avail_end;
685 extern vm_offset_t etext;
686 extern void *sectHIBB;
687 extern int sectSizeHIB;
688
689 /*
690 * Bootstrap the system enough to run with virtual memory.
691 * Map the kernel's code and data, and allocate the system page table.
692 * Called with mapping OFF. Page_size must already be set.
693 *
694 * Parameters:
695 * load_start: PA where kernel was loaded
696 * avail_start PA of first available physical page -
697 * after kernel page tables
698 * avail_end PA of last available physical page
699 * virtual_avail VA of first available page -
700 * after kernel page tables
701 * virtual_end VA of last available page -
702 * end of kernel address space
703 *
704 * &start_text start of kernel text
705 * &etext end of kernel text
706 */
707
708 void
709 pmap_bootstrap(
710 __unused vm_offset_t load_start)
711 {
712 vm_offset_t va;
713 pt_entry_t *pte;
714 int i;
715 int wpkernel, boot_arg;
716
717 vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address
718 * known to VM */
719
720 /*
721 * The kernel's pmap is statically allocated so we don't
722 * have to use pmap_create, which is unlikely to work
723 * correctly at this part of the boot sequence.
724 */
725
726 kernel_pmap = &kernel_pmap_store;
727 #ifdef PMAP_QUEUE
728 kernel_pmap->pmap_link.next = (queue_t)kernel_pmap; /* Set up anchor forward */
729 kernel_pmap->pmap_link.prev = (queue_t)kernel_pmap; /* Set up anchor reverse */
730 #endif
731 kernel_pmap->ref_count = 1;
732 kernel_pmap->pm_obj = (vm_object_t) NULL;
733 kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE);
734 kernel_pmap->pdirbase = (pd_entry_t *)IdlePTD;
735 #ifdef PAE
736 kernel_pmap->pm_pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE );
737 kernel_pmap->pm_ppdpt = (vm_offset_t)IdlePDPT;
738 #endif
739
740 va = (vm_offset_t)kernel_pmap->dirbase;
741 /* setup self referential mapping(s) */
742 for (i = 0; i< NPGPTD; i++ ) {
743 pmap_paddr_t pa;
744 pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i));
745 * (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i) =
746 (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF |
747 INTEL_PTE_MOD | INTEL_PTE_WIRED ;
748 #ifdef PAE
749 kernel_pmap->pm_pdpt[i] = pa | INTEL_PTE_VALID;
750 #endif
751 }
752
753 nkpt = NKPT;
754
755 virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail;
756 virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS);
757
758 /*
759 * Reserve some special page table entries/VA space for temporary
760 * mapping of pages.
761 */
762 #define SYSMAP(c, p, v, n) \
763 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n);
764
765 va = virtual_avail;
766 pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
767
768 /*
769 * CMAP1/CMAP2 are used for zeroing and copying pages.
770 * CMAP3 is used for ml_phys_read/write.
771 */
772 SYSMAP(caddr_t, CM1, CA1, 1)
773 * (pt_entry_t *) CM1 = 0;
774 SYSMAP(caddr_t, CM2, CA2, 1)
775 * (pt_entry_t *) CM2 = 0;
776 SYSMAP(caddr_t, CM3, CA3, 1)
777 * (pt_entry_t *) CM3 = 0;
778
779 /* used by pmap_pte */
780 SYSMAP(caddr_t, CM4, CA4, 1)
781 * (pt_entry_t *) CM4 = 0;
782
783 /* DMAP user for debugger */
784 SYSMAP(caddr_t, DMAP1, DADDR1, 1);
785 SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */
786
787
788 lock_init(&pmap_system_lock,
789 FALSE, /* NOT a sleep lock */
790 0, 0);
791
792 virtual_avail = va;
793
794 wpkernel = 1;
795 if (PE_parse_boot_arg("debug", &boot_arg)) {
796 if (boot_arg & DB_PRT) wpkernel = 0;
797 if (boot_arg & DB_NMI) wpkernel = 0;
798 }
799
800 /* remap kernel text readonly if not debugging or kprintfing */
801 if (wpkernel)
802 {
803 vm_offset_t myva;
804 pt_entry_t *ptep;
805
806 for (myva = i386_round_page(VM_MIN_KERNEL_ADDRESS + MP_BOOT + MP_BOOTSTACK); myva < etext; myva += PAGE_SIZE) {
807 if (myva >= (vm_offset_t)sectHIBB && myva < ((vm_offset_t)sectHIBB + sectSizeHIB))
808 continue;
809 ptep = pmap_pte(kernel_pmap, myva);
810 if (ptep)
811 *ptep &= ~INTEL_PTE_RW;
812 }
813 flush_tlb();
814 }
815
816 simple_lock_init(&kernel_pmap->lock, 0);
817 simple_lock_init(&pv_free_list_lock, 0);
818
819 /* invalidate user virtual addresses */
820 memset((char *)kernel_pmap->dirbase,
821 0,
822 (KPTDI) * sizeof(pd_entry_t));
823
824 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
825 VADDR(KPTDI,0), virtual_end);
826 #ifdef PAE
827 kprintf("Available physical space from 0x%llx to 0x%llx\n",
828 avail_start, avail_end);
829 printf("PAE enabled\n");
830 #else
831 kprintf("Available physical space from 0x%x to 0x%x\n",
832 avail_start, avail_end);
833 #endif
834 }
835
836 void
837 pmap_virtual_space(
838 vm_offset_t *startp,
839 vm_offset_t *endp)
840 {
841 *startp = virtual_avail;
842 *endp = virtual_end;
843 }
844
845 /*
846 * Initialize the pmap module.
847 * Called by vm_init, to initialize any structures that the pmap
848 * system needs to map virtual memory.
849 */
850 void
851 pmap_init(void)
852 {
853 register long npages;
854 vm_offset_t addr;
855 register vm_size_t s;
856 vm_offset_t vaddr;
857 ppnum_t ppn;
858
859 /*
860 * Allocate memory for the pv_head_table and its lock bits,
861 * the modify bit array, and the pte_page table.
862 */
863
864 /* zero bias all these arrays now instead of off avail_start
865 so we cover all memory */
866 npages = i386_btop(avail_end);
867 s = (vm_size_t) (sizeof(struct pv_entry) * npages
868 + pv_lock_table_size(npages)
869 + npages);
870
871 s = round_page(s);
872 if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS)
873 panic("pmap_init");
874
875 memset((char *)addr, 0, s);
876
877 /*
878 * Allocate the structures first to preserve word-alignment.
879 */
880 pv_head_table = (pv_entry_t) addr;
881 addr = (vm_offset_t) (pv_head_table + npages);
882
883 pv_lock_table = (char *) addr;
884 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
885
886 pmap_phys_attributes = (char *) addr;
887
888 /*
889 * Create the zone of physical maps,
890 * and of the physical-to-virtual entries.
891 */
892 s = (vm_size_t) sizeof(struct pmap);
893 pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
894 s = (vm_size_t) sizeof(struct pv_entry);
895 pv_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
896 #ifdef PAE
897 // s = (vm_size_t) (sizeof(pdpt_entry_t) * NPGPTD);
898 s = 63;
899 pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */
900 #endif
901
902 /*
903 * Only now, when all of the data structures are allocated,
904 * can we set vm_first_phys and vm_last_phys. If we set them
905 * too soon, the kmem_alloc_wired above will try to use these
906 * data structures and blow up.
907 */
908
909 /* zero bias this now so we cover all memory */
910 vm_first_phys = 0;
911 vm_last_phys = avail_end;
912
913 #if GROW_KERNEL_FUNCTION_IMPLEMENTED
914 kptobj = &kptobj_object_store;
915 _vm_object_allocate((vm_object_size_t)NKPDE, kptobj);
916 kernel_pmap->pm_obj = kptobj;
917 #endif
918
919 /* create pv entries for kernel pages mapped by low level
920 startup code. these have to exist so we can pmap_remove()
921 e.g. kext pages from the middle of our addr space */
922
923 vaddr = (vm_offset_t)VM_MIN_KERNEL_ADDRESS;
924 for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) {
925 pv_entry_t pv_e;
926
927 pv_e = pai_to_pvh(ppn);
928 pv_e->va = vaddr;
929 vaddr += PAGE_SIZE;
930 pv_e->pmap = kernel_pmap;
931 pv_e->next = PV_ENTRY_NULL;
932 }
933
934 pmap_initialized = TRUE;
935
936 /*
937 * Initializie pmap cache.
938 */
939 pmap_cache_list = PMAP_NULL;
940 pmap_cache_count = 0;
941 simple_lock_init(&pmap_cache_lock, 0);
942 #ifdef PMAP_QUEUE
943 simple_lock_init(&free_pmap_lock, 0);
944 #endif
945
946 }
947
948 void
949 x86_lowmem_free(void)
950 {
951 /* free lowmem pages back to the vm system. we had to defer doing this
952 until the vm system was fully up.
953 the actual pages that are released are determined by which
954 pages the memory sizing code puts into the region table */
955
956 ml_static_mfree((vm_offset_t) i386_ptob(pmap_memory_regions[0].base)|VM_MIN_KERNEL_ADDRESS,
957 (vm_size_t) i386_ptob(pmap_memory_regions[0].end - pmap_memory_regions[0].base));
958 }
959
960
961 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
962
963 boolean_t
964 pmap_verify_free(
965 ppnum_t pn)
966 {
967 pmap_paddr_t phys;
968 pv_entry_t pv_h;
969 int pai;
970 spl_t spl;
971 boolean_t result;
972
973 assert(pn != vm_page_fictitious_addr);
974 phys = (pmap_paddr_t)i386_ptob(pn);
975 if (!pmap_initialized)
976 return(TRUE);
977
978 if (!pmap_valid_page(pn))
979 return(FALSE);
980
981 PMAP_WRITE_LOCK(spl);
982
983 pai = pa_index(phys);
984 pv_h = pai_to_pvh(pai);
985
986 result = (pv_h->pmap == PMAP_NULL);
987 PMAP_WRITE_UNLOCK(spl);
988
989 return(result);
990 }
991
992 /*
993 * Create and return a physical map.
994 *
995 * If the size specified for the map
996 * is zero, the map is an actual physical
997 * map, and may be referenced by the
998 * hardware.
999 *
1000 * If the size specified is non-zero,
1001 * the map will be used in software only, and
1002 * is bounded by that size.
1003 */
1004 pmap_t
1005 pmap_create(
1006 vm_size_t size)
1007 {
1008 register pmap_t p;
1009 #ifdef PMAP_QUEUE
1010 register pmap_t pro;
1011 spl_t s;
1012 #endif
1013 register int i;
1014 register vm_offset_t va;
1015
1016 /*
1017 * A software use-only map doesn't even need a map.
1018 */
1019
1020 if (size != 0) {
1021 return(PMAP_NULL);
1022 }
1023
1024 p = (pmap_t) zalloc(pmap_zone);
1025 if (PMAP_NULL == p)
1026 panic("pmap_create zalloc");
1027 if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD))
1028 panic("pmap_create kmem_alloc_wired");
1029 #ifdef PAE
1030 p->pm_hold = (vm_offset_t)zalloc(pdpt_zone);
1031 if ((vm_offset_t)NULL == p->pm_hold) {
1032 panic("pdpt zalloc");
1033 }
1034 p->pm_pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31);
1035 p->pm_ppdpt = kvtophys((vm_offset_t)p->pm_pdpt); /* XXX */
1036 #endif
1037 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPDEPG))))
1038 panic("pmap_create vm_object_allocate");
1039 memcpy(p->dirbase,
1040 (void *)((unsigned int)IdlePTD | KERNBASE),
1041 NBPTD);
1042 va = (vm_offset_t)p->dirbase;
1043 p->pdirbase = (pd_entry_t *)(kvtophys(va));
1044 simple_lock_init(&p->lock, 0);
1045
1046 /* setup self referential mapping(s) */
1047 for (i = 0; i< NPGPTD; i++ ) {
1048 pmap_paddr_t pa;
1049 pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i));
1050 * (pd_entry_t *) (p->dirbase + PTDPTDI + i) =
1051 (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF |
1052 INTEL_PTE_MOD | INTEL_PTE_WIRED ;
1053 #ifdef PAE
1054 p->pm_pdpt[i] = pa | INTEL_PTE_VALID;
1055 #endif
1056 }
1057
1058 p->cpus_using = 0;
1059 p->stats.resident_count = 0;
1060 p->stats.wired_count = 0;
1061 p->ref_count = 1;
1062
1063 #ifdef PMAP_QUEUE
1064 /* insert new pmap at head of queue hanging off kernel_pmap */
1065 SPLVM(s);
1066 simple_lock(&free_pmap_lock);
1067 p->pmap_link.next = (queue_t)kernel_pmap->pmap_link.next;
1068 kernel_pmap->pmap_link.next = (queue_t)p;
1069
1070 pro = (pmap_t) p->pmap_link.next;
1071 p->pmap_link.prev = (queue_t)pro->pmap_link.prev;
1072 pro->pmap_link.prev = (queue_t)p;
1073
1074
1075 simple_unlock(&free_pmap_lock);
1076 SPLX(s);
1077 #endif
1078
1079 return(p);
1080 }
1081
1082 /*
1083 * Retire the given physical map from service.
1084 * Should only be called if the map contains
1085 * no valid mappings.
1086 */
1087
1088 void
1089 pmap_destroy(
1090 register pmap_t p)
1091 {
1092 register pt_entry_t *pdep;
1093 register int c;
1094 spl_t s;
1095 register vm_page_t m;
1096 #ifdef PMAP_QUEUE
1097 register pmap_t pre,pro;
1098 #endif
1099
1100 if (p == PMAP_NULL)
1101 return;
1102
1103 SPLVM(s);
1104 simple_lock(&p->lock);
1105 c = --p->ref_count;
1106 if (c == 0) {
1107 register int my_cpu;
1108
1109 mp_disable_preemption();
1110 my_cpu = cpu_number();
1111
1112 /*
1113 * If some cpu is not using the physical pmap pointer that it
1114 * is supposed to be (see set_dirbase), we might be using the
1115 * pmap that is being destroyed! Make sure we are
1116 * physically on the right pmap:
1117 */
1118 /* force pmap/cr3 update */
1119 PMAP_UPDATE_TLBS(p,
1120 VM_MIN_ADDRESS,
1121 VM_MAX_KERNEL_ADDRESS);
1122
1123 if (PMAP_REAL(my_cpu) == p) {
1124 PMAP_CPU_CLR(p, my_cpu);
1125 PMAP_REAL(my_cpu) = kernel_pmap;
1126 #ifdef PAE
1127 set_cr3((unsigned int)kernel_pmap->pm_ppdpt);
1128 #else
1129 set_cr3((unsigned int)kernel_pmap->pdirbase);
1130 #endif
1131 }
1132 mp_enable_preemption();
1133 }
1134 simple_unlock(&p->lock);
1135 SPLX(s);
1136
1137 if (c != 0) {
1138 return; /* still in use */
1139 }
1140
1141 #ifdef PMAP_QUEUE
1142 /* remove from pmap queue */
1143 SPLVM(s);
1144 simple_lock(&free_pmap_lock);
1145
1146 pre = (pmap_t)p->pmap_link.prev;
1147 pre->pmap_link.next = (queue_t)p->pmap_link.next;
1148 pro = (pmap_t)p->pmap_link.next;
1149 pro->pmap_link.prev = (queue_t)p->pmap_link.prev;
1150
1151 simple_unlock(&free_pmap_lock);
1152 SPLX(s);
1153 #endif
1154
1155 /*
1156 * Free the memory maps, then the
1157 * pmap structure.
1158 */
1159
1160 pdep = (pt_entry_t *)p->dirbase;
1161
1162 while (pdep < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]) {
1163 int ind;
1164 if (*pdep & INTEL_PTE_VALID) {
1165 ind = pdep - (pt_entry_t *)&p->dirbase[0];
1166 vm_object_lock(p->pm_obj);
1167 m = vm_page_lookup(p->pm_obj, (vm_object_offset_t)ind);
1168 if (m == VM_PAGE_NULL) {
1169 panic("pmap_destroy: pte page not in object");
1170 }
1171 vm_page_lock_queues();
1172 vm_page_free(m);
1173 inuse_ptepages_count--;
1174 vm_object_unlock(p->pm_obj);
1175 vm_page_unlock_queues();
1176
1177 /*
1178 * Clear pdes, this might be headed for the cache.
1179 */
1180 *pdep++ = 0;
1181 }
1182 else {
1183 *pdep++ = 0;
1184 }
1185
1186 }
1187
1188 vm_object_deallocate(p->pm_obj);
1189 kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD);
1190 #ifdef PAE
1191 zfree(pdpt_zone, (void *)p->pm_hold);
1192 #endif
1193 zfree(pmap_zone, p);
1194 }
1195
1196 /*
1197 * Add a reference to the specified pmap.
1198 */
1199
1200 void
1201 pmap_reference(
1202 register pmap_t p)
1203 {
1204 spl_t s;
1205
1206 if (p != PMAP_NULL) {
1207 SPLVM(s);
1208 simple_lock(&p->lock);
1209 p->ref_count++;
1210 simple_unlock(&p->lock);
1211 SPLX(s);
1212 }
1213 }
1214
1215 /*
1216 * Remove a range of hardware page-table entries.
1217 * The entries given are the first (inclusive)
1218 * and last (exclusive) entries for the VM pages.
1219 * The virtual address is the va for the first pte.
1220 *
1221 * The pmap must be locked.
1222 * If the pmap is not the kernel pmap, the range must lie
1223 * entirely within one pte-page. This is NOT checked.
1224 * Assumes that the pte-page exists.
1225 */
1226
1227 /* static */
1228 void
1229 pmap_remove_range(
1230 pmap_t pmap,
1231 vm_offset_t va,
1232 pt_entry_t *spte,
1233 pt_entry_t *epte)
1234 {
1235 register pt_entry_t *cpte;
1236 int num_removed, num_unwired;
1237 int pai;
1238 pmap_paddr_t pa;
1239
1240 #if DEBUG_PTE_PAGE
1241 if (pmap != kernel_pmap)
1242 ptep_check(get_pte_page(spte));
1243 #endif /* DEBUG_PTE_PAGE */
1244 num_removed = 0;
1245 num_unwired = 0;
1246
1247 for (cpte = spte; cpte < epte;
1248 cpte++, va += PAGE_SIZE) {
1249
1250 pa = pte_to_pa(*cpte);
1251 if (pa == 0)
1252 continue;
1253
1254 num_removed++;
1255 if (iswired(*cpte))
1256 num_unwired++;
1257
1258 if (!valid_page(i386_btop(pa))) {
1259
1260 /*
1261 * Outside range of managed physical memory.
1262 * Just remove the mappings.
1263 */
1264 register pt_entry_t *lpte = cpte;
1265
1266 *lpte = 0;
1267 continue;
1268 }
1269
1270 pai = pa_index(pa);
1271 LOCK_PVH(pai);
1272
1273 /*
1274 * Get the modify and reference bits.
1275 */
1276 {
1277 register pt_entry_t *lpte;
1278
1279 lpte = cpte;
1280 pmap_phys_attributes[pai] |=
1281 *lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
1282 *lpte = 0;
1283
1284 }
1285
1286 /*
1287 * Remove the mapping from the pvlist for
1288 * this physical page.
1289 */
1290 {
1291 register pv_entry_t pv_h, prev, cur;
1292
1293 pv_h = pai_to_pvh(pai);
1294 if (pv_h->pmap == PMAP_NULL) {
1295 panic("pmap_remove: null pv_list!");
1296 }
1297 if (pv_h->va == va && pv_h->pmap == pmap) {
1298 /*
1299 * Header is the pv_entry. Copy the next one
1300 * to header and free the next one (we cannot
1301 * free the header)
1302 */
1303 cur = pv_h->next;
1304 if (cur != PV_ENTRY_NULL) {
1305 *pv_h = *cur;
1306 PV_FREE(cur);
1307 }
1308 else {
1309 pv_h->pmap = PMAP_NULL;
1310 }
1311 }
1312 else {
1313 cur = pv_h;
1314 do {
1315 prev = cur;
1316 if ((cur = prev->next) == PV_ENTRY_NULL) {
1317 panic("pmap-remove: mapping not in pv_list!");
1318 }
1319 } while (cur->va != va || cur->pmap != pmap);
1320 prev->next = cur->next;
1321 PV_FREE(cur);
1322 }
1323 UNLOCK_PVH(pai);
1324 }
1325 }
1326
1327 /*
1328 * Update the counts
1329 */
1330 assert(pmap->stats.resident_count >= num_removed);
1331 pmap->stats.resident_count -= num_removed;
1332 assert(pmap->stats.wired_count >= num_unwired);
1333 pmap->stats.wired_count -= num_unwired;
1334 }
1335
1336 /*
1337 * Remove phys addr if mapped in specified map
1338 *
1339 */
1340 void
1341 pmap_remove_some_phys(
1342 __unused pmap_t map,
1343 __unused ppnum_t pn)
1344 {
1345
1346 /* Implement to support working set code */
1347
1348 }
1349
1350 /*
1351 * Remove the given range of addresses
1352 * from the specified map.
1353 *
1354 * It is assumed that the start and end are properly
1355 * rounded to the hardware page size.
1356 */
1357
1358
1359 void
1360 pmap_remove(
1361 pmap_t map,
1362 addr64_t s64,
1363 addr64_t e64)
1364 {
1365 spl_t spl;
1366 register pt_entry_t *pde;
1367 register pt_entry_t *spte, *epte;
1368 vm_offset_t l;
1369 vm_offset_t s, e;
1370 vm_offset_t orig_s;
1371
1372 if (map == PMAP_NULL)
1373 return;
1374
1375 PMAP_READ_LOCK(map, spl);
1376
1377 if (value_64bit(s64) || value_64bit(e64)) {
1378 panic("pmap_remove addr overflow");
1379 }
1380
1381 orig_s = s = (vm_offset_t)low32(s64);
1382 e = (vm_offset_t)low32(e64);
1383
1384 pde = pmap_pde(map, s);
1385
1386 while (s < e) {
1387 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
1388 if (l > e)
1389 l = e;
1390 if (*pde & INTEL_PTE_VALID) {
1391 spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1)));
1392 spte = &spte[ptenum(s)];
1393 epte = &spte[intel_btop(l-s)];
1394 pmap_remove_range(map, s, spte, epte);
1395 }
1396 s = l;
1397 pde++;
1398 }
1399
1400 PMAP_UPDATE_TLBS(map, orig_s, e);
1401
1402 PMAP_READ_UNLOCK(map, spl);
1403 }
1404
1405 /*
1406 * Routine: pmap_page_protect
1407 *
1408 * Function:
1409 * Lower the permission for all mappings to a given
1410 * page.
1411 */
1412 void
1413 pmap_page_protect(
1414 ppnum_t pn,
1415 vm_prot_t prot)
1416 {
1417 pv_entry_t pv_h, prev;
1418 register pv_entry_t pv_e;
1419 register pt_entry_t *pte;
1420 int pai;
1421 register pmap_t pmap;
1422 spl_t spl;
1423 boolean_t remove;
1424 pmap_paddr_t phys;
1425
1426 assert(pn != vm_page_fictitious_addr);
1427 phys = (pmap_paddr_t)i386_ptob(pn);
1428 if (!valid_page(pn)) {
1429 /*
1430 * Not a managed page.
1431 */
1432 return;
1433 }
1434
1435 /*
1436 * Determine the new protection.
1437 */
1438 switch (prot) {
1439 case VM_PROT_READ:
1440 case VM_PROT_READ|VM_PROT_EXECUTE:
1441 remove = FALSE;
1442 break;
1443 case VM_PROT_ALL:
1444 return; /* nothing to do */
1445 default:
1446 remove = TRUE;
1447 break;
1448 }
1449
1450 /*
1451 * Lock the pmap system first, since we will be changing
1452 * several pmaps.
1453 */
1454
1455 PMAP_WRITE_LOCK(spl);
1456
1457 pai = pa_index(phys);
1458 pv_h = pai_to_pvh(pai);
1459
1460 /*
1461 * Walk down PV list, changing or removing all mappings.
1462 * We do not have to lock the pv_list because we have
1463 * the entire pmap system locked.
1464 */
1465 if (pv_h->pmap != PMAP_NULL) {
1466
1467 prev = pv_e = pv_h;
1468 do {
1469 register vm_offset_t va;
1470 pmap = pv_e->pmap;
1471 /*
1472 * Lock the pmap to block pmap_extract and similar routines.
1473 */
1474 simple_lock(&pmap->lock);
1475
1476 {
1477
1478 va = pv_e->va;
1479 pte = pmap_pte(pmap, va);
1480
1481 /*
1482 * Consistency checks.
1483 */
1484 /* assert(*pte & INTEL_PTE_VALID); XXX */
1485 /* assert(pte_to_phys(*pte) == phys); */
1486
1487 }
1488
1489 /*
1490 * Remove the mapping if new protection is NONE
1491 * or if write-protecting a kernel mapping.
1492 */
1493 if (remove || pmap == kernel_pmap) {
1494 /*
1495 * Remove the mapping, collecting any modify bits.
1496 */
1497 {
1498 pmap_phys_attributes[pai] |=
1499 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1500 *pte++ = 0;
1501 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1502 }
1503
1504 assert(pmap->stats.resident_count >= 1);
1505 pmap->stats.resident_count--;
1506
1507 /*
1508 * Remove the pv_entry.
1509 */
1510 if (pv_e == pv_h) {
1511 /*
1512 * Fix up head later.
1513 */
1514 pv_h->pmap = PMAP_NULL;
1515 }
1516 else {
1517 /*
1518 * Delete this entry.
1519 */
1520 prev->next = pv_e->next;
1521 PV_FREE(pv_e);
1522 }
1523 }
1524 else {
1525 /*
1526 * Write-protect.
1527 */
1528
1529 *pte &= ~INTEL_PTE_WRITE;
1530 pte++;
1531 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1532 /*
1533 * Advance prev.
1534 */
1535 prev = pv_e;
1536 }
1537
1538 simple_unlock(&pmap->lock);
1539
1540 } while ((pv_e = prev->next) != PV_ENTRY_NULL);
1541
1542 /*
1543 * If pv_head mapping was removed, fix it up.
1544 */
1545 if (pv_h->pmap == PMAP_NULL) {
1546 pv_e = pv_h->next;
1547 if (pv_e != PV_ENTRY_NULL) {
1548 *pv_h = *pv_e;
1549 PV_FREE(pv_e);
1550 }
1551 }
1552 }
1553
1554 PMAP_WRITE_UNLOCK(spl);
1555 }
1556
1557 /*
1558 * Routine:
1559 * pmap_disconnect
1560 *
1561 * Function:
1562 * Disconnect all mappings for this page and return reference and change status
1563 * in generic format.
1564 *
1565 */
1566 unsigned int pmap_disconnect(
1567 ppnum_t pa)
1568 {
1569 pmap_page_protect(pa, 0); /* disconnect the page */
1570 return (pmap_get_refmod(pa)); /* return ref/chg status */
1571 }
1572
1573 /*
1574 * Set the physical protection on the
1575 * specified range of this map as requested.
1576 * Will not increase permissions.
1577 */
1578 void
1579 pmap_protect(
1580 pmap_t map,
1581 vm_offset_t s,
1582 vm_offset_t e,
1583 vm_prot_t prot)
1584 {
1585 register pt_entry_t *pde;
1586 register pt_entry_t *spte, *epte;
1587 vm_offset_t l;
1588 spl_t spl;
1589 vm_offset_t orig_s = s;
1590
1591
1592 if (map == PMAP_NULL)
1593 return;
1594
1595 /*
1596 * Determine the new protection.
1597 */
1598 switch (prot) {
1599 case VM_PROT_READ:
1600 case VM_PROT_READ|VM_PROT_EXECUTE:
1601 break;
1602 case VM_PROT_READ|VM_PROT_WRITE:
1603 case VM_PROT_ALL:
1604 return; /* nothing to do */
1605 default:
1606 pmap_remove(map, (addr64_t)s, (addr64_t)e);
1607 return;
1608 }
1609
1610 SPLVM(spl);
1611 simple_lock(&map->lock);
1612
1613 pde = pmap_pde(map, s);
1614 while (s < e) {
1615 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
1616 if (l > e)
1617 l = e;
1618 if (*pde & INTEL_PTE_VALID) {
1619 spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1)));
1620 spte = &spte[ptenum(s)];
1621 epte = &spte[intel_btop(l-s)];
1622
1623 while (spte < epte) {
1624 if (*spte & INTEL_PTE_VALID)
1625 *spte &= ~INTEL_PTE_WRITE;
1626 spte++;
1627 }
1628 }
1629 s = l;
1630 pde++;
1631 }
1632
1633 PMAP_UPDATE_TLBS(map, orig_s, e);
1634
1635 simple_unlock(&map->lock);
1636 SPLX(spl);
1637 }
1638
1639
1640
1641 /*
1642 * Insert the given physical page (p) at
1643 * the specified virtual address (v) in the
1644 * target physical map with the protection requested.
1645 *
1646 * If specified, the page will be wired down, meaning
1647 * that the related pte cannot be reclaimed.
1648 *
1649 * NB: This is the only routine which MAY NOT lazy-evaluate
1650 * or lose information. That is, this routine must actually
1651 * insert this page into the given map NOW.
1652 */
1653 void
1654 pmap_enter(
1655 register pmap_t pmap,
1656 vm_offset_t v,
1657 ppnum_t pn,
1658 vm_prot_t prot,
1659 unsigned int flags,
1660 boolean_t wired)
1661 {
1662 register pt_entry_t *pte;
1663 register pv_entry_t pv_h;
1664 register int pai;
1665 pv_entry_t pv_e;
1666 pt_entry_t template;
1667 spl_t spl;
1668 pmap_paddr_t old_pa;
1669 pmap_paddr_t pa = (pmap_paddr_t)i386_ptob(pn);
1670
1671 XPR(0x80000000, "%x/%x: pmap_enter %x/%x/%x\n",
1672 current_thread(),
1673 current_thread(),
1674 pmap, v, pn);
1675
1676 assert(pn != vm_page_fictitious_addr);
1677 if (pmap_debug)
1678 printf("pmap(%x, %x)\n", v, pn);
1679 if (pmap == PMAP_NULL)
1680 return;
1681
1682 /*
1683 * Must allocate a new pvlist entry while we're unlocked;
1684 * zalloc may cause pageout (which will lock the pmap system).
1685 * If we determine we need a pvlist entry, we will unlock
1686 * and allocate one. Then we will retry, throughing away
1687 * the allocated entry later (if we no longer need it).
1688 */
1689 pv_e = PV_ENTRY_NULL;
1690
1691 PMAP_READ_LOCK(pmap, spl);
1692
1693 /*
1694 * Expand pmap to include this pte. Assume that
1695 * pmap is always expanded to include enough hardware
1696 * pages to map one VM page.
1697 */
1698
1699 while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
1700 /*
1701 * Must unlock to expand the pmap.
1702 */
1703 PMAP_READ_UNLOCK(pmap, spl);
1704
1705 pmap_expand(pmap, v);
1706
1707 PMAP_READ_LOCK(pmap, spl);
1708 }
1709 /*
1710 * Special case if the physical page is already mapped
1711 * at this address.
1712 */
1713 old_pa = pte_to_pa(*pte);
1714 if (old_pa == pa) {
1715 /*
1716 * May be changing its wired attribute or protection
1717 */
1718
1719 template = pa_to_pte(pa) | INTEL_PTE_VALID;
1720
1721 if(flags & VM_MEM_NOT_CACHEABLE) {
1722 if(!(flags & VM_MEM_GUARDED))
1723 template |= INTEL_PTE_PTA;
1724 template |= INTEL_PTE_NCACHE;
1725 }
1726
1727 if (pmap != kernel_pmap)
1728 template |= INTEL_PTE_USER;
1729 if (prot & VM_PROT_WRITE)
1730 template |= INTEL_PTE_WRITE;
1731 if (wired) {
1732 template |= INTEL_PTE_WIRED;
1733 if (!iswired(*pte))
1734 pmap->stats.wired_count++;
1735 }
1736 else {
1737 if (iswired(*pte)) {
1738 assert(pmap->stats.wired_count >= 1);
1739 pmap->stats.wired_count--;
1740 }
1741 }
1742
1743 if (*pte & INTEL_PTE_MOD)
1744 template |= INTEL_PTE_MOD;
1745 WRITE_PTE(pte, template)
1746 pte++;
1747
1748 goto Done;
1749 }
1750
1751 /*
1752 * Outline of code from here:
1753 * 1) If va was mapped, update TLBs, remove the mapping
1754 * and remove old pvlist entry.
1755 * 2) Add pvlist entry for new mapping
1756 * 3) Enter new mapping.
1757 *
1758 * SHARING_FAULTS complicates this slightly in that it cannot
1759 * replace the mapping, but must remove it (because adding the
1760 * pvlist entry for the new mapping may remove others), and
1761 * hence always enters the new mapping at step 3)
1762 *
1763 * If the old physical page is not managed step 1) is skipped
1764 * (except for updating the TLBs), and the mapping is
1765 * overwritten at step 3). If the new physical page is not
1766 * managed, step 2) is skipped.
1767 */
1768
1769 if (old_pa != (pmap_paddr_t) 0) {
1770
1771
1772 #if DEBUG_PTE_PAGE
1773 if (pmap != kernel_pmap)
1774 ptep_check(get_pte_page(pte));
1775 #endif /* DEBUG_PTE_PAGE */
1776
1777 /*
1778 * Don't do anything to pages outside valid memory here.
1779 * Instead convince the code that enters a new mapping
1780 * to overwrite the old one.
1781 */
1782
1783 if (valid_page(i386_btop(old_pa))) {
1784
1785 pai = pa_index(old_pa);
1786 LOCK_PVH(pai);
1787
1788 assert(pmap->stats.resident_count >= 1);
1789 pmap->stats.resident_count--;
1790 if (iswired(*pte)) {
1791 assert(pmap->stats.wired_count >= 1);
1792 pmap->stats.wired_count--;
1793 }
1794
1795 pmap_phys_attributes[pai] |=
1796 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1797 WRITE_PTE(pte, 0)
1798
1799 /*
1800 * Remove the mapping from the pvlist for
1801 * this physical page.
1802 */
1803 {
1804 register pv_entry_t prev, cur;
1805
1806 pv_h = pai_to_pvh(pai);
1807 if (pv_h->pmap == PMAP_NULL) {
1808 panic("pmap_enter: null pv_list!");
1809 }
1810 if (pv_h->va == v && pv_h->pmap == pmap) {
1811 /*
1812 * Header is the pv_entry. Copy the next one
1813 * to header and free the next one (we cannot
1814 * free the header)
1815 */
1816 cur = pv_h->next;
1817 if (cur != PV_ENTRY_NULL) {
1818 *pv_h = *cur;
1819 pv_e = cur;
1820 }
1821 else {
1822 pv_h->pmap = PMAP_NULL;
1823 }
1824 }
1825 else {
1826 cur = pv_h;
1827 do {
1828 prev = cur;
1829 if ((cur = prev->next) == PV_ENTRY_NULL) {
1830 panic("pmap_enter: mapping not in pv_list!");
1831 }
1832 } while (cur->va != v || cur->pmap != pmap);
1833 prev->next = cur->next;
1834 pv_e = cur;
1835 }
1836 }
1837 UNLOCK_PVH(pai);
1838 }
1839 else {
1840
1841 /*
1842 * old_pa is not managed. Pretend it's zero so code
1843 * at Step 3) will enter new mapping (overwriting old
1844 * one). Do removal part of accounting.
1845 */
1846 old_pa = (pmap_paddr_t) 0;
1847 assert(pmap->stats.resident_count >= 1);
1848 pmap->stats.resident_count--;
1849 if (iswired(*pte)) {
1850 assert(pmap->stats.wired_count >= 1);
1851 pmap->stats.wired_count--;
1852 }
1853 }
1854
1855 }
1856
1857 if (valid_page(i386_btop(pa))) {
1858
1859 /*
1860 * Step 2) Enter the mapping in the PV list for this
1861 * physical page.
1862 */
1863
1864 pai = pa_index(pa);
1865
1866
1867 #if SHARING_FAULTS
1868 RetryPvList:
1869 /*
1870 * We can return here from the sharing fault code below
1871 * in case we removed the only entry on the pv list and thus
1872 * must enter the new one in the list header.
1873 */
1874 #endif /* SHARING_FAULTS */
1875 LOCK_PVH(pai);
1876 pv_h = pai_to_pvh(pai);
1877
1878 if (pv_h->pmap == PMAP_NULL) {
1879 /*
1880 * No mappings yet
1881 */
1882 pv_h->va = v;
1883 pv_h->pmap = pmap;
1884 pv_h->next = PV_ENTRY_NULL;
1885 }
1886 else {
1887 #if DEBUG
1888 {
1889 /*
1890 * check that this mapping is not already there
1891 * or there is no alias for this mapping in the same map
1892 */
1893 pv_entry_t e = pv_h;
1894 while (e != PV_ENTRY_NULL) {
1895 if (e->pmap == pmap && e->va == v)
1896 panic("pmap_enter: already in pv_list");
1897 e = e->next;
1898 }
1899 }
1900 #endif /* DEBUG */
1901 #if SHARING_FAULTS
1902 {
1903 /*
1904 * do sharing faults.
1905 * if we find an entry on this pv list in the same address
1906 * space, remove it. we know there will not be more
1907 * than one.
1908 */
1909 pv_entry_t e = pv_h;
1910 pt_entry_t *opte;
1911
1912 while (e != PV_ENTRY_NULL) {
1913 if (e->pmap == pmap) {
1914 /*
1915 * Remove it, drop pv list lock first.
1916 */
1917 UNLOCK_PVH(pai);
1918
1919 opte = pmap_pte(pmap, e->va);
1920 assert(opte != PT_ENTRY_NULL);
1921 /*
1922 * Invalidate the translation buffer,
1923 * then remove the mapping.
1924 */
1925 pmap_remove_range(pmap, e->va, opte,
1926 opte + 1);
1927 PMAP_UPDATE_TLBS(pmap, e->va, e->va + PAGE_SIZE);
1928
1929 /*
1930 * We could have remove the head entry,
1931 * so there could be no more entries
1932 * and so we have to use the pv head entry.
1933 * so, go back to the top and try the entry
1934 * again.
1935 */
1936 goto RetryPvList;
1937 }
1938 e = e->next;
1939 }
1940
1941 /*
1942 * check that this mapping is not already there
1943 */
1944 e = pv_h;
1945 while (e != PV_ENTRY_NULL) {
1946 if (e->pmap == pmap)
1947 panic("pmap_enter: alias in pv_list");
1948 e = e->next;
1949 }
1950 }
1951 #endif /* SHARING_FAULTS */
1952 #if DEBUG_ALIAS
1953 {
1954 /*
1955 * check for aliases within the same address space.
1956 */
1957 pv_entry_t e = pv_h;
1958 vm_offset_t rpc = get_rpc();
1959
1960 while (e != PV_ENTRY_NULL) {
1961 if (e->pmap == pmap) {
1962 /*
1963 * log this entry in the alias ring buffer
1964 * if it's not there already.
1965 */
1966 struct pmap_alias *pma;
1967 int ii, logit;
1968
1969 logit = TRUE;
1970 for (ii = 0; ii < pmap_alias_index; ii++) {
1971 if (pmap_aliasbuf[ii].rpc == rpc) {
1972 /* found it in the log already */
1973 logit = FALSE;
1974 break;
1975 }
1976 }
1977 if (logit) {
1978 pma = &pmap_aliasbuf[pmap_alias_index];
1979 pma->pmap = pmap;
1980 pma->va = v;
1981 pma->rpc = rpc;
1982 pma->cookie = PMAP_ALIAS_COOKIE;
1983 if (++pmap_alias_index >= PMAP_ALIAS_MAX)
1984 panic("pmap_enter: exhausted alias log");
1985 }
1986 }
1987 e = e->next;
1988 }
1989 }
1990 #endif /* DEBUG_ALIAS */
1991 /*
1992 * Add new pv_entry after header.
1993 */
1994 if (pv_e == PV_ENTRY_NULL) {
1995 PV_ALLOC(pv_e);
1996 if (pv_e == PV_ENTRY_NULL) {
1997 panic("pmap no pv_e's");
1998 }
1999 }
2000 pv_e->va = v;
2001 pv_e->pmap = pmap;
2002 pv_e->next = pv_h->next;
2003 pv_h->next = pv_e;
2004 /*
2005 * Remember that we used the pvlist entry.
2006 */
2007 pv_e = PV_ENTRY_NULL;
2008 }
2009 UNLOCK_PVH(pai);
2010 }
2011
2012 /*
2013 * Step 3) Enter and count the mapping.
2014 */
2015
2016 pmap->stats.resident_count++;
2017
2018 /*
2019 * Build a template to speed up entering -
2020 * only the pfn changes.
2021 */
2022 template = pa_to_pte(pa) | INTEL_PTE_VALID;
2023
2024 if(flags & VM_MEM_NOT_CACHEABLE) {
2025 if(!(flags & VM_MEM_GUARDED))
2026 template |= INTEL_PTE_PTA;
2027 template |= INTEL_PTE_NCACHE;
2028 }
2029
2030 if (pmap != kernel_pmap)
2031 template |= INTEL_PTE_USER;
2032 if (prot & VM_PROT_WRITE)
2033 template |= INTEL_PTE_WRITE;
2034 if (wired) {
2035 template |= INTEL_PTE_WIRED;
2036 pmap->stats.wired_count++;
2037 }
2038
2039 WRITE_PTE(pte, template)
2040
2041 Done:
2042 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
2043
2044 if (pv_e != PV_ENTRY_NULL) {
2045 PV_FREE(pv_e);
2046 }
2047
2048 PMAP_READ_UNLOCK(pmap, spl);
2049 }
2050
2051 /*
2052 * Routine: pmap_change_wiring
2053 * Function: Change the wiring attribute for a map/virtual-address
2054 * pair.
2055 * In/out conditions:
2056 * The mapping must already exist in the pmap.
2057 */
2058 void
2059 pmap_change_wiring(
2060 register pmap_t map,
2061 vm_offset_t v,
2062 boolean_t wired)
2063 {
2064 register pt_entry_t *pte;
2065 spl_t spl;
2066
2067 #if 1
2068 /*
2069 * We must grab the pmap system lock because we may
2070 * change a pte_page queue.
2071 */
2072 PMAP_READ_LOCK(map, spl);
2073
2074 if ((pte = pmap_pte(map, v)) == PT_ENTRY_NULL)
2075 panic("pmap_change_wiring: pte missing");
2076
2077 if (wired && !iswired(*pte)) {
2078 /*
2079 * wiring down mapping
2080 */
2081 map->stats.wired_count++;
2082 *pte++ |= INTEL_PTE_WIRED;
2083 }
2084 else if (!wired && iswired(*pte)) {
2085 /*
2086 * unwiring mapping
2087 */
2088 assert(map->stats.wired_count >= 1);
2089 map->stats.wired_count--;
2090 *pte++ &= ~INTEL_PTE_WIRED;
2091 }
2092
2093 PMAP_READ_UNLOCK(map, spl);
2094
2095 #else
2096 return;
2097 #endif
2098
2099 }
2100
2101 ppnum_t
2102 pmap_find_phys(pmap_t pmap, addr64_t va)
2103 {
2104 pt_entry_t *ptp;
2105 vm_offset_t a32;
2106 ppnum_t ppn;
2107
2108 if (value_64bit(va))
2109 panic("pmap_find_phys 64 bit value");
2110 a32 = (vm_offset_t) low32(va);
2111 ptp = pmap_pte(pmap, a32);
2112 if (PT_ENTRY_NULL == ptp) {
2113 ppn = 0;
2114 } else {
2115 ppn = (ppnum_t) i386_btop(pte_to_pa(*ptp));
2116 }
2117 return ppn;
2118 }
2119
2120 /*
2121 * Routine: pmap_extract
2122 * Function:
2123 * Extract the physical page address associated
2124 * with the given map/virtual_address pair.
2125 * Change to shim for backwards compatibility but will not
2126 * work for 64 bit systems. Some old drivers that we cannot
2127 * change need this.
2128 */
2129
2130 vm_offset_t
2131 pmap_extract(
2132 register pmap_t pmap,
2133 vm_offset_t va)
2134 {
2135 ppnum_t ppn;
2136 vm_offset_t vaddr;
2137
2138 vaddr = (vm_offset_t)0;
2139 ppn = pmap_find_phys(pmap, (addr64_t)va);
2140 if (ppn) {
2141 vaddr = ((vm_offset_t)i386_ptob(ppn)) | (va & INTEL_OFFMASK);
2142 }
2143 return (vaddr);
2144 }
2145
2146
2147 /*
2148 * Routine: pmap_expand
2149 *
2150 * Expands a pmap to be able to map the specified virtual address.
2151 *
2152 * Allocates new virtual memory for the P0 or P1 portion of the
2153 * pmap, then re-maps the physical pages that were in the old
2154 * pmap to be in the new pmap.
2155 *
2156 * Must be called with the pmap system and the pmap unlocked,
2157 * since these must be unlocked to use vm_allocate or vm_deallocate.
2158 * Thus it must be called in a loop that checks whether the map
2159 * has been expanded enough.
2160 * (We won't loop forever, since page tables aren't shrunk.)
2161 */
2162 void
2163 pmap_expand(
2164 register pmap_t map,
2165 register vm_offset_t v)
2166 {
2167 pt_entry_t *pdp;
2168 register vm_page_t m;
2169 register pmap_paddr_t pa;
2170 register int i;
2171 spl_t spl;
2172 ppnum_t pn;
2173
2174 if (map == kernel_pmap) {
2175 pmap_growkernel(v);
2176 return;
2177 }
2178
2179 /*
2180 * Allocate a VM page for the level 2 page table entries.
2181 */
2182 while ((m = vm_page_grab()) == VM_PAGE_NULL)
2183 VM_PAGE_WAIT();
2184
2185 /*
2186 * put the page into the pmap's obj list so it
2187 * can be found later.
2188 */
2189 pn = m->phys_page;
2190 pa = i386_ptob(pn);
2191 i = pdenum(map, v);
2192 vm_object_lock(map->pm_obj);
2193 vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i);
2194 vm_page_lock_queues();
2195 vm_page_wire(m);
2196 inuse_ptepages_count++;
2197 vm_object_unlock(map->pm_obj);
2198 vm_page_unlock_queues();
2199
2200 /*
2201 * Zero the page.
2202 */
2203 pmap_zero_page(pn);
2204
2205 PMAP_READ_LOCK(map, spl);
2206 /*
2207 * See if someone else expanded us first
2208 */
2209 if (pmap_pte(map, v) != PT_ENTRY_NULL) {
2210 PMAP_READ_UNLOCK(map, spl);
2211 vm_object_lock(map->pm_obj);
2212 vm_page_lock_queues();
2213 vm_page_free(m);
2214 inuse_ptepages_count--;
2215 vm_page_unlock_queues();
2216 vm_object_unlock(map->pm_obj);
2217 return;
2218 }
2219
2220 /*
2221 * Set the page directory entry for this page table.
2222 * If we have allocated more than one hardware page,
2223 * set several page directory entries.
2224 */
2225
2226 pdp = &map->dirbase[pdenum(map, v)];
2227 *pdp = pa_to_pte(pa)
2228 | INTEL_PTE_VALID
2229 | INTEL_PTE_USER
2230 | INTEL_PTE_WRITE;
2231
2232 PMAP_READ_UNLOCK(map, spl);
2233 return;
2234 }
2235
2236 /*
2237 * Copy the range specified by src_addr/len
2238 * from the source map to the range dst_addr/len
2239 * in the destination map.
2240 *
2241 * This routine is only advisory and need not do anything.
2242 */
2243 #if 0
2244 void
2245 pmap_copy(
2246 pmap_t dst_pmap,
2247 pmap_t src_pmap,
2248 vm_offset_t dst_addr,
2249 vm_size_t len,
2250 vm_offset_t src_addr)
2251 {
2252 #ifdef lint
2253 dst_pmap++; src_pmap++; dst_addr++; len++; src_addr++;
2254 #endif /* lint */
2255 }
2256 #endif/* 0 */
2257
2258 /*
2259 * pmap_sync_page_data_phys(ppnum_t pa)
2260 *
2261 * Invalidates all of the instruction cache on a physical page and
2262 * pushes any dirty data from the data cache for the same physical page
2263 * Not required in i386.
2264 */
2265 void
2266 pmap_sync_page_data_phys(__unused ppnum_t pa)
2267 {
2268 return;
2269 }
2270
2271 /*
2272 * pmap_sync_page_attributes_phys(ppnum_t pa)
2273 *
2274 * Write back and invalidate all cachelines on a physical page.
2275 */
2276 void
2277 pmap_sync_page_attributes_phys(ppnum_t pa)
2278 {
2279 cache_flush_page_phys(pa);
2280 }
2281
2282 int collect_ref;
2283 int collect_unref;
2284
2285 /*
2286 * Routine: pmap_collect
2287 * Function:
2288 * Garbage collects the physical map system for
2289 * pages which are no longer used.
2290 * Success need not be guaranteed -- that is, there
2291 * may well be pages which are not referenced, but
2292 * others may be collected.
2293 * Usage:
2294 * Called by the pageout daemon when pages are scarce.
2295 */
2296 void
2297 pmap_collect(
2298 pmap_t p)
2299 {
2300 register pt_entry_t *pdp, *ptp;
2301 pt_entry_t *eptp;
2302 int wired;
2303 spl_t spl;
2304
2305 if (p == PMAP_NULL)
2306 return;
2307
2308 if (p == kernel_pmap)
2309 return;
2310
2311 /*
2312 * Garbage collect map.
2313 */
2314 PMAP_READ_LOCK(p, spl);
2315
2316 for (pdp = (pt_entry_t *)p->dirbase;
2317 pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)];
2318 pdp++)
2319 {
2320 if (*pdp & INTEL_PTE_VALID) {
2321 if(*pdp & INTEL_PTE_REF) {
2322 *pdp &= ~INTEL_PTE_REF;
2323 collect_ref++;
2324 } else {
2325 collect_unref++;
2326 ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase));
2327 eptp = ptp + NPTEPG;
2328
2329 /*
2330 * If the pte page has any wired mappings, we cannot
2331 * free it.
2332 */
2333 wired = 0;
2334 {
2335 register pt_entry_t *ptep;
2336 for (ptep = ptp; ptep < eptp; ptep++) {
2337 if (iswired(*ptep)) {
2338 wired = 1;
2339 break;
2340 }
2341 }
2342 }
2343 if (!wired) {
2344 /*
2345 * Remove the virtual addresses mapped by this pte page.
2346 */
2347 pmap_remove_range(p,
2348 pdetova(pdp - (pt_entry_t *)p->dirbase),
2349 ptp,
2350 eptp);
2351
2352 /*
2353 * Invalidate the page directory pointer.
2354 */
2355 *pdp = 0x0;
2356
2357 PMAP_READ_UNLOCK(p, spl);
2358
2359 /*
2360 * And free the pte page itself.
2361 */
2362 {
2363 register vm_page_t m;
2364
2365 vm_object_lock(p->pm_obj);
2366 m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]));
2367 if (m == VM_PAGE_NULL)
2368 panic("pmap_collect: pte page not in object");
2369 vm_page_lock_queues();
2370 vm_page_free(m);
2371 inuse_ptepages_count--;
2372 vm_page_unlock_queues();
2373 vm_object_unlock(p->pm_obj);
2374 }
2375
2376 PMAP_READ_LOCK(p, spl);
2377 }
2378 }
2379 }
2380 }
2381 PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
2382 PMAP_READ_UNLOCK(p, spl);
2383 return;
2384
2385 }
2386
2387 /*
2388 * Routine: pmap_kernel
2389 * Function:
2390 * Returns the physical map handle for the kernel.
2391 */
2392 #if 0
2393 pmap_t
2394 pmap_kernel(void)
2395 {
2396 return (kernel_pmap);
2397 }
2398 #endif/* 0 */
2399
2400 void
2401 pmap_copy_page(src, dst)
2402 ppnum_t src;
2403 ppnum_t dst;
2404 {
2405 bcopy_phys((addr64_t)i386_ptob(src),
2406 (addr64_t)i386_ptob(dst),
2407 PAGE_SIZE);
2408 }
2409
2410
2411 /*
2412 * Routine: pmap_pageable
2413 * Function:
2414 * Make the specified pages (by pmap, offset)
2415 * pageable (or not) as requested.
2416 *
2417 * A page which is not pageable may not take
2418 * a fault; therefore, its page table entry
2419 * must remain valid for the duration.
2420 *
2421 * This routine is merely advisory; pmap_enter
2422 * will specify that these pages are to be wired
2423 * down (or not) as appropriate.
2424 */
2425 void
2426 pmap_pageable(
2427 __unused pmap_t pmap,
2428 __unused vm_offset_t start_addr,
2429 __unused vm_offset_t end_addr,
2430 __unused boolean_t pageable)
2431 {
2432 #ifdef lint
2433 pmap++; start_addr++; end_addr++; pageable++;
2434 #endif /* lint */
2435 }
2436
2437 /*
2438 * Clear specified attribute bits.
2439 */
2440 void
2441 phys_attribute_clear(
2442 ppnum_t pn,
2443 int bits)
2444 {
2445 pv_entry_t pv_h;
2446 register pv_entry_t pv_e;
2447 register pt_entry_t *pte;
2448 int pai;
2449 register pmap_t pmap;
2450 spl_t spl;
2451 pmap_paddr_t phys;
2452
2453 assert(pn != vm_page_fictitious_addr);
2454 if (!valid_page(pn)) {
2455 /*
2456 * Not a managed page.
2457 */
2458 return;
2459 }
2460
2461 /*
2462 * Lock the pmap system first, since we will be changing
2463 * several pmaps.
2464 */
2465
2466 PMAP_WRITE_LOCK(spl);
2467 phys = i386_ptob(pn);
2468 pai = pa_index(phys);
2469 pv_h = pai_to_pvh(pai);
2470
2471 /*
2472 * Walk down PV list, clearing all modify or reference bits.
2473 * We do not have to lock the pv_list because we have
2474 * the entire pmap system locked.
2475 */
2476 if (pv_h->pmap != PMAP_NULL) {
2477 /*
2478 * There are some mappings.
2479 */
2480 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
2481
2482 pmap = pv_e->pmap;
2483 /*
2484 * Lock the pmap to block pmap_extract and similar routines.
2485 */
2486 simple_lock(&pmap->lock);
2487
2488 {
2489 register vm_offset_t va;
2490
2491 va = pv_e->va;
2492 pte = pmap_pte(pmap, va);
2493
2494 #if 0
2495 /*
2496 * Consistency checks.
2497 */
2498 assert(*pte & INTEL_PTE_VALID);
2499 /* assert(pte_to_phys(*pte) == phys); */
2500 #endif
2501
2502 /*
2503 * Clear modify or reference bits.
2504 */
2505
2506 *pte++ &= ~bits;
2507 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
2508 }
2509 simple_unlock(&pmap->lock);
2510
2511 }
2512 }
2513
2514 pmap_phys_attributes[pai] &= ~bits;
2515
2516 PMAP_WRITE_UNLOCK(spl);
2517 }
2518
2519 /*
2520 * Check specified attribute bits.
2521 */
2522 boolean_t
2523 phys_attribute_test(
2524 ppnum_t pn,
2525 int bits)
2526 {
2527 pv_entry_t pv_h;
2528 register pv_entry_t pv_e;
2529 register pt_entry_t *pte;
2530 int pai;
2531 register pmap_t pmap;
2532 spl_t spl;
2533 pmap_paddr_t phys;
2534
2535 assert(pn != vm_page_fictitious_addr);
2536 if (!valid_page(pn)) {
2537 /*
2538 * Not a managed page.
2539 */
2540 return (FALSE);
2541 }
2542
2543 /*
2544 * Lock the pmap system first, since we will be checking
2545 * several pmaps.
2546 */
2547
2548 PMAP_WRITE_LOCK(spl);
2549 phys = i386_ptob(pn);
2550 pai = pa_index(phys);
2551 pv_h = pai_to_pvh(pai);
2552
2553 if (pmap_phys_attributes[pai] & bits) {
2554 PMAP_WRITE_UNLOCK(spl);
2555 return (TRUE);
2556 }
2557
2558 /*
2559 * Walk down PV list, checking all mappings.
2560 * We do not have to lock the pv_list because we have
2561 * the entire pmap system locked.
2562 */
2563 if (pv_h->pmap != PMAP_NULL) {
2564 /*
2565 * There are some mappings.
2566 */
2567 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
2568
2569 pmap = pv_e->pmap;
2570 /*
2571 * Lock the pmap to block pmap_extract and similar routines.
2572 */
2573 simple_lock(&pmap->lock);
2574
2575 {
2576 register vm_offset_t va;
2577
2578 va = pv_e->va;
2579 pte = pmap_pte(pmap, va);
2580
2581 #if 0
2582 /*
2583 * Consistency checks.
2584 */
2585 assert(*pte & INTEL_PTE_VALID);
2586 /* assert(pte_to_phys(*pte) == phys); */
2587 #endif
2588 }
2589
2590 /*
2591 * Check modify or reference bits.
2592 */
2593 {
2594 if (*pte++ & bits) {
2595 simple_unlock(&pmap->lock);
2596 PMAP_WRITE_UNLOCK(spl);
2597 return (TRUE);
2598 }
2599 }
2600 simple_unlock(&pmap->lock);
2601 }
2602 }
2603 PMAP_WRITE_UNLOCK(spl);
2604 return (FALSE);
2605 }
2606
2607 /*
2608 * Set specified attribute bits.
2609 */
2610 void
2611 phys_attribute_set(
2612 ppnum_t pn,
2613 int bits)
2614 {
2615 int spl;
2616 pmap_paddr_t phys;
2617
2618 assert(pn != vm_page_fictitious_addr);
2619 if (!valid_page(pn)) {
2620 /*
2621 * Not a managed page.
2622 */
2623 return;
2624 }
2625
2626 /*
2627 * Lock the pmap system and set the requested bits in
2628 * the phys attributes array. Don't need to bother with
2629 * ptes because the test routine looks here first.
2630 */
2631 phys = i386_ptob(pn);
2632 PMAP_WRITE_LOCK(spl);
2633 pmap_phys_attributes[pa_index(phys)] |= bits;
2634 PMAP_WRITE_UNLOCK(spl);
2635 }
2636
2637 /*
2638 * Set the modify bit on the specified physical page.
2639 */
2640
2641 void pmap_set_modify(
2642 ppnum_t pn)
2643 {
2644 phys_attribute_set(pn, PHYS_MODIFIED);
2645 }
2646
2647 /*
2648 * Clear the modify bits on the specified physical page.
2649 */
2650
2651 void
2652 pmap_clear_modify(
2653 ppnum_t pn)
2654 {
2655 phys_attribute_clear(pn, PHYS_MODIFIED);
2656 }
2657
2658 /*
2659 * pmap_is_modified:
2660 *
2661 * Return whether or not the specified physical page is modified
2662 * by any physical maps.
2663 */
2664
2665 boolean_t
2666 pmap_is_modified(
2667 ppnum_t pn)
2668 {
2669 return (phys_attribute_test(pn, PHYS_MODIFIED));
2670 }
2671
2672 /*
2673 * pmap_clear_reference:
2674 *
2675 * Clear the reference bit on the specified physical page.
2676 */
2677
2678 void
2679 pmap_clear_reference(
2680 ppnum_t pn)
2681 {
2682 phys_attribute_clear(pn, PHYS_REFERENCED);
2683 }
2684
2685 void
2686 pmap_set_reference(ppnum_t pn)
2687 {
2688 phys_attribute_set(pn, PHYS_REFERENCED);
2689 }
2690
2691 /*
2692 * pmap_is_referenced:
2693 *
2694 * Return whether or not the specified physical page is referenced
2695 * by any physical maps.
2696 */
2697
2698 boolean_t
2699 pmap_is_referenced(
2700 ppnum_t pn)
2701 {
2702 return (phys_attribute_test(pn, PHYS_REFERENCED));
2703 }
2704
2705 /*
2706 * pmap_get_refmod(phys)
2707 * returns the referenced and modified bits of the specified
2708 * physical page.
2709 */
2710 unsigned int
2711 pmap_get_refmod(ppnum_t pa)
2712 {
2713 return ( ((phys_attribute_test(pa, PHYS_MODIFIED))? VM_MEM_MODIFIED : 0)
2714 | ((phys_attribute_test(pa, PHYS_REFERENCED))? VM_MEM_REFERENCED : 0));
2715 }
2716
2717 /*
2718 * pmap_clear_refmod(phys, mask)
2719 * clears the referenced and modified bits as specified by the mask
2720 * of the specified physical page.
2721 */
2722 void
2723 pmap_clear_refmod(ppnum_t pa, unsigned int mask)
2724 {
2725 unsigned int x86Mask;
2726
2727 x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0)
2728 | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
2729 phys_attribute_clear(pa, x86Mask);
2730 }
2731
2732 /*
2733 * Set the modify bit on the specified range
2734 * of this map as requested.
2735 *
2736 * This optimization stands only if each time the dirty bit
2737 * in vm_page_t is tested, it is also tested in the pmap.
2738 */
2739 void
2740 pmap_modify_pages(
2741 pmap_t map,
2742 vm_offset_t s,
2743 vm_offset_t e)
2744 {
2745 spl_t spl;
2746 register pt_entry_t *pde;
2747 register pt_entry_t *spte, *epte;
2748 vm_offset_t l;
2749 vm_offset_t orig_s = s;
2750
2751 if (map == PMAP_NULL)
2752 return;
2753
2754 PMAP_READ_LOCK(map, spl);
2755
2756 pde = pmap_pde(map, s);
2757 while (s && s < e) {
2758 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
2759 if (l > e)
2760 l = e;
2761 if (*pde & INTEL_PTE_VALID) {
2762 spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1)));
2763 if (l) {
2764 spte = &spte[ptenum(s)];
2765 epte = &spte[intel_btop(l-s)];
2766 } else {
2767 epte = &spte[intel_btop(PDE_MAPPED_SIZE)];
2768 spte = &spte[ptenum(s)];
2769 }
2770 while (spte < epte) {
2771 if (*spte & INTEL_PTE_VALID) {
2772 *spte |= (INTEL_PTE_MOD | INTEL_PTE_WRITE);
2773 }
2774 spte++;
2775 }
2776 }
2777 s = l;
2778 pde++;
2779 }
2780 PMAP_UPDATE_TLBS(map, orig_s, e);
2781 PMAP_READ_UNLOCK(map, spl);
2782 }
2783
2784
2785 void
2786 invalidate_icache(__unused vm_offset_t addr,
2787 __unused unsigned cnt,
2788 __unused int phys)
2789 {
2790 return;
2791 }
2792 void
2793 flush_dcache(__unused vm_offset_t addr,
2794 __unused unsigned count,
2795 __unused int phys)
2796 {
2797 return;
2798 }
2799
2800 /*
2801 * TLB Coherence Code (TLB "shootdown" code)
2802 *
2803 * Threads that belong to the same task share the same address space and
2804 * hence share a pmap. However, they may run on distinct cpus and thus
2805 * have distinct TLBs that cache page table entries. In order to guarantee
2806 * the TLBs are consistent, whenever a pmap is changed, all threads that
2807 * are active in that pmap must have their TLB updated. To keep track of
2808 * this information, the set of cpus that are currently using a pmap is
2809 * maintained within each pmap structure (cpus_using). Pmap_activate() and
2810 * pmap_deactivate add and remove, respectively, a cpu from this set.
2811 * Since the TLBs are not addressable over the bus, each processor must
2812 * flush its own TLB; a processor that needs to invalidate another TLB
2813 * needs to interrupt the processor that owns that TLB to signal the
2814 * update.
2815 *
2816 * Whenever a pmap is updated, the lock on that pmap is locked, and all
2817 * cpus using the pmap are signaled to invalidate. All threads that need
2818 * to activate a pmap must wait for the lock to clear to await any updates
2819 * in progress before using the pmap. They must ACQUIRE the lock to add
2820 * their cpu to the cpus_using set. An implicit assumption made
2821 * throughout the TLB code is that all kernel code that runs at or higher
2822 * than splvm blocks out update interrupts, and that such code does not
2823 * touch pageable pages.
2824 *
2825 * A shootdown interrupt serves another function besides signaling a
2826 * processor to invalidate. The interrupt routine (pmap_update_interrupt)
2827 * waits for the both the pmap lock (and the kernel pmap lock) to clear,
2828 * preventing user code from making implicit pmap updates while the
2829 * sending processor is performing its update. (This could happen via a
2830 * user data write reference that turns on the modify bit in the page
2831 * table). It must wait for any kernel updates that may have started
2832 * concurrently with a user pmap update because the IPC code
2833 * changes mappings.
2834 * Spinning on the VALUES of the locks is sufficient (rather than
2835 * having to acquire the locks) because any updates that occur subsequent
2836 * to finding the lock unlocked will be signaled via another interrupt.
2837 * (This assumes the interrupt is cleared before the low level interrupt code
2838 * calls pmap_update_interrupt()).
2839 *
2840 * The signaling processor must wait for any implicit updates in progress
2841 * to terminate before continuing with its update. Thus it must wait for an
2842 * acknowledgement of the interrupt from each processor for which such
2843 * references could be made. For maintaining this information, a set
2844 * cpus_active is used. A cpu is in this set if and only if it can
2845 * use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from
2846 * this set; when all such cpus are removed, it is safe to update.
2847 *
2848 * Before attempting to acquire the update lock on a pmap, a cpu (A) must
2849 * be at least at the priority of the interprocessor interrupt
2850 * (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a
2851 * kernel update; it would spin forever in pmap_update_interrupt() trying
2852 * to acquire the user pmap lock it had already acquired. Furthermore A
2853 * must remove itself from cpus_active. Otherwise, another cpu holding
2854 * the lock (B) could be in the process of sending an update signal to A,
2855 * and thus be waiting for A to remove itself from cpus_active. If A is
2856 * spinning on the lock at priority this will never happen and a deadlock
2857 * will result.
2858 */
2859
2860 /*
2861 * Signal another CPU that it must flush its TLB
2862 */
2863 void
2864 signal_cpus(
2865 cpu_set use_list,
2866 pmap_t pmap,
2867 vm_offset_t start_addr,
2868 vm_offset_t end_addr)
2869 {
2870 register int which_cpu, j;
2871 register pmap_update_list_t update_list_p;
2872
2873 while ((which_cpu = ffs((unsigned long)use_list)) != 0) {
2874 which_cpu -= 1; /* convert to 0 origin */
2875
2876 update_list_p = cpu_update_list(which_cpu);
2877 simple_lock(&update_list_p->lock);
2878
2879 j = update_list_p->count;
2880 if (j >= UPDATE_LIST_SIZE) {
2881 /*
2882 * list overflowed. Change last item to
2883 * indicate overflow.
2884 */
2885 update_list_p->item[UPDATE_LIST_SIZE-1].pmap = kernel_pmap;
2886 update_list_p->item[UPDATE_LIST_SIZE-1].start = VM_MIN_ADDRESS;
2887 update_list_p->item[UPDATE_LIST_SIZE-1].end = VM_MAX_KERNEL_ADDRESS;
2888 }
2889 else {
2890 update_list_p->item[j].pmap = pmap;
2891 update_list_p->item[j].start = start_addr;
2892 update_list_p->item[j].end = end_addr;
2893 update_list_p->count = j+1;
2894 }
2895 cpu_update_needed(which_cpu) = TRUE;
2896 simple_unlock(&update_list_p->lock);
2897
2898 /* if its the kernel pmap, ignore cpus_idle */
2899 if (((cpus_idle & (1 << which_cpu)) == 0) ||
2900 (pmap == kernel_pmap) || PMAP_REAL(which_cpu) == pmap)
2901 {
2902 i386_signal_cpu(which_cpu, MP_TLB_FLUSH, ASYNC);
2903 }
2904 use_list &= ~(1 << which_cpu);
2905 }
2906 }
2907
2908 void
2909 process_pmap_updates(
2910 register pmap_t my_pmap)
2911 {
2912 register int my_cpu;
2913 register pmap_update_list_t update_list_p;
2914 register int j;
2915 register pmap_t pmap;
2916
2917 mp_disable_preemption();
2918 my_cpu = cpu_number();
2919 update_list_p = cpu_update_list(my_cpu);
2920 simple_lock(&update_list_p->lock);
2921
2922 for (j = 0; j < update_list_p->count; j++) {
2923 pmap = update_list_p->item[j].pmap;
2924 if (pmap == my_pmap ||
2925 pmap == kernel_pmap) {
2926
2927 if (pmap->ref_count <= 0) {
2928 PMAP_CPU_CLR(pmap, my_cpu);
2929 PMAP_REAL(my_cpu) = kernel_pmap;
2930 #ifdef PAE
2931 set_cr3((unsigned int)kernel_pmap->pm_ppdpt);
2932 #else
2933 set_cr3((unsigned int)kernel_pmap->pdirbase);
2934 #endif
2935 } else
2936 INVALIDATE_TLB(pmap,
2937 update_list_p->item[j].start,
2938 update_list_p->item[j].end);
2939 }
2940 }
2941 update_list_p->count = 0;
2942 cpu_update_needed(my_cpu) = FALSE;
2943 simple_unlock(&update_list_p->lock);
2944 mp_enable_preemption();
2945 }
2946
2947 /*
2948 * Interrupt routine for TBIA requested from other processor.
2949 * This routine can also be called at all interrupts time if
2950 * the cpu was idle. Some driver interrupt routines might access
2951 * newly allocated vm. (This is the case for hd)
2952 */
2953 void
2954 pmap_update_interrupt(void)
2955 {
2956 register int my_cpu;
2957 spl_t s;
2958 register pmap_t my_pmap;
2959
2960 mp_disable_preemption();
2961 my_cpu = cpu_number();
2962
2963 /*
2964 * Raise spl to splvm (above splip) to block out pmap_extract
2965 * from IO code (which would put this cpu back in the active
2966 * set).
2967 */
2968 s = splhigh();
2969
2970 my_pmap = PMAP_REAL(my_cpu);
2971
2972 if (!(my_pmap && pmap_in_use(my_pmap, my_cpu)))
2973 my_pmap = kernel_pmap;
2974
2975 do {
2976 LOOP_VAR;
2977
2978 /*
2979 * Indicate that we're not using either user or kernel
2980 * pmap.
2981 */
2982 i_bit_clear(my_cpu, &cpus_active);
2983
2984 /*
2985 * Wait for any pmap updates in progress, on either user
2986 * or kernel pmap.
2987 */
2988 while (*(volatile int *)(&my_pmap->lock.interlock.lock_data) ||
2989 *(volatile int *)(&kernel_pmap->lock.interlock.lock_data)) {
2990 LOOP_CHECK("pmap_update_interrupt", my_pmap);
2991 cpu_pause();
2992 }
2993
2994 process_pmap_updates(my_pmap);
2995
2996 i_bit_set(my_cpu, &cpus_active);
2997
2998 } while (cpu_update_needed(my_cpu));
2999
3000 splx(s);
3001 mp_enable_preemption();
3002 }
3003
3004 #if MACH_KDB
3005
3006 /* show phys page mappings and attributes */
3007
3008 extern void db_show_page(pmap_paddr_t pa);
3009
3010 void
3011 db_show_page(pmap_paddr_t pa)
3012 {
3013 pv_entry_t pv_h;
3014 int pai;
3015 char attr;
3016
3017 pai = pa_index(pa);
3018 pv_h = pai_to_pvh(pai);
3019
3020 attr = pmap_phys_attributes[pai];
3021 printf("phys page %x ", pa);
3022 if (attr & PHYS_MODIFIED)
3023 printf("modified, ");
3024 if (attr & PHYS_REFERENCED)
3025 printf("referenced, ");
3026 if (pv_h->pmap || pv_h->next)
3027 printf(" mapped at\n");
3028 else
3029 printf(" not mapped\n");
3030 for (; pv_h; pv_h = pv_h->next)
3031 if (pv_h->pmap)
3032 printf("%x in pmap %x\n", pv_h->va, pv_h->pmap);
3033 }
3034
3035 #endif /* MACH_KDB */
3036
3037 #if MACH_KDB
3038 void db_kvtophys(vm_offset_t);
3039 void db_show_vaddrs(pt_entry_t *);
3040
3041 /*
3042 * print out the results of kvtophys(arg)
3043 */
3044 void
3045 db_kvtophys(
3046 vm_offset_t vaddr)
3047 {
3048 db_printf("0x%x", kvtophys(vaddr));
3049 }
3050
3051 /*
3052 * Walk the pages tables.
3053 */
3054 void
3055 db_show_vaddrs(
3056 pt_entry_t *dirbase)
3057 {
3058 pt_entry_t *ptep, *pdep, tmp;
3059 int x, y, pdecnt, ptecnt;
3060
3061 if (dirbase == 0) {
3062 dirbase = kernel_pmap->dirbase;
3063 }
3064 if (dirbase == 0) {
3065 db_printf("need a dirbase...\n");
3066 return;
3067 }
3068 dirbase = (pt_entry_t *) ((unsigned long) dirbase & ~INTEL_OFFMASK);
3069
3070 db_printf("dirbase: 0x%x\n", dirbase);
3071
3072 pdecnt = ptecnt = 0;
3073 pdep = &dirbase[0];
3074 for (y = 0; y < NPDEPG; y++, pdep++) {
3075 if (((tmp = *pdep) & INTEL_PTE_VALID) == 0) {
3076 continue;
3077 }
3078 pdecnt++;
3079 ptep = (pt_entry_t *) ((*pdep) & ~INTEL_OFFMASK);
3080 db_printf("dir[%4d]: 0x%x\n", y, *pdep);
3081 for (x = 0; x < NPTEPG; x++, ptep++) {
3082 if (((tmp = *ptep) & INTEL_PTE_VALID) == 0) {
3083 continue;
3084 }
3085 ptecnt++;
3086 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
3087 x,
3088 *ptep,
3089 (y << 22) | (x << 12),
3090 *ptep & ~INTEL_OFFMASK);
3091 }
3092 }
3093
3094 db_printf("total: %d tables, %d page table entries.\n", pdecnt, ptecnt);
3095
3096 }
3097 #endif /* MACH_KDB */
3098
3099 #include <mach_vm_debug.h>
3100 #if MACH_VM_DEBUG
3101 #include <vm/vm_debug.h>
3102
3103 int
3104 pmap_list_resident_pages(
3105 __unused pmap_t pmap,
3106 __unused vm_offset_t *listp,
3107 __unused int space)
3108 {
3109 return 0;
3110 }
3111 #endif /* MACH_VM_DEBUG */
3112
3113 #ifdef MACH_BSD
3114 /*
3115 * pmap_pagemove
3116 *
3117 * BSD support routine to reassign virtual addresses.
3118 */
3119
3120 void
3121 pmap_movepage(unsigned long from, unsigned long to, vm_size_t size)
3122 {
3123 spl_t spl;
3124 pt_entry_t *pte, saved_pte;
3125
3126 /* Lock the kernel map */
3127 PMAP_READ_LOCK(kernel_pmap, spl);
3128
3129
3130 while (size > 0) {
3131 pte = pmap_pte(kernel_pmap, from);
3132 if (pte == NULL)
3133 panic("pmap_pagemove from pte NULL");
3134 saved_pte = *pte;
3135 PMAP_READ_UNLOCK(kernel_pmap, spl);
3136
3137 pmap_enter(kernel_pmap, to, (ppnum_t)i386_btop(i386_trunc_page(*pte)),
3138 VM_PROT_READ|VM_PROT_WRITE, 0, *pte & INTEL_PTE_WIRED);
3139
3140 pmap_remove(kernel_pmap, (addr64_t)from, (addr64_t)(from+PAGE_SIZE));
3141
3142 PMAP_READ_LOCK(kernel_pmap, spl);
3143 pte = pmap_pte(kernel_pmap, to);
3144 if (pte == NULL)
3145 panic("pmap_pagemove 'to' pte NULL");
3146
3147 *pte = saved_pte;
3148
3149 from += PAGE_SIZE;
3150 to += PAGE_SIZE;
3151 size -= PAGE_SIZE;
3152 }
3153
3154 /* Get the processors to update the TLBs */
3155 PMAP_UPDATE_TLBS(kernel_pmap, from, from+size);
3156 PMAP_UPDATE_TLBS(kernel_pmap, to, to+size);
3157
3158 PMAP_READ_UNLOCK(kernel_pmap, spl);
3159
3160 }
3161 #endif /* MACH_BSD */
3162
3163 /* temporary workaround */
3164 boolean_t
3165 coredumpok(vm_map_t map, vm_offset_t va)
3166 {
3167 pt_entry_t *ptep;
3168
3169 ptep = pmap_pte(map->pmap, va);
3170 if (0 == ptep)
3171 return FALSE;
3172 return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED));
3173 }
3174
3175 /*
3176 * grow the number of kernel page table entries, if needed
3177 */
3178 void
3179 pmap_growkernel(vm_offset_t addr)
3180 {
3181 #if GROW_KERNEL_FUNCTION_IMPLEMENTED
3182 struct pmap *pmap;
3183 int s;
3184 vm_offset_t ptppaddr;
3185 ppnum_t ppn;
3186 vm_page_t nkpg;
3187 pd_entry_t newpdir = 0;
3188
3189 /*
3190 * Serialize.
3191 * Losers return to try again until the winner completes the work.
3192 */
3193 if (kptobj == 0) panic("growkernel 0");
3194 if (!vm_object_lock_try(kptobj)) {
3195 return;
3196 }
3197
3198 vm_page_lock_queues();
3199
3200 s = splhigh();
3201
3202 /*
3203 * If this is the first time thru, locate the end of the
3204 * kernel page table entries and set nkpt to the current
3205 * number of kernel page table pages
3206 */
3207
3208 if (kernel_vm_end == 0) {
3209 kernel_vm_end = KERNBASE;
3210 nkpt = 0;
3211
3212 while (pdir_pde(kernel_pmap->dirbase, kernel_vm_end)) {
3213 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
3214 nkpt++;
3215 }
3216 }
3217
3218 /*
3219 * Now allocate and map the required number of page tables
3220 */
3221 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
3222 while (kernel_vm_end < addr) {
3223 if (pdir_pde(kernel_pmap->dirbase, kernel_vm_end)) {
3224 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
3225 continue; /* someone already filled this one */
3226 }
3227
3228 nkpg = vm_page_alloc(kptobj, nkpt);
3229 if (!nkpg)
3230 panic("pmap_growkernel: no memory to grow kernel");
3231
3232 nkpt++;
3233 vm_page_wire(nkpg);
3234 ppn = nkpg->phys_page;
3235 pmap_zero_page(ppn);
3236 ptppaddr = i386_ptob(ppn);
3237 newpdir = (pd_entry_t) (ptppaddr | INTEL_PTE_VALID |
3238 INTEL_PTE_RW | INTEL_PTE_REF | INTEL_PTE_MOD);
3239 pdir_pde(kernel_pmap->dirbase, kernel_vm_end) = newpdir;
3240
3241 simple_lock(&free_pmap_lock);
3242 for (pmap = (struct pmap *)kernel_pmap->pmap_link.next;
3243 pmap != kernel_pmap ;
3244 pmap = (struct pmap *)pmap->pmap_link.next ) {
3245 *pmap_pde(pmap, kernel_vm_end) = newpdir;
3246 }
3247 simple_unlock(&free_pmap_lock);
3248 }
3249 splx(s);
3250 vm_page_unlock_queues();
3251 vm_object_unlock(kptobj);
3252 #endif
3253 }
3254
3255 pt_entry_t *
3256 pmap_mapgetpte(vm_map_t map, vm_offset_t v)
3257 {
3258 return pmap_pte(map->pmap, v);
3259 }
3260
3261 boolean_t
3262 phys_page_exists(
3263 ppnum_t pn)
3264 {
3265 pmap_paddr_t phys;
3266
3267 assert(pn != vm_page_fictitious_addr);
3268
3269 if (!pmap_initialized)
3270 return (TRUE);
3271 phys = (pmap_paddr_t) i386_ptob(pn);
3272 if (!pmap_valid_page(pn))
3273 return (FALSE);
3274
3275 return TRUE;
3276 }
3277
3278 void
3279 mapping_free_prime()
3280 {
3281 int i;
3282 pv_entry_t pv_e;
3283
3284 for (i = 0; i < (5 * PV_ALLOC_CHUNK); i++) {
3285 pv_e = (pv_entry_t) zalloc(pv_list_zone);
3286 PV_FREE(pv_e);
3287 }
3288 }
3289
3290 void
3291 mapping_adjust()
3292 {
3293 pv_entry_t pv_e;
3294 int i;
3295 int spl;
3296
3297 if (mapping_adjust_call == NULL) {
3298 thread_call_setup(&mapping_adjust_call_data,
3299 (thread_call_func_t) mapping_adjust,
3300 (thread_call_param_t) NULL);
3301 mapping_adjust_call = &mapping_adjust_call_data;
3302 }
3303 /* XXX rethink best way to do locking here */
3304 if (pv_free_count < PV_LOW_WATER_MARK) {
3305 for (i = 0; i < PV_ALLOC_CHUNK; i++) {
3306 pv_e = (pv_entry_t) zalloc(pv_list_zone);
3307 SPLVM(spl);
3308 PV_FREE(pv_e);
3309 SPLX(spl);
3310 }
3311 }
3312 mappingrecurse = 0;
3313 }
3314
3315 void
3316 pmap_commpage_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt)
3317 {
3318 int i;
3319 pt_entry_t *opte, *npte;
3320 pt_entry_t pte;
3321
3322 for (i = 0; i < cnt; i++) {
3323 opte = pmap_pte(kernel_pmap, kernel_commpage);
3324 if (0 == opte) panic("kernel_commpage");
3325 npte = pmap_pte(kernel_pmap, user_commpage);
3326 if (0 == npte) panic("user_commpage");
3327 pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL;
3328 pte &= ~INTEL_PTE_WRITE; // ensure read only
3329 WRITE_PTE_FAST(npte, pte);
3330 kernel_commpage += INTEL_PGBYTES;
3331 user_commpage += INTEL_PGBYTES;
3332 }
3333 }
3334
3335 static cpu_pmap_t cpu_pmap_master;
3336 static struct pmap_update_list cpu_update_list_master;
3337
3338 struct cpu_pmap *
3339 pmap_cpu_alloc(boolean_t is_boot_cpu)
3340 {
3341 int ret;
3342 int i;
3343 cpu_pmap_t *cp;
3344 pmap_update_list_t up;
3345 vm_offset_t address;
3346 vm_map_entry_t entry;
3347
3348 if (is_boot_cpu) {
3349 cp = &cpu_pmap_master;
3350 up = &cpu_update_list_master;
3351 } else {
3352 /*
3353 * The per-cpu pmap data structure itself.
3354 */
3355 ret = kmem_alloc(kernel_map,
3356 (vm_offset_t *) &cp, sizeof(cpu_pmap_t));
3357 if (ret != KERN_SUCCESS) {
3358 printf("pmap_cpu_alloc() failed ret=%d\n", ret);
3359 return NULL;
3360 }
3361 bzero((void *)cp, sizeof(cpu_pmap_t));
3362
3363 /*
3364 * The tlb flush update list.
3365 */
3366 ret = kmem_alloc(kernel_map,
3367 (vm_offset_t *) &up, sizeof(*up));
3368 if (ret != KERN_SUCCESS) {
3369 printf("pmap_cpu_alloc() failed ret=%d\n", ret);
3370 pmap_cpu_free(cp);
3371 return NULL;
3372 }
3373
3374 /*
3375 * The temporary windows used for copy/zero - see loose_ends.c
3376 */
3377 for (i = 0; i < PMAP_NWINDOWS; i++) {
3378 ret = vm_map_find_space(kernel_map,
3379 &address, PAGE_SIZE, 0, &entry);
3380 if (ret != KERN_SUCCESS) {
3381 printf("pmap_cpu_alloc() "
3382 "vm_map_find_space ret=%d\n", ret);
3383 pmap_cpu_free(cp);
3384 return NULL;
3385 }
3386 vm_map_unlock(kernel_map);
3387
3388 cp->mapwindow[i].prv_CADDR = (caddr_t) address;
3389 cp->mapwindow[i].prv_CMAP = vtopte(address);
3390 * (int *) cp->mapwindow[i].prv_CMAP = 0;
3391
3392 kprintf("pmap_cpu_alloc() "
3393 "window=%d CADDR=0x%x CMAP=0x%x\n",
3394 i, address, vtopte(address));
3395 }
3396 }
3397
3398 /*
3399 * Set up the pmap request list
3400 */
3401 cp->update_list = up;
3402 simple_lock_init(&up->lock, 0);
3403 up->count = 0;
3404
3405 return cp;
3406 }
3407
3408 void
3409 pmap_cpu_free(struct cpu_pmap *cp)
3410 {
3411 if (cp != NULL && cp != &cpu_pmap_master) {
3412 if (cp->update_list != NULL)
3413 kfree((void *) cp->update_list,
3414 sizeof(*cp->update_list));
3415 kfree((void *) cp, sizeof(cpu_pmap_t));
3416 }
3417 }