]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/pmap.c
xnu-792.12.6.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /*
31 * @OSF_COPYRIGHT@
32 */
33 /*
34 * Mach Operating System
35 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
36 * All Rights Reserved.
37 *
38 * Permission to use, copy, modify and distribute this software and its
39 * documentation is hereby granted, provided that both the copyright
40 * notice and this permission notice appear in all copies of the
41 * software, derivative works or modified versions, and any portions
42 * thereof, and that both notices appear in supporting documentation.
43 *
44 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
45 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
46 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
47 *
48 * Carnegie Mellon requests users of this software to return to
49 *
50 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
51 * School of Computer Science
52 * Carnegie Mellon University
53 * Pittsburgh PA 15213-3890
54 *
55 * any improvements or extensions that they make and grant Carnegie Mellon
56 * the rights to redistribute these changes.
57 */
58 /*
59 */
60
61 /*
62 * File: pmap.c
63 * Author: Avadis Tevanian, Jr., Michael Wayne Young
64 * (These guys wrote the Vax version)
65 *
66 * Physical Map management code for Intel i386, i486, and i860.
67 *
68 * Manages physical address maps.
69 *
70 * In addition to hardware address maps, this
71 * module is called upon to provide software-use-only
72 * maps which may or may not be stored in the same
73 * form as hardware maps. These pseudo-maps are
74 * used to store intermediate results from copy
75 * operations to and from address spaces.
76 *
77 * Since the information managed by this module is
78 * also stored by the logical address mapping module,
79 * this module may throw away valid virtual-to-physical
80 * mappings at almost any time. However, invalidations
81 * of virtual-to-physical mappings must be done as
82 * requested.
83 *
84 * In order to cope with hardware architectures which
85 * make virtual-to-physical map invalidates expensive,
86 * this module may delay invalidate or reduced protection
87 * operations until such time as they are actually
88 * necessary. This module is given full information as
89 * to which processors are currently using which maps,
90 * and to when physical maps must be made correct.
91 */
92
93 #include <string.h>
94 #include <norma_vm.h>
95 #include <mach_kdb.h>
96 #include <mach_ldebug.h>
97
98 #include <mach/machine/vm_types.h>
99
100 #include <mach/boolean.h>
101 #include <kern/thread.h>
102 #include <kern/zalloc.h>
103
104 #include <kern/lock.h>
105 #include <kern/kalloc.h>
106 #include <kern/spl.h>
107
108 #include <vm/pmap.h>
109 #include <vm/vm_map.h>
110 #include <vm/vm_kern.h>
111 #include <mach/vm_param.h>
112 #include <mach/vm_prot.h>
113 #include <vm/vm_object.h>
114 #include <vm/vm_page.h>
115
116 #include <mach/machine/vm_param.h>
117 #include <machine/thread.h>
118
119 #include <kern/misc_protos.h> /* prototyping */
120 #include <i386/misc_protos.h>
121
122 #include <i386/cpuid.h>
123 #include <i386/cpu_data.h>
124 #include <i386/cpu_number.h>
125 #include <i386/machine_cpu.h>
126 #include <i386/mp_slave_boot.h>
127
128 #if MACH_KDB
129 #include <ddb/db_command.h>
130 #include <ddb/db_output.h>
131 #include <ddb/db_sym.h>
132 #include <ddb/db_print.h>
133 #endif /* MACH_KDB */
134
135 #include <kern/xpr.h>
136
137 #include <vm/vm_protos.h>
138
139 #include <i386/mp.h>
140
141 /*
142 * Forward declarations for internal functions.
143 */
144 void pmap_expand(
145 pmap_t map,
146 vm_offset_t v);
147
148 extern void pmap_remove_range(
149 pmap_t pmap,
150 vm_offset_t va,
151 pt_entry_t *spte,
152 pt_entry_t *epte);
153
154 void phys_attribute_clear(
155 ppnum_t phys,
156 int bits);
157
158 boolean_t phys_attribute_test(
159 ppnum_t phys,
160 int bits);
161
162 void phys_attribute_set(
163 ppnum_t phys,
164 int bits);
165
166 void pmap_growkernel(
167 vm_offset_t addr);
168
169 void pmap_set_reference(
170 ppnum_t pn);
171
172 void pmap_movepage(
173 unsigned long from,
174 unsigned long to,
175 vm_size_t size);
176
177 pt_entry_t * pmap_mapgetpte(
178 vm_map_t map,
179 vm_offset_t v);
180
181 boolean_t phys_page_exists(
182 ppnum_t pn);
183
184 #ifndef set_dirbase
185 void set_dirbase(vm_offset_t dirbase);
186 #endif /* set_dirbase */
187
188 #define iswired(pte) ((pte) & INTEL_PTE_WIRED)
189
190 #define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
191 #define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry);
192
193 #define value_64bit(value) ((value) & 0xFFFFFFFF00000000LL)
194 #define low32(x) ((unsigned int)((x) & 0x00000000ffffffffLL))
195
196 /*
197 * Private data structures.
198 */
199
200 /*
201 * For each vm_page_t, there is a list of all currently
202 * valid virtual mappings of that page. An entry is
203 * a pv_entry_t; the list is the pv_table.
204 */
205
206 typedef struct pv_entry {
207 struct pv_entry *next; /* next pv_entry */
208 pmap_t pmap; /* pmap where mapping lies */
209 vm_offset_t va; /* virtual address for mapping */
210 } *pv_entry_t;
211
212 #define PV_ENTRY_NULL ((pv_entry_t) 0)
213
214 pv_entry_t pv_head_table; /* array of entries, one per page */
215
216 /*
217 * pv_list entries are kept on a list that can only be accessed
218 * with the pmap system locked (at SPLVM, not in the cpus_active set).
219 * The list is refilled from the pv_list_zone if it becomes empty.
220 */
221 pv_entry_t pv_free_list; /* free list at SPLVM */
222 decl_simple_lock_data(,pv_free_list_lock)
223 int pv_free_count = 0;
224 #define PV_LOW_WATER_MARK 5000
225 #define PV_ALLOC_CHUNK 2000
226 thread_call_t mapping_adjust_call;
227 static thread_call_data_t mapping_adjust_call_data;
228 int mappingrecurse = 0;
229
230 #define PV_ALLOC(pv_e) { \
231 simple_lock(&pv_free_list_lock); \
232 if ((pv_e = pv_free_list) != 0) { \
233 pv_free_list = pv_e->next; \
234 pv_free_count--; \
235 if (pv_free_count < PV_LOW_WATER_MARK) \
236 if (hw_compare_and_store(0,1,&mappingrecurse)) \
237 thread_call_enter(mapping_adjust_call); \
238 } \
239 simple_unlock(&pv_free_list_lock); \
240 }
241
242 #define PV_FREE(pv_e) { \
243 simple_lock(&pv_free_list_lock); \
244 pv_e->next = pv_free_list; \
245 pv_free_list = pv_e; \
246 pv_free_count++; \
247 simple_unlock(&pv_free_list_lock); \
248 }
249
250 zone_t pv_list_zone; /* zone of pv_entry structures */
251
252 #ifdef PAE
253 static zone_t pdpt_zone;
254 #endif
255
256
257 /*
258 * Each entry in the pv_head_table is locked by a bit in the
259 * pv_lock_table. The lock bits are accessed by the physical
260 * address of the page they lock.
261 */
262
263 char *pv_lock_table; /* pointer to array of bits */
264 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
265
266 /*
267 * First and last physical addresses that we maintain any information
268 * for. Initialized to zero so that pmap operations done before
269 * pmap_init won't touch any non-existent structures.
270 */
271 pmap_paddr_t vm_first_phys = (pmap_paddr_t) 0;
272 pmap_paddr_t vm_last_phys = (pmap_paddr_t) 0;
273 boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */
274
275 pmap_paddr_t kernel_vm_end = (pmap_paddr_t)0;
276
277 #define GROW_KERNEL_FUNCTION_IMPLEMENTED 1
278 #if GROW_KERNEL_FUNCTION_IMPLEMENTED /* not needed until growing kernel pmap */
279 static struct vm_object kptobj_object_store;
280 static vm_object_t kptobj;
281 #endif
282
283
284 /*
285 * Index into pv_head table, its lock bits, and the modify/reference
286 * bits starting at vm_first_phys.
287 */
288
289 #define pa_index(pa) (i386_btop(pa - vm_first_phys))
290
291 #define pai_to_pvh(pai) (&pv_head_table[pai])
292 #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
293 #define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
294
295 /*
296 * Array of physical page attribites for managed pages.
297 * One byte per physical page.
298 */
299 char *pmap_phys_attributes;
300
301 /*
302 * Physical page attributes. Copy bits from PTE definition.
303 */
304 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
305 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
306 #define PHYS_NCACHE INTEL_PTE_NCACHE
307
308 /*
309 * Amount of virtual memory mapped by one
310 * page-directory entry.
311 */
312 #define PDE_MAPPED_SIZE (pdetova(1))
313
314 /*
315 * Locking and TLB invalidation
316 */
317
318 /*
319 * Locking Protocols:
320 *
321 * There are two structures in the pmap module that need locking:
322 * the pmaps themselves, and the per-page pv_lists (which are locked
323 * by locking the pv_lock_table entry that corresponds to the pv_head
324 * for the list in question.) Most routines want to lock a pmap and
325 * then do operations in it that require pv_list locking -- however
326 * pmap_remove_all and pmap_copy_on_write operate on a physical page
327 * basis and want to do the locking in the reverse order, i.e. lock
328 * a pv_list and then go through all the pmaps referenced by that list.
329 * To protect against deadlock between these two cases, the pmap_lock
330 * is used. There are three different locking protocols as a result:
331 *
332 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
333 * the pmap.
334 *
335 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
336 * lock on the pmap_lock (shared read), then lock the pmap
337 * and finally the pv_lists as needed [i.e. pmap lock before
338 * pv_list lock.]
339 *
340 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
341 * Get a write lock on the pmap_lock (exclusive write); this
342 * also guaranteees exclusive access to the pv_lists. Lock the
343 * pmaps as needed.
344 *
345 * At no time may any routine hold more than one pmap lock or more than
346 * one pv_list lock. Because interrupt level routines can allocate
347 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
348 * kernel_pmap can only be held at splhigh.
349 */
350
351 /*
352 * We raise the interrupt level to splvm, to block interprocessor
353 * interrupts during pmap operations. We must take the CPU out of
354 * the cpus_active set while interrupts are blocked.
355 */
356 #define SPLVM(spl) { \
357 spl = splhigh(); \
358 mp_disable_preemption(); \
359 i_bit_clear(cpu_number(), &cpus_active); \
360 mp_enable_preemption(); \
361 }
362
363 #define SPLX(spl) { \
364 mp_disable_preemption(); \
365 i_bit_set(cpu_number(), &cpus_active); \
366 mp_enable_preemption(); \
367 splx(spl); \
368 }
369
370 /*
371 * Lock on pmap system
372 */
373 lock_t pmap_system_lock;
374
375 #define PMAP_READ_LOCK(pmap, spl) { \
376 SPLVM(spl); \
377 lock_read(&pmap_system_lock); \
378 simple_lock(&(pmap)->lock); \
379 }
380
381 #define PMAP_WRITE_LOCK(spl) { \
382 SPLVM(spl); \
383 lock_write(&pmap_system_lock); \
384 }
385
386 #define PMAP_READ_UNLOCK(pmap, spl) { \
387 simple_unlock(&(pmap)->lock); \
388 lock_read_done(&pmap_system_lock); \
389 SPLX(spl); \
390 }
391
392 #define PMAP_WRITE_UNLOCK(spl) { \
393 lock_write_done(&pmap_system_lock); \
394 SPLX(spl); \
395 }
396
397 #define PMAP_WRITE_TO_READ_LOCK(pmap) { \
398 simple_lock(&(pmap)->lock); \
399 lock_write_to_read(&pmap_system_lock); \
400 }
401
402 #define LOCK_PVH(index) lock_pvh_pai(index)
403
404 #define UNLOCK_PVH(index) unlock_pvh_pai(index)
405
406 #if USLOCK_DEBUG
407 extern int max_lock_loops;
408 extern int disableSerialOuput;
409 #define LOOP_VAR \
410 unsigned int loop_count; \
411 loop_count = disableSerialOuput ? max_lock_loops \
412 : max_lock_loops*100
413 #define LOOP_CHECK(msg, pmap) \
414 if (--loop_count == 0) { \
415 mp_disable_preemption(); \
416 kprintf("%s: cpu %d pmap %x, cpus_active 0x%x\n", \
417 msg, cpu_number(), pmap, cpus_active); \
418 Debugger("deadlock detection"); \
419 mp_enable_preemption(); \
420 loop_count = max_lock_loops; \
421 }
422 #else /* USLOCK_DEBUG */
423 #define LOOP_VAR
424 #define LOOP_CHECK(msg, pmap)
425 #endif /* USLOCK_DEBUG */
426
427 #define PMAP_UPDATE_TLBS(pmap, s, e) \
428 { \
429 cpu_set cpu_mask; \
430 cpu_set users; \
431 \
432 mp_disable_preemption(); \
433 cpu_mask = 1 << cpu_number(); \
434 \
435 /* Since the pmap is locked, other updates are locked */ \
436 /* out, and any pmap_activate has finished. */ \
437 \
438 /* find other cpus using the pmap */ \
439 users = (pmap)->cpus_using & ~cpu_mask; \
440 if (users) { \
441 LOOP_VAR; \
442 /* signal them, and wait for them to finish */ \
443 /* using the pmap */ \
444 signal_cpus(users, (pmap), (s), (e)); \
445 while (((pmap)->cpus_using & cpus_active & ~cpu_mask)) { \
446 LOOP_CHECK("PMAP_UPDATE_TLBS", pmap); \
447 cpu_pause(); \
448 } \
449 } \
450 /* invalidate our own TLB if pmap is in use */ \
451 \
452 if ((pmap)->cpus_using & cpu_mask) { \
453 INVALIDATE_TLB((pmap), (s), (e)); \
454 } \
455 \
456 mp_enable_preemption(); \
457 }
458
459 #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
460
461 #define INVALIDATE_TLB(m, s, e) { \
462 flush_tlb(); \
463 }
464
465 /*
466 * Structures to keep track of pending TLB invalidations
467 */
468 cpu_set cpus_active;
469 cpu_set cpus_idle;
470
471 #define UPDATE_LIST_SIZE 4
472
473 struct pmap_update_item {
474 pmap_t pmap; /* pmap to invalidate */
475 vm_offset_t start; /* start address to invalidate */
476 vm_offset_t end; /* end address to invalidate */
477 };
478
479 typedef struct pmap_update_item *pmap_update_item_t;
480
481 /*
482 * List of pmap updates. If the list overflows,
483 * the last entry is changed to invalidate all.
484 */
485 struct pmap_update_list {
486 decl_simple_lock_data(,lock)
487 int count;
488 struct pmap_update_item item[UPDATE_LIST_SIZE];
489 } ;
490 typedef struct pmap_update_list *pmap_update_list_t;
491
492 extern void signal_cpus(
493 cpu_set use_list,
494 pmap_t pmap,
495 vm_offset_t start,
496 vm_offset_t end);
497
498 pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE];
499
500 /*
501 * Other useful macros.
502 */
503 #define current_pmap() (vm_map_pmap(current_thread()->map))
504 #define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
505
506 struct pmap kernel_pmap_store;
507 pmap_t kernel_pmap;
508
509 #ifdef PMAP_QUEUE
510 decl_simple_lock_data(,free_pmap_lock)
511 #endif
512
513 struct zone *pmap_zone; /* zone of pmap structures */
514
515 int pmap_debug = 0; /* flag for debugging prints */
516
517 unsigned int inuse_ptepages_count = 0; /* debugging */
518
519 /*
520 * Pmap cache. Cache is threaded through ref_count field of pmap.
521 * Max will eventually be constant -- variable for experimentation.
522 */
523 int pmap_cache_max = 32;
524 int pmap_alloc_chunk = 8;
525 pmap_t pmap_cache_list;
526 int pmap_cache_count;
527 decl_simple_lock_data(,pmap_cache_lock)
528
529 extern vm_offset_t hole_start, hole_end;
530
531 extern char end;
532
533 static int nkpt;
534
535 pt_entry_t *DMAP1, *DMAP2;
536 caddr_t DADDR1;
537 caddr_t DADDR2;
538
539 #if DEBUG_ALIAS
540 #define PMAP_ALIAS_MAX 32
541 struct pmap_alias {
542 vm_offset_t rpc;
543 pmap_t pmap;
544 vm_offset_t va;
545 int cookie;
546 #define PMAP_ALIAS_COOKIE 0xdeadbeef
547 } pmap_aliasbuf[PMAP_ALIAS_MAX];
548 int pmap_alias_index = 0;
549 extern vm_offset_t get_rpc();
550
551 #endif /* DEBUG_ALIAS */
552
553 #define pmap_pde(m, v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]))
554 #define pdir_pde(d, v) (d[(vm_offset_t)(v) >> PDESHIFT])
555
556 static __inline int
557 pmap_is_current(pmap_t pmap)
558 {
559 return (pmap == kernel_pmap ||
560 (pmap->dirbase[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME));
561 }
562
563
564 /*
565 * return address of mapped pte for vaddr va in pmap pmap.
566 */
567 pt_entry_t *
568 pmap_pte(pmap_t pmap, vm_offset_t va)
569 {
570 pd_entry_t *pde;
571 pd_entry_t newpf;
572
573 pde = pmap_pde(pmap, va);
574 if (*pde != 0) {
575 if (pmap_is_current(pmap))
576 return( vtopte(va));
577 newpf = *pde & PG_FRAME;
578 if (((*CM4) & PG_FRAME) != newpf) {
579 *CM4 = newpf | INTEL_PTE_RW | INTEL_PTE_VALID;
580 invlpg((u_int)CA4);
581 }
582 return (pt_entry_t *)CA4 + (i386_btop(va) & (NPTEPG-1));
583 }
584 return(0);
585 }
586
587 #define DEBUG_PTE_PAGE 0
588
589 #if DEBUG_PTE_PAGE
590 void
591 ptep_check(
592 ptep_t ptep)
593 {
594 register pt_entry_t *pte, *epte;
595 int ctu, ctw;
596
597 /* check the use and wired counts */
598 if (ptep == PTE_PAGE_NULL)
599 return;
600 pte = pmap_pte(ptep->pmap, ptep->va);
601 epte = pte + INTEL_PGBYTES/sizeof(pt_entry_t);
602 ctu = 0;
603 ctw = 0;
604 while (pte < epte) {
605 if (pte->pfn != 0) {
606 ctu++;
607 if (pte->wired)
608 ctw++;
609 }
610 pte++;
611 }
612
613 if (ctu != ptep->use_count || ctw != ptep->wired_count) {
614 printf("use %d wired %d - actual use %d wired %d\n",
615 ptep->use_count, ptep->wired_count, ctu, ctw);
616 panic("pte count");
617 }
618 }
619 #endif /* DEBUG_PTE_PAGE */
620
621 /*
622 * Map memory at initialization. The physical addresses being
623 * mapped are not managed and are never unmapped.
624 *
625 * For now, VM is already on, we only need to map the
626 * specified memory.
627 */
628 vm_offset_t
629 pmap_map(
630 register vm_offset_t virt,
631 register vm_offset_t start_addr,
632 register vm_offset_t end_addr,
633 register vm_prot_t prot)
634 {
635 register int ps;
636
637 ps = PAGE_SIZE;
638 while (start_addr < end_addr) {
639 pmap_enter(kernel_pmap,
640 virt, (ppnum_t) i386_btop(start_addr), prot, 0, FALSE);
641 virt += ps;
642 start_addr += ps;
643 }
644 return(virt);
645 }
646
647 /*
648 * Back-door routine for mapping kernel VM at initialization.
649 * Useful for mapping memory outside the range
650 * Sets no-cache, A, D.
651 * [vm_first_phys, vm_last_phys) (i.e., devices).
652 * Otherwise like pmap_map.
653 */
654 vm_offset_t
655 pmap_map_bd(
656 register vm_offset_t virt,
657 register vm_offset_t start_addr,
658 register vm_offset_t end_addr,
659 vm_prot_t prot)
660 {
661 register pt_entry_t template;
662 register pt_entry_t *pte;
663
664 template = pa_to_pte(start_addr)
665 | INTEL_PTE_NCACHE
666 | INTEL_PTE_REF
667 | INTEL_PTE_MOD
668 | INTEL_PTE_WIRED
669 | INTEL_PTE_VALID;
670 if (prot & VM_PROT_WRITE)
671 template |= INTEL_PTE_WRITE;
672
673 /* XXX move pmap_pte out of loop, once one pte mapped, all are */
674 while (start_addr < end_addr) {
675 pte = pmap_pte(kernel_pmap, virt);
676 if (pte == PT_ENTRY_NULL) {
677 panic("pmap_map_bd: Invalid kernel address\n");
678 }
679 WRITE_PTE_FAST(pte, template)
680 pte_increment_pa(template);
681 virt += PAGE_SIZE;
682 start_addr += PAGE_SIZE;
683 }
684
685 flush_tlb();
686 return(virt);
687 }
688
689 extern char *first_avail;
690 extern vm_offset_t virtual_avail, virtual_end;
691 extern pmap_paddr_t avail_start, avail_end;
692 extern vm_offset_t etext;
693 extern void *sectHIBB;
694 extern int sectSizeHIB;
695
696 /*
697 * Bootstrap the system enough to run with virtual memory.
698 * Map the kernel's code and data, and allocate the system page table.
699 * Called with mapping OFF. Page_size must already be set.
700 *
701 * Parameters:
702 * load_start: PA where kernel was loaded
703 * avail_start PA of first available physical page -
704 * after kernel page tables
705 * avail_end PA of last available physical page
706 * virtual_avail VA of first available page -
707 * after kernel page tables
708 * virtual_end VA of last available page -
709 * end of kernel address space
710 *
711 * &start_text start of kernel text
712 * &etext end of kernel text
713 */
714
715 void
716 pmap_bootstrap(
717 __unused vm_offset_t load_start)
718 {
719 vm_offset_t va;
720 pt_entry_t *pte;
721 int i;
722 int wpkernel, boot_arg;
723
724 vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address
725 * known to VM */
726
727 /*
728 * The kernel's pmap is statically allocated so we don't
729 * have to use pmap_create, which is unlikely to work
730 * correctly at this part of the boot sequence.
731 */
732
733 kernel_pmap = &kernel_pmap_store;
734 #ifdef PMAP_QUEUE
735 kernel_pmap->pmap_link.next = (queue_t)kernel_pmap; /* Set up anchor forward */
736 kernel_pmap->pmap_link.prev = (queue_t)kernel_pmap; /* Set up anchor reverse */
737 #endif
738 kernel_pmap->ref_count = 1;
739 kernel_pmap->pm_obj = (vm_object_t) NULL;
740 kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE);
741 kernel_pmap->pdirbase = (pd_entry_t *)IdlePTD;
742 #ifdef PAE
743 kernel_pmap->pm_pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE );
744 kernel_pmap->pm_ppdpt = (vm_offset_t)IdlePDPT;
745 #endif
746
747 va = (vm_offset_t)kernel_pmap->dirbase;
748 /* setup self referential mapping(s) */
749 for (i = 0; i< NPGPTD; i++ ) {
750 pmap_paddr_t pa;
751 pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i));
752 * (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i) =
753 (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF |
754 INTEL_PTE_MOD | INTEL_PTE_WIRED ;
755 #ifdef PAE
756 kernel_pmap->pm_pdpt[i] = pa | INTEL_PTE_VALID;
757 #endif
758 }
759
760 nkpt = NKPT;
761
762 virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail;
763 virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS);
764
765 /*
766 * Reserve some special page table entries/VA space for temporary
767 * mapping of pages.
768 */
769 #define SYSMAP(c, p, v, n) \
770 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n);
771
772 va = virtual_avail;
773 pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
774
775 /*
776 * CMAP1/CMAP2 are used for zeroing and copying pages.
777 * CMAP3 is used for ml_phys_read/write.
778 */
779 SYSMAP(caddr_t, CM1, CA1, 1)
780 * (pt_entry_t *) CM1 = 0;
781 SYSMAP(caddr_t, CM2, CA2, 1)
782 * (pt_entry_t *) CM2 = 0;
783 SYSMAP(caddr_t, CM3, CA3, 1)
784 * (pt_entry_t *) CM3 = 0;
785
786 /* used by pmap_pte */
787 SYSMAP(caddr_t, CM4, CA4, 1)
788 * (pt_entry_t *) CM4 = 0;
789
790 /* DMAP user for debugger */
791 SYSMAP(caddr_t, DMAP1, DADDR1, 1);
792 SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */
793
794
795 lock_init(&pmap_system_lock,
796 FALSE, /* NOT a sleep lock */
797 0, 0);
798
799 virtual_avail = va;
800
801 wpkernel = 1;
802 if (PE_parse_boot_arg("debug", &boot_arg)) {
803 if (boot_arg & DB_PRT) wpkernel = 0;
804 if (boot_arg & DB_NMI) wpkernel = 0;
805 }
806
807 /* remap kernel text readonly if not debugging or kprintfing */
808 if (wpkernel)
809 {
810 vm_offset_t myva;
811 pt_entry_t *ptep;
812
813 for (myva = i386_round_page(VM_MIN_KERNEL_ADDRESS + MP_BOOT + MP_BOOTSTACK); myva < etext; myva += PAGE_SIZE) {
814 if (myva >= (vm_offset_t)sectHIBB && myva < ((vm_offset_t)sectHIBB + sectSizeHIB))
815 continue;
816 ptep = pmap_pte(kernel_pmap, myva);
817 if (ptep)
818 *ptep &= ~INTEL_PTE_RW;
819 }
820 flush_tlb();
821 }
822
823 simple_lock_init(&kernel_pmap->lock, 0);
824 simple_lock_init(&pv_free_list_lock, 0);
825
826 /* invalidate user virtual addresses */
827 memset((char *)kernel_pmap->dirbase,
828 0,
829 (KPTDI) * sizeof(pd_entry_t));
830
831 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
832 VADDR(KPTDI,0), virtual_end);
833 #ifdef PAE
834 kprintf("Available physical space from 0x%llx to 0x%llx\n",
835 avail_start, avail_end);
836 printf("PAE enabled\n");
837 #else
838 kprintf("Available physical space from 0x%x to 0x%x\n",
839 avail_start, avail_end);
840 #endif
841 }
842
843 void
844 pmap_virtual_space(
845 vm_offset_t *startp,
846 vm_offset_t *endp)
847 {
848 *startp = virtual_avail;
849 *endp = virtual_end;
850 }
851
852 /*
853 * Initialize the pmap module.
854 * Called by vm_init, to initialize any structures that the pmap
855 * system needs to map virtual memory.
856 */
857 void
858 pmap_init(void)
859 {
860 register long npages;
861 vm_offset_t addr;
862 register vm_size_t s;
863 vm_offset_t vaddr;
864 ppnum_t ppn;
865
866 /*
867 * Allocate memory for the pv_head_table and its lock bits,
868 * the modify bit array, and the pte_page table.
869 */
870
871 /* zero bias all these arrays now instead of off avail_start
872 so we cover all memory */
873 npages = i386_btop(avail_end);
874 s = (vm_size_t) (sizeof(struct pv_entry) * npages
875 + pv_lock_table_size(npages)
876 + npages);
877
878 s = round_page(s);
879 if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS)
880 panic("pmap_init");
881
882 memset((char *)addr, 0, s);
883
884 /*
885 * Allocate the structures first to preserve word-alignment.
886 */
887 pv_head_table = (pv_entry_t) addr;
888 addr = (vm_offset_t) (pv_head_table + npages);
889
890 pv_lock_table = (char *) addr;
891 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
892
893 pmap_phys_attributes = (char *) addr;
894
895 /*
896 * Create the zone of physical maps,
897 * and of the physical-to-virtual entries.
898 */
899 s = (vm_size_t) sizeof(struct pmap);
900 pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
901 s = (vm_size_t) sizeof(struct pv_entry);
902 pv_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
903 #ifdef PAE
904 // s = (vm_size_t) (sizeof(pdpt_entry_t) * NPGPTD);
905 s = 63;
906 pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */
907 #endif
908
909 /*
910 * Only now, when all of the data structures are allocated,
911 * can we set vm_first_phys and vm_last_phys. If we set them
912 * too soon, the kmem_alloc_wired above will try to use these
913 * data structures and blow up.
914 */
915
916 /* zero bias this now so we cover all memory */
917 vm_first_phys = 0;
918 vm_last_phys = avail_end;
919
920 #if GROW_KERNEL_FUNCTION_IMPLEMENTED
921 kptobj = &kptobj_object_store;
922 _vm_object_allocate((vm_object_size_t)NKPDE, kptobj);
923 kernel_pmap->pm_obj = kptobj;
924 #endif
925
926 /* create pv entries for kernel pages mapped by low level
927 startup code. these have to exist so we can pmap_remove()
928 e.g. kext pages from the middle of our addr space */
929
930 vaddr = (vm_offset_t)VM_MIN_KERNEL_ADDRESS;
931 for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) {
932 pv_entry_t pv_e;
933
934 pv_e = pai_to_pvh(ppn);
935 pv_e->va = vaddr;
936 vaddr += PAGE_SIZE;
937 pv_e->pmap = kernel_pmap;
938 pv_e->next = PV_ENTRY_NULL;
939 }
940
941 pmap_initialized = TRUE;
942
943 /*
944 * Initializie pmap cache.
945 */
946 pmap_cache_list = PMAP_NULL;
947 pmap_cache_count = 0;
948 simple_lock_init(&pmap_cache_lock, 0);
949 #ifdef PMAP_QUEUE
950 simple_lock_init(&free_pmap_lock, 0);
951 #endif
952
953 }
954
955 void
956 x86_lowmem_free(void)
957 {
958 /* free lowmem pages back to the vm system. we had to defer doing this
959 until the vm system was fully up.
960 the actual pages that are released are determined by which
961 pages the memory sizing code puts into the region table */
962
963 ml_static_mfree((vm_offset_t) i386_ptob(pmap_memory_regions[0].base)|VM_MIN_KERNEL_ADDRESS,
964 (vm_size_t) i386_ptob(pmap_memory_regions[0].end - pmap_memory_regions[0].base));
965 }
966
967
968 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
969
970 boolean_t
971 pmap_verify_free(
972 ppnum_t pn)
973 {
974 pmap_paddr_t phys;
975 pv_entry_t pv_h;
976 int pai;
977 spl_t spl;
978 boolean_t result;
979
980 assert(pn != vm_page_fictitious_addr);
981 phys = (pmap_paddr_t)i386_ptob(pn);
982 if (!pmap_initialized)
983 return(TRUE);
984
985 if (!pmap_valid_page(pn))
986 return(FALSE);
987
988 PMAP_WRITE_LOCK(spl);
989
990 pai = pa_index(phys);
991 pv_h = pai_to_pvh(pai);
992
993 result = (pv_h->pmap == PMAP_NULL);
994 PMAP_WRITE_UNLOCK(spl);
995
996 return(result);
997 }
998
999 /*
1000 * Create and return a physical map.
1001 *
1002 * If the size specified for the map
1003 * is zero, the map is an actual physical
1004 * map, and may be referenced by the
1005 * hardware.
1006 *
1007 * If the size specified is non-zero,
1008 * the map will be used in software only, and
1009 * is bounded by that size.
1010 */
1011 pmap_t
1012 pmap_create(
1013 vm_size_t size)
1014 {
1015 register pmap_t p;
1016 #ifdef PMAP_QUEUE
1017 register pmap_t pro;
1018 spl_t s;
1019 #endif
1020 register int i;
1021 register vm_offset_t va;
1022
1023 /*
1024 * A software use-only map doesn't even need a map.
1025 */
1026
1027 if (size != 0) {
1028 return(PMAP_NULL);
1029 }
1030
1031 p = (pmap_t) zalloc(pmap_zone);
1032 if (PMAP_NULL == p)
1033 panic("pmap_create zalloc");
1034 if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD))
1035 panic("pmap_create kmem_alloc_wired");
1036 #ifdef PAE
1037 p->pm_hold = (vm_offset_t)zalloc(pdpt_zone);
1038 if ((vm_offset_t)NULL == p->pm_hold) {
1039 panic("pdpt zalloc");
1040 }
1041 p->pm_pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31);
1042 p->pm_ppdpt = kvtophys((vm_offset_t)p->pm_pdpt); /* XXX */
1043 #endif
1044 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPDEPG))))
1045 panic("pmap_create vm_object_allocate");
1046 memcpy(p->dirbase,
1047 (void *)((unsigned int)IdlePTD | KERNBASE),
1048 NBPTD);
1049 va = (vm_offset_t)p->dirbase;
1050 p->pdirbase = (pd_entry_t *)(kvtophys(va));
1051 simple_lock_init(&p->lock, 0);
1052
1053 /* setup self referential mapping(s) */
1054 for (i = 0; i< NPGPTD; i++ ) {
1055 pmap_paddr_t pa;
1056 pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i));
1057 * (pd_entry_t *) (p->dirbase + PTDPTDI + i) =
1058 (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF |
1059 INTEL_PTE_MOD | INTEL_PTE_WIRED ;
1060 #ifdef PAE
1061 p->pm_pdpt[i] = pa | INTEL_PTE_VALID;
1062 #endif
1063 }
1064
1065 p->cpus_using = 0;
1066 p->stats.resident_count = 0;
1067 p->stats.wired_count = 0;
1068 p->ref_count = 1;
1069
1070 #ifdef PMAP_QUEUE
1071 /* insert new pmap at head of queue hanging off kernel_pmap */
1072 SPLVM(s);
1073 simple_lock(&free_pmap_lock);
1074 p->pmap_link.next = (queue_t)kernel_pmap->pmap_link.next;
1075 kernel_pmap->pmap_link.next = (queue_t)p;
1076
1077 pro = (pmap_t) p->pmap_link.next;
1078 p->pmap_link.prev = (queue_t)pro->pmap_link.prev;
1079 pro->pmap_link.prev = (queue_t)p;
1080
1081
1082 simple_unlock(&free_pmap_lock);
1083 SPLX(s);
1084 #endif
1085
1086 return(p);
1087 }
1088
1089 /*
1090 * Retire the given physical map from service.
1091 * Should only be called if the map contains
1092 * no valid mappings.
1093 */
1094
1095 void
1096 pmap_destroy(
1097 register pmap_t p)
1098 {
1099 register pt_entry_t *pdep;
1100 register int c;
1101 spl_t s;
1102 register vm_page_t m;
1103 #ifdef PMAP_QUEUE
1104 register pmap_t pre,pro;
1105 #endif
1106
1107 if (p == PMAP_NULL)
1108 return;
1109
1110 SPLVM(s);
1111 simple_lock(&p->lock);
1112 c = --p->ref_count;
1113 if (c == 0) {
1114 register int my_cpu;
1115
1116 mp_disable_preemption();
1117 my_cpu = cpu_number();
1118
1119 /*
1120 * If some cpu is not using the physical pmap pointer that it
1121 * is supposed to be (see set_dirbase), we might be using the
1122 * pmap that is being destroyed! Make sure we are
1123 * physically on the right pmap:
1124 */
1125 /* force pmap/cr3 update */
1126 PMAP_UPDATE_TLBS(p,
1127 VM_MIN_ADDRESS,
1128 VM_MAX_KERNEL_ADDRESS);
1129
1130 if (PMAP_REAL(my_cpu) == p) {
1131 PMAP_CPU_CLR(p, my_cpu);
1132 PMAP_REAL(my_cpu) = kernel_pmap;
1133 #ifdef PAE
1134 set_cr3((unsigned int)kernel_pmap->pm_ppdpt);
1135 #else
1136 set_cr3((unsigned int)kernel_pmap->pdirbase);
1137 #endif
1138 }
1139 mp_enable_preemption();
1140 }
1141 simple_unlock(&p->lock);
1142 SPLX(s);
1143
1144 if (c != 0) {
1145 return; /* still in use */
1146 }
1147
1148 #ifdef PMAP_QUEUE
1149 /* remove from pmap queue */
1150 SPLVM(s);
1151 simple_lock(&free_pmap_lock);
1152
1153 pre = (pmap_t)p->pmap_link.prev;
1154 pre->pmap_link.next = (queue_t)p->pmap_link.next;
1155 pro = (pmap_t)p->pmap_link.next;
1156 pro->pmap_link.prev = (queue_t)p->pmap_link.prev;
1157
1158 simple_unlock(&free_pmap_lock);
1159 SPLX(s);
1160 #endif
1161
1162 /*
1163 * Free the memory maps, then the
1164 * pmap structure.
1165 */
1166
1167 pdep = (pt_entry_t *)p->dirbase;
1168
1169 while (pdep < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]) {
1170 int ind;
1171 if (*pdep & INTEL_PTE_VALID) {
1172 ind = pdep - (pt_entry_t *)&p->dirbase[0];
1173 vm_object_lock(p->pm_obj);
1174 m = vm_page_lookup(p->pm_obj, (vm_object_offset_t)ind);
1175 if (m == VM_PAGE_NULL) {
1176 panic("pmap_destroy: pte page not in object");
1177 }
1178 vm_page_lock_queues();
1179 vm_page_free(m);
1180 inuse_ptepages_count--;
1181 vm_object_unlock(p->pm_obj);
1182 vm_page_unlock_queues();
1183
1184 /*
1185 * Clear pdes, this might be headed for the cache.
1186 */
1187 *pdep++ = 0;
1188 }
1189 else {
1190 *pdep++ = 0;
1191 }
1192
1193 }
1194
1195 vm_object_deallocate(p->pm_obj);
1196 kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD);
1197 #ifdef PAE
1198 zfree(pdpt_zone, (void *)p->pm_hold);
1199 #endif
1200 zfree(pmap_zone, p);
1201 }
1202
1203 /*
1204 * Add a reference to the specified pmap.
1205 */
1206
1207 void
1208 pmap_reference(
1209 register pmap_t p)
1210 {
1211 spl_t s;
1212
1213 if (p != PMAP_NULL) {
1214 SPLVM(s);
1215 simple_lock(&p->lock);
1216 p->ref_count++;
1217 simple_unlock(&p->lock);
1218 SPLX(s);
1219 }
1220 }
1221
1222 /*
1223 * Remove a range of hardware page-table entries.
1224 * The entries given are the first (inclusive)
1225 * and last (exclusive) entries for the VM pages.
1226 * The virtual address is the va for the first pte.
1227 *
1228 * The pmap must be locked.
1229 * If the pmap is not the kernel pmap, the range must lie
1230 * entirely within one pte-page. This is NOT checked.
1231 * Assumes that the pte-page exists.
1232 */
1233
1234 /* static */
1235 void
1236 pmap_remove_range(
1237 pmap_t pmap,
1238 vm_offset_t va,
1239 pt_entry_t *spte,
1240 pt_entry_t *epte)
1241 {
1242 register pt_entry_t *cpte;
1243 int num_removed, num_unwired;
1244 int pai;
1245 pmap_paddr_t pa;
1246
1247 #if DEBUG_PTE_PAGE
1248 if (pmap != kernel_pmap)
1249 ptep_check(get_pte_page(spte));
1250 #endif /* DEBUG_PTE_PAGE */
1251 num_removed = 0;
1252 num_unwired = 0;
1253
1254 for (cpte = spte; cpte < epte;
1255 cpte++, va += PAGE_SIZE) {
1256
1257 pa = pte_to_pa(*cpte);
1258 if (pa == 0)
1259 continue;
1260
1261 num_removed++;
1262 if (iswired(*cpte))
1263 num_unwired++;
1264
1265 if (!valid_page(i386_btop(pa))) {
1266
1267 /*
1268 * Outside range of managed physical memory.
1269 * Just remove the mappings.
1270 */
1271 register pt_entry_t *lpte = cpte;
1272
1273 *lpte = 0;
1274 continue;
1275 }
1276
1277 pai = pa_index(pa);
1278 LOCK_PVH(pai);
1279
1280 /*
1281 * Get the modify and reference bits.
1282 */
1283 {
1284 register pt_entry_t *lpte;
1285
1286 lpte = cpte;
1287 pmap_phys_attributes[pai] |=
1288 *lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
1289 *lpte = 0;
1290
1291 }
1292
1293 /*
1294 * Remove the mapping from the pvlist for
1295 * this physical page.
1296 */
1297 {
1298 register pv_entry_t pv_h, prev, cur;
1299
1300 pv_h = pai_to_pvh(pai);
1301 if (pv_h->pmap == PMAP_NULL) {
1302 panic("pmap_remove: null pv_list!");
1303 }
1304 if (pv_h->va == va && pv_h->pmap == pmap) {
1305 /*
1306 * Header is the pv_entry. Copy the next one
1307 * to header and free the next one (we cannot
1308 * free the header)
1309 */
1310 cur = pv_h->next;
1311 if (cur != PV_ENTRY_NULL) {
1312 *pv_h = *cur;
1313 PV_FREE(cur);
1314 }
1315 else {
1316 pv_h->pmap = PMAP_NULL;
1317 }
1318 }
1319 else {
1320 cur = pv_h;
1321 do {
1322 prev = cur;
1323 if ((cur = prev->next) == PV_ENTRY_NULL) {
1324 panic("pmap-remove: mapping not in pv_list!");
1325 }
1326 } while (cur->va != va || cur->pmap != pmap);
1327 prev->next = cur->next;
1328 PV_FREE(cur);
1329 }
1330 UNLOCK_PVH(pai);
1331 }
1332 }
1333
1334 /*
1335 * Update the counts
1336 */
1337 assert(pmap->stats.resident_count >= num_removed);
1338 pmap->stats.resident_count -= num_removed;
1339 assert(pmap->stats.wired_count >= num_unwired);
1340 pmap->stats.wired_count -= num_unwired;
1341 }
1342
1343 /*
1344 * Remove phys addr if mapped in specified map
1345 *
1346 */
1347 void
1348 pmap_remove_some_phys(
1349 __unused pmap_t map,
1350 __unused ppnum_t pn)
1351 {
1352
1353 /* Implement to support working set code */
1354
1355 }
1356
1357 /*
1358 * Remove the given range of addresses
1359 * from the specified map.
1360 *
1361 * It is assumed that the start and end are properly
1362 * rounded to the hardware page size.
1363 */
1364
1365
1366 void
1367 pmap_remove(
1368 pmap_t map,
1369 addr64_t s64,
1370 addr64_t e64)
1371 {
1372 spl_t spl;
1373 register pt_entry_t *pde;
1374 register pt_entry_t *spte, *epte;
1375 vm_offset_t l;
1376 vm_offset_t s, e;
1377 vm_offset_t orig_s;
1378
1379 if (map == PMAP_NULL)
1380 return;
1381
1382 PMAP_READ_LOCK(map, spl);
1383
1384 if (value_64bit(s64) || value_64bit(e64)) {
1385 panic("pmap_remove addr overflow");
1386 }
1387
1388 orig_s = s = (vm_offset_t)low32(s64);
1389 e = (vm_offset_t)low32(e64);
1390
1391 pde = pmap_pde(map, s);
1392
1393 while (s < e) {
1394 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
1395 if (l > e)
1396 l = e;
1397 if (*pde & INTEL_PTE_VALID) {
1398 spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1)));
1399 spte = &spte[ptenum(s)];
1400 epte = &spte[intel_btop(l-s)];
1401 pmap_remove_range(map, s, spte, epte);
1402 }
1403 s = l;
1404 pde++;
1405 }
1406
1407 PMAP_UPDATE_TLBS(map, orig_s, e);
1408
1409 PMAP_READ_UNLOCK(map, spl);
1410 }
1411
1412 /*
1413 * Routine: pmap_page_protect
1414 *
1415 * Function:
1416 * Lower the permission for all mappings to a given
1417 * page.
1418 */
1419 void
1420 pmap_page_protect(
1421 ppnum_t pn,
1422 vm_prot_t prot)
1423 {
1424 pv_entry_t pv_h, prev;
1425 register pv_entry_t pv_e;
1426 register pt_entry_t *pte;
1427 int pai;
1428 register pmap_t pmap;
1429 spl_t spl;
1430 boolean_t remove;
1431 pmap_paddr_t phys;
1432
1433 assert(pn != vm_page_fictitious_addr);
1434 phys = (pmap_paddr_t)i386_ptob(pn);
1435 if (!valid_page(pn)) {
1436 /*
1437 * Not a managed page.
1438 */
1439 return;
1440 }
1441
1442 /*
1443 * Determine the new protection.
1444 */
1445 switch (prot) {
1446 case VM_PROT_READ:
1447 case VM_PROT_READ|VM_PROT_EXECUTE:
1448 remove = FALSE;
1449 break;
1450 case VM_PROT_ALL:
1451 return; /* nothing to do */
1452 default:
1453 remove = TRUE;
1454 break;
1455 }
1456
1457 /*
1458 * Lock the pmap system first, since we will be changing
1459 * several pmaps.
1460 */
1461
1462 PMAP_WRITE_LOCK(spl);
1463
1464 pai = pa_index(phys);
1465 pv_h = pai_to_pvh(pai);
1466
1467 /*
1468 * Walk down PV list, changing or removing all mappings.
1469 * We do not have to lock the pv_list because we have
1470 * the entire pmap system locked.
1471 */
1472 if (pv_h->pmap != PMAP_NULL) {
1473
1474 prev = pv_e = pv_h;
1475 do {
1476 register vm_offset_t va;
1477 pmap = pv_e->pmap;
1478 /*
1479 * Lock the pmap to block pmap_extract and similar routines.
1480 */
1481 simple_lock(&pmap->lock);
1482
1483 {
1484
1485 va = pv_e->va;
1486 pte = pmap_pte(pmap, va);
1487
1488 /*
1489 * Consistency checks.
1490 */
1491 /* assert(*pte & INTEL_PTE_VALID); XXX */
1492 /* assert(pte_to_phys(*pte) == phys); */
1493
1494 }
1495
1496 /*
1497 * Remove the mapping if new protection is NONE
1498 * or if write-protecting a kernel mapping.
1499 */
1500 if (remove || pmap == kernel_pmap) {
1501 /*
1502 * Remove the mapping, collecting any modify bits.
1503 */
1504 {
1505 pmap_phys_attributes[pai] |=
1506 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1507 *pte++ = 0;
1508 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1509 }
1510
1511 assert(pmap->stats.resident_count >= 1);
1512 pmap->stats.resident_count--;
1513
1514 /*
1515 * Remove the pv_entry.
1516 */
1517 if (pv_e == pv_h) {
1518 /*
1519 * Fix up head later.
1520 */
1521 pv_h->pmap = PMAP_NULL;
1522 }
1523 else {
1524 /*
1525 * Delete this entry.
1526 */
1527 prev->next = pv_e->next;
1528 PV_FREE(pv_e);
1529 }
1530 }
1531 else {
1532 /*
1533 * Write-protect.
1534 */
1535
1536 *pte &= ~INTEL_PTE_WRITE;
1537 pte++;
1538 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1539 /*
1540 * Advance prev.
1541 */
1542 prev = pv_e;
1543 }
1544
1545 simple_unlock(&pmap->lock);
1546
1547 } while ((pv_e = prev->next) != PV_ENTRY_NULL);
1548
1549 /*
1550 * If pv_head mapping was removed, fix it up.
1551 */
1552 if (pv_h->pmap == PMAP_NULL) {
1553 pv_e = pv_h->next;
1554 if (pv_e != PV_ENTRY_NULL) {
1555 *pv_h = *pv_e;
1556 PV_FREE(pv_e);
1557 }
1558 }
1559 }
1560
1561 PMAP_WRITE_UNLOCK(spl);
1562 }
1563
1564 /*
1565 * Routine:
1566 * pmap_disconnect
1567 *
1568 * Function:
1569 * Disconnect all mappings for this page and return reference and change status
1570 * in generic format.
1571 *
1572 */
1573 unsigned int pmap_disconnect(
1574 ppnum_t pa)
1575 {
1576 pmap_page_protect(pa, 0); /* disconnect the page */
1577 return (pmap_get_refmod(pa)); /* return ref/chg status */
1578 }
1579
1580 /*
1581 * Set the physical protection on the
1582 * specified range of this map as requested.
1583 * Will not increase permissions.
1584 */
1585 void
1586 pmap_protect(
1587 pmap_t map,
1588 vm_offset_t s,
1589 vm_offset_t e,
1590 vm_prot_t prot)
1591 {
1592 register pt_entry_t *pde;
1593 register pt_entry_t *spte, *epte;
1594 vm_offset_t l;
1595 spl_t spl;
1596 vm_offset_t orig_s = s;
1597
1598
1599 if (map == PMAP_NULL)
1600 return;
1601
1602 /*
1603 * Determine the new protection.
1604 */
1605 switch (prot) {
1606 case VM_PROT_READ:
1607 case VM_PROT_READ|VM_PROT_EXECUTE:
1608 break;
1609 case VM_PROT_READ|VM_PROT_WRITE:
1610 case VM_PROT_ALL:
1611 return; /* nothing to do */
1612 default:
1613 pmap_remove(map, (addr64_t)s, (addr64_t)e);
1614 return;
1615 }
1616
1617 SPLVM(spl);
1618 simple_lock(&map->lock);
1619
1620 pde = pmap_pde(map, s);
1621 while (s < e) {
1622 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
1623 if (l > e)
1624 l = e;
1625 if (*pde & INTEL_PTE_VALID) {
1626 spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1)));
1627 spte = &spte[ptenum(s)];
1628 epte = &spte[intel_btop(l-s)];
1629
1630 while (spte < epte) {
1631 if (*spte & INTEL_PTE_VALID)
1632 *spte &= ~INTEL_PTE_WRITE;
1633 spte++;
1634 }
1635 }
1636 s = l;
1637 pde++;
1638 }
1639
1640 PMAP_UPDATE_TLBS(map, orig_s, e);
1641
1642 simple_unlock(&map->lock);
1643 SPLX(spl);
1644 }
1645
1646
1647
1648 /*
1649 * Insert the given physical page (p) at
1650 * the specified virtual address (v) in the
1651 * target physical map with the protection requested.
1652 *
1653 * If specified, the page will be wired down, meaning
1654 * that the related pte cannot be reclaimed.
1655 *
1656 * NB: This is the only routine which MAY NOT lazy-evaluate
1657 * or lose information. That is, this routine must actually
1658 * insert this page into the given map NOW.
1659 */
1660 void
1661 pmap_enter(
1662 register pmap_t pmap,
1663 vm_offset_t v,
1664 ppnum_t pn,
1665 vm_prot_t prot,
1666 unsigned int flags,
1667 boolean_t wired)
1668 {
1669 register pt_entry_t *pte;
1670 register pv_entry_t pv_h;
1671 register int pai;
1672 pv_entry_t pv_e;
1673 pt_entry_t template;
1674 spl_t spl;
1675 pmap_paddr_t old_pa;
1676 pmap_paddr_t pa = (pmap_paddr_t)i386_ptob(pn);
1677
1678 XPR(0x80000000, "%x/%x: pmap_enter %x/%x/%x\n",
1679 current_thread(),
1680 current_thread(),
1681 pmap, v, pn);
1682
1683 assert(pn != vm_page_fictitious_addr);
1684 if (pmap_debug)
1685 printf("pmap(%x, %x)\n", v, pn);
1686 if (pmap == PMAP_NULL)
1687 return;
1688
1689 /*
1690 * Must allocate a new pvlist entry while we're unlocked;
1691 * zalloc may cause pageout (which will lock the pmap system).
1692 * If we determine we need a pvlist entry, we will unlock
1693 * and allocate one. Then we will retry, throughing away
1694 * the allocated entry later (if we no longer need it).
1695 */
1696 pv_e = PV_ENTRY_NULL;
1697
1698 PMAP_READ_LOCK(pmap, spl);
1699
1700 /*
1701 * Expand pmap to include this pte. Assume that
1702 * pmap is always expanded to include enough hardware
1703 * pages to map one VM page.
1704 */
1705
1706 while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
1707 /*
1708 * Must unlock to expand the pmap.
1709 */
1710 PMAP_READ_UNLOCK(pmap, spl);
1711
1712 pmap_expand(pmap, v);
1713
1714 PMAP_READ_LOCK(pmap, spl);
1715 }
1716 /*
1717 * Special case if the physical page is already mapped
1718 * at this address.
1719 */
1720 old_pa = pte_to_pa(*pte);
1721 if (old_pa == pa) {
1722 /*
1723 * May be changing its wired attribute or protection
1724 */
1725
1726 template = pa_to_pte(pa) | INTEL_PTE_VALID;
1727
1728 if(flags & VM_MEM_NOT_CACHEABLE) {
1729 if(!(flags & VM_MEM_GUARDED))
1730 template |= INTEL_PTE_PTA;
1731 template |= INTEL_PTE_NCACHE;
1732 }
1733
1734 if (pmap != kernel_pmap)
1735 template |= INTEL_PTE_USER;
1736 if (prot & VM_PROT_WRITE)
1737 template |= INTEL_PTE_WRITE;
1738 if (wired) {
1739 template |= INTEL_PTE_WIRED;
1740 if (!iswired(*pte))
1741 pmap->stats.wired_count++;
1742 }
1743 else {
1744 if (iswired(*pte)) {
1745 assert(pmap->stats.wired_count >= 1);
1746 pmap->stats.wired_count--;
1747 }
1748 }
1749
1750 if (*pte & INTEL_PTE_MOD)
1751 template |= INTEL_PTE_MOD;
1752 WRITE_PTE(pte, template)
1753 pte++;
1754
1755 goto Done;
1756 }
1757
1758 /*
1759 * Outline of code from here:
1760 * 1) If va was mapped, update TLBs, remove the mapping
1761 * and remove old pvlist entry.
1762 * 2) Add pvlist entry for new mapping
1763 * 3) Enter new mapping.
1764 *
1765 * SHARING_FAULTS complicates this slightly in that it cannot
1766 * replace the mapping, but must remove it (because adding the
1767 * pvlist entry for the new mapping may remove others), and
1768 * hence always enters the new mapping at step 3)
1769 *
1770 * If the old physical page is not managed step 1) is skipped
1771 * (except for updating the TLBs), and the mapping is
1772 * overwritten at step 3). If the new physical page is not
1773 * managed, step 2) is skipped.
1774 */
1775
1776 if (old_pa != (pmap_paddr_t) 0) {
1777
1778
1779 #if DEBUG_PTE_PAGE
1780 if (pmap != kernel_pmap)
1781 ptep_check(get_pte_page(pte));
1782 #endif /* DEBUG_PTE_PAGE */
1783
1784 /*
1785 * Don't do anything to pages outside valid memory here.
1786 * Instead convince the code that enters a new mapping
1787 * to overwrite the old one.
1788 */
1789
1790 if (valid_page(i386_btop(old_pa))) {
1791
1792 pai = pa_index(old_pa);
1793 LOCK_PVH(pai);
1794
1795 assert(pmap->stats.resident_count >= 1);
1796 pmap->stats.resident_count--;
1797 if (iswired(*pte)) {
1798 assert(pmap->stats.wired_count >= 1);
1799 pmap->stats.wired_count--;
1800 }
1801
1802 pmap_phys_attributes[pai] |=
1803 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1804 WRITE_PTE(pte, 0)
1805
1806 /*
1807 * Remove the mapping from the pvlist for
1808 * this physical page.
1809 */
1810 {
1811 register pv_entry_t prev, cur;
1812
1813 pv_h = pai_to_pvh(pai);
1814 if (pv_h->pmap == PMAP_NULL) {
1815 panic("pmap_enter: null pv_list!");
1816 }
1817 if (pv_h->va == v && pv_h->pmap == pmap) {
1818 /*
1819 * Header is the pv_entry. Copy the next one
1820 * to header and free the next one (we cannot
1821 * free the header)
1822 */
1823 cur = pv_h->next;
1824 if (cur != PV_ENTRY_NULL) {
1825 *pv_h = *cur;
1826 pv_e = cur;
1827 }
1828 else {
1829 pv_h->pmap = PMAP_NULL;
1830 }
1831 }
1832 else {
1833 cur = pv_h;
1834 do {
1835 prev = cur;
1836 if ((cur = prev->next) == PV_ENTRY_NULL) {
1837 panic("pmap_enter: mapping not in pv_list!");
1838 }
1839 } while (cur->va != v || cur->pmap != pmap);
1840 prev->next = cur->next;
1841 pv_e = cur;
1842 }
1843 }
1844 UNLOCK_PVH(pai);
1845 }
1846 else {
1847
1848 /*
1849 * old_pa is not managed. Pretend it's zero so code
1850 * at Step 3) will enter new mapping (overwriting old
1851 * one). Do removal part of accounting.
1852 */
1853 old_pa = (pmap_paddr_t) 0;
1854 assert(pmap->stats.resident_count >= 1);
1855 pmap->stats.resident_count--;
1856 if (iswired(*pte)) {
1857 assert(pmap->stats.wired_count >= 1);
1858 pmap->stats.wired_count--;
1859 }
1860 }
1861
1862 }
1863
1864 if (valid_page(i386_btop(pa))) {
1865
1866 /*
1867 * Step 2) Enter the mapping in the PV list for this
1868 * physical page.
1869 */
1870
1871 pai = pa_index(pa);
1872
1873
1874 #if SHARING_FAULTS
1875 RetryPvList:
1876 /*
1877 * We can return here from the sharing fault code below
1878 * in case we removed the only entry on the pv list and thus
1879 * must enter the new one in the list header.
1880 */
1881 #endif /* SHARING_FAULTS */
1882 LOCK_PVH(pai);
1883 pv_h = pai_to_pvh(pai);
1884
1885 if (pv_h->pmap == PMAP_NULL) {
1886 /*
1887 * No mappings yet
1888 */
1889 pv_h->va = v;
1890 pv_h->pmap = pmap;
1891 pv_h->next = PV_ENTRY_NULL;
1892 }
1893 else {
1894 #if DEBUG
1895 {
1896 /*
1897 * check that this mapping is not already there
1898 * or there is no alias for this mapping in the same map
1899 */
1900 pv_entry_t e = pv_h;
1901 while (e != PV_ENTRY_NULL) {
1902 if (e->pmap == pmap && e->va == v)
1903 panic("pmap_enter: already in pv_list");
1904 e = e->next;
1905 }
1906 }
1907 #endif /* DEBUG */
1908 #if SHARING_FAULTS
1909 {
1910 /*
1911 * do sharing faults.
1912 * if we find an entry on this pv list in the same address
1913 * space, remove it. we know there will not be more
1914 * than one.
1915 */
1916 pv_entry_t e = pv_h;
1917 pt_entry_t *opte;
1918
1919 while (e != PV_ENTRY_NULL) {
1920 if (e->pmap == pmap) {
1921 /*
1922 * Remove it, drop pv list lock first.
1923 */
1924 UNLOCK_PVH(pai);
1925
1926 opte = pmap_pte(pmap, e->va);
1927 assert(opte != PT_ENTRY_NULL);
1928 /*
1929 * Invalidate the translation buffer,
1930 * then remove the mapping.
1931 */
1932 pmap_remove_range(pmap, e->va, opte,
1933 opte + 1);
1934 PMAP_UPDATE_TLBS(pmap, e->va, e->va + PAGE_SIZE);
1935
1936 /*
1937 * We could have remove the head entry,
1938 * so there could be no more entries
1939 * and so we have to use the pv head entry.
1940 * so, go back to the top and try the entry
1941 * again.
1942 */
1943 goto RetryPvList;
1944 }
1945 e = e->next;
1946 }
1947
1948 /*
1949 * check that this mapping is not already there
1950 */
1951 e = pv_h;
1952 while (e != PV_ENTRY_NULL) {
1953 if (e->pmap == pmap)
1954 panic("pmap_enter: alias in pv_list");
1955 e = e->next;
1956 }
1957 }
1958 #endif /* SHARING_FAULTS */
1959 #if DEBUG_ALIAS
1960 {
1961 /*
1962 * check for aliases within the same address space.
1963 */
1964 pv_entry_t e = pv_h;
1965 vm_offset_t rpc = get_rpc();
1966
1967 while (e != PV_ENTRY_NULL) {
1968 if (e->pmap == pmap) {
1969 /*
1970 * log this entry in the alias ring buffer
1971 * if it's not there already.
1972 */
1973 struct pmap_alias *pma;
1974 int ii, logit;
1975
1976 logit = TRUE;
1977 for (ii = 0; ii < pmap_alias_index; ii++) {
1978 if (pmap_aliasbuf[ii].rpc == rpc) {
1979 /* found it in the log already */
1980 logit = FALSE;
1981 break;
1982 }
1983 }
1984 if (logit) {
1985 pma = &pmap_aliasbuf[pmap_alias_index];
1986 pma->pmap = pmap;
1987 pma->va = v;
1988 pma->rpc = rpc;
1989 pma->cookie = PMAP_ALIAS_COOKIE;
1990 if (++pmap_alias_index >= PMAP_ALIAS_MAX)
1991 panic("pmap_enter: exhausted alias log");
1992 }
1993 }
1994 e = e->next;
1995 }
1996 }
1997 #endif /* DEBUG_ALIAS */
1998 /*
1999 * Add new pv_entry after header.
2000 */
2001 if (pv_e == PV_ENTRY_NULL) {
2002 PV_ALLOC(pv_e);
2003 if (pv_e == PV_ENTRY_NULL) {
2004 panic("pmap no pv_e's");
2005 }
2006 }
2007 pv_e->va = v;
2008 pv_e->pmap = pmap;
2009 pv_e->next = pv_h->next;
2010 pv_h->next = pv_e;
2011 /*
2012 * Remember that we used the pvlist entry.
2013 */
2014 pv_e = PV_ENTRY_NULL;
2015 }
2016 UNLOCK_PVH(pai);
2017 }
2018
2019 /*
2020 * Step 3) Enter and count the mapping.
2021 */
2022
2023 pmap->stats.resident_count++;
2024
2025 /*
2026 * Build a template to speed up entering -
2027 * only the pfn changes.
2028 */
2029 template = pa_to_pte(pa) | INTEL_PTE_VALID;
2030
2031 if(flags & VM_MEM_NOT_CACHEABLE) {
2032 if(!(flags & VM_MEM_GUARDED))
2033 template |= INTEL_PTE_PTA;
2034 template |= INTEL_PTE_NCACHE;
2035 }
2036
2037 if (pmap != kernel_pmap)
2038 template |= INTEL_PTE_USER;
2039 if (prot & VM_PROT_WRITE)
2040 template |= INTEL_PTE_WRITE;
2041 if (wired) {
2042 template |= INTEL_PTE_WIRED;
2043 pmap->stats.wired_count++;
2044 }
2045
2046 WRITE_PTE(pte, template)
2047
2048 Done:
2049 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
2050
2051 if (pv_e != PV_ENTRY_NULL) {
2052 PV_FREE(pv_e);
2053 }
2054
2055 PMAP_READ_UNLOCK(pmap, spl);
2056 }
2057
2058 /*
2059 * Routine: pmap_change_wiring
2060 * Function: Change the wiring attribute for a map/virtual-address
2061 * pair.
2062 * In/out conditions:
2063 * The mapping must already exist in the pmap.
2064 */
2065 void
2066 pmap_change_wiring(
2067 register pmap_t map,
2068 vm_offset_t v,
2069 boolean_t wired)
2070 {
2071 register pt_entry_t *pte;
2072 spl_t spl;
2073
2074 #if 1
2075 /*
2076 * We must grab the pmap system lock because we may
2077 * change a pte_page queue.
2078 */
2079 PMAP_READ_LOCK(map, spl);
2080
2081 if ((pte = pmap_pte(map, v)) == PT_ENTRY_NULL)
2082 panic("pmap_change_wiring: pte missing");
2083
2084 if (wired && !iswired(*pte)) {
2085 /*
2086 * wiring down mapping
2087 */
2088 map->stats.wired_count++;
2089 *pte++ |= INTEL_PTE_WIRED;
2090 }
2091 else if (!wired && iswired(*pte)) {
2092 /*
2093 * unwiring mapping
2094 */
2095 assert(map->stats.wired_count >= 1);
2096 map->stats.wired_count--;
2097 *pte++ &= ~INTEL_PTE_WIRED;
2098 }
2099
2100 PMAP_READ_UNLOCK(map, spl);
2101
2102 #else
2103 return;
2104 #endif
2105
2106 }
2107
2108 ppnum_t
2109 pmap_find_phys(pmap_t pmap, addr64_t va)
2110 {
2111 pt_entry_t *ptp;
2112 vm_offset_t a32;
2113 ppnum_t ppn;
2114
2115 if (value_64bit(va))
2116 panic("pmap_find_phys 64 bit value");
2117 a32 = (vm_offset_t) low32(va);
2118 ptp = pmap_pte(pmap, a32);
2119 if (PT_ENTRY_NULL == ptp) {
2120 ppn = 0;
2121 } else {
2122 ppn = (ppnum_t) i386_btop(pte_to_pa(*ptp));
2123 }
2124 return ppn;
2125 }
2126
2127 /*
2128 * Routine: pmap_extract
2129 * Function:
2130 * Extract the physical page address associated
2131 * with the given map/virtual_address pair.
2132 * Change to shim for backwards compatibility but will not
2133 * work for 64 bit systems. Some old drivers that we cannot
2134 * change need this.
2135 */
2136
2137 vm_offset_t
2138 pmap_extract(
2139 register pmap_t pmap,
2140 vm_offset_t va)
2141 {
2142 ppnum_t ppn;
2143 vm_offset_t vaddr;
2144
2145 vaddr = (vm_offset_t)0;
2146 ppn = pmap_find_phys(pmap, (addr64_t)va);
2147 if (ppn) {
2148 vaddr = ((vm_offset_t)i386_ptob(ppn)) | (va & INTEL_OFFMASK);
2149 }
2150 return (vaddr);
2151 }
2152
2153
2154 /*
2155 * Routine: pmap_expand
2156 *
2157 * Expands a pmap to be able to map the specified virtual address.
2158 *
2159 * Allocates new virtual memory for the P0 or P1 portion of the
2160 * pmap, then re-maps the physical pages that were in the old
2161 * pmap to be in the new pmap.
2162 *
2163 * Must be called with the pmap system and the pmap unlocked,
2164 * since these must be unlocked to use vm_allocate or vm_deallocate.
2165 * Thus it must be called in a loop that checks whether the map
2166 * has been expanded enough.
2167 * (We won't loop forever, since page tables aren't shrunk.)
2168 */
2169 void
2170 pmap_expand(
2171 register pmap_t map,
2172 register vm_offset_t v)
2173 {
2174 pt_entry_t *pdp;
2175 register vm_page_t m;
2176 register pmap_paddr_t pa;
2177 register int i;
2178 spl_t spl;
2179 ppnum_t pn;
2180
2181 if (map == kernel_pmap) {
2182 pmap_growkernel(v);
2183 return;
2184 }
2185
2186 /*
2187 * Allocate a VM page for the level 2 page table entries.
2188 */
2189 while ((m = vm_page_grab()) == VM_PAGE_NULL)
2190 VM_PAGE_WAIT();
2191
2192 /*
2193 * put the page into the pmap's obj list so it
2194 * can be found later.
2195 */
2196 pn = m->phys_page;
2197 pa = i386_ptob(pn);
2198 i = pdenum(map, v);
2199 vm_object_lock(map->pm_obj);
2200 vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i);
2201 vm_page_lock_queues();
2202 vm_page_wire(m);
2203 inuse_ptepages_count++;
2204 vm_object_unlock(map->pm_obj);
2205 vm_page_unlock_queues();
2206
2207 /*
2208 * Zero the page.
2209 */
2210 pmap_zero_page(pn);
2211
2212 PMAP_READ_LOCK(map, spl);
2213 /*
2214 * See if someone else expanded us first
2215 */
2216 if (pmap_pte(map, v) != PT_ENTRY_NULL) {
2217 PMAP_READ_UNLOCK(map, spl);
2218 vm_object_lock(map->pm_obj);
2219 vm_page_lock_queues();
2220 vm_page_free(m);
2221 inuse_ptepages_count--;
2222 vm_page_unlock_queues();
2223 vm_object_unlock(map->pm_obj);
2224 return;
2225 }
2226
2227 /*
2228 * Set the page directory entry for this page table.
2229 * If we have allocated more than one hardware page,
2230 * set several page directory entries.
2231 */
2232
2233 pdp = &map->dirbase[pdenum(map, v)];
2234 *pdp = pa_to_pte(pa)
2235 | INTEL_PTE_VALID
2236 | INTEL_PTE_USER
2237 | INTEL_PTE_WRITE;
2238
2239 PMAP_READ_UNLOCK(map, spl);
2240 return;
2241 }
2242
2243 /*
2244 * Copy the range specified by src_addr/len
2245 * from the source map to the range dst_addr/len
2246 * in the destination map.
2247 *
2248 * This routine is only advisory and need not do anything.
2249 */
2250 #if 0
2251 void
2252 pmap_copy(
2253 pmap_t dst_pmap,
2254 pmap_t src_pmap,
2255 vm_offset_t dst_addr,
2256 vm_size_t len,
2257 vm_offset_t src_addr)
2258 {
2259 #ifdef lint
2260 dst_pmap++; src_pmap++; dst_addr++; len++; src_addr++;
2261 #endif /* lint */
2262 }
2263 #endif/* 0 */
2264
2265 /*
2266 * pmap_sync_page_data_phys(ppnum_t pa)
2267 *
2268 * Invalidates all of the instruction cache on a physical page and
2269 * pushes any dirty data from the data cache for the same physical page
2270 * Not required in i386.
2271 */
2272 void
2273 pmap_sync_page_data_phys(__unused ppnum_t pa)
2274 {
2275 return;
2276 }
2277
2278 /*
2279 * pmap_sync_page_attributes_phys(ppnum_t pa)
2280 *
2281 * Write back and invalidate all cachelines on a physical page.
2282 */
2283 void
2284 pmap_sync_page_attributes_phys(ppnum_t pa)
2285 {
2286 cache_flush_page_phys(pa);
2287 }
2288
2289 int collect_ref;
2290 int collect_unref;
2291
2292 /*
2293 * Routine: pmap_collect
2294 * Function:
2295 * Garbage collects the physical map system for
2296 * pages which are no longer used.
2297 * Success need not be guaranteed -- that is, there
2298 * may well be pages which are not referenced, but
2299 * others may be collected.
2300 * Usage:
2301 * Called by the pageout daemon when pages are scarce.
2302 */
2303 void
2304 pmap_collect(
2305 pmap_t p)
2306 {
2307 register pt_entry_t *pdp, *ptp;
2308 pt_entry_t *eptp;
2309 int wired;
2310 spl_t spl;
2311
2312 if (p == PMAP_NULL)
2313 return;
2314
2315 if (p == kernel_pmap)
2316 return;
2317
2318 /*
2319 * Garbage collect map.
2320 */
2321 PMAP_READ_LOCK(p, spl);
2322
2323 for (pdp = (pt_entry_t *)p->dirbase;
2324 pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)];
2325 pdp++)
2326 {
2327 if (*pdp & INTEL_PTE_VALID) {
2328 if(*pdp & INTEL_PTE_REF) {
2329 *pdp &= ~INTEL_PTE_REF;
2330 collect_ref++;
2331 } else {
2332 collect_unref++;
2333 ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase));
2334 eptp = ptp + NPTEPG;
2335
2336 /*
2337 * If the pte page has any wired mappings, we cannot
2338 * free it.
2339 */
2340 wired = 0;
2341 {
2342 register pt_entry_t *ptep;
2343 for (ptep = ptp; ptep < eptp; ptep++) {
2344 if (iswired(*ptep)) {
2345 wired = 1;
2346 break;
2347 }
2348 }
2349 }
2350 if (!wired) {
2351 /*
2352 * Remove the virtual addresses mapped by this pte page.
2353 */
2354 pmap_remove_range(p,
2355 pdetova(pdp - (pt_entry_t *)p->dirbase),
2356 ptp,
2357 eptp);
2358
2359 /*
2360 * Invalidate the page directory pointer.
2361 */
2362 *pdp = 0x0;
2363
2364 PMAP_READ_UNLOCK(p, spl);
2365
2366 /*
2367 * And free the pte page itself.
2368 */
2369 {
2370 register vm_page_t m;
2371
2372 vm_object_lock(p->pm_obj);
2373 m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]));
2374 if (m == VM_PAGE_NULL)
2375 panic("pmap_collect: pte page not in object");
2376 vm_page_lock_queues();
2377 vm_page_free(m);
2378 inuse_ptepages_count--;
2379 vm_page_unlock_queues();
2380 vm_object_unlock(p->pm_obj);
2381 }
2382
2383 PMAP_READ_LOCK(p, spl);
2384 }
2385 }
2386 }
2387 }
2388 PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
2389 PMAP_READ_UNLOCK(p, spl);
2390 return;
2391
2392 }
2393
2394 /*
2395 * Routine: pmap_kernel
2396 * Function:
2397 * Returns the physical map handle for the kernel.
2398 */
2399 #if 0
2400 pmap_t
2401 pmap_kernel(void)
2402 {
2403 return (kernel_pmap);
2404 }
2405 #endif/* 0 */
2406
2407 void
2408 pmap_copy_page(src, dst)
2409 ppnum_t src;
2410 ppnum_t dst;
2411 {
2412 bcopy_phys((addr64_t)i386_ptob(src),
2413 (addr64_t)i386_ptob(dst),
2414 PAGE_SIZE);
2415 }
2416
2417
2418 /*
2419 * Routine: pmap_pageable
2420 * Function:
2421 * Make the specified pages (by pmap, offset)
2422 * pageable (or not) as requested.
2423 *
2424 * A page which is not pageable may not take
2425 * a fault; therefore, its page table entry
2426 * must remain valid for the duration.
2427 *
2428 * This routine is merely advisory; pmap_enter
2429 * will specify that these pages are to be wired
2430 * down (or not) as appropriate.
2431 */
2432 void
2433 pmap_pageable(
2434 __unused pmap_t pmap,
2435 __unused vm_offset_t start_addr,
2436 __unused vm_offset_t end_addr,
2437 __unused boolean_t pageable)
2438 {
2439 #ifdef lint
2440 pmap++; start_addr++; end_addr++; pageable++;
2441 #endif /* lint */
2442 }
2443
2444 /*
2445 * Clear specified attribute bits.
2446 */
2447 void
2448 phys_attribute_clear(
2449 ppnum_t pn,
2450 int bits)
2451 {
2452 pv_entry_t pv_h;
2453 register pv_entry_t pv_e;
2454 register pt_entry_t *pte;
2455 int pai;
2456 register pmap_t pmap;
2457 spl_t spl;
2458 pmap_paddr_t phys;
2459
2460 assert(pn != vm_page_fictitious_addr);
2461 if (!valid_page(pn)) {
2462 /*
2463 * Not a managed page.
2464 */
2465 return;
2466 }
2467
2468 /*
2469 * Lock the pmap system first, since we will be changing
2470 * several pmaps.
2471 */
2472
2473 PMAP_WRITE_LOCK(spl);
2474 phys = i386_ptob(pn);
2475 pai = pa_index(phys);
2476 pv_h = pai_to_pvh(pai);
2477
2478 /*
2479 * Walk down PV list, clearing all modify or reference bits.
2480 * We do not have to lock the pv_list because we have
2481 * the entire pmap system locked.
2482 */
2483 if (pv_h->pmap != PMAP_NULL) {
2484 /*
2485 * There are some mappings.
2486 */
2487 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
2488
2489 pmap = pv_e->pmap;
2490 /*
2491 * Lock the pmap to block pmap_extract and similar routines.
2492 */
2493 simple_lock(&pmap->lock);
2494
2495 {
2496 register vm_offset_t va;
2497
2498 va = pv_e->va;
2499 pte = pmap_pte(pmap, va);
2500
2501 #if 0
2502 /*
2503 * Consistency checks.
2504 */
2505 assert(*pte & INTEL_PTE_VALID);
2506 /* assert(pte_to_phys(*pte) == phys); */
2507 #endif
2508
2509 /*
2510 * Clear modify or reference bits.
2511 */
2512
2513 *pte++ &= ~bits;
2514 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
2515 }
2516 simple_unlock(&pmap->lock);
2517
2518 }
2519 }
2520
2521 pmap_phys_attributes[pai] &= ~bits;
2522
2523 PMAP_WRITE_UNLOCK(spl);
2524 }
2525
2526 /*
2527 * Check specified attribute bits.
2528 */
2529 boolean_t
2530 phys_attribute_test(
2531 ppnum_t pn,
2532 int bits)
2533 {
2534 pv_entry_t pv_h;
2535 register pv_entry_t pv_e;
2536 register pt_entry_t *pte;
2537 int pai;
2538 register pmap_t pmap;
2539 spl_t spl;
2540 pmap_paddr_t phys;
2541
2542 assert(pn != vm_page_fictitious_addr);
2543 if (!valid_page(pn)) {
2544 /*
2545 * Not a managed page.
2546 */
2547 return (FALSE);
2548 }
2549
2550 /*
2551 * Lock the pmap system first, since we will be checking
2552 * several pmaps.
2553 */
2554
2555 PMAP_WRITE_LOCK(spl);
2556 phys = i386_ptob(pn);
2557 pai = pa_index(phys);
2558 pv_h = pai_to_pvh(pai);
2559
2560 if (pmap_phys_attributes[pai] & bits) {
2561 PMAP_WRITE_UNLOCK(spl);
2562 return (TRUE);
2563 }
2564
2565 /*
2566 * Walk down PV list, checking all mappings.
2567 * We do not have to lock the pv_list because we have
2568 * the entire pmap system locked.
2569 */
2570 if (pv_h->pmap != PMAP_NULL) {
2571 /*
2572 * There are some mappings.
2573 */
2574 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
2575
2576 pmap = pv_e->pmap;
2577 /*
2578 * Lock the pmap to block pmap_extract and similar routines.
2579 */
2580 simple_lock(&pmap->lock);
2581
2582 {
2583 register vm_offset_t va;
2584
2585 va = pv_e->va;
2586 pte = pmap_pte(pmap, va);
2587
2588 #if 0
2589 /*
2590 * Consistency checks.
2591 */
2592 assert(*pte & INTEL_PTE_VALID);
2593 /* assert(pte_to_phys(*pte) == phys); */
2594 #endif
2595 }
2596
2597 /*
2598 * Check modify or reference bits.
2599 */
2600 {
2601 if (*pte++ & bits) {
2602 simple_unlock(&pmap->lock);
2603 PMAP_WRITE_UNLOCK(spl);
2604 return (TRUE);
2605 }
2606 }
2607 simple_unlock(&pmap->lock);
2608 }
2609 }
2610 PMAP_WRITE_UNLOCK(spl);
2611 return (FALSE);
2612 }
2613
2614 /*
2615 * Set specified attribute bits.
2616 */
2617 void
2618 phys_attribute_set(
2619 ppnum_t pn,
2620 int bits)
2621 {
2622 int spl;
2623 pmap_paddr_t phys;
2624
2625 assert(pn != vm_page_fictitious_addr);
2626 if (!valid_page(pn)) {
2627 /*
2628 * Not a managed page.
2629 */
2630 return;
2631 }
2632
2633 /*
2634 * Lock the pmap system and set the requested bits in
2635 * the phys attributes array. Don't need to bother with
2636 * ptes because the test routine looks here first.
2637 */
2638 phys = i386_ptob(pn);
2639 PMAP_WRITE_LOCK(spl);
2640 pmap_phys_attributes[pa_index(phys)] |= bits;
2641 PMAP_WRITE_UNLOCK(spl);
2642 }
2643
2644 /*
2645 * Set the modify bit on the specified physical page.
2646 */
2647
2648 void pmap_set_modify(
2649 ppnum_t pn)
2650 {
2651 phys_attribute_set(pn, PHYS_MODIFIED);
2652 }
2653
2654 /*
2655 * Clear the modify bits on the specified physical page.
2656 */
2657
2658 void
2659 pmap_clear_modify(
2660 ppnum_t pn)
2661 {
2662 phys_attribute_clear(pn, PHYS_MODIFIED);
2663 }
2664
2665 /*
2666 * pmap_is_modified:
2667 *
2668 * Return whether or not the specified physical page is modified
2669 * by any physical maps.
2670 */
2671
2672 boolean_t
2673 pmap_is_modified(
2674 ppnum_t pn)
2675 {
2676 return (phys_attribute_test(pn, PHYS_MODIFIED));
2677 }
2678
2679 /*
2680 * pmap_clear_reference:
2681 *
2682 * Clear the reference bit on the specified physical page.
2683 */
2684
2685 void
2686 pmap_clear_reference(
2687 ppnum_t pn)
2688 {
2689 phys_attribute_clear(pn, PHYS_REFERENCED);
2690 }
2691
2692 void
2693 pmap_set_reference(ppnum_t pn)
2694 {
2695 phys_attribute_set(pn, PHYS_REFERENCED);
2696 }
2697
2698 /*
2699 * pmap_is_referenced:
2700 *
2701 * Return whether or not the specified physical page is referenced
2702 * by any physical maps.
2703 */
2704
2705 boolean_t
2706 pmap_is_referenced(
2707 ppnum_t pn)
2708 {
2709 return (phys_attribute_test(pn, PHYS_REFERENCED));
2710 }
2711
2712 /*
2713 * pmap_get_refmod(phys)
2714 * returns the referenced and modified bits of the specified
2715 * physical page.
2716 */
2717 unsigned int
2718 pmap_get_refmod(ppnum_t pa)
2719 {
2720 return ( ((phys_attribute_test(pa, PHYS_MODIFIED))? VM_MEM_MODIFIED : 0)
2721 | ((phys_attribute_test(pa, PHYS_REFERENCED))? VM_MEM_REFERENCED : 0));
2722 }
2723
2724 /*
2725 * pmap_clear_refmod(phys, mask)
2726 * clears the referenced and modified bits as specified by the mask
2727 * of the specified physical page.
2728 */
2729 void
2730 pmap_clear_refmod(ppnum_t pa, unsigned int mask)
2731 {
2732 unsigned int x86Mask;
2733
2734 x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0)
2735 | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
2736 phys_attribute_clear(pa, x86Mask);
2737 }
2738
2739 /*
2740 * Set the modify bit on the specified range
2741 * of this map as requested.
2742 *
2743 * This optimization stands only if each time the dirty bit
2744 * in vm_page_t is tested, it is also tested in the pmap.
2745 */
2746 void
2747 pmap_modify_pages(
2748 pmap_t map,
2749 vm_offset_t s,
2750 vm_offset_t e)
2751 {
2752 spl_t spl;
2753 register pt_entry_t *pde;
2754 register pt_entry_t *spte, *epte;
2755 vm_offset_t l;
2756 vm_offset_t orig_s = s;
2757
2758 if (map == PMAP_NULL)
2759 return;
2760
2761 PMAP_READ_LOCK(map, spl);
2762
2763 pde = pmap_pde(map, s);
2764 while (s && s < e) {
2765 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
2766 if (l > e)
2767 l = e;
2768 if (*pde & INTEL_PTE_VALID) {
2769 spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1)));
2770 if (l) {
2771 spte = &spte[ptenum(s)];
2772 epte = &spte[intel_btop(l-s)];
2773 } else {
2774 epte = &spte[intel_btop(PDE_MAPPED_SIZE)];
2775 spte = &spte[ptenum(s)];
2776 }
2777 while (spte < epte) {
2778 if (*spte & INTEL_PTE_VALID) {
2779 *spte |= (INTEL_PTE_MOD | INTEL_PTE_WRITE);
2780 }
2781 spte++;
2782 }
2783 }
2784 s = l;
2785 pde++;
2786 }
2787 PMAP_UPDATE_TLBS(map, orig_s, e);
2788 PMAP_READ_UNLOCK(map, spl);
2789 }
2790
2791
2792 void
2793 invalidate_icache(__unused vm_offset_t addr,
2794 __unused unsigned cnt,
2795 __unused int phys)
2796 {
2797 return;
2798 }
2799 void
2800 flush_dcache(__unused vm_offset_t addr,
2801 __unused unsigned count,
2802 __unused int phys)
2803 {
2804 return;
2805 }
2806
2807 /*
2808 * TLB Coherence Code (TLB "shootdown" code)
2809 *
2810 * Threads that belong to the same task share the same address space and
2811 * hence share a pmap. However, they may run on distinct cpus and thus
2812 * have distinct TLBs that cache page table entries. In order to guarantee
2813 * the TLBs are consistent, whenever a pmap is changed, all threads that
2814 * are active in that pmap must have their TLB updated. To keep track of
2815 * this information, the set of cpus that are currently using a pmap is
2816 * maintained within each pmap structure (cpus_using). Pmap_activate() and
2817 * pmap_deactivate add and remove, respectively, a cpu from this set.
2818 * Since the TLBs are not addressable over the bus, each processor must
2819 * flush its own TLB; a processor that needs to invalidate another TLB
2820 * needs to interrupt the processor that owns that TLB to signal the
2821 * update.
2822 *
2823 * Whenever a pmap is updated, the lock on that pmap is locked, and all
2824 * cpus using the pmap are signaled to invalidate. All threads that need
2825 * to activate a pmap must wait for the lock to clear to await any updates
2826 * in progress before using the pmap. They must ACQUIRE the lock to add
2827 * their cpu to the cpus_using set. An implicit assumption made
2828 * throughout the TLB code is that all kernel code that runs at or higher
2829 * than splvm blocks out update interrupts, and that such code does not
2830 * touch pageable pages.
2831 *
2832 * A shootdown interrupt serves another function besides signaling a
2833 * processor to invalidate. The interrupt routine (pmap_update_interrupt)
2834 * waits for the both the pmap lock (and the kernel pmap lock) to clear,
2835 * preventing user code from making implicit pmap updates while the
2836 * sending processor is performing its update. (This could happen via a
2837 * user data write reference that turns on the modify bit in the page
2838 * table). It must wait for any kernel updates that may have started
2839 * concurrently with a user pmap update because the IPC code
2840 * changes mappings.
2841 * Spinning on the VALUES of the locks is sufficient (rather than
2842 * having to acquire the locks) because any updates that occur subsequent
2843 * to finding the lock unlocked will be signaled via another interrupt.
2844 * (This assumes the interrupt is cleared before the low level interrupt code
2845 * calls pmap_update_interrupt()).
2846 *
2847 * The signaling processor must wait for any implicit updates in progress
2848 * to terminate before continuing with its update. Thus it must wait for an
2849 * acknowledgement of the interrupt from each processor for which such
2850 * references could be made. For maintaining this information, a set
2851 * cpus_active is used. A cpu is in this set if and only if it can
2852 * use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from
2853 * this set; when all such cpus are removed, it is safe to update.
2854 *
2855 * Before attempting to acquire the update lock on a pmap, a cpu (A) must
2856 * be at least at the priority of the interprocessor interrupt
2857 * (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a
2858 * kernel update; it would spin forever in pmap_update_interrupt() trying
2859 * to acquire the user pmap lock it had already acquired. Furthermore A
2860 * must remove itself from cpus_active. Otherwise, another cpu holding
2861 * the lock (B) could be in the process of sending an update signal to A,
2862 * and thus be waiting for A to remove itself from cpus_active. If A is
2863 * spinning on the lock at priority this will never happen and a deadlock
2864 * will result.
2865 */
2866
2867 /*
2868 * Signal another CPU that it must flush its TLB
2869 */
2870 void
2871 signal_cpus(
2872 cpu_set use_list,
2873 pmap_t pmap,
2874 vm_offset_t start_addr,
2875 vm_offset_t end_addr)
2876 {
2877 register int which_cpu, j;
2878 register pmap_update_list_t update_list_p;
2879
2880 while ((which_cpu = ffs((unsigned long)use_list)) != 0) {
2881 which_cpu -= 1; /* convert to 0 origin */
2882
2883 update_list_p = cpu_update_list(which_cpu);
2884 simple_lock(&update_list_p->lock);
2885
2886 j = update_list_p->count;
2887 if (j >= UPDATE_LIST_SIZE) {
2888 /*
2889 * list overflowed. Change last item to
2890 * indicate overflow.
2891 */
2892 update_list_p->item[UPDATE_LIST_SIZE-1].pmap = kernel_pmap;
2893 update_list_p->item[UPDATE_LIST_SIZE-1].start = VM_MIN_ADDRESS;
2894 update_list_p->item[UPDATE_LIST_SIZE-1].end = VM_MAX_KERNEL_ADDRESS;
2895 }
2896 else {
2897 update_list_p->item[j].pmap = pmap;
2898 update_list_p->item[j].start = start_addr;
2899 update_list_p->item[j].end = end_addr;
2900 update_list_p->count = j+1;
2901 }
2902 cpu_update_needed(which_cpu) = TRUE;
2903 simple_unlock(&update_list_p->lock);
2904
2905 /* if its the kernel pmap, ignore cpus_idle */
2906 if (((cpus_idle & (1 << which_cpu)) == 0) ||
2907 (pmap == kernel_pmap) || PMAP_REAL(which_cpu) == pmap)
2908 {
2909 i386_signal_cpu(which_cpu, MP_TLB_FLUSH, ASYNC);
2910 }
2911 use_list &= ~(1 << which_cpu);
2912 }
2913 }
2914
2915 void
2916 process_pmap_updates(
2917 register pmap_t my_pmap)
2918 {
2919 register int my_cpu;
2920 register pmap_update_list_t update_list_p;
2921 register int j;
2922 register pmap_t pmap;
2923
2924 mp_disable_preemption();
2925 my_cpu = cpu_number();
2926 update_list_p = cpu_update_list(my_cpu);
2927 simple_lock(&update_list_p->lock);
2928
2929 for (j = 0; j < update_list_p->count; j++) {
2930 pmap = update_list_p->item[j].pmap;
2931 if (pmap == my_pmap ||
2932 pmap == kernel_pmap) {
2933
2934 if (pmap->ref_count <= 0) {
2935 PMAP_CPU_CLR(pmap, my_cpu);
2936 PMAP_REAL(my_cpu) = kernel_pmap;
2937 #ifdef PAE
2938 set_cr3((unsigned int)kernel_pmap->pm_ppdpt);
2939 #else
2940 set_cr3((unsigned int)kernel_pmap->pdirbase);
2941 #endif
2942 } else
2943 INVALIDATE_TLB(pmap,
2944 update_list_p->item[j].start,
2945 update_list_p->item[j].end);
2946 }
2947 }
2948 update_list_p->count = 0;
2949 cpu_update_needed(my_cpu) = FALSE;
2950 simple_unlock(&update_list_p->lock);
2951 mp_enable_preemption();
2952 }
2953
2954 /*
2955 * Interrupt routine for TBIA requested from other processor.
2956 * This routine can also be called at all interrupts time if
2957 * the cpu was idle. Some driver interrupt routines might access
2958 * newly allocated vm. (This is the case for hd)
2959 */
2960 void
2961 pmap_update_interrupt(void)
2962 {
2963 register int my_cpu;
2964 spl_t s;
2965 register pmap_t my_pmap;
2966
2967 mp_disable_preemption();
2968 my_cpu = cpu_number();
2969
2970 /*
2971 * Raise spl to splvm (above splip) to block out pmap_extract
2972 * from IO code (which would put this cpu back in the active
2973 * set).
2974 */
2975 s = splhigh();
2976
2977 my_pmap = PMAP_REAL(my_cpu);
2978
2979 if (!(my_pmap && pmap_in_use(my_pmap, my_cpu)))
2980 my_pmap = kernel_pmap;
2981
2982 do {
2983 LOOP_VAR;
2984
2985 /*
2986 * Indicate that we're not using either user or kernel
2987 * pmap.
2988 */
2989 i_bit_clear(my_cpu, &cpus_active);
2990
2991 /*
2992 * Wait for any pmap updates in progress, on either user
2993 * or kernel pmap.
2994 */
2995 while (*(volatile int *)(&my_pmap->lock.interlock.lock_data) ||
2996 *(volatile int *)(&kernel_pmap->lock.interlock.lock_data)) {
2997 LOOP_CHECK("pmap_update_interrupt", my_pmap);
2998 cpu_pause();
2999 }
3000
3001 process_pmap_updates(my_pmap);
3002
3003 i_bit_set(my_cpu, &cpus_active);
3004
3005 } while (cpu_update_needed(my_cpu));
3006
3007 splx(s);
3008 mp_enable_preemption();
3009 }
3010
3011 #if MACH_KDB
3012
3013 /* show phys page mappings and attributes */
3014
3015 extern void db_show_page(pmap_paddr_t pa);
3016
3017 void
3018 db_show_page(pmap_paddr_t pa)
3019 {
3020 pv_entry_t pv_h;
3021 int pai;
3022 char attr;
3023
3024 pai = pa_index(pa);
3025 pv_h = pai_to_pvh(pai);
3026
3027 attr = pmap_phys_attributes[pai];
3028 printf("phys page %x ", pa);
3029 if (attr & PHYS_MODIFIED)
3030 printf("modified, ");
3031 if (attr & PHYS_REFERENCED)
3032 printf("referenced, ");
3033 if (pv_h->pmap || pv_h->next)
3034 printf(" mapped at\n");
3035 else
3036 printf(" not mapped\n");
3037 for (; pv_h; pv_h = pv_h->next)
3038 if (pv_h->pmap)
3039 printf("%x in pmap %x\n", pv_h->va, pv_h->pmap);
3040 }
3041
3042 #endif /* MACH_KDB */
3043
3044 #if MACH_KDB
3045 void db_kvtophys(vm_offset_t);
3046 void db_show_vaddrs(pt_entry_t *);
3047
3048 /*
3049 * print out the results of kvtophys(arg)
3050 */
3051 void
3052 db_kvtophys(
3053 vm_offset_t vaddr)
3054 {
3055 db_printf("0x%x", kvtophys(vaddr));
3056 }
3057
3058 /*
3059 * Walk the pages tables.
3060 */
3061 void
3062 db_show_vaddrs(
3063 pt_entry_t *dirbase)
3064 {
3065 pt_entry_t *ptep, *pdep, tmp;
3066 int x, y, pdecnt, ptecnt;
3067
3068 if (dirbase == 0) {
3069 dirbase = kernel_pmap->dirbase;
3070 }
3071 if (dirbase == 0) {
3072 db_printf("need a dirbase...\n");
3073 return;
3074 }
3075 dirbase = (pt_entry_t *) ((unsigned long) dirbase & ~INTEL_OFFMASK);
3076
3077 db_printf("dirbase: 0x%x\n", dirbase);
3078
3079 pdecnt = ptecnt = 0;
3080 pdep = &dirbase[0];
3081 for (y = 0; y < NPDEPG; y++, pdep++) {
3082 if (((tmp = *pdep) & INTEL_PTE_VALID) == 0) {
3083 continue;
3084 }
3085 pdecnt++;
3086 ptep = (pt_entry_t *) ((*pdep) & ~INTEL_OFFMASK);
3087 db_printf("dir[%4d]: 0x%x\n", y, *pdep);
3088 for (x = 0; x < NPTEPG; x++, ptep++) {
3089 if (((tmp = *ptep) & INTEL_PTE_VALID) == 0) {
3090 continue;
3091 }
3092 ptecnt++;
3093 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
3094 x,
3095 *ptep,
3096 (y << 22) | (x << 12),
3097 *ptep & ~INTEL_OFFMASK);
3098 }
3099 }
3100
3101 db_printf("total: %d tables, %d page table entries.\n", pdecnt, ptecnt);
3102
3103 }
3104 #endif /* MACH_KDB */
3105
3106 #include <mach_vm_debug.h>
3107 #if MACH_VM_DEBUG
3108 #include <vm/vm_debug.h>
3109
3110 int
3111 pmap_list_resident_pages(
3112 __unused pmap_t pmap,
3113 __unused vm_offset_t *listp,
3114 __unused int space)
3115 {
3116 return 0;
3117 }
3118 #endif /* MACH_VM_DEBUG */
3119
3120 #ifdef MACH_BSD
3121 /*
3122 * pmap_pagemove
3123 *
3124 * BSD support routine to reassign virtual addresses.
3125 */
3126
3127 void
3128 pmap_movepage(unsigned long from, unsigned long to, vm_size_t size)
3129 {
3130 spl_t spl;
3131 pt_entry_t *pte, saved_pte;
3132
3133 /* Lock the kernel map */
3134 PMAP_READ_LOCK(kernel_pmap, spl);
3135
3136
3137 while (size > 0) {
3138 pte = pmap_pte(kernel_pmap, from);
3139 if (pte == NULL)
3140 panic("pmap_pagemove from pte NULL");
3141 saved_pte = *pte;
3142 PMAP_READ_UNLOCK(kernel_pmap, spl);
3143
3144 pmap_enter(kernel_pmap, to, (ppnum_t)i386_btop(i386_trunc_page(*pte)),
3145 VM_PROT_READ|VM_PROT_WRITE, 0, *pte & INTEL_PTE_WIRED);
3146
3147 pmap_remove(kernel_pmap, (addr64_t)from, (addr64_t)(from+PAGE_SIZE));
3148
3149 PMAP_READ_LOCK(kernel_pmap, spl);
3150 pte = pmap_pte(kernel_pmap, to);
3151 if (pte == NULL)
3152 panic("pmap_pagemove 'to' pte NULL");
3153
3154 *pte = saved_pte;
3155
3156 from += PAGE_SIZE;
3157 to += PAGE_SIZE;
3158 size -= PAGE_SIZE;
3159 }
3160
3161 /* Get the processors to update the TLBs */
3162 PMAP_UPDATE_TLBS(kernel_pmap, from, from+size);
3163 PMAP_UPDATE_TLBS(kernel_pmap, to, to+size);
3164
3165 PMAP_READ_UNLOCK(kernel_pmap, spl);
3166
3167 }
3168 #endif /* MACH_BSD */
3169
3170 /* temporary workaround */
3171 boolean_t
3172 coredumpok(vm_map_t map, vm_offset_t va)
3173 {
3174 pt_entry_t *ptep;
3175
3176 ptep = pmap_pte(map->pmap, va);
3177 if (0 == ptep)
3178 return FALSE;
3179 return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED));
3180 }
3181
3182 /*
3183 * grow the number of kernel page table entries, if needed
3184 */
3185 void
3186 pmap_growkernel(vm_offset_t addr)
3187 {
3188 #if GROW_KERNEL_FUNCTION_IMPLEMENTED
3189 struct pmap *pmap;
3190 int s;
3191 vm_offset_t ptppaddr;
3192 ppnum_t ppn;
3193 vm_page_t nkpg;
3194 pd_entry_t newpdir = 0;
3195
3196 /*
3197 * Serialize.
3198 * Losers return to try again until the winner completes the work.
3199 */
3200 if (kptobj == 0) panic("growkernel 0");
3201 if (!vm_object_lock_try(kptobj)) {
3202 return;
3203 }
3204
3205 vm_page_lock_queues();
3206
3207 s = splhigh();
3208
3209 /*
3210 * If this is the first time thru, locate the end of the
3211 * kernel page table entries and set nkpt to the current
3212 * number of kernel page table pages
3213 */
3214
3215 if (kernel_vm_end == 0) {
3216 kernel_vm_end = KERNBASE;
3217 nkpt = 0;
3218
3219 while (pdir_pde(kernel_pmap->dirbase, kernel_vm_end)) {
3220 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
3221 nkpt++;
3222 }
3223 }
3224
3225 /*
3226 * Now allocate and map the required number of page tables
3227 */
3228 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
3229 while (kernel_vm_end < addr) {
3230 if (pdir_pde(kernel_pmap->dirbase, kernel_vm_end)) {
3231 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
3232 continue; /* someone already filled this one */
3233 }
3234
3235 nkpg = vm_page_alloc(kptobj, nkpt);
3236 if (!nkpg)
3237 panic("pmap_growkernel: no memory to grow kernel");
3238
3239 nkpt++;
3240 vm_page_wire(nkpg);
3241 ppn = nkpg->phys_page;
3242 pmap_zero_page(ppn);
3243 ptppaddr = i386_ptob(ppn);
3244 newpdir = (pd_entry_t) (ptppaddr | INTEL_PTE_VALID |
3245 INTEL_PTE_RW | INTEL_PTE_REF | INTEL_PTE_MOD);
3246 pdir_pde(kernel_pmap->dirbase, kernel_vm_end) = newpdir;
3247
3248 simple_lock(&free_pmap_lock);
3249 for (pmap = (struct pmap *)kernel_pmap->pmap_link.next;
3250 pmap != kernel_pmap ;
3251 pmap = (struct pmap *)pmap->pmap_link.next ) {
3252 *pmap_pde(pmap, kernel_vm_end) = newpdir;
3253 }
3254 simple_unlock(&free_pmap_lock);
3255 }
3256 splx(s);
3257 vm_page_unlock_queues();
3258 vm_object_unlock(kptobj);
3259 #endif
3260 }
3261
3262 pt_entry_t *
3263 pmap_mapgetpte(vm_map_t map, vm_offset_t v)
3264 {
3265 return pmap_pte(map->pmap, v);
3266 }
3267
3268 boolean_t
3269 phys_page_exists(
3270 ppnum_t pn)
3271 {
3272 pmap_paddr_t phys;
3273
3274 assert(pn != vm_page_fictitious_addr);
3275
3276 if (!pmap_initialized)
3277 return (TRUE);
3278 phys = (pmap_paddr_t) i386_ptob(pn);
3279 if (!pmap_valid_page(pn))
3280 return (FALSE);
3281
3282 return TRUE;
3283 }
3284
3285 void
3286 mapping_free_prime()
3287 {
3288 int i;
3289 pv_entry_t pv_e;
3290
3291 for (i = 0; i < (5 * PV_ALLOC_CHUNK); i++) {
3292 pv_e = (pv_entry_t) zalloc(pv_list_zone);
3293 PV_FREE(pv_e);
3294 }
3295 }
3296
3297 void
3298 mapping_adjust()
3299 {
3300 pv_entry_t pv_e;
3301 int i;
3302 int spl;
3303
3304 if (mapping_adjust_call == NULL) {
3305 thread_call_setup(&mapping_adjust_call_data,
3306 (thread_call_func_t) mapping_adjust,
3307 (thread_call_param_t) NULL);
3308 mapping_adjust_call = &mapping_adjust_call_data;
3309 }
3310 /* XXX rethink best way to do locking here */
3311 if (pv_free_count < PV_LOW_WATER_MARK) {
3312 for (i = 0; i < PV_ALLOC_CHUNK; i++) {
3313 pv_e = (pv_entry_t) zalloc(pv_list_zone);
3314 SPLVM(spl);
3315 PV_FREE(pv_e);
3316 SPLX(spl);
3317 }
3318 }
3319 mappingrecurse = 0;
3320 }
3321
3322 void
3323 pmap_commpage_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt)
3324 {
3325 int i;
3326 pt_entry_t *opte, *npte;
3327 pt_entry_t pte;
3328
3329 for (i = 0; i < cnt; i++) {
3330 opte = pmap_pte(kernel_pmap, kernel_commpage);
3331 if (0 == opte) panic("kernel_commpage");
3332 npte = pmap_pte(kernel_pmap, user_commpage);
3333 if (0 == npte) panic("user_commpage");
3334 pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL;
3335 pte &= ~INTEL_PTE_WRITE; // ensure read only
3336 WRITE_PTE_FAST(npte, pte);
3337 kernel_commpage += INTEL_PGBYTES;
3338 user_commpage += INTEL_PGBYTES;
3339 }
3340 }
3341
3342 static cpu_pmap_t cpu_pmap_master;
3343 static struct pmap_update_list cpu_update_list_master;
3344
3345 struct cpu_pmap *
3346 pmap_cpu_alloc(boolean_t is_boot_cpu)
3347 {
3348 int ret;
3349 int i;
3350 cpu_pmap_t *cp;
3351 pmap_update_list_t up;
3352 vm_offset_t address;
3353 vm_map_entry_t entry;
3354
3355 if (is_boot_cpu) {
3356 cp = &cpu_pmap_master;
3357 up = &cpu_update_list_master;
3358 } else {
3359 /*
3360 * The per-cpu pmap data structure itself.
3361 */
3362 ret = kmem_alloc(kernel_map,
3363 (vm_offset_t *) &cp, sizeof(cpu_pmap_t));
3364 if (ret != KERN_SUCCESS) {
3365 printf("pmap_cpu_alloc() failed ret=%d\n", ret);
3366 return NULL;
3367 }
3368 bzero((void *)cp, sizeof(cpu_pmap_t));
3369
3370 /*
3371 * The tlb flush update list.
3372 */
3373 ret = kmem_alloc(kernel_map,
3374 (vm_offset_t *) &up, sizeof(*up));
3375 if (ret != KERN_SUCCESS) {
3376 printf("pmap_cpu_alloc() failed ret=%d\n", ret);
3377 pmap_cpu_free(cp);
3378 return NULL;
3379 }
3380
3381 /*
3382 * The temporary windows used for copy/zero - see loose_ends.c
3383 */
3384 for (i = 0; i < PMAP_NWINDOWS; i++) {
3385 ret = vm_map_find_space(kernel_map,
3386 &address, PAGE_SIZE, 0, &entry);
3387 if (ret != KERN_SUCCESS) {
3388 printf("pmap_cpu_alloc() "
3389 "vm_map_find_space ret=%d\n", ret);
3390 pmap_cpu_free(cp);
3391 return NULL;
3392 }
3393 vm_map_unlock(kernel_map);
3394
3395 cp->mapwindow[i].prv_CADDR = (caddr_t) address;
3396 cp->mapwindow[i].prv_CMAP = vtopte(address);
3397 * (int *) cp->mapwindow[i].prv_CMAP = 0;
3398
3399 kprintf("pmap_cpu_alloc() "
3400 "window=%d CADDR=0x%x CMAP=0x%x\n",
3401 i, address, vtopte(address));
3402 }
3403 }
3404
3405 /*
3406 * Set up the pmap request list
3407 */
3408 cp->update_list = up;
3409 simple_lock_init(&up->lock, 0);
3410 up->count = 0;
3411
3412 return cp;
3413 }
3414
3415 void
3416 pmap_cpu_free(struct cpu_pmap *cp)
3417 {
3418 if (cp != NULL && cp != &cpu_pmap_master) {
3419 if (cp->update_list != NULL)
3420 kfree((void *) cp->update_list,
3421 sizeof(*cp->update_list));
3422 kfree((void *) cp, sizeof(cpu_pmap_t));
3423 }
3424 }