]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/pmap.c
xnu-792.6.61.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap.c
CommitLineData
1c79356b 1/*
91447636 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
37839358
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
37839358
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
37839358
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25/*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50/*
51 */
52
53/*
54 * File: pmap.c
55 * Author: Avadis Tevanian, Jr., Michael Wayne Young
56 * (These guys wrote the Vax version)
57 *
58 * Physical Map management code for Intel i386, i486, and i860.
59 *
60 * Manages physical address maps.
61 *
62 * In addition to hardware address maps, this
63 * module is called upon to provide software-use-only
64 * maps which may or may not be stored in the same
65 * form as hardware maps. These pseudo-maps are
66 * used to store intermediate results from copy
67 * operations to and from address spaces.
68 *
69 * Since the information managed by this module is
70 * also stored by the logical address mapping module,
71 * this module may throw away valid virtual-to-physical
72 * mappings at almost any time. However, invalidations
73 * of virtual-to-physical mappings must be done as
74 * requested.
75 *
76 * In order to cope with hardware architectures which
77 * make virtual-to-physical map invalidates expensive,
78 * this module may delay invalidate or reduced protection
79 * operations until such time as they are actually
80 * necessary. This module is given full information as
81 * to which processors are currently using which maps,
82 * and to when physical maps must be made correct.
83 */
84
1c79356b
A
85#include <string.h>
86#include <norma_vm.h>
87#include <mach_kdb.h>
88#include <mach_ldebug.h>
89
90#include <mach/machine/vm_types.h>
91
92#include <mach/boolean.h>
93#include <kern/thread.h>
94#include <kern/zalloc.h>
95
96#include <kern/lock.h>
91447636 97#include <kern/kalloc.h>
1c79356b
A
98#include <kern/spl.h>
99
100#include <vm/pmap.h>
101#include <vm/vm_map.h>
102#include <vm/vm_kern.h>
103#include <mach/vm_param.h>
104#include <mach/vm_prot.h>
105#include <vm/vm_object.h>
106#include <vm/vm_page.h>
107
108#include <mach/machine/vm_param.h>
109#include <machine/thread.h>
110
111#include <kern/misc_protos.h> /* prototyping */
112#include <i386/misc_protos.h>
113
114#include <i386/cpuid.h>
91447636 115#include <i386/cpu_data.h>
55e303ae
A
116#include <i386/cpu_number.h>
117#include <i386/machine_cpu.h>
91447636 118#include <i386/mp_slave_boot.h>
1c79356b
A
119
120#if MACH_KDB
121#include <ddb/db_command.h>
122#include <ddb/db_output.h>
123#include <ddb/db_sym.h>
124#include <ddb/db_print.h>
125#endif /* MACH_KDB */
126
127#include <kern/xpr.h>
128
91447636
A
129#include <vm/vm_protos.h>
130
131#include <i386/mp.h>
1c79356b
A
132
133/*
134 * Forward declarations for internal functions.
135 */
91447636 136void pmap_expand(
1c79356b
A
137 pmap_t map,
138 vm_offset_t v);
139
140extern void pmap_remove_range(
141 pmap_t pmap,
142 vm_offset_t va,
143 pt_entry_t *spte,
144 pt_entry_t *epte);
145
91447636
A
146void phys_attribute_clear(
147 ppnum_t phys,
1c79356b
A
148 int bits);
149
91447636
A
150boolean_t phys_attribute_test(
151 ppnum_t phys,
1c79356b
A
152 int bits);
153
91447636
A
154void phys_attribute_set(
155 ppnum_t phys,
1c79356b
A
156 int bits);
157
91447636
A
158void pmap_growkernel(
159 vm_offset_t addr);
160
161void pmap_set_reference(
162 ppnum_t pn);
163
164void pmap_movepage(
165 unsigned long from,
166 unsigned long to,
167 vm_size_t size);
168
169pt_entry_t * pmap_mapgetpte(
170 vm_map_t map,
171 vm_offset_t v);
172
173boolean_t phys_page_exists(
174 ppnum_t pn);
1c79356b
A
175
176#ifndef set_dirbase
91447636 177void set_dirbase(vm_offset_t dirbase);
1c79356b
A
178#endif /* set_dirbase */
179
1c79356b
A
180#define iswired(pte) ((pte) & INTEL_PTE_WIRED)
181
1c79356b
A
182#define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
183#define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry);
184
55e303ae
A
185#define value_64bit(value) ((value) & 0xFFFFFFFF00000000LL)
186#define low32(x) ((unsigned int)((x) & 0x00000000ffffffffLL))
187
1c79356b
A
188/*
189 * Private data structures.
190 */
191
192/*
193 * For each vm_page_t, there is a list of all currently
194 * valid virtual mappings of that page. An entry is
195 * a pv_entry_t; the list is the pv_table.
196 */
197
198typedef struct pv_entry {
199 struct pv_entry *next; /* next pv_entry */
200 pmap_t pmap; /* pmap where mapping lies */
201 vm_offset_t va; /* virtual address for mapping */
202} *pv_entry_t;
203
204#define PV_ENTRY_NULL ((pv_entry_t) 0)
205
206pv_entry_t pv_head_table; /* array of entries, one per page */
207
208/*
209 * pv_list entries are kept on a list that can only be accessed
210 * with the pmap system locked (at SPLVM, not in the cpus_active set).
211 * The list is refilled from the pv_list_zone if it becomes empty.
212 */
213pv_entry_t pv_free_list; /* free list at SPLVM */
214decl_simple_lock_data(,pv_free_list_lock)
91447636
A
215int pv_free_count = 0;
216#define PV_LOW_WATER_MARK 5000
217#define PV_ALLOC_CHUNK 2000
218thread_call_t mapping_adjust_call;
219static thread_call_data_t mapping_adjust_call_data;
220int mappingrecurse = 0;
1c79356b
A
221
222#define PV_ALLOC(pv_e) { \
223 simple_lock(&pv_free_list_lock); \
224 if ((pv_e = pv_free_list) != 0) { \
225 pv_free_list = pv_e->next; \
91447636
A
226 pv_free_count--; \
227 if (pv_free_count < PV_LOW_WATER_MARK) \
228 if (hw_compare_and_store(0,1,&mappingrecurse)) \
229 thread_call_enter(mapping_adjust_call); \
1c79356b
A
230 } \
231 simple_unlock(&pv_free_list_lock); \
232}
233
234#define PV_FREE(pv_e) { \
235 simple_lock(&pv_free_list_lock); \
236 pv_e->next = pv_free_list; \
237 pv_free_list = pv_e; \
91447636 238 pv_free_count++; \
1c79356b
A
239 simple_unlock(&pv_free_list_lock); \
240}
241
242zone_t pv_list_zone; /* zone of pv_entry structures */
243
91447636
A
244#ifdef PAE
245static zone_t pdpt_zone;
246#endif
247
248
1c79356b
A
249/*
250 * Each entry in the pv_head_table is locked by a bit in the
251 * pv_lock_table. The lock bits are accessed by the physical
252 * address of the page they lock.
253 */
254
255char *pv_lock_table; /* pointer to array of bits */
256#define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
257
258/*
259 * First and last physical addresses that we maintain any information
260 * for. Initialized to zero so that pmap operations done before
261 * pmap_init won't touch any non-existent structures.
262 */
91447636
A
263pmap_paddr_t vm_first_phys = (pmap_paddr_t) 0;
264pmap_paddr_t vm_last_phys = (pmap_paddr_t) 0;
1c79356b
A
265boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */
266
91447636
A
267pmap_paddr_t kernel_vm_end = (pmap_paddr_t)0;
268
269#define GROW_KERNEL_FUNCTION_IMPLEMENTED 1
270#if GROW_KERNEL_FUNCTION_IMPLEMENTED /* not needed until growing kernel pmap */
271static struct vm_object kptobj_object_store;
272static vm_object_t kptobj;
273#endif
274
275
1c79356b
A
276/*
277 * Index into pv_head table, its lock bits, and the modify/reference
278 * bits starting at vm_first_phys.
279 */
280
91447636 281#define pa_index(pa) (i386_btop(pa - vm_first_phys))
1c79356b
A
282
283#define pai_to_pvh(pai) (&pv_head_table[pai])
284#define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
285#define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
286
287/*
288 * Array of physical page attribites for managed pages.
289 * One byte per physical page.
290 */
291char *pmap_phys_attributes;
292
293/*
294 * Physical page attributes. Copy bits from PTE definition.
295 */
296#define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
297#define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
55e303ae 298#define PHYS_NCACHE INTEL_PTE_NCACHE
1c79356b
A
299
300/*
301 * Amount of virtual memory mapped by one
302 * page-directory entry.
303 */
304#define PDE_MAPPED_SIZE (pdetova(1))
305
1c79356b
A
306/*
307 * Locking and TLB invalidation
308 */
309
310/*
311 * Locking Protocols:
312 *
313 * There are two structures in the pmap module that need locking:
314 * the pmaps themselves, and the per-page pv_lists (which are locked
315 * by locking the pv_lock_table entry that corresponds to the pv_head
316 * for the list in question.) Most routines want to lock a pmap and
317 * then do operations in it that require pv_list locking -- however
318 * pmap_remove_all and pmap_copy_on_write operate on a physical page
319 * basis and want to do the locking in the reverse order, i.e. lock
320 * a pv_list and then go through all the pmaps referenced by that list.
321 * To protect against deadlock between these two cases, the pmap_lock
322 * is used. There are three different locking protocols as a result:
323 *
324 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
325 * the pmap.
326 *
327 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
328 * lock on the pmap_lock (shared read), then lock the pmap
329 * and finally the pv_lists as needed [i.e. pmap lock before
330 * pv_list lock.]
331 *
332 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
333 * Get a write lock on the pmap_lock (exclusive write); this
334 * also guaranteees exclusive access to the pv_lists. Lock the
335 * pmaps as needed.
336 *
337 * At no time may any routine hold more than one pmap lock or more than
338 * one pv_list lock. Because interrupt level routines can allocate
339 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
340 * kernel_pmap can only be held at splhigh.
341 */
342
1c79356b 343/*
55e303ae 344 * We raise the interrupt level to splvm, to block interprocessor
1c79356b
A
345 * interrupts during pmap operations. We must take the CPU out of
346 * the cpus_active set while interrupts are blocked.
347 */
348#define SPLVM(spl) { \
349 spl = splhigh(); \
350 mp_disable_preemption(); \
351 i_bit_clear(cpu_number(), &cpus_active); \
352 mp_enable_preemption(); \
353}
354
355#define SPLX(spl) { \
356 mp_disable_preemption(); \
357 i_bit_set(cpu_number(), &cpus_active); \
358 mp_enable_preemption(); \
359 splx(spl); \
360}
361
362/*
363 * Lock on pmap system
364 */
365lock_t pmap_system_lock;
366
367#define PMAP_READ_LOCK(pmap, spl) { \
368 SPLVM(spl); \
369 lock_read(&pmap_system_lock); \
370 simple_lock(&(pmap)->lock); \
371}
372
373#define PMAP_WRITE_LOCK(spl) { \
374 SPLVM(spl); \
375 lock_write(&pmap_system_lock); \
376}
377
378#define PMAP_READ_UNLOCK(pmap, spl) { \
379 simple_unlock(&(pmap)->lock); \
380 lock_read_done(&pmap_system_lock); \
381 SPLX(spl); \
382}
383
384#define PMAP_WRITE_UNLOCK(spl) { \
385 lock_write_done(&pmap_system_lock); \
386 SPLX(spl); \
387}
388
389#define PMAP_WRITE_TO_READ_LOCK(pmap) { \
390 simple_lock(&(pmap)->lock); \
391 lock_write_to_read(&pmap_system_lock); \
392}
393
394#define LOCK_PVH(index) lock_pvh_pai(index)
395
396#define UNLOCK_PVH(index) unlock_pvh_pai(index)
397
55e303ae
A
398#if USLOCK_DEBUG
399extern int max_lock_loops;
91447636
A
400extern int disableSerialOuput;
401#define LOOP_VAR \
402 unsigned int loop_count; \
403 loop_count = disableSerialOuput ? max_lock_loops \
404 : max_lock_loops*100
55e303ae 405#define LOOP_CHECK(msg, pmap) \
91447636 406 if (--loop_count == 0) { \
55e303ae 407 mp_disable_preemption(); \
91447636 408 kprintf("%s: cpu %d pmap %x, cpus_active 0x%x\n", \
55e303ae
A
409 msg, cpu_number(), pmap, cpus_active); \
410 Debugger("deadlock detection"); \
411 mp_enable_preemption(); \
91447636 412 loop_count = max_lock_loops; \
55e303ae
A
413 }
414#else /* USLOCK_DEBUG */
415#define LOOP_VAR
416#define LOOP_CHECK(msg, pmap)
417#endif /* USLOCK_DEBUG */
1c79356b 418
55e303ae
A
419#define PMAP_UPDATE_TLBS(pmap, s, e) \
420{ \
421 cpu_set cpu_mask; \
422 cpu_set users; \
423 \
424 mp_disable_preemption(); \
425 cpu_mask = 1 << cpu_number(); \
426 \
427 /* Since the pmap is locked, other updates are locked */ \
428 /* out, and any pmap_activate has finished. */ \
429 \
430 /* find other cpus using the pmap */ \
431 users = (pmap)->cpus_using & ~cpu_mask; \
432 if (users) { \
433 LOOP_VAR; \
434 /* signal them, and wait for them to finish */ \
435 /* using the pmap */ \
436 signal_cpus(users, (pmap), (s), (e)); \
437 while (((pmap)->cpus_using & cpus_active & ~cpu_mask)) { \
438 LOOP_CHECK("PMAP_UPDATE_TLBS", pmap); \
439 cpu_pause(); \
440 } \
441 } \
442 /* invalidate our own TLB if pmap is in use */ \
443 \
444 if ((pmap)->cpus_using & cpu_mask) { \
445 INVALIDATE_TLB((pmap), (s), (e)); \
446 } \
447 \
448 mp_enable_preemption(); \
1c79356b
A
449}
450
1c79356b
A
451#define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
452
55e303ae
A
453#define INVALIDATE_TLB(m, s, e) { \
454 flush_tlb(); \
455}
456
1c79356b
A
457/*
458 * Structures to keep track of pending TLB invalidations
459 */
460cpu_set cpus_active;
461cpu_set cpus_idle;
1c79356b 462
55e303ae
A
463#define UPDATE_LIST_SIZE 4
464
465struct pmap_update_item {
466 pmap_t pmap; /* pmap to invalidate */
467 vm_offset_t start; /* start address to invalidate */
468 vm_offset_t end; /* end address to invalidate */
469};
470
471typedef struct pmap_update_item *pmap_update_item_t;
472
473/*
474 * List of pmap updates. If the list overflows,
475 * the last entry is changed to invalidate all.
476 */
477struct pmap_update_list {
478 decl_simple_lock_data(,lock)
479 int count;
480 struct pmap_update_item item[UPDATE_LIST_SIZE];
481} ;
482typedef struct pmap_update_list *pmap_update_list_t;
483
55e303ae
A
484extern void signal_cpus(
485 cpu_set use_list,
486 pmap_t pmap,
487 vm_offset_t start,
488 vm_offset_t end);
1c79356b 489
91447636 490pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE];
1c79356b
A
491
492/*
493 * Other useful macros.
494 */
91447636 495#define current_pmap() (vm_map_pmap(current_thread()->map))
1c79356b
A
496#define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
497
498struct pmap kernel_pmap_store;
499pmap_t kernel_pmap;
500
91447636
A
501#ifdef PMAP_QUEUE
502decl_simple_lock_data(,free_pmap_lock)
503#endif
504
1c79356b
A
505struct zone *pmap_zone; /* zone of pmap structures */
506
507int pmap_debug = 0; /* flag for debugging prints */
91447636 508
1c79356b
A
509unsigned int inuse_ptepages_count = 0; /* debugging */
510
511/*
512 * Pmap cache. Cache is threaded through ref_count field of pmap.
513 * Max will eventually be constant -- variable for experimentation.
514 */
515int pmap_cache_max = 32;
516int pmap_alloc_chunk = 8;
517pmap_t pmap_cache_list;
518int pmap_cache_count;
519decl_simple_lock_data(,pmap_cache_lock)
520
521extern vm_offset_t hole_start, hole_end;
522
523extern char end;
524
91447636
A
525static int nkpt;
526
527pt_entry_t *DMAP1, *DMAP2;
528caddr_t DADDR1;
529caddr_t DADDR2;
1c79356b
A
530
531#if DEBUG_ALIAS
532#define PMAP_ALIAS_MAX 32
533struct pmap_alias {
534 vm_offset_t rpc;
535 pmap_t pmap;
536 vm_offset_t va;
537 int cookie;
538#define PMAP_ALIAS_COOKIE 0xdeadbeef
539} pmap_aliasbuf[PMAP_ALIAS_MAX];
540int pmap_alias_index = 0;
541extern vm_offset_t get_rpc();
542
543#endif /* DEBUG_ALIAS */
544
91447636
A
545#define pmap_pde(m, v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]))
546#define pdir_pde(d, v) (d[(vm_offset_t)(v) >> PDESHIFT])
1c79356b 547
91447636
A
548static __inline int
549pmap_is_current(pmap_t pmap)
1c79356b 550{
91447636
A
551 return (pmap == kernel_pmap ||
552 (pmap->dirbase[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME));
1c79356b
A
553}
554
1c79356b 555
91447636
A
556/*
557 * return address of mapped pte for vaddr va in pmap pmap.
558 */
559pt_entry_t *
560pmap_pte(pmap_t pmap, vm_offset_t va)
561{
562 pd_entry_t *pde;
563 pd_entry_t newpf;
564
565 pde = pmap_pde(pmap, va);
566 if (*pde != 0) {
567 if (pmap_is_current(pmap))
568 return( vtopte(va));
569 newpf = *pde & PG_FRAME;
570 if (((*CM4) & PG_FRAME) != newpf) {
571 *CM4 = newpf | INTEL_PTE_RW | INTEL_PTE_VALID;
572 invlpg((u_int)CA4);
573 }
574 return (pt_entry_t *)CA4 + (i386_btop(va) & (NPTEPG-1));
575 }
576 return(0);
577}
578
1c79356b
A
579#define DEBUG_PTE_PAGE 0
580
581#if DEBUG_PTE_PAGE
582void
583ptep_check(
584 ptep_t ptep)
585{
586 register pt_entry_t *pte, *epte;
587 int ctu, ctw;
588
589 /* check the use and wired counts */
590 if (ptep == PTE_PAGE_NULL)
591 return;
592 pte = pmap_pte(ptep->pmap, ptep->va);
593 epte = pte + INTEL_PGBYTES/sizeof(pt_entry_t);
594 ctu = 0;
595 ctw = 0;
596 while (pte < epte) {
597 if (pte->pfn != 0) {
598 ctu++;
599 if (pte->wired)
600 ctw++;
601 }
91447636 602 pte++;
1c79356b
A
603 }
604
605 if (ctu != ptep->use_count || ctw != ptep->wired_count) {
606 printf("use %d wired %d - actual use %d wired %d\n",
607 ptep->use_count, ptep->wired_count, ctu, ctw);
608 panic("pte count");
609 }
610}
611#endif /* DEBUG_PTE_PAGE */
612
613/*
614 * Map memory at initialization. The physical addresses being
615 * mapped are not managed and are never unmapped.
616 *
617 * For now, VM is already on, we only need to map the
618 * specified memory.
619 */
620vm_offset_t
621pmap_map(
622 register vm_offset_t virt,
91447636
A
623 register vm_offset_t start_addr,
624 register vm_offset_t end_addr,
1c79356b
A
625 register vm_prot_t prot)
626{
627 register int ps;
628
629 ps = PAGE_SIZE;
91447636
A
630 while (start_addr < end_addr) {
631 pmap_enter(kernel_pmap,
632 virt, (ppnum_t) i386_btop(start_addr), prot, 0, FALSE);
1c79356b 633 virt += ps;
91447636 634 start_addr += ps;
1c79356b
A
635 }
636 return(virt);
637}
638
639/*
640 * Back-door routine for mapping kernel VM at initialization.
641 * Useful for mapping memory outside the range
642 * Sets no-cache, A, D.
643 * [vm_first_phys, vm_last_phys) (i.e., devices).
644 * Otherwise like pmap_map.
645 */
646vm_offset_t
647pmap_map_bd(
648 register vm_offset_t virt,
91447636
A
649 register vm_offset_t start_addr,
650 register vm_offset_t end_addr,
1c79356b
A
651 vm_prot_t prot)
652{
653 register pt_entry_t template;
654 register pt_entry_t *pte;
655
91447636 656 template = pa_to_pte(start_addr)
1c79356b
A
657 | INTEL_PTE_NCACHE
658 | INTEL_PTE_REF
659 | INTEL_PTE_MOD
660 | INTEL_PTE_WIRED
661 | INTEL_PTE_VALID;
662 if (prot & VM_PROT_WRITE)
663 template |= INTEL_PTE_WRITE;
664
91447636
A
665 /* XXX move pmap_pte out of loop, once one pte mapped, all are */
666 while (start_addr < end_addr) {
1c79356b 667 pte = pmap_pte(kernel_pmap, virt);
91447636 668 if (pte == PT_ENTRY_NULL) {
1c79356b 669 panic("pmap_map_bd: Invalid kernel address\n");
91447636 670 }
1c79356b
A
671 WRITE_PTE_FAST(pte, template)
672 pte_increment_pa(template);
673 virt += PAGE_SIZE;
91447636 674 start_addr += PAGE_SIZE;
1c79356b
A
675 }
676
55e303ae 677 flush_tlb();
1c79356b
A
678 return(virt);
679}
680
1c79356b
A
681extern char *first_avail;
682extern vm_offset_t virtual_avail, virtual_end;
91447636
A
683extern pmap_paddr_t avail_start, avail_end;
684extern vm_offset_t etext;
685extern void *sectHIBB;
686extern int sectSizeHIB;
1c79356b
A
687
688/*
689 * Bootstrap the system enough to run with virtual memory.
690 * Map the kernel's code and data, and allocate the system page table.
691 * Called with mapping OFF. Page_size must already be set.
692 *
693 * Parameters:
694 * load_start: PA where kernel was loaded
695 * avail_start PA of first available physical page -
696 * after kernel page tables
697 * avail_end PA of last available physical page
698 * virtual_avail VA of first available page -
699 * after kernel page tables
700 * virtual_end VA of last available page -
701 * end of kernel address space
702 *
703 * &start_text start of kernel text
704 * &etext end of kernel text
705 */
706
707void
708pmap_bootstrap(
91447636 709 __unused vm_offset_t load_start)
1c79356b 710{
91447636
A
711 vm_offset_t va;
712 pt_entry_t *pte;
713 int i;
714 int wpkernel, boot_arg;
1c79356b 715
91447636
A
716 vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address
717 * known to VM */
1c79356b
A
718
719 /*
720 * The kernel's pmap is statically allocated so we don't
721 * have to use pmap_create, which is unlikely to work
722 * correctly at this part of the boot sequence.
723 */
724
725 kernel_pmap = &kernel_pmap_store;
91447636
A
726#ifdef PMAP_QUEUE
727 kernel_pmap->pmap_link.next = (queue_t)kernel_pmap; /* Set up anchor forward */
728 kernel_pmap->pmap_link.prev = (queue_t)kernel_pmap; /* Set up anchor reverse */
729#endif
730 kernel_pmap->ref_count = 1;
731 kernel_pmap->pm_obj = (vm_object_t) NULL;
732 kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE);
733 kernel_pmap->pdirbase = (pd_entry_t *)IdlePTD;
734#ifdef PAE
735 kernel_pmap->pm_pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE );
736 kernel_pmap->pm_ppdpt = (vm_offset_t)IdlePDPT;
737#endif
1c79356b 738
91447636
A
739 va = (vm_offset_t)kernel_pmap->dirbase;
740 /* setup self referential mapping(s) */
741 for (i = 0; i< NPGPTD; i++ ) {
742 pmap_paddr_t pa;
743 pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i));
744 * (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i) =
745 (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF |
746 INTEL_PTE_MOD | INTEL_PTE_WIRED ;
747#ifdef PAE
748 kernel_pmap->pm_pdpt[i] = pa | INTEL_PTE_VALID;
749#endif
750 }
1c79356b 751
91447636 752 nkpt = NKPT;
1c79356b 753
91447636
A
754 virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail;
755 virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS);
1c79356b
A
756
757 /*
91447636
A
758 * Reserve some special page table entries/VA space for temporary
759 * mapping of pages.
1c79356b 760 */
91447636
A
761#define SYSMAP(c, p, v, n) \
762 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n);
763
764 va = virtual_avail;
765 pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
1c79356b
A
766
767 /*
91447636
A
768 * CMAP1/CMAP2 are used for zeroing and copying pages.
769 * CMAP3 is used for ml_phys_read/write.
1c79356b 770 */
91447636
A
771 SYSMAP(caddr_t, CM1, CA1, 1)
772 * (pt_entry_t *) CM1 = 0;
773 SYSMAP(caddr_t, CM2, CA2, 1)
774 * (pt_entry_t *) CM2 = 0;
775 SYSMAP(caddr_t, CM3, CA3, 1)
776 * (pt_entry_t *) CM3 = 0;
1c79356b 777
91447636
A
778 /* used by pmap_pte */
779 SYSMAP(caddr_t, CM4, CA4, 1)
780 * (pt_entry_t *) CM4 = 0;
1c79356b 781
91447636
A
782 /* DMAP user for debugger */
783 SYSMAP(caddr_t, DMAP1, DADDR1, 1);
784 SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */
1c79356b 785
1c79356b 786
91447636
A
787 lock_init(&pmap_system_lock,
788 FALSE, /* NOT a sleep lock */
789 0, 0);
1c79356b 790
91447636 791 virtual_avail = va;
1c79356b 792
91447636
A
793 wpkernel = 1;
794 if (PE_parse_boot_arg("debug", &boot_arg)) {
795 if (boot_arg & DB_PRT) wpkernel = 0;
796 if (boot_arg & DB_NMI) wpkernel = 0;
1c79356b
A
797 }
798
91447636
A
799 /* remap kernel text readonly if not debugging or kprintfing */
800 if (wpkernel)
801 {
802 vm_offset_t myva;
803 pt_entry_t *ptep;
804
805 for (myva = i386_round_page(VM_MIN_KERNEL_ADDRESS + MP_BOOT + MP_BOOTSTACK); myva < etext; myva += PAGE_SIZE) {
806 if (myva >= (vm_offset_t)sectHIBB && myva < ((vm_offset_t)sectHIBB + sectSizeHIB))
807 continue;
808 ptep = pmap_pte(kernel_pmap, myva);
809 if (ptep)
810 *ptep &= ~INTEL_PTE_RW;
811 }
812 flush_tlb();
813 }
1c79356b 814
91447636
A
815 simple_lock_init(&kernel_pmap->lock, 0);
816 simple_lock_init(&pv_free_list_lock, 0);
1c79356b 817
91447636
A
818 /* invalidate user virtual addresses */
819 memset((char *)kernel_pmap->dirbase,
1c79356b 820 0,
91447636 821 (KPTDI) * sizeof(pd_entry_t));
1c79356b 822
91447636
A
823 kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
824 VADDR(KPTDI,0), virtual_end);
825#ifdef PAE
826 kprintf("Available physical space from 0x%llx to 0x%llx\n",
1c79356b 827 avail_start, avail_end);
91447636
A
828 printf("PAE enabled\n");
829#else
830 kprintf("Available physical space from 0x%x to 0x%x\n",
831 avail_start, avail_end);
832#endif
1c79356b
A
833}
834
835void
836pmap_virtual_space(
837 vm_offset_t *startp,
838 vm_offset_t *endp)
839{
840 *startp = virtual_avail;
841 *endp = virtual_end;
842}
843
844/*
845 * Initialize the pmap module.
846 * Called by vm_init, to initialize any structures that the pmap
847 * system needs to map virtual memory.
848 */
849void
850pmap_init(void)
851{
852 register long npages;
853 vm_offset_t addr;
854 register vm_size_t s;
91447636
A
855 vm_offset_t vaddr;
856 ppnum_t ppn;
1c79356b
A
857
858 /*
859 * Allocate memory for the pv_head_table and its lock bits,
860 * the modify bit array, and the pte_page table.
861 */
862
91447636
A
863 /* zero bias all these arrays now instead of off avail_start
864 so we cover all memory */
865 npages = i386_btop(avail_end);
1c79356b
A
866 s = (vm_size_t) (sizeof(struct pv_entry) * npages
867 + pv_lock_table_size(npages)
868 + npages);
869
870 s = round_page(s);
871 if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS)
872 panic("pmap_init");
873
874 memset((char *)addr, 0, s);
875
876 /*
877 * Allocate the structures first to preserve word-alignment.
878 */
879 pv_head_table = (pv_entry_t) addr;
880 addr = (vm_offset_t) (pv_head_table + npages);
881
882 pv_lock_table = (char *) addr;
883 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
884
885 pmap_phys_attributes = (char *) addr;
886
887 /*
888 * Create the zone of physical maps,
889 * and of the physical-to-virtual entries.
890 */
891 s = (vm_size_t) sizeof(struct pmap);
892 pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
893 s = (vm_size_t) sizeof(struct pv_entry);
894 pv_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
91447636
A
895#ifdef PAE
896 // s = (vm_size_t) (sizeof(pdpt_entry_t) * NPGPTD);
897 s = 63;
898 pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */
899#endif
55e303ae 900
1c79356b
A
901 /*
902 * Only now, when all of the data structures are allocated,
903 * can we set vm_first_phys and vm_last_phys. If we set them
904 * too soon, the kmem_alloc_wired above will try to use these
905 * data structures and blow up.
906 */
907
91447636
A
908 /* zero bias this now so we cover all memory */
909 vm_first_phys = 0;
1c79356b 910 vm_last_phys = avail_end;
91447636
A
911
912#if GROW_KERNEL_FUNCTION_IMPLEMENTED
913 kptobj = &kptobj_object_store;
914 _vm_object_allocate((vm_object_size_t)NKPDE, kptobj);
915 kernel_pmap->pm_obj = kptobj;
916#endif
917
918 /* create pv entries for kernel pages mapped by low level
919 startup code. these have to exist so we can pmap_remove()
920 e.g. kext pages from the middle of our addr space */
921
922 vaddr = (vm_offset_t)VM_MIN_KERNEL_ADDRESS;
923 for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) {
924 pv_entry_t pv_e;
925
926 pv_e = pai_to_pvh(ppn);
927 pv_e->va = vaddr;
928 vaddr += PAGE_SIZE;
929 pv_e->pmap = kernel_pmap;
930 pv_e->next = PV_ENTRY_NULL;
931 }
932
1c79356b
A
933 pmap_initialized = TRUE;
934
935 /*
936 * Initializie pmap cache.
937 */
938 pmap_cache_list = PMAP_NULL;
939 pmap_cache_count = 0;
91447636
A
940 simple_lock_init(&pmap_cache_lock, 0);
941#ifdef PMAP_QUEUE
942 simple_lock_init(&free_pmap_lock, 0);
943#endif
944
1c79356b
A
945}
946
91447636
A
947void
948x86_lowmem_free(void)
949{
950 /* free lowmem pages back to the vm system. we had to defer doing this
951 until the vm system was fully up.
952 the actual pages that are released are determined by which
953 pages the memory sizing code puts into the region table */
1c79356b 954
91447636
A
955 ml_static_mfree((vm_offset_t) i386_ptob(pmap_memory_regions[0].base)|VM_MIN_KERNEL_ADDRESS,
956 (vm_size_t) i386_ptob(pmap_memory_regions[0].end - pmap_memory_regions[0].base));
957}
1c79356b
A
958
959
960#define valid_page(x) (pmap_initialized && pmap_valid_page(x))
961
962boolean_t
963pmap_verify_free(
55e303ae 964 ppnum_t pn)
1c79356b 965{
91447636 966 pmap_paddr_t phys;
1c79356b
A
967 pv_entry_t pv_h;
968 int pai;
969 spl_t spl;
970 boolean_t result;
971
55e303ae 972 assert(pn != vm_page_fictitious_addr);
91447636 973 phys = (pmap_paddr_t)i386_ptob(pn);
1c79356b
A
974 if (!pmap_initialized)
975 return(TRUE);
976
91447636 977 if (!pmap_valid_page(pn))
1c79356b
A
978 return(FALSE);
979
980 PMAP_WRITE_LOCK(spl);
981
982 pai = pa_index(phys);
983 pv_h = pai_to_pvh(pai);
984
985 result = (pv_h->pmap == PMAP_NULL);
986 PMAP_WRITE_UNLOCK(spl);
987
988 return(result);
989}
990
991/*
992 * Create and return a physical map.
993 *
994 * If the size specified for the map
995 * is zero, the map is an actual physical
996 * map, and may be referenced by the
997 * hardware.
998 *
999 * If the size specified is non-zero,
1000 * the map will be used in software only, and
1001 * is bounded by that size.
1002 */
1003pmap_t
1004pmap_create(
1005 vm_size_t size)
1006{
91447636
A
1007 register pmap_t p;
1008#ifdef PMAP_QUEUE
1009 register pmap_t pro;
1010 spl_t s;
1011#endif
1012 register int i;
1013 register vm_offset_t va;
1c79356b
A
1014
1015 /*
1016 * A software use-only map doesn't even need a map.
1017 */
1018
1019 if (size != 0) {
1020 return(PMAP_NULL);
1021 }
1022
91447636
A
1023 p = (pmap_t) zalloc(pmap_zone);
1024 if (PMAP_NULL == p)
1025 panic("pmap_create zalloc");
1026 if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD))
1027 panic("pmap_create kmem_alloc_wired");
1028#ifdef PAE
1029 p->pm_hold = (vm_offset_t)zalloc(pdpt_zone);
1030 if ((vm_offset_t)NULL == p->pm_hold) {
1031 panic("pdpt zalloc");
1032 }
1033 p->pm_pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31);
1034 p->pm_ppdpt = kvtophys((vm_offset_t)p->pm_pdpt); /* XXX */
1035#endif
1036 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPDEPG))))
1037 panic("pmap_create vm_object_allocate");
1038 memcpy(p->dirbase,
1039 (void *)((unsigned int)IdlePTD | KERNBASE),
1040 NBPTD);
1041 va = (vm_offset_t)p->dirbase;
1042 p->pdirbase = (pd_entry_t *)(kvtophys(va));
1043 simple_lock_init(&p->lock, 0);
1044
1045 /* setup self referential mapping(s) */
1046 for (i = 0; i< NPGPTD; i++ ) {
1047 pmap_paddr_t pa;
1048 pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i));
1049 * (pd_entry_t *) (p->dirbase + PTDPTDI + i) =
1050 (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF |
1051 INTEL_PTE_MOD | INTEL_PTE_WIRED ;
1052#ifdef PAE
1053 p->pm_pdpt[i] = pa | INTEL_PTE_VALID;
1c79356b 1054#endif
1c79356b
A
1055 }
1056
91447636 1057 p->cpus_using = 0;
1c79356b
A
1058 p->stats.resident_count = 0;
1059 p->stats.wired_count = 0;
1c79356b 1060 p->ref_count = 1;
91447636
A
1061
1062#ifdef PMAP_QUEUE
1063 /* insert new pmap at head of queue hanging off kernel_pmap */
1064 SPLVM(s);
1065 simple_lock(&free_pmap_lock);
1066 p->pmap_link.next = (queue_t)kernel_pmap->pmap_link.next;
1067 kernel_pmap->pmap_link.next = (queue_t)p;
1068
1069 pro = (pmap_t) p->pmap_link.next;
1070 p->pmap_link.prev = (queue_t)pro->pmap_link.prev;
1071 pro->pmap_link.prev = (queue_t)p;
1072
1073
1074 simple_unlock(&free_pmap_lock);
1075 SPLX(s);
1076#endif
1c79356b
A
1077
1078 return(p);
1079}
1080
1081/*
1082 * Retire the given physical map from service.
1083 * Should only be called if the map contains
1084 * no valid mappings.
1085 */
1086
1087void
1088pmap_destroy(
1089 register pmap_t p)
1090{
1091 register pt_entry_t *pdep;
1c79356b
A
1092 register int c;
1093 spl_t s;
1094 register vm_page_t m;
91447636
A
1095#ifdef PMAP_QUEUE
1096 register pmap_t pre,pro;
1097#endif
1c79356b
A
1098
1099 if (p == PMAP_NULL)
1100 return;
1101
1102 SPLVM(s);
1103 simple_lock(&p->lock);
1104 c = --p->ref_count;
1105 if (c == 0) {
1106 register int my_cpu;
1107
1108 mp_disable_preemption();
1109 my_cpu = cpu_number();
1110
1111 /*
1112 * If some cpu is not using the physical pmap pointer that it
1113 * is supposed to be (see set_dirbase), we might be using the
1114 * pmap that is being destroyed! Make sure we are
1115 * physically on the right pmap:
1116 */
55e303ae
A
1117 /* force pmap/cr3 update */
1118 PMAP_UPDATE_TLBS(p,
1119 VM_MIN_ADDRESS,
1120 VM_MAX_KERNEL_ADDRESS);
1c79356b 1121
91447636 1122 if (PMAP_REAL(my_cpu) == p) {
1c79356b 1123 PMAP_CPU_CLR(p, my_cpu);
91447636
A
1124 PMAP_REAL(my_cpu) = kernel_pmap;
1125#ifdef PAE
1126 set_cr3((unsigned int)kernel_pmap->pm_ppdpt);
1127#else
1128 set_cr3((unsigned int)kernel_pmap->pdirbase);
1129#endif
1c79356b
A
1130 }
1131 mp_enable_preemption();
1132 }
1133 simple_unlock(&p->lock);
1134 SPLX(s);
1135
1136 if (c != 0) {
1137 return; /* still in use */
1138 }
1139
91447636
A
1140#ifdef PMAP_QUEUE
1141 /* remove from pmap queue */
1142 SPLVM(s);
1143 simple_lock(&free_pmap_lock);
1144
1145 pre = (pmap_t)p->pmap_link.prev;
1146 pre->pmap_link.next = (queue_t)p->pmap_link.next;
1147 pro = (pmap_t)p->pmap_link.next;
1148 pro->pmap_link.prev = (queue_t)p->pmap_link.prev;
1149
1150 simple_unlock(&free_pmap_lock);
1151 SPLX(s);
1152#endif
1153
1c79356b
A
1154 /*
1155 * Free the memory maps, then the
1156 * pmap structure.
1157 */
91447636
A
1158
1159 pdep = (pt_entry_t *)p->dirbase;
1160
1161 while (pdep < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]) {
1162 int ind;
1c79356b 1163 if (*pdep & INTEL_PTE_VALID) {
91447636
A
1164 ind = pdep - (pt_entry_t *)&p->dirbase[0];
1165 vm_object_lock(p->pm_obj);
1166 m = vm_page_lookup(p->pm_obj, (vm_object_offset_t)ind);
1167 if (m == VM_PAGE_NULL) {
1c79356b 1168 panic("pmap_destroy: pte page not in object");
91447636 1169 }
1c79356b
A
1170 vm_page_lock_queues();
1171 vm_page_free(m);
1172 inuse_ptepages_count--;
91447636 1173 vm_object_unlock(p->pm_obj);
1c79356b
A
1174 vm_page_unlock_queues();
1175
1176 /*
1177 * Clear pdes, this might be headed for the cache.
1178 */
91447636 1179 *pdep++ = 0;
1c79356b
A
1180 }
1181 else {
91447636 1182 *pdep++ = 0;
1c79356b
A
1183 }
1184
1185 }
55e303ae 1186
91447636
A
1187 vm_object_deallocate(p->pm_obj);
1188 kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD);
1189#ifdef PAE
1190 zfree(pdpt_zone, (void *)p->pm_hold);
1191#endif
1192 zfree(pmap_zone, p);
1c79356b
A
1193}
1194
1195/*
1196 * Add a reference to the specified pmap.
1197 */
1198
1199void
1200pmap_reference(
1201 register pmap_t p)
1202{
1203 spl_t s;
1204
1205 if (p != PMAP_NULL) {
1206 SPLVM(s);
1207 simple_lock(&p->lock);
1208 p->ref_count++;
1209 simple_unlock(&p->lock);
1210 SPLX(s);
1211 }
1212}
1213
1214/*
1215 * Remove a range of hardware page-table entries.
1216 * The entries given are the first (inclusive)
1217 * and last (exclusive) entries for the VM pages.
1218 * The virtual address is the va for the first pte.
1219 *
1220 * The pmap must be locked.
1221 * If the pmap is not the kernel pmap, the range must lie
1222 * entirely within one pte-page. This is NOT checked.
1223 * Assumes that the pte-page exists.
1224 */
1225
1226/* static */
1227void
1228pmap_remove_range(
1229 pmap_t pmap,
1230 vm_offset_t va,
1231 pt_entry_t *spte,
1232 pt_entry_t *epte)
1233{
1234 register pt_entry_t *cpte;
1235 int num_removed, num_unwired;
1236 int pai;
91447636 1237 pmap_paddr_t pa;
1c79356b
A
1238
1239#if DEBUG_PTE_PAGE
1240 if (pmap != kernel_pmap)
1241 ptep_check(get_pte_page(spte));
1242#endif /* DEBUG_PTE_PAGE */
1243 num_removed = 0;
1244 num_unwired = 0;
1245
1246 for (cpte = spte; cpte < epte;
91447636 1247 cpte++, va += PAGE_SIZE) {
1c79356b
A
1248
1249 pa = pte_to_pa(*cpte);
1250 if (pa == 0)
1251 continue;
1252
1253 num_removed++;
1254 if (iswired(*cpte))
1255 num_unwired++;
1256
91447636 1257 if (!valid_page(i386_btop(pa))) {
1c79356b
A
1258
1259 /*
1260 * Outside range of managed physical memory.
1261 * Just remove the mappings.
1262 */
1c79356b 1263 register pt_entry_t *lpte = cpte;
91447636
A
1264
1265 *lpte = 0;
1c79356b
A
1266 continue;
1267 }
1268
1269 pai = pa_index(pa);
1270 LOCK_PVH(pai);
1271
1272 /*
1273 * Get the modify and reference bits.
1274 */
1275 {
1c79356b
A
1276 register pt_entry_t *lpte;
1277
1c79356b 1278 lpte = cpte;
1c79356b
A
1279 pmap_phys_attributes[pai] |=
1280 *lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
1281 *lpte = 0;
91447636 1282
1c79356b
A
1283 }
1284
1285 /*
1286 * Remove the mapping from the pvlist for
1287 * this physical page.
1288 */
1289 {
1290 register pv_entry_t pv_h, prev, cur;
1291
1292 pv_h = pai_to_pvh(pai);
1293 if (pv_h->pmap == PMAP_NULL) {
1294 panic("pmap_remove: null pv_list!");
1295 }
1296 if (pv_h->va == va && pv_h->pmap == pmap) {
1297 /*
1298 * Header is the pv_entry. Copy the next one
1299 * to header and free the next one (we cannot
1300 * free the header)
1301 */
1302 cur = pv_h->next;
1303 if (cur != PV_ENTRY_NULL) {
1304 *pv_h = *cur;
1305 PV_FREE(cur);
1306 }
1307 else {
1308 pv_h->pmap = PMAP_NULL;
1309 }
1310 }
1311 else {
1312 cur = pv_h;
1313 do {
1314 prev = cur;
1315 if ((cur = prev->next) == PV_ENTRY_NULL) {
55e303ae 1316 panic("pmap-remove: mapping not in pv_list!");
1c79356b
A
1317 }
1318 } while (cur->va != va || cur->pmap != pmap);
1319 prev->next = cur->next;
1320 PV_FREE(cur);
1321 }
1322 UNLOCK_PVH(pai);
1323 }
1324 }
1325
1326 /*
1327 * Update the counts
1328 */
1329 assert(pmap->stats.resident_count >= num_removed);
1330 pmap->stats.resident_count -= num_removed;
1331 assert(pmap->stats.wired_count >= num_unwired);
1332 pmap->stats.wired_count -= num_unwired;
1333}
1334
0b4e3aa0
A
1335/*
1336 * Remove phys addr if mapped in specified map
1337 *
1338 */
1339void
1340pmap_remove_some_phys(
91447636
A
1341 __unused pmap_t map,
1342 __unused ppnum_t pn)
0b4e3aa0
A
1343{
1344
1345/* Implement to support working set code */
1346
1347}
1348
1c79356b
A
1349/*
1350 * Remove the given range of addresses
1351 * from the specified map.
1352 *
1353 * It is assumed that the start and end are properly
1354 * rounded to the hardware page size.
1355 */
1356
55e303ae 1357
1c79356b
A
1358void
1359pmap_remove(
1360 pmap_t map,
55e303ae
A
1361 addr64_t s64,
1362 addr64_t e64)
1c79356b
A
1363{
1364 spl_t spl;
1365 register pt_entry_t *pde;
1366 register pt_entry_t *spte, *epte;
1367 vm_offset_t l;
55e303ae 1368 vm_offset_t s, e;
91447636 1369 vm_offset_t orig_s;
1c79356b
A
1370
1371 if (map == PMAP_NULL)
1372 return;
1373
1374 PMAP_READ_LOCK(map, spl);
1375
55e303ae
A
1376 if (value_64bit(s64) || value_64bit(e64)) {
1377 panic("pmap_remove addr overflow");
1378 }
1379
91447636 1380 orig_s = s = (vm_offset_t)low32(s64);
55e303ae
A
1381 e = (vm_offset_t)low32(e64);
1382
1c79356b
A
1383 pde = pmap_pde(map, s);
1384
1385 while (s < e) {
1386 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
1387 if (l > e)
1388 l = e;
1389 if (*pde & INTEL_PTE_VALID) {
91447636 1390 spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1)));
1c79356b
A
1391 spte = &spte[ptenum(s)];
1392 epte = &spte[intel_btop(l-s)];
1393 pmap_remove_range(map, s, spte, epte);
1394 }
1395 s = l;
1396 pde++;
1397 }
1398
91447636
A
1399 PMAP_UPDATE_TLBS(map, orig_s, e);
1400
1c79356b
A
1401 PMAP_READ_UNLOCK(map, spl);
1402}
1403
1404/*
1405 * Routine: pmap_page_protect
1406 *
1407 * Function:
1408 * Lower the permission for all mappings to a given
1409 * page.
1410 */
1411void
1412pmap_page_protect(
55e303ae 1413 ppnum_t pn,
1c79356b
A
1414 vm_prot_t prot)
1415{
1416 pv_entry_t pv_h, prev;
1417 register pv_entry_t pv_e;
1418 register pt_entry_t *pte;
1419 int pai;
1420 register pmap_t pmap;
1421 spl_t spl;
1422 boolean_t remove;
91447636 1423 pmap_paddr_t phys;
1c79356b 1424
55e303ae 1425 assert(pn != vm_page_fictitious_addr);
91447636
A
1426 phys = (pmap_paddr_t)i386_ptob(pn);
1427 if (!valid_page(pn)) {
1c79356b
A
1428 /*
1429 * Not a managed page.
1430 */
1431 return;
1432 }
1433
1434 /*
1435 * Determine the new protection.
1436 */
1437 switch (prot) {
1438 case VM_PROT_READ:
1439 case VM_PROT_READ|VM_PROT_EXECUTE:
1440 remove = FALSE;
1441 break;
1442 case VM_PROT_ALL:
1443 return; /* nothing to do */
1444 default:
1445 remove = TRUE;
1446 break;
1447 }
1448
1449 /*
1450 * Lock the pmap system first, since we will be changing
1451 * several pmaps.
1452 */
1453
1454 PMAP_WRITE_LOCK(spl);
1455
1456 pai = pa_index(phys);
1457 pv_h = pai_to_pvh(pai);
1458
1459 /*
1460 * Walk down PV list, changing or removing all mappings.
1461 * We do not have to lock the pv_list because we have
1462 * the entire pmap system locked.
1463 */
1464 if (pv_h->pmap != PMAP_NULL) {
1465
1466 prev = pv_e = pv_h;
1467 do {
91447636 1468 register vm_offset_t va;
1c79356b
A
1469 pmap = pv_e->pmap;
1470 /*
1471 * Lock the pmap to block pmap_extract and similar routines.
1472 */
1473 simple_lock(&pmap->lock);
1474
1475 {
1c79356b
A
1476
1477 va = pv_e->va;
1478 pte = pmap_pte(pmap, va);
1479
1480 /*
1481 * Consistency checks.
1482 */
1483 /* assert(*pte & INTEL_PTE_VALID); XXX */
1484 /* assert(pte_to_phys(*pte) == phys); */
1485
1c79356b
A
1486 }
1487
1488 /*
1489 * Remove the mapping if new protection is NONE
1490 * or if write-protecting a kernel mapping.
1491 */
1492 if (remove || pmap == kernel_pmap) {
1493 /*
1494 * Remove the mapping, collecting any modify bits.
1495 */
1c79356b 1496 {
1c79356b
A
1497 pmap_phys_attributes[pai] |=
1498 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1499 *pte++ = 0;
91447636 1500 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1c79356b
A
1501 }
1502
1503 assert(pmap->stats.resident_count >= 1);
1504 pmap->stats.resident_count--;
1505
1506 /*
1507 * Remove the pv_entry.
1508 */
1509 if (pv_e == pv_h) {
1510 /*
1511 * Fix up head later.
1512 */
1513 pv_h->pmap = PMAP_NULL;
1514 }
1515 else {
1516 /*
1517 * Delete this entry.
1518 */
1519 prev->next = pv_e->next;
1520 PV_FREE(pv_e);
1521 }
1522 }
1523 else {
1524 /*
1525 * Write-protect.
1526 */
1c79356b 1527
1c79356b
A
1528 *pte &= ~INTEL_PTE_WRITE;
1529 pte++;
91447636 1530 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1c79356b
A
1531 /*
1532 * Advance prev.
1533 */
1534 prev = pv_e;
1535 }
1536
1537 simple_unlock(&pmap->lock);
1538
1539 } while ((pv_e = prev->next) != PV_ENTRY_NULL);
1540
1541 /*
1542 * If pv_head mapping was removed, fix it up.
1543 */
1544 if (pv_h->pmap == PMAP_NULL) {
1545 pv_e = pv_h->next;
1546 if (pv_e != PV_ENTRY_NULL) {
1547 *pv_h = *pv_e;
1548 PV_FREE(pv_e);
1549 }
1550 }
1551 }
1552
1553 PMAP_WRITE_UNLOCK(spl);
1554}
1555
91447636
A
1556/*
1557 * Routine:
1558 * pmap_disconnect
1559 *
1560 * Function:
1561 * Disconnect all mappings for this page and return reference and change status
1562 * in generic format.
1563 *
1564 */
1565unsigned int pmap_disconnect(
1566 ppnum_t pa)
1567{
1568 pmap_page_protect(pa, 0); /* disconnect the page */
1569 return (pmap_get_refmod(pa)); /* return ref/chg status */
1570}
1571
1c79356b
A
1572/*
1573 * Set the physical protection on the
1574 * specified range of this map as requested.
1575 * Will not increase permissions.
1576 */
1577void
1578pmap_protect(
1579 pmap_t map,
1580 vm_offset_t s,
1581 vm_offset_t e,
1582 vm_prot_t prot)
1583{
1584 register pt_entry_t *pde;
1585 register pt_entry_t *spte, *epte;
1586 vm_offset_t l;
1587 spl_t spl;
91447636 1588 vm_offset_t orig_s = s;
1c79356b
A
1589
1590
1591 if (map == PMAP_NULL)
1592 return;
1593
1594 /*
1595 * Determine the new protection.
1596 */
1597 switch (prot) {
1598 case VM_PROT_READ:
1599 case VM_PROT_READ|VM_PROT_EXECUTE:
1600 break;
1601 case VM_PROT_READ|VM_PROT_WRITE:
1602 case VM_PROT_ALL:
1603 return; /* nothing to do */
1604 default:
55e303ae 1605 pmap_remove(map, (addr64_t)s, (addr64_t)e);
1c79356b
A
1606 return;
1607 }
1608
1c79356b
A
1609 SPLVM(spl);
1610 simple_lock(&map->lock);
1611
1c79356b
A
1612 pde = pmap_pde(map, s);
1613 while (s < e) {
1614 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
1615 if (l > e)
1616 l = e;
1617 if (*pde & INTEL_PTE_VALID) {
91447636 1618 spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1)));
1c79356b
A
1619 spte = &spte[ptenum(s)];
1620 epte = &spte[intel_btop(l-s)];
1621
1622 while (spte < epte) {
1623 if (*spte & INTEL_PTE_VALID)
1624 *spte &= ~INTEL_PTE_WRITE;
1625 spte++;
1626 }
1627 }
1628 s = l;
1629 pde++;
1630 }
1631
91447636
A
1632 PMAP_UPDATE_TLBS(map, orig_s, e);
1633
1c79356b
A
1634 simple_unlock(&map->lock);
1635 SPLX(spl);
1636}
1637
1638
1639
1640/*
1641 * Insert the given physical page (p) at
1642 * the specified virtual address (v) in the
1643 * target physical map with the protection requested.
1644 *
1645 * If specified, the page will be wired down, meaning
1646 * that the related pte cannot be reclaimed.
1647 *
1648 * NB: This is the only routine which MAY NOT lazy-evaluate
1649 * or lose information. That is, this routine must actually
1650 * insert this page into the given map NOW.
1651 */
1652void
1653pmap_enter(
1654 register pmap_t pmap,
1655 vm_offset_t v,
55e303ae 1656 ppnum_t pn,
1c79356b 1657 vm_prot_t prot,
9bccf70c 1658 unsigned int flags,
1c79356b
A
1659 boolean_t wired)
1660{
1661 register pt_entry_t *pte;
1662 register pv_entry_t pv_h;
91447636 1663 register int pai;
1c79356b
A
1664 pv_entry_t pv_e;
1665 pt_entry_t template;
1666 spl_t spl;
91447636
A
1667 pmap_paddr_t old_pa;
1668 pmap_paddr_t pa = (pmap_paddr_t)i386_ptob(pn);
1c79356b
A
1669
1670 XPR(0x80000000, "%x/%x: pmap_enter %x/%x/%x\n",
91447636 1671 current_thread(),
1c79356b 1672 current_thread(),
55e303ae 1673 pmap, v, pn);
1c79356b 1674
55e303ae 1675 assert(pn != vm_page_fictitious_addr);
1c79356b 1676 if (pmap_debug)
55e303ae 1677 printf("pmap(%x, %x)\n", v, pn);
1c79356b
A
1678 if (pmap == PMAP_NULL)
1679 return;
1680
1c79356b
A
1681 /*
1682 * Must allocate a new pvlist entry while we're unlocked;
1683 * zalloc may cause pageout (which will lock the pmap system).
1684 * If we determine we need a pvlist entry, we will unlock
1685 * and allocate one. Then we will retry, throughing away
1686 * the allocated entry later (if we no longer need it).
1687 */
1688 pv_e = PV_ENTRY_NULL;
91447636 1689
1c79356b
A
1690 PMAP_READ_LOCK(pmap, spl);
1691
1692 /*
1693 * Expand pmap to include this pte. Assume that
1694 * pmap is always expanded to include enough hardware
1695 * pages to map one VM page.
1696 */
1697
1698 while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
1699 /*
1700 * Must unlock to expand the pmap.
1701 */
1702 PMAP_READ_UNLOCK(pmap, spl);
1703
1704 pmap_expand(pmap, v);
1705
1706 PMAP_READ_LOCK(pmap, spl);
1707 }
1708 /*
1709 * Special case if the physical page is already mapped
1710 * at this address.
1711 */
1712 old_pa = pte_to_pa(*pte);
1713 if (old_pa == pa) {
1714 /*
1715 * May be changing its wired attribute or protection
1716 */
55e303ae 1717
1c79356b 1718 template = pa_to_pte(pa) | INTEL_PTE_VALID;
55e303ae
A
1719
1720 if(flags & VM_MEM_NOT_CACHEABLE) {
1721 if(!(flags & VM_MEM_GUARDED))
1722 template |= INTEL_PTE_PTA;
1723 template |= INTEL_PTE_NCACHE;
1724 }
1725
1c79356b
A
1726 if (pmap != kernel_pmap)
1727 template |= INTEL_PTE_USER;
1728 if (prot & VM_PROT_WRITE)
1729 template |= INTEL_PTE_WRITE;
1730 if (wired) {
1731 template |= INTEL_PTE_WIRED;
1732 if (!iswired(*pte))
1733 pmap->stats.wired_count++;
1734 }
1735 else {
1736 if (iswired(*pte)) {
1737 assert(pmap->stats.wired_count >= 1);
1738 pmap->stats.wired_count--;
1739 }
1740 }
1741
1c79356b
A
1742 if (*pte & INTEL_PTE_MOD)
1743 template |= INTEL_PTE_MOD;
1744 WRITE_PTE(pte, template)
91447636 1745 pte++;
1c79356b
A
1746
1747 goto Done;
1748 }
1749
1750 /*
1751 * Outline of code from here:
1752 * 1) If va was mapped, update TLBs, remove the mapping
1753 * and remove old pvlist entry.
1754 * 2) Add pvlist entry for new mapping
1755 * 3) Enter new mapping.
1756 *
1757 * SHARING_FAULTS complicates this slightly in that it cannot
1758 * replace the mapping, but must remove it (because adding the
1759 * pvlist entry for the new mapping may remove others), and
1760 * hence always enters the new mapping at step 3)
1761 *
1762 * If the old physical page is not managed step 1) is skipped
1763 * (except for updating the TLBs), and the mapping is
1764 * overwritten at step 3). If the new physical page is not
1765 * managed, step 2) is skipped.
1766 */
1767
91447636 1768 if (old_pa != (pmap_paddr_t) 0) {
1c79356b 1769
1c79356b
A
1770
1771#if DEBUG_PTE_PAGE
1772 if (pmap != kernel_pmap)
1773 ptep_check(get_pte_page(pte));
1774#endif /* DEBUG_PTE_PAGE */
1775
1776 /*
1777 * Don't do anything to pages outside valid memory here.
1778 * Instead convince the code that enters a new mapping
1779 * to overwrite the old one.
1780 */
1781
91447636 1782 if (valid_page(i386_btop(old_pa))) {
1c79356b
A
1783
1784 pai = pa_index(old_pa);
1785 LOCK_PVH(pai);
1786
1787 assert(pmap->stats.resident_count >= 1);
1788 pmap->stats.resident_count--;
1789 if (iswired(*pte)) {
1790 assert(pmap->stats.wired_count >= 1);
1791 pmap->stats.wired_count--;
1792 }
91447636 1793
1c79356b
A
1794 pmap_phys_attributes[pai] |=
1795 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1796 WRITE_PTE(pte, 0)
1c79356b
A
1797
1798 /*
1799 * Remove the mapping from the pvlist for
1800 * this physical page.
1801 */
1802 {
1803 register pv_entry_t prev, cur;
1804
1805 pv_h = pai_to_pvh(pai);
1806 if (pv_h->pmap == PMAP_NULL) {
1807 panic("pmap_enter: null pv_list!");
1808 }
1809 if (pv_h->va == v && pv_h->pmap == pmap) {
1810 /*
1811 * Header is the pv_entry. Copy the next one
1812 * to header and free the next one (we cannot
1813 * free the header)
1814 */
1815 cur = pv_h->next;
1816 if (cur != PV_ENTRY_NULL) {
1817 *pv_h = *cur;
1818 pv_e = cur;
1819 }
1820 else {
1821 pv_h->pmap = PMAP_NULL;
1822 }
1823 }
1824 else {
1825 cur = pv_h;
1826 do {
1827 prev = cur;
1828 if ((cur = prev->next) == PV_ENTRY_NULL) {
1829 panic("pmap_enter: mapping not in pv_list!");
1830 }
1831 } while (cur->va != v || cur->pmap != pmap);
1832 prev->next = cur->next;
1833 pv_e = cur;
1834 }
1835 }
1836 UNLOCK_PVH(pai);
1837 }
1838 else {
1839
1840 /*
1841 * old_pa is not managed. Pretend it's zero so code
1842 * at Step 3) will enter new mapping (overwriting old
1843 * one). Do removal part of accounting.
1844 */
91447636 1845 old_pa = (pmap_paddr_t) 0;
1c79356b
A
1846 assert(pmap->stats.resident_count >= 1);
1847 pmap->stats.resident_count--;
1848 if (iswired(*pte)) {
1849 assert(pmap->stats.wired_count >= 1);
1850 pmap->stats.wired_count--;
1851 }
1852 }
91447636 1853
1c79356b
A
1854 }
1855
91447636 1856 if (valid_page(i386_btop(pa))) {
1c79356b
A
1857
1858 /*
1859 * Step 2) Enter the mapping in the PV list for this
1860 * physical page.
1861 */
1862
1863 pai = pa_index(pa);
1864
1865
1866#if SHARING_FAULTS
1867RetryPvList:
1868 /*
1869 * We can return here from the sharing fault code below
1870 * in case we removed the only entry on the pv list and thus
1871 * must enter the new one in the list header.
1872 */
1873#endif /* SHARING_FAULTS */
1874 LOCK_PVH(pai);
1875 pv_h = pai_to_pvh(pai);
1876
1877 if (pv_h->pmap == PMAP_NULL) {
1878 /*
1879 * No mappings yet
1880 */
1881 pv_h->va = v;
1882 pv_h->pmap = pmap;
1883 pv_h->next = PV_ENTRY_NULL;
1884 }
1885 else {
1886#if DEBUG
1887 {
1888 /*
1889 * check that this mapping is not already there
1890 * or there is no alias for this mapping in the same map
1891 */
1892 pv_entry_t e = pv_h;
1893 while (e != PV_ENTRY_NULL) {
1894 if (e->pmap == pmap && e->va == v)
1895 panic("pmap_enter: already in pv_list");
1896 e = e->next;
1897 }
1898 }
1899#endif /* DEBUG */
1900#if SHARING_FAULTS
1901 {
1902 /*
1903 * do sharing faults.
1904 * if we find an entry on this pv list in the same address
1905 * space, remove it. we know there will not be more
1906 * than one.
1907 */
1908 pv_entry_t e = pv_h;
1909 pt_entry_t *opte;
1910
1911 while (e != PV_ENTRY_NULL) {
1912 if (e->pmap == pmap) {
1913 /*
1914 * Remove it, drop pv list lock first.
1915 */
1916 UNLOCK_PVH(pai);
1917
1918 opte = pmap_pte(pmap, e->va);
1919 assert(opte != PT_ENTRY_NULL);
1920 /*
1921 * Invalidate the translation buffer,
1922 * then remove the mapping.
1923 */
1c79356b 1924 pmap_remove_range(pmap, e->va, opte,
91447636
A
1925 opte + 1);
1926 PMAP_UPDATE_TLBS(pmap, e->va, e->va + PAGE_SIZE);
1927
1c79356b
A
1928 /*
1929 * We could have remove the head entry,
1930 * so there could be no more entries
1931 * and so we have to use the pv head entry.
1932 * so, go back to the top and try the entry
1933 * again.
1934 */
1935 goto RetryPvList;
1936 }
1937 e = e->next;
1938 }
1939
1940 /*
1941 * check that this mapping is not already there
1942 */
1943 e = pv_h;
1944 while (e != PV_ENTRY_NULL) {
1945 if (e->pmap == pmap)
1946 panic("pmap_enter: alias in pv_list");
1947 e = e->next;
1948 }
1949 }
1950#endif /* SHARING_FAULTS */
1951#if DEBUG_ALIAS
1952 {
1953 /*
1954 * check for aliases within the same address space.
1955 */
1956 pv_entry_t e = pv_h;
1957 vm_offset_t rpc = get_rpc();
1958
1959 while (e != PV_ENTRY_NULL) {
1960 if (e->pmap == pmap) {
1961 /*
1962 * log this entry in the alias ring buffer
1963 * if it's not there already.
1964 */
1965 struct pmap_alias *pma;
1966 int ii, logit;
1967
1968 logit = TRUE;
1969 for (ii = 0; ii < pmap_alias_index; ii++) {
1970 if (pmap_aliasbuf[ii].rpc == rpc) {
1971 /* found it in the log already */
1972 logit = FALSE;
1973 break;
1974 }
1975 }
1976 if (logit) {
1977 pma = &pmap_aliasbuf[pmap_alias_index];
1978 pma->pmap = pmap;
1979 pma->va = v;
1980 pma->rpc = rpc;
1981 pma->cookie = PMAP_ALIAS_COOKIE;
1982 if (++pmap_alias_index >= PMAP_ALIAS_MAX)
1983 panic("pmap_enter: exhausted alias log");
1984 }
1985 }
1986 e = e->next;
1987 }
1988 }
1989#endif /* DEBUG_ALIAS */
1990 /*
1991 * Add new pv_entry after header.
1992 */
1993 if (pv_e == PV_ENTRY_NULL) {
1994 PV_ALLOC(pv_e);
1995 if (pv_e == PV_ENTRY_NULL) {
91447636 1996 panic("pmap no pv_e's");
1c79356b
A
1997 }
1998 }
1999 pv_e->va = v;
2000 pv_e->pmap = pmap;
2001 pv_e->next = pv_h->next;
2002 pv_h->next = pv_e;
2003 /*
2004 * Remember that we used the pvlist entry.
2005 */
2006 pv_e = PV_ENTRY_NULL;
2007 }
2008 UNLOCK_PVH(pai);
2009 }
2010
2011 /*
2012 * Step 3) Enter and count the mapping.
2013 */
2014
2015 pmap->stats.resident_count++;
2016
2017 /*
2018 * Build a template to speed up entering -
2019 * only the pfn changes.
2020 */
2021 template = pa_to_pte(pa) | INTEL_PTE_VALID;
55e303ae
A
2022
2023 if(flags & VM_MEM_NOT_CACHEABLE) {
2024 if(!(flags & VM_MEM_GUARDED))
2025 template |= INTEL_PTE_PTA;
2026 template |= INTEL_PTE_NCACHE;
2027 }
2028
1c79356b
A
2029 if (pmap != kernel_pmap)
2030 template |= INTEL_PTE_USER;
2031 if (prot & VM_PROT_WRITE)
2032 template |= INTEL_PTE_WRITE;
2033 if (wired) {
2034 template |= INTEL_PTE_WIRED;
2035 pmap->stats.wired_count++;
2036 }
91447636
A
2037
2038 WRITE_PTE(pte, template)
2039
1c79356b 2040Done:
91447636
A
2041 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
2042
1c79356b
A
2043 if (pv_e != PV_ENTRY_NULL) {
2044 PV_FREE(pv_e);
2045 }
2046
2047 PMAP_READ_UNLOCK(pmap, spl);
2048}
2049
2050/*
2051 * Routine: pmap_change_wiring
2052 * Function: Change the wiring attribute for a map/virtual-address
2053 * pair.
2054 * In/out conditions:
2055 * The mapping must already exist in the pmap.
2056 */
2057void
2058pmap_change_wiring(
2059 register pmap_t map,
2060 vm_offset_t v,
2061 boolean_t wired)
2062{
2063 register pt_entry_t *pte;
1c79356b
A
2064 spl_t spl;
2065
55e303ae 2066#if 1
1c79356b
A
2067 /*
2068 * We must grab the pmap system lock because we may
2069 * change a pte_page queue.
2070 */
2071 PMAP_READ_LOCK(map, spl);
2072
2073 if ((pte = pmap_pte(map, v)) == PT_ENTRY_NULL)
2074 panic("pmap_change_wiring: pte missing");
2075
2076 if (wired && !iswired(*pte)) {
2077 /*
2078 * wiring down mapping
2079 */
2080 map->stats.wired_count++;
91447636 2081 *pte++ |= INTEL_PTE_WIRED;
1c79356b
A
2082 }
2083 else if (!wired && iswired(*pte)) {
2084 /*
2085 * unwiring mapping
2086 */
2087 assert(map->stats.wired_count >= 1);
2088 map->stats.wired_count--;
91447636 2089 *pte++ &= ~INTEL_PTE_WIRED;
1c79356b
A
2090 }
2091
2092 PMAP_READ_UNLOCK(map, spl);
9bccf70c
A
2093
2094#else
2095 return;
2096#endif
2097
1c79356b
A
2098}
2099
91447636 2100ppnum_t
55e303ae
A
2101pmap_find_phys(pmap_t pmap, addr64_t va)
2102{
91447636
A
2103 pt_entry_t *ptp;
2104 vm_offset_t a32;
2105 ppnum_t ppn;
2106
2107 if (value_64bit(va))
2108 panic("pmap_find_phys 64 bit value");
2109 a32 = (vm_offset_t) low32(va);
2110 ptp = pmap_pte(pmap, a32);
2111 if (PT_ENTRY_NULL == ptp) {
2112 ppn = 0;
2113 } else {
2114 ppn = (ppnum_t) i386_btop(pte_to_pa(*ptp));
2115 }
2116 return ppn;
55e303ae
A
2117}
2118
1c79356b
A
2119/*
2120 * Routine: pmap_extract
2121 * Function:
2122 * Extract the physical page address associated
2123 * with the given map/virtual_address pair.
91447636
A
2124 * Change to shim for backwards compatibility but will not
2125 * work for 64 bit systems. Some old drivers that we cannot
2126 * change need this.
1c79356b
A
2127 */
2128
2129vm_offset_t
2130pmap_extract(
2131 register pmap_t pmap,
2132 vm_offset_t va)
2133{
91447636
A
2134 ppnum_t ppn;
2135 vm_offset_t vaddr;
2136
2137 vaddr = (vm_offset_t)0;
2138 ppn = pmap_find_phys(pmap, (addr64_t)va);
2139 if (ppn) {
2140 vaddr = ((vm_offset_t)i386_ptob(ppn)) | (va & INTEL_OFFMASK);
2141 }
2142 return (vaddr);
1c79356b
A
2143}
2144
91447636 2145
1c79356b
A
2146/*
2147 * Routine: pmap_expand
2148 *
2149 * Expands a pmap to be able to map the specified virtual address.
2150 *
2151 * Allocates new virtual memory for the P0 or P1 portion of the
2152 * pmap, then re-maps the physical pages that were in the old
2153 * pmap to be in the new pmap.
2154 *
2155 * Must be called with the pmap system and the pmap unlocked,
2156 * since these must be unlocked to use vm_allocate or vm_deallocate.
2157 * Thus it must be called in a loop that checks whether the map
2158 * has been expanded enough.
2159 * (We won't loop forever, since page tables aren't shrunk.)
2160 */
2161void
2162pmap_expand(
2163 register pmap_t map,
2164 register vm_offset_t v)
2165{
2166 pt_entry_t *pdp;
2167 register vm_page_t m;
91447636 2168 register pmap_paddr_t pa;
1c79356b
A
2169 register int i;
2170 spl_t spl;
55e303ae 2171 ppnum_t pn;
1c79356b 2172
91447636
A
2173 if (map == kernel_pmap) {
2174 pmap_growkernel(v);
2175 return;
2176 }
1c79356b
A
2177
2178 /*
2179 * Allocate a VM page for the level 2 page table entries.
2180 */
2181 while ((m = vm_page_grab()) == VM_PAGE_NULL)
2182 VM_PAGE_WAIT();
2183
2184 /*
91447636 2185 * put the page into the pmap's obj list so it
1c79356b
A
2186 * can be found later.
2187 */
55e303ae
A
2188 pn = m->phys_page;
2189 pa = i386_ptob(pn);
91447636
A
2190 i = pdenum(map, v);
2191 vm_object_lock(map->pm_obj);
2192 vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i);
1c79356b
A
2193 vm_page_lock_queues();
2194 vm_page_wire(m);
2195 inuse_ptepages_count++;
91447636 2196 vm_object_unlock(map->pm_obj);
1c79356b
A
2197 vm_page_unlock_queues();
2198
2199 /*
2200 * Zero the page.
2201 */
91447636 2202 pmap_zero_page(pn);
1c79356b
A
2203
2204 PMAP_READ_LOCK(map, spl);
2205 /*
2206 * See if someone else expanded us first
2207 */
2208 if (pmap_pte(map, v) != PT_ENTRY_NULL) {
2209 PMAP_READ_UNLOCK(map, spl);
91447636 2210 vm_object_lock(map->pm_obj);
1c79356b
A
2211 vm_page_lock_queues();
2212 vm_page_free(m);
2213 inuse_ptepages_count--;
2214 vm_page_unlock_queues();
91447636 2215 vm_object_unlock(map->pm_obj);
1c79356b
A
2216 return;
2217 }
2218
2219 /*
2220 * Set the page directory entry for this page table.
2221 * If we have allocated more than one hardware page,
2222 * set several page directory entries.
2223 */
2224
91447636 2225 pdp = &map->dirbase[pdenum(map, v)];
1c79356b
A
2226 *pdp = pa_to_pte(pa)
2227 | INTEL_PTE_VALID
2228 | INTEL_PTE_USER
2229 | INTEL_PTE_WRITE;
1c79356b
A
2230
2231 PMAP_READ_UNLOCK(map, spl);
2232 return;
2233}
2234
2235/*
2236 * Copy the range specified by src_addr/len
2237 * from the source map to the range dst_addr/len
2238 * in the destination map.
2239 *
2240 * This routine is only advisory and need not do anything.
2241 */
2242#if 0
2243void
2244pmap_copy(
2245 pmap_t dst_pmap,
2246 pmap_t src_pmap,
2247 vm_offset_t dst_addr,
2248 vm_size_t len,
2249 vm_offset_t src_addr)
2250{
2251#ifdef lint
2252 dst_pmap++; src_pmap++; dst_addr++; len++; src_addr++;
2253#endif /* lint */
2254}
2255#endif/* 0 */
2256
55e303ae 2257/*
91447636 2258 * pmap_sync_page_data_phys(ppnum_t pa)
55e303ae
A
2259 *
2260 * Invalidates all of the instruction cache on a physical page and
2261 * pushes any dirty data from the data cache for the same physical page
91447636 2262 * Not required in i386.
55e303ae 2263 */
91447636
A
2264void
2265pmap_sync_page_data_phys(__unused ppnum_t pa)
55e303ae 2266{
55e303ae
A
2267 return;
2268}
2269
91447636
A
2270/*
2271 * pmap_sync_page_attributes_phys(ppnum_t pa)
2272 *
2273 * Write back and invalidate all cachelines on a physical page.
2274 */
2275void
2276pmap_sync_page_attributes_phys(ppnum_t pa)
2277{
2278 cache_flush_page_phys(pa);
2279}
2280
1c79356b
A
2281int collect_ref;
2282int collect_unref;
2283
2284/*
2285 * Routine: pmap_collect
2286 * Function:
2287 * Garbage collects the physical map system for
2288 * pages which are no longer used.
2289 * Success need not be guaranteed -- that is, there
2290 * may well be pages which are not referenced, but
2291 * others may be collected.
2292 * Usage:
2293 * Called by the pageout daemon when pages are scarce.
2294 */
2295void
2296pmap_collect(
2297 pmap_t p)
2298{
2299 register pt_entry_t *pdp, *ptp;
2300 pt_entry_t *eptp;
1c79356b
A
2301 int wired;
2302 spl_t spl;
2303
2304 if (p == PMAP_NULL)
2305 return;
2306
2307 if (p == kernel_pmap)
2308 return;
2309
2310 /*
2311 * Garbage collect map.
2312 */
2313 PMAP_READ_LOCK(p, spl);
1c79356b 2314
91447636
A
2315 for (pdp = (pt_entry_t *)p->dirbase;
2316 pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)];
2317 pdp++)
1c79356b 2318 {
91447636 2319 if (*pdp & INTEL_PTE_VALID) {
1c79356b
A
2320 if(*pdp & INTEL_PTE_REF) {
2321 *pdp &= ~INTEL_PTE_REF;
2322 collect_ref++;
2323 } else {
2324 collect_unref++;
91447636
A
2325 ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase));
2326 eptp = ptp + NPTEPG;
1c79356b
A
2327
2328 /*
2329 * If the pte page has any wired mappings, we cannot
2330 * free it.
2331 */
2332 wired = 0;
2333 {
2334 register pt_entry_t *ptep;
2335 for (ptep = ptp; ptep < eptp; ptep++) {
2336 if (iswired(*ptep)) {
2337 wired = 1;
2338 break;
2339 }
2340 }
2341 }
2342 if (!wired) {
2343 /*
2344 * Remove the virtual addresses mapped by this pte page.
2345 */
2346 pmap_remove_range(p,
91447636 2347 pdetova(pdp - (pt_entry_t *)p->dirbase),
1c79356b
A
2348 ptp,
2349 eptp);
2350
2351 /*
2352 * Invalidate the page directory pointer.
2353 */
91447636
A
2354 *pdp = 0x0;
2355
1c79356b
A
2356 PMAP_READ_UNLOCK(p, spl);
2357
2358 /*
2359 * And free the pte page itself.
2360 */
2361 {
2362 register vm_page_t m;
2363
91447636
A
2364 vm_object_lock(p->pm_obj);
2365 m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]));
1c79356b
A
2366 if (m == VM_PAGE_NULL)
2367 panic("pmap_collect: pte page not in object");
2368 vm_page_lock_queues();
2369 vm_page_free(m);
2370 inuse_ptepages_count--;
2371 vm_page_unlock_queues();
91447636 2372 vm_object_unlock(p->pm_obj);
1c79356b
A
2373 }
2374
2375 PMAP_READ_LOCK(p, spl);
2376 }
91447636
A
2377 }
2378 }
1c79356b 2379 }
91447636 2380 PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
1c79356b
A
2381 PMAP_READ_UNLOCK(p, spl);
2382 return;
2383
2384}
2385
2386/*
2387 * Routine: pmap_kernel
2388 * Function:
2389 * Returns the physical map handle for the kernel.
2390 */
2391#if 0
2392pmap_t
2393pmap_kernel(void)
2394{
2395 return (kernel_pmap);
2396}
2397#endif/* 0 */
2398
1c79356b 2399void
91447636
A
2400pmap_copy_page(src, dst)
2401 ppnum_t src;
2402 ppnum_t dst;
1c79356b 2403{
91447636
A
2404 bcopy_phys((addr64_t)i386_ptob(src),
2405 (addr64_t)i386_ptob(dst),
2406 PAGE_SIZE);
1c79356b 2407}
1c79356b 2408
1c79356b
A
2409
2410/*
2411 * Routine: pmap_pageable
2412 * Function:
2413 * Make the specified pages (by pmap, offset)
2414 * pageable (or not) as requested.
2415 *
2416 * A page which is not pageable may not take
2417 * a fault; therefore, its page table entry
2418 * must remain valid for the duration.
2419 *
2420 * This routine is merely advisory; pmap_enter
2421 * will specify that these pages are to be wired
2422 * down (or not) as appropriate.
2423 */
2424void
2425pmap_pageable(
91447636
A
2426 __unused pmap_t pmap,
2427 __unused vm_offset_t start_addr,
2428 __unused vm_offset_t end_addr,
2429 __unused boolean_t pageable)
1c79356b
A
2430{
2431#ifdef lint
91447636 2432 pmap++; start_addr++; end_addr++; pageable++;
1c79356b
A
2433#endif /* lint */
2434}
2435
2436/*
2437 * Clear specified attribute bits.
2438 */
2439void
2440phys_attribute_clear(
91447636 2441 ppnum_t pn,
1c79356b
A
2442 int bits)
2443{
2444 pv_entry_t pv_h;
2445 register pv_entry_t pv_e;
2446 register pt_entry_t *pte;
2447 int pai;
2448 register pmap_t pmap;
2449 spl_t spl;
91447636 2450 pmap_paddr_t phys;
1c79356b 2451
91447636
A
2452 assert(pn != vm_page_fictitious_addr);
2453 if (!valid_page(pn)) {
1c79356b
A
2454 /*
2455 * Not a managed page.
2456 */
2457 return;
2458 }
2459
2460 /*
2461 * Lock the pmap system first, since we will be changing
2462 * several pmaps.
2463 */
2464
2465 PMAP_WRITE_LOCK(spl);
91447636 2466 phys = i386_ptob(pn);
1c79356b
A
2467 pai = pa_index(phys);
2468 pv_h = pai_to_pvh(pai);
2469
2470 /*
2471 * Walk down PV list, clearing all modify or reference bits.
2472 * We do not have to lock the pv_list because we have
2473 * the entire pmap system locked.
2474 */
2475 if (pv_h->pmap != PMAP_NULL) {
2476 /*
2477 * There are some mappings.
2478 */
2479 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
2480
2481 pmap = pv_e->pmap;
2482 /*
2483 * Lock the pmap to block pmap_extract and similar routines.
2484 */
2485 simple_lock(&pmap->lock);
2486
2487 {
2488 register vm_offset_t va;
2489
2490 va = pv_e->va;
2491 pte = pmap_pte(pmap, va);
2492
2493#if 0
2494 /*
2495 * Consistency checks.
2496 */
2497 assert(*pte & INTEL_PTE_VALID);
2498 /* assert(pte_to_phys(*pte) == phys); */
2499#endif
2500
1c79356b
A
2501 /*
2502 * Clear modify or reference bits.
2503 */
91447636 2504
1c79356b 2505 *pte++ &= ~bits;
91447636 2506 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1c79356b
A
2507 }
2508 simple_unlock(&pmap->lock);
91447636 2509
1c79356b
A
2510 }
2511 }
2512
2513 pmap_phys_attributes[pai] &= ~bits;
2514
2515 PMAP_WRITE_UNLOCK(spl);
2516}
2517
2518/*
2519 * Check specified attribute bits.
2520 */
2521boolean_t
2522phys_attribute_test(
91447636 2523 ppnum_t pn,
1c79356b
A
2524 int bits)
2525{
2526 pv_entry_t pv_h;
2527 register pv_entry_t pv_e;
2528 register pt_entry_t *pte;
2529 int pai;
2530 register pmap_t pmap;
2531 spl_t spl;
91447636 2532 pmap_paddr_t phys;
1c79356b 2533
91447636
A
2534 assert(pn != vm_page_fictitious_addr);
2535 if (!valid_page(pn)) {
1c79356b
A
2536 /*
2537 * Not a managed page.
2538 */
2539 return (FALSE);
2540 }
2541
2542 /*
2543 * Lock the pmap system first, since we will be checking
2544 * several pmaps.
2545 */
2546
2547 PMAP_WRITE_LOCK(spl);
91447636 2548 phys = i386_ptob(pn);
1c79356b
A
2549 pai = pa_index(phys);
2550 pv_h = pai_to_pvh(pai);
2551
2552 if (pmap_phys_attributes[pai] & bits) {
2553 PMAP_WRITE_UNLOCK(spl);
2554 return (TRUE);
2555 }
2556
2557 /*
2558 * Walk down PV list, checking all mappings.
2559 * We do not have to lock the pv_list because we have
2560 * the entire pmap system locked.
2561 */
2562 if (pv_h->pmap != PMAP_NULL) {
2563 /*
2564 * There are some mappings.
2565 */
2566 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
2567
2568 pmap = pv_e->pmap;
2569 /*
2570 * Lock the pmap to block pmap_extract and similar routines.
2571 */
2572 simple_lock(&pmap->lock);
2573
2574 {
2575 register vm_offset_t va;
2576
2577 va = pv_e->va;
2578 pte = pmap_pte(pmap, va);
2579
2580#if 0
2581 /*
2582 * Consistency checks.
2583 */
2584 assert(*pte & INTEL_PTE_VALID);
2585 /* assert(pte_to_phys(*pte) == phys); */
2586#endif
2587 }
2588
2589 /*
2590 * Check modify or reference bits.
2591 */
2592 {
1c79356b
A
2593 if (*pte++ & bits) {
2594 simple_unlock(&pmap->lock);
2595 PMAP_WRITE_UNLOCK(spl);
2596 return (TRUE);
2597 }
1c79356b
A
2598 }
2599 simple_unlock(&pmap->lock);
2600 }
2601 }
2602 PMAP_WRITE_UNLOCK(spl);
2603 return (FALSE);
2604}
2605
2606/*
2607 * Set specified attribute bits.
2608 */
2609void
2610phys_attribute_set(
91447636 2611 ppnum_t pn,
1c79356b
A
2612 int bits)
2613{
2614 int spl;
91447636 2615 pmap_paddr_t phys;
1c79356b 2616
91447636
A
2617 assert(pn != vm_page_fictitious_addr);
2618 if (!valid_page(pn)) {
1c79356b
A
2619 /*
2620 * Not a managed page.
2621 */
2622 return;
2623 }
2624
2625 /*
2626 * Lock the pmap system and set the requested bits in
2627 * the phys attributes array. Don't need to bother with
2628 * ptes because the test routine looks here first.
2629 */
91447636 2630 phys = i386_ptob(pn);
1c79356b
A
2631 PMAP_WRITE_LOCK(spl);
2632 pmap_phys_attributes[pa_index(phys)] |= bits;
2633 PMAP_WRITE_UNLOCK(spl);
2634}
2635
2636/*
2637 * Set the modify bit on the specified physical page.
2638 */
2639
2640void pmap_set_modify(
55e303ae 2641 ppnum_t pn)
1c79356b 2642{
91447636 2643 phys_attribute_set(pn, PHYS_MODIFIED);
1c79356b
A
2644}
2645
2646/*
2647 * Clear the modify bits on the specified physical page.
2648 */
2649
2650void
2651pmap_clear_modify(
55e303ae 2652 ppnum_t pn)
1c79356b 2653{
91447636 2654 phys_attribute_clear(pn, PHYS_MODIFIED);
1c79356b
A
2655}
2656
2657/*
2658 * pmap_is_modified:
2659 *
2660 * Return whether or not the specified physical page is modified
2661 * by any physical maps.
2662 */
2663
2664boolean_t
2665pmap_is_modified(
55e303ae 2666 ppnum_t pn)
1c79356b 2667{
91447636 2668 return (phys_attribute_test(pn, PHYS_MODIFIED));
1c79356b
A
2669}
2670
2671/*
2672 * pmap_clear_reference:
2673 *
2674 * Clear the reference bit on the specified physical page.
2675 */
2676
2677void
2678pmap_clear_reference(
55e303ae 2679 ppnum_t pn)
1c79356b 2680{
91447636
A
2681 phys_attribute_clear(pn, PHYS_REFERENCED);
2682}
2683
2684void
2685pmap_set_reference(ppnum_t pn)
2686{
2687 phys_attribute_set(pn, PHYS_REFERENCED);
1c79356b
A
2688}
2689
2690/*
2691 * pmap_is_referenced:
2692 *
2693 * Return whether or not the specified physical page is referenced
2694 * by any physical maps.
2695 */
2696
2697boolean_t
2698pmap_is_referenced(
55e303ae 2699 ppnum_t pn)
1c79356b 2700{
91447636
A
2701 return (phys_attribute_test(pn, PHYS_REFERENCED));
2702}
2703
2704/*
2705 * pmap_get_refmod(phys)
2706 * returns the referenced and modified bits of the specified
2707 * physical page.
2708 */
2709unsigned int
2710pmap_get_refmod(ppnum_t pa)
2711{
2712 return ( ((phys_attribute_test(pa, PHYS_MODIFIED))? VM_MEM_MODIFIED : 0)
2713 | ((phys_attribute_test(pa, PHYS_REFERENCED))? VM_MEM_REFERENCED : 0));
2714}
2715
2716/*
2717 * pmap_clear_refmod(phys, mask)
2718 * clears the referenced and modified bits as specified by the mask
2719 * of the specified physical page.
2720 */
2721void
2722pmap_clear_refmod(ppnum_t pa, unsigned int mask)
2723{
2724 unsigned int x86Mask;
2725
2726 x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0)
2727 | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
2728 phys_attribute_clear(pa, x86Mask);
1c79356b
A
2729}
2730
2731/*
2732 * Set the modify bit on the specified range
2733 * of this map as requested.
2734 *
2735 * This optimization stands only if each time the dirty bit
2736 * in vm_page_t is tested, it is also tested in the pmap.
2737 */
2738void
2739pmap_modify_pages(
2740 pmap_t map,
2741 vm_offset_t s,
2742 vm_offset_t e)
2743{
2744 spl_t spl;
2745 register pt_entry_t *pde;
2746 register pt_entry_t *spte, *epte;
2747 vm_offset_t l;
91447636 2748 vm_offset_t orig_s = s;
1c79356b
A
2749
2750 if (map == PMAP_NULL)
2751 return;
2752
2753 PMAP_READ_LOCK(map, spl);
2754
2755 pde = pmap_pde(map, s);
2756 while (s && s < e) {
2757 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
2758 if (l > e)
2759 l = e;
2760 if (*pde & INTEL_PTE_VALID) {
91447636 2761 spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1)));
1c79356b
A
2762 if (l) {
2763 spte = &spte[ptenum(s)];
2764 epte = &spte[intel_btop(l-s)];
2765 } else {
2766 epte = &spte[intel_btop(PDE_MAPPED_SIZE)];
2767 spte = &spte[ptenum(s)];
2768 }
2769 while (spte < epte) {
2770 if (*spte & INTEL_PTE_VALID) {
2771 *spte |= (INTEL_PTE_MOD | INTEL_PTE_WRITE);
2772 }
2773 spte++;
2774 }
2775 }
2776 s = l;
2777 pde++;
2778 }
91447636 2779 PMAP_UPDATE_TLBS(map, orig_s, e);
1c79356b
A
2780 PMAP_READ_UNLOCK(map, spl);
2781}
2782
2783
2784void
91447636
A
2785invalidate_icache(__unused vm_offset_t addr,
2786 __unused unsigned cnt,
2787 __unused int phys)
1c79356b
A
2788{
2789 return;
2790}
2791void
91447636
A
2792flush_dcache(__unused vm_offset_t addr,
2793 __unused unsigned count,
2794 __unused int phys)
1c79356b
A
2795{
2796 return;
2797}
2798
55e303ae
A
2799/*
2800* TLB Coherence Code (TLB "shootdown" code)
2801*
2802* Threads that belong to the same task share the same address space and
2803* hence share a pmap. However, they may run on distinct cpus and thus
2804* have distinct TLBs that cache page table entries. In order to guarantee
2805* the TLBs are consistent, whenever a pmap is changed, all threads that
2806* are active in that pmap must have their TLB updated. To keep track of
2807* this information, the set of cpus that are currently using a pmap is
2808* maintained within each pmap structure (cpus_using). Pmap_activate() and
2809* pmap_deactivate add and remove, respectively, a cpu from this set.
2810* Since the TLBs are not addressable over the bus, each processor must
2811* flush its own TLB; a processor that needs to invalidate another TLB
2812* needs to interrupt the processor that owns that TLB to signal the
2813* update.
2814*
2815* Whenever a pmap is updated, the lock on that pmap is locked, and all
2816* cpus using the pmap are signaled to invalidate. All threads that need
2817* to activate a pmap must wait for the lock to clear to await any updates
2818* in progress before using the pmap. They must ACQUIRE the lock to add
2819* their cpu to the cpus_using set. An implicit assumption made
2820* throughout the TLB code is that all kernel code that runs at or higher
2821* than splvm blocks out update interrupts, and that such code does not
2822* touch pageable pages.
2823*
2824* A shootdown interrupt serves another function besides signaling a
2825* processor to invalidate. The interrupt routine (pmap_update_interrupt)
2826* waits for the both the pmap lock (and the kernel pmap lock) to clear,
2827* preventing user code from making implicit pmap updates while the
2828* sending processor is performing its update. (This could happen via a
2829* user data write reference that turns on the modify bit in the page
2830* table). It must wait for any kernel updates that may have started
2831* concurrently with a user pmap update because the IPC code
2832* changes mappings.
2833* Spinning on the VALUES of the locks is sufficient (rather than
2834* having to acquire the locks) because any updates that occur subsequent
2835* to finding the lock unlocked will be signaled via another interrupt.
2836* (This assumes the interrupt is cleared before the low level interrupt code
2837* calls pmap_update_interrupt()).
2838*
2839* The signaling processor must wait for any implicit updates in progress
2840* to terminate before continuing with its update. Thus it must wait for an
2841* acknowledgement of the interrupt from each processor for which such
2842* references could be made. For maintaining this information, a set
2843* cpus_active is used. A cpu is in this set if and only if it can
2844* use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from
2845* this set; when all such cpus are removed, it is safe to update.
2846*
2847* Before attempting to acquire the update lock on a pmap, a cpu (A) must
2848* be at least at the priority of the interprocessor interrupt
2849* (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a
2850* kernel update; it would spin forever in pmap_update_interrupt() trying
2851* to acquire the user pmap lock it had already acquired. Furthermore A
2852* must remove itself from cpus_active. Otherwise, another cpu holding
2853* the lock (B) could be in the process of sending an update signal to A,
2854* and thus be waiting for A to remove itself from cpus_active. If A is
2855* spinning on the lock at priority this will never happen and a deadlock
2856* will result.
2857*/
2858
2859/*
2860 * Signal another CPU that it must flush its TLB
2861 */
2862void
2863signal_cpus(
2864 cpu_set use_list,
2865 pmap_t pmap,
91447636
A
2866 vm_offset_t start_addr,
2867 vm_offset_t end_addr)
55e303ae
A
2868{
2869 register int which_cpu, j;
2870 register pmap_update_list_t update_list_p;
2871
2872 while ((which_cpu = ffs((unsigned long)use_list)) != 0) {
2873 which_cpu -= 1; /* convert to 0 origin */
2874
91447636 2875 update_list_p = cpu_update_list(which_cpu);
55e303ae
A
2876 simple_lock(&update_list_p->lock);
2877
2878 j = update_list_p->count;
2879 if (j >= UPDATE_LIST_SIZE) {
2880 /*
2881 * list overflowed. Change last item to
2882 * indicate overflow.
2883 */
2884 update_list_p->item[UPDATE_LIST_SIZE-1].pmap = kernel_pmap;
2885 update_list_p->item[UPDATE_LIST_SIZE-1].start = VM_MIN_ADDRESS;
2886 update_list_p->item[UPDATE_LIST_SIZE-1].end = VM_MAX_KERNEL_ADDRESS;
2887 }
2888 else {
2889 update_list_p->item[j].pmap = pmap;
91447636
A
2890 update_list_p->item[j].start = start_addr;
2891 update_list_p->item[j].end = end_addr;
55e303ae
A
2892 update_list_p->count = j+1;
2893 }
91447636 2894 cpu_update_needed(which_cpu) = TRUE;
55e303ae
A
2895 simple_unlock(&update_list_p->lock);
2896
2897 /* if its the kernel pmap, ignore cpus_idle */
2898 if (((cpus_idle & (1 << which_cpu)) == 0) ||
91447636 2899 (pmap == kernel_pmap) || PMAP_REAL(which_cpu) == pmap)
55e303ae
A
2900 {
2901 i386_signal_cpu(which_cpu, MP_TLB_FLUSH, ASYNC);
2902 }
2903 use_list &= ~(1 << which_cpu);
2904 }
2905}
1c79356b 2906
55e303ae
A
2907void
2908process_pmap_updates(
2909 register pmap_t my_pmap)
1c79356b
A
2910{
2911 register int my_cpu;
55e303ae
A
2912 register pmap_update_list_t update_list_p;
2913 register int j;
2914 register pmap_t pmap;
1c79356b
A
2915
2916 mp_disable_preemption();
2917 my_cpu = cpu_number();
91447636 2918 update_list_p = cpu_update_list(my_cpu);
55e303ae 2919 simple_lock(&update_list_p->lock);
1c79356b 2920
55e303ae
A
2921 for (j = 0; j < update_list_p->count; j++) {
2922 pmap = update_list_p->item[j].pmap;
2923 if (pmap == my_pmap ||
2924 pmap == kernel_pmap) {
1c79356b 2925
55e303ae
A
2926 if (pmap->ref_count <= 0) {
2927 PMAP_CPU_CLR(pmap, my_cpu);
91447636
A
2928 PMAP_REAL(my_cpu) = kernel_pmap;
2929#ifdef PAE
2930 set_cr3((unsigned int)kernel_pmap->pm_ppdpt);
2931#else
2932 set_cr3((unsigned int)kernel_pmap->pdirbase);
2933#endif
55e303ae
A
2934 } else
2935 INVALIDATE_TLB(pmap,
2936 update_list_p->item[j].start,
2937 update_list_p->item[j].end);
2938 }
2939 }
2940 update_list_p->count = 0;
91447636 2941 cpu_update_needed(my_cpu) = FALSE;
55e303ae
A
2942 simple_unlock(&update_list_p->lock);
2943 mp_enable_preemption();
2944}
2945
2946/*
2947 * Interrupt routine for TBIA requested from other processor.
2948 * This routine can also be called at all interrupts time if
2949 * the cpu was idle. Some driver interrupt routines might access
2950 * newly allocated vm. (This is the case for hd)
2951 */
2952void
2953pmap_update_interrupt(void)
2954{
2955 register int my_cpu;
2956 spl_t s;
2957 register pmap_t my_pmap;
2958
2959 mp_disable_preemption();
2960 my_cpu = cpu_number();
1c79356b
A
2961
2962 /*
55e303ae 2963 * Raise spl to splvm (above splip) to block out pmap_extract
1c79356b
A
2964 * from IO code (which would put this cpu back in the active
2965 * set).
2966 */
2967 s = splhigh();
55e303ae 2968
91447636 2969 my_pmap = PMAP_REAL(my_cpu);
1c79356b 2970
55e303ae
A
2971 if (!(my_pmap && pmap_in_use(my_pmap, my_cpu)))
2972 my_pmap = kernel_pmap;
1c79356b 2973
55e303ae
A
2974 do {
2975 LOOP_VAR;
1c79356b 2976
55e303ae
A
2977 /*
2978 * Indicate that we're not using either user or kernel
2979 * pmap.
2980 */
2981 i_bit_clear(my_cpu, &cpus_active);
1c79356b 2982
55e303ae
A
2983 /*
2984 * Wait for any pmap updates in progress, on either user
2985 * or kernel pmap.
2986 */
91447636
A
2987 while (*(volatile int *)(&my_pmap->lock.interlock.lock_data) ||
2988 *(volatile int *)(&kernel_pmap->lock.interlock.lock_data)) {
55e303ae
A
2989 LOOP_CHECK("pmap_update_interrupt", my_pmap);
2990 cpu_pause();
2991 }
1c79356b 2992
55e303ae 2993 process_pmap_updates(my_pmap);
1c79356b 2994
55e303ae 2995 i_bit_set(my_cpu, &cpus_active);
1c79356b 2996
91447636 2997 } while (cpu_update_needed(my_cpu));
1c79356b 2998
55e303ae
A
2999 splx(s);
3000 mp_enable_preemption();
3001}
1c79356b
A
3002
3003#if MACH_KDB
3004
3005/* show phys page mappings and attributes */
3006
91447636 3007extern void db_show_page(pmap_paddr_t pa);
1c79356b
A
3008
3009void
91447636 3010db_show_page(pmap_paddr_t pa)
1c79356b
A
3011{
3012 pv_entry_t pv_h;
3013 int pai;
3014 char attr;
3015
3016 pai = pa_index(pa);
3017 pv_h = pai_to_pvh(pai);
3018
3019 attr = pmap_phys_attributes[pai];
3020 printf("phys page %x ", pa);
3021 if (attr & PHYS_MODIFIED)
3022 printf("modified, ");
3023 if (attr & PHYS_REFERENCED)
3024 printf("referenced, ");
3025 if (pv_h->pmap || pv_h->next)
3026 printf(" mapped at\n");
3027 else
3028 printf(" not mapped\n");
3029 for (; pv_h; pv_h = pv_h->next)
3030 if (pv_h->pmap)
3031 printf("%x in pmap %x\n", pv_h->va, pv_h->pmap);
3032}
3033
3034#endif /* MACH_KDB */
3035
3036#if MACH_KDB
3037void db_kvtophys(vm_offset_t);
3038void db_show_vaddrs(pt_entry_t *);
3039
3040/*
3041 * print out the results of kvtophys(arg)
3042 */
3043void
3044db_kvtophys(
3045 vm_offset_t vaddr)
3046{
3047 db_printf("0x%x", kvtophys(vaddr));
3048}
3049
3050/*
3051 * Walk the pages tables.
3052 */
3053void
3054db_show_vaddrs(
3055 pt_entry_t *dirbase)
3056{
3057 pt_entry_t *ptep, *pdep, tmp;
3058 int x, y, pdecnt, ptecnt;
3059
3060 if (dirbase == 0) {
3061 dirbase = kernel_pmap->dirbase;
3062 }
3063 if (dirbase == 0) {
3064 db_printf("need a dirbase...\n");
3065 return;
3066 }
3067 dirbase = (pt_entry_t *) ((unsigned long) dirbase & ~INTEL_OFFMASK);
3068
3069 db_printf("dirbase: 0x%x\n", dirbase);
3070
3071 pdecnt = ptecnt = 0;
3072 pdep = &dirbase[0];
91447636 3073 for (y = 0; y < NPDEPG; y++, pdep++) {
1c79356b
A
3074 if (((tmp = *pdep) & INTEL_PTE_VALID) == 0) {
3075 continue;
3076 }
3077 pdecnt++;
3078 ptep = (pt_entry_t *) ((*pdep) & ~INTEL_OFFMASK);
3079 db_printf("dir[%4d]: 0x%x\n", y, *pdep);
91447636 3080 for (x = 0; x < NPTEPG; x++, ptep++) {
1c79356b
A
3081 if (((tmp = *ptep) & INTEL_PTE_VALID) == 0) {
3082 continue;
3083 }
3084 ptecnt++;
3085 db_printf(" tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
3086 x,
3087 *ptep,
3088 (y << 22) | (x << 12),
3089 *ptep & ~INTEL_OFFMASK);
3090 }
3091 }
3092
3093 db_printf("total: %d tables, %d page table entries.\n", pdecnt, ptecnt);
3094
3095}
3096#endif /* MACH_KDB */
3097
3098#include <mach_vm_debug.h>
3099#if MACH_VM_DEBUG
3100#include <vm/vm_debug.h>
3101
3102int
3103pmap_list_resident_pages(
91447636
A
3104 __unused pmap_t pmap,
3105 __unused vm_offset_t *listp,
3106 __unused int space)
1c79356b
A
3107{
3108 return 0;
3109}
3110#endif /* MACH_VM_DEBUG */
3111
3112#ifdef MACH_BSD
3113/*
3114 * pmap_pagemove
3115 *
3116 * BSD support routine to reassign virtual addresses.
3117 */
3118
3119void
3120pmap_movepage(unsigned long from, unsigned long to, vm_size_t size)
3121{
3122 spl_t spl;
3123 pt_entry_t *pte, saved_pte;
55e303ae 3124
1c79356b 3125 /* Lock the kernel map */
55e303ae 3126 PMAP_READ_LOCK(kernel_pmap, spl);
1c79356b
A
3127
3128
3129 while (size > 0) {
1c79356b
A
3130 pte = pmap_pte(kernel_pmap, from);
3131 if (pte == NULL)
3132 panic("pmap_pagemove from pte NULL");
3133 saved_pte = *pte;
3134 PMAP_READ_UNLOCK(kernel_pmap, spl);
3135
55e303ae 3136 pmap_enter(kernel_pmap, to, (ppnum_t)i386_btop(i386_trunc_page(*pte)),
9bccf70c 3137 VM_PROT_READ|VM_PROT_WRITE, 0, *pte & INTEL_PTE_WIRED);
1c79356b 3138
55e303ae 3139 pmap_remove(kernel_pmap, (addr64_t)from, (addr64_t)(from+PAGE_SIZE));
1c79356b
A
3140
3141 PMAP_READ_LOCK(kernel_pmap, spl);
3142 pte = pmap_pte(kernel_pmap, to);
3143 if (pte == NULL)
3144 panic("pmap_pagemove 'to' pte NULL");
3145
3146 *pte = saved_pte;
1c79356b
A
3147
3148 from += PAGE_SIZE;
3149 to += PAGE_SIZE;
3150 size -= PAGE_SIZE;
3151 }
3152
3153 /* Get the processors to update the TLBs */
55e303ae
A
3154 PMAP_UPDATE_TLBS(kernel_pmap, from, from+size);
3155 PMAP_UPDATE_TLBS(kernel_pmap, to, to+size);
3156
3157 PMAP_READ_UNLOCK(kernel_pmap, spl);
1c79356b
A
3158
3159}
91447636 3160#endif /* MACH_BSD */
1c79356b 3161
91447636
A
3162/* temporary workaround */
3163boolean_t
3164coredumpok(vm_map_t map, vm_offset_t va)
3165{
3166 pt_entry_t *ptep;
1c79356b 3167
91447636
A
3168 ptep = pmap_pte(map->pmap, va);
3169 if (0 == ptep)
3170 return FALSE;
3171 return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED));
1c79356b
A
3172}
3173
91447636
A
3174/*
3175 * grow the number of kernel page table entries, if needed
3176 */
3177void
3178pmap_growkernel(vm_offset_t addr)
3179{
3180#if GROW_KERNEL_FUNCTION_IMPLEMENTED
3181 struct pmap *pmap;
3182 int s;
3183 vm_offset_t ptppaddr;
3184 ppnum_t ppn;
3185 vm_page_t nkpg;
3186 pd_entry_t newpdir = 0;
3187
3188 /*
3189 * Serialize.
3190 * Losers return to try again until the winner completes the work.
3191 */
3192 if (kptobj == 0) panic("growkernel 0");
3193 if (!vm_object_lock_try(kptobj)) {
3194 return;
3195 }
3196
3197 vm_page_lock_queues();
3198
3199 s = splhigh();
3200
3201 /*
3202 * If this is the first time thru, locate the end of the
3203 * kernel page table entries and set nkpt to the current
3204 * number of kernel page table pages
3205 */
3206
3207 if (kernel_vm_end == 0) {
3208 kernel_vm_end = KERNBASE;
3209 nkpt = 0;
3210
3211 while (pdir_pde(kernel_pmap->dirbase, kernel_vm_end)) {
3212 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
3213 nkpt++;
3214 }
3215 }
3216
3217 /*
3218 * Now allocate and map the required number of page tables
3219 */
3220 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
3221 while (kernel_vm_end < addr) {
3222 if (pdir_pde(kernel_pmap->dirbase, kernel_vm_end)) {
3223 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
3224 continue; /* someone already filled this one */
3225 }
3226
3227 nkpg = vm_page_alloc(kptobj, nkpt);
3228 if (!nkpg)
3229 panic("pmap_growkernel: no memory to grow kernel");
3230
3231 nkpt++;
3232 vm_page_wire(nkpg);
3233 ppn = nkpg->phys_page;
3234 pmap_zero_page(ppn);
3235 ptppaddr = i386_ptob(ppn);
3236 newpdir = (pd_entry_t) (ptppaddr | INTEL_PTE_VALID |
3237 INTEL_PTE_RW | INTEL_PTE_REF | INTEL_PTE_MOD);
3238 pdir_pde(kernel_pmap->dirbase, kernel_vm_end) = newpdir;
3239
3240 simple_lock(&free_pmap_lock);
3241 for (pmap = (struct pmap *)kernel_pmap->pmap_link.next;
3242 pmap != kernel_pmap ;
3243 pmap = (struct pmap *)pmap->pmap_link.next ) {
3244 *pmap_pde(pmap, kernel_vm_end) = newpdir;
3245 }
3246 simple_unlock(&free_pmap_lock);
3247 }
3248 splx(s);
3249 vm_page_unlock_queues();
3250 vm_object_unlock(kptobj);
1c79356b 3251#endif
1c79356b
A
3252}
3253
91447636
A
3254pt_entry_t *
3255pmap_mapgetpte(vm_map_t map, vm_offset_t v)
3256{
3257 return pmap_pte(map->pmap, v);
1c79356b 3258}
1c79356b 3259
9bccf70c 3260boolean_t
91447636
A
3261phys_page_exists(
3262 ppnum_t pn)
9bccf70c 3263{
91447636
A
3264 pmap_paddr_t phys;
3265
3266 assert(pn != vm_page_fictitious_addr);
3267
3268 if (!pmap_initialized)
3269 return (TRUE);
3270 phys = (pmap_paddr_t) i386_ptob(pn);
3271 if (!pmap_valid_page(pn))
3272 return (FALSE);
3273
3274 return TRUE;
3275}
3276
3277void
3278mapping_free_prime()
3279{
3280 int i;
3281 pv_entry_t pv_e;
3282
3283 for (i = 0; i < (5 * PV_ALLOC_CHUNK); i++) {
3284 pv_e = (pv_entry_t) zalloc(pv_list_zone);
3285 PV_FREE(pv_e);
3286 }
3287}
3288
3289void
3290mapping_adjust()
3291{
3292 pv_entry_t pv_e;
3293 int i;
3294 int spl;
3295
3296 if (mapping_adjust_call == NULL) {
3297 thread_call_setup(&mapping_adjust_call_data,
3298 (thread_call_func_t) mapping_adjust,
3299 (thread_call_param_t) NULL);
3300 mapping_adjust_call = &mapping_adjust_call_data;
3301 }
3302 /* XXX rethink best way to do locking here */
3303 if (pv_free_count < PV_LOW_WATER_MARK) {
3304 for (i = 0; i < PV_ALLOC_CHUNK; i++) {
3305 pv_e = (pv_entry_t) zalloc(pv_list_zone);
3306 SPLVM(spl);
3307 PV_FREE(pv_e);
3308 SPLX(spl);
3309 }
3310 }
3311 mappingrecurse = 0;
3312}
3313
3314void
3315pmap_commpage_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt)
3316{
3317 int i;
3318 pt_entry_t *opte, *npte;
3319 pt_entry_t pte;
3320
3321 for (i = 0; i < cnt; i++) {
3322 opte = pmap_pte(kernel_pmap, kernel_commpage);
3323 if (0 == opte) panic("kernel_commpage");
3324 npte = pmap_pte(kernel_pmap, user_commpage);
3325 if (0 == npte) panic("user_commpage");
3326 pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL;
3327 pte &= ~INTEL_PTE_WRITE; // ensure read only
3328 WRITE_PTE_FAST(npte, pte);
3329 kernel_commpage += INTEL_PGBYTES;
3330 user_commpage += INTEL_PGBYTES;
3331 }
3332}
3333
3334static cpu_pmap_t cpu_pmap_master;
3335static struct pmap_update_list cpu_update_list_master;
3336
3337struct cpu_pmap *
3338pmap_cpu_alloc(boolean_t is_boot_cpu)
3339{
3340 int ret;
3341 int i;
3342 cpu_pmap_t *cp;
3343 pmap_update_list_t up;
3344 vm_offset_t address;
3345 vm_map_entry_t entry;
3346
3347 if (is_boot_cpu) {
3348 cp = &cpu_pmap_master;
3349 up = &cpu_update_list_master;
3350 } else {
3351 /*
3352 * The per-cpu pmap data structure itself.
3353 */
3354 ret = kmem_alloc(kernel_map,
3355 (vm_offset_t *) &cp, sizeof(cpu_pmap_t));
3356 if (ret != KERN_SUCCESS) {
3357 printf("pmap_cpu_alloc() failed ret=%d\n", ret);
3358 return NULL;
3359 }
3360 bzero((void *)cp, sizeof(cpu_pmap_t));
3361
3362 /*
3363 * The tlb flush update list.
3364 */
3365 ret = kmem_alloc(kernel_map,
3366 (vm_offset_t *) &up, sizeof(*up));
3367 if (ret != KERN_SUCCESS) {
3368 printf("pmap_cpu_alloc() failed ret=%d\n", ret);
3369 pmap_cpu_free(cp);
3370 return NULL;
3371 }
3372
3373 /*
3374 * The temporary windows used for copy/zero - see loose_ends.c
3375 */
3376 for (i = 0; i < PMAP_NWINDOWS; i++) {
3377 ret = vm_map_find_space(kernel_map,
3378 &address, PAGE_SIZE, 0, &entry);
3379 if (ret != KERN_SUCCESS) {
3380 printf("pmap_cpu_alloc() "
3381 "vm_map_find_space ret=%d\n", ret);
3382 pmap_cpu_free(cp);
3383 return NULL;
3384 }
3385 vm_map_unlock(kernel_map);
3386
3387 cp->mapwindow[i].prv_CADDR = (caddr_t) address;
3388 cp->mapwindow[i].prv_CMAP = vtopte(address);
3389 * (int *) cp->mapwindow[i].prv_CMAP = 0;
3390
3391 kprintf("pmap_cpu_alloc() "
3392 "window=%d CADDR=0x%x CMAP=0x%x\n",
3393 i, address, vtopte(address));
3394 }
3395 }
3396
3397 /*
3398 * Set up the pmap request list
3399 */
3400 cp->update_list = up;
3401 simple_lock_init(&up->lock, 0);
3402 up->count = 0;
3403
3404 return cp;
3405}
3406
3407void
3408pmap_cpu_free(struct cpu_pmap *cp)
3409{
3410 if (cp != NULL && cp != &cpu_pmap_master) {
3411 if (cp->update_list != NULL)
3412 kfree((void *) cp->update_list,
3413 sizeof(*cp->update_list));
3414 kfree((void *) cp, sizeof(cpu_pmap_t));
3415 }
9bccf70c 3416}