]> git.saurik.com Git - apple/xnu.git/blame - osfmk/x86_64/pmap.c
xnu-1699.26.8.tar.gz
[apple/xnu.git] / osfmk / x86_64 / pmap.c
CommitLineData
b0d623f7 1/*
6d2010ae 2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
b0d623f7
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
59/*
60 * File: pmap.c
61 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * (These guys wrote the Vax version)
63 *
64 * Physical Map management code for Intel i386, i486, and i860.
65 *
66 * Manages physical address maps.
67 *
68 * In addition to hardware address maps, this
69 * module is called upon to provide software-use-only
70 * maps which may or may not be stored in the same
71 * form as hardware maps. These pseudo-maps are
72 * used to store intermediate results from copy
73 * operations to and from address spaces.
74 *
75 * Since the information managed by this module is
76 * also stored by the logical address mapping module,
77 * this module may throw away valid virtual-to-physical
78 * mappings at almost any time. However, invalidations
79 * of virtual-to-physical mappings must be done as
80 * requested.
81 *
82 * In order to cope with hardware architectures which
83 * make virtual-to-physical map invalidates expensive,
84 * this module may delay invalidate or reduced protection
85 * operations until such time as they are actually
86 * necessary. This module is given full information as
87 * to which processors are currently using which maps,
88 * and to when physical maps must be made correct.
89 */
90
91#include <string.h>
b0d623f7
A
92#include <mach_kdb.h>
93#include <mach_ldebug.h>
94
95#include <libkern/OSAtomic.h>
96
97#include <mach/machine/vm_types.h>
98
99#include <mach/boolean.h>
100#include <kern/thread.h>
101#include <kern/zalloc.h>
102#include <kern/queue.h>
6d2010ae 103#include <kern/mach_param.h>
b0d623f7
A
104
105#include <kern/lock.h>
106#include <kern/kalloc.h>
107#include <kern/spl.h>
108
109#include <vm/pmap.h>
110#include <vm/vm_map.h>
111#include <vm/vm_kern.h>
112#include <mach/vm_param.h>
113#include <mach/vm_prot.h>
114#include <vm/vm_object.h>
115#include <vm/vm_page.h>
116
117#include <mach/machine/vm_param.h>
118#include <machine/thread.h>
119
120#include <kern/misc_protos.h> /* prototyping */
121#include <i386/misc_protos.h>
6d2010ae 122#include <i386/i386_lowmem.h>
b0d623f7
A
123#include <x86_64/lowglobals.h>
124
125#include <i386/cpuid.h>
126#include <i386/cpu_data.h>
127#include <i386/cpu_number.h>
128#include <i386/machine_cpu.h>
129#include <i386/seg.h>
130#include <i386/serial_io.h>
131#include <i386/cpu_capabilities.h>
132#include <i386/machine_routines.h>
133#include <i386/proc_reg.h>
134#include <i386/tsc.h>
135#include <i386/pmap_internal.h>
6d2010ae 136#include <i386/pmap_pcid.h>
b0d623f7
A
137
138#if MACH_KDB
139#include <ddb/db_command.h>
140#include <ddb/db_output.h>
141#include <ddb/db_sym.h>
142#include <ddb/db_print.h>
143#endif /* MACH_KDB */
144
145#include <vm/vm_protos.h>
146
147#include <i386/mp.h>
148#include <i386/mp_desc.h>
149
150
b0d623f7
A
151#ifdef IWANTTODEBUG
152#undef DEBUG
153#define DEBUG 1
154#define POSTCODE_DELAY 1
155#include <i386/postcode.h>
156#endif /* IWANTTODEBUG */
157
6d2010ae
A
158#ifdef PMAP_DEBUG
159#define DBG(x...) kprintf("DBG: " x)
b0d623f7
A
160#else
161#define DBG(x...)
162#endif
6d2010ae
A
163/* Compile time assert to ensure adjacency/alignment of per-CPU data fields used
164 * in the trampolines for kernel/user boundary TLB coherency.
b0d623f7 165 */
6d2010ae
A
166char pmap_cpu_data_assert[(((offsetof(cpu_data_t, cpu_tlb_invalid) - offsetof(cpu_data_t, cpu_active_cr3)) == 8) && (offsetof(cpu_data_t, cpu_active_cr3) % 64 == 0)) ? 1 : -1];
167boolean_t pmap_trace = FALSE;
b0d623f7 168
6d2010ae 169boolean_t no_shared_cr3 = DEBUG; /* TRUE for DEBUG by default */
b0d623f7
A
170
171int nx_enabled = 1; /* enable no-execute protection */
172int allow_data_exec = VM_ABI_32; /* 32-bit apps may execute data by default, 64-bit apps may not */
173int allow_stack_exec = 0; /* No apps may execute from the stack by default */
174
175const boolean_t cpu_64bit = TRUE; /* Mais oui! */
176
b0d623f7
A
177uint64_t max_preemption_latency_tsc = 0;
178
b0d623f7
A
179pv_hashed_entry_t *pv_hash_table; /* hash lists */
180
181uint32_t npvhash = 0;
182
b0d623f7
A
183pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL;
184pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL;
185decl_simple_lock_data(,pv_hashed_free_list_lock)
186decl_simple_lock_data(,pv_hashed_kern_free_list_lock)
187decl_simple_lock_data(,pv_hash_table_lock)
188
b0d623f7
A
189zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */
190
b0d623f7
A
191/*
192 * First and last physical addresses that we maintain any information
193 * for. Initialized to zero so that pmap operations done before
194 * pmap_init won't touch any non-existent structures.
195 */
196boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */
197
198static struct vm_object kptobj_object_store;
199static struct vm_object kpml4obj_object_store;
200static struct vm_object kpdptobj_object_store;
201
202/*
6d2010ae 203 * Array of physical page attribites for managed pages.
b0d623f7
A
204 * One byte per physical page.
205 */
206char *pmap_phys_attributes;
207unsigned int last_managed_page = 0;
6d2010ae
A
208
209/*
210 * Amount of virtual memory mapped by one
211 * page-directory entry.
212 */
213
b0d623f7
A
214uint64_t pde_mapped_size = PDE_MAPPED_SIZE;
215
b0d623f7
A
216unsigned pmap_memory_region_count;
217unsigned pmap_memory_region_current;
218
219pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE];
220
221/*
222 * Other useful macros.
223 */
224#define current_pmap() (vm_map_pmap(current_thread()->map))
225
226struct pmap kernel_pmap_store;
227pmap_t kernel_pmap;
228
229pd_entry_t high_shared_pde;
230pd_entry_t commpage64_pde;
231
232struct zone *pmap_zone; /* zone of pmap structures */
233
6d2010ae
A
234struct zone *pmap_anchor_zone;
235int pmap_debug = 0; /* flag for debugging prints */
236
b0d623f7 237unsigned int inuse_ptepages_count = 0;
6d2010ae
A
238long long alloc_ptepages_count __attribute__((aligned(8))) = 0; /* aligned for atomic access */
239unsigned int bootstrap_wired_pages = 0;
240int pt_fake_zone_index = -1;
b0d623f7 241
6d2010ae 242extern long NMIPI_acks;
b0d623f7 243
6d2010ae
A
244boolean_t kernel_text_ps_4K = TRUE;
245boolean_t wpkernel = TRUE;
b0d623f7
A
246
247extern char end;
248
249static int nkpt;
250
251pt_entry_t *DMAP1, *DMAP2;
252caddr_t DADDR1;
253caddr_t DADDR2;
b0d623f7
A
254
255/*
6d2010ae
A
256 * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
257 * properly deals with the anchor.
258 * must be called with the hash locked, does not unlock it
b0d623f7 259 */
b0d623f7 260
b0d623f7
A
261
262/*
263 * Map memory at initialization. The physical addresses being
264 * mapped are not managed and are never unmapped.
265 *
266 * For now, VM is already on, we only need to map the
267 * specified memory.
268 */
269vm_offset_t
270pmap_map(
271 vm_offset_t virt,
272 vm_map_offset_t start_addr,
273 vm_map_offset_t end_addr,
274 vm_prot_t prot,
275 unsigned int flags)
276{
277 int ps;
278
279 ps = PAGE_SIZE;
280 while (start_addr < end_addr) {
281 pmap_enter(kernel_pmap, (vm_map_offset_t)virt,
282 (ppnum_t) i386_btop(start_addr), prot, flags, FALSE);
283 virt += ps;
284 start_addr += ps;
285 }
286 return(virt);
287}
288
b0d623f7
A
289extern char *first_avail;
290extern vm_offset_t virtual_avail, virtual_end;
291extern pmap_paddr_t avail_start, avail_end;
292extern vm_offset_t sHIB;
293extern vm_offset_t eHIB;
294extern vm_offset_t stext;
295extern vm_offset_t etext;
296extern vm_offset_t sdata;
297
6d2010ae
A
298extern void *KPTphys;
299
b0d623f7
A
300void
301pmap_cpu_init(void)
302{
303 /*
304 * Here early in the life of a processor (from cpu_mode_init()).
6d2010ae 305 * Ensure global page feature is disabled at this point.
b0d623f7 306 */
6d2010ae 307
b0d623f7
A
308 set_cr4(get_cr4() &~ CR4_PGE);
309
310 /*
311 * Initialize the per-cpu, TLB-related fields.
312 */
313 current_cpu_datap()->cpu_kernel_cr3 = kernel_pmap->pm_cr3;
314 current_cpu_datap()->cpu_active_cr3 = kernel_pmap->pm_cr3;
315 current_cpu_datap()->cpu_tlb_invalid = FALSE;
6d2010ae
A
316 current_cpu_datap()->cpu_task_map = TASK_MAP_64BIT;
317 pmap_pcid_configure();
b0d623f7
A
318}
319
320
321
322/*
323 * Bootstrap the system enough to run with virtual memory.
324 * Map the kernel's code and data, and allocate the system page table.
325 * Called with mapping OFF. Page_size must already be set.
326 */
327
328void
329pmap_bootstrap(
330 __unused vm_offset_t load_start,
331 __unused boolean_t IA32e)
332{
333#if NCOPY_WINDOWS > 0
334 vm_offset_t va;
335 int i;
336#endif
b0d623f7
A
337 assert(IA32e);
338
339 vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address
340 * known to VM */
341 /*
342 * The kernel's pmap is statically allocated so we don't
343 * have to use pmap_create, which is unlikely to work
344 * correctly at this part of the boot sequence.
345 */
346
347 kernel_pmap = &kernel_pmap_store;
348 kernel_pmap->ref_count = 1;
349 kernel_pmap->nx_enabled = FALSE;
350 kernel_pmap->pm_task_map = TASK_MAP_64BIT;
351 kernel_pmap->pm_obj = (vm_object_t) NULL;
352 kernel_pmap->dirbase = (pd_entry_t *)((uintptr_t)IdlePTD);
353 kernel_pmap->pm_pdpt = (pd_entry_t *) ((uintptr_t)IdlePDPT);
354 kernel_pmap->pm_pml4 = IdlePML4;
355 kernel_pmap->pm_cr3 = (uintptr_t)ID_MAP_VTOP(IdlePML4);
6d2010ae 356 pmap_pcid_initialize_kernel(kernel_pmap);
b0d623f7 357
6d2010ae 358
b0d623f7
A
359
360 current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3;
361
362 nkpt = NKPT;
363 OSAddAtomic(NKPT, &inuse_ptepages_count);
6d2010ae
A
364 OSAddAtomic64(NKPT, &alloc_ptepages_count);
365 bootstrap_wired_pages = NKPT;
b0d623f7
A
366
367 virtual_avail = (vm_offset_t)(VM_MIN_KERNEL_ADDRESS) + (vm_offset_t)first_avail;
368 virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS);
369
370#if NCOPY_WINDOWS > 0
371 /*
372 * Reserve some special page table entries/VA space for temporary
373 * mapping of pages.
374 */
375#define SYSMAP(c, p, v, n) \
376 v = (c)va; va += ((n)*INTEL_PGBYTES);
377
378 va = virtual_avail;
379
380 for (i=0; i<PMAP_NWINDOWS; i++) {
381#if 1
382 kprintf("trying to do SYSMAP idx %d %p\n", i,
383 current_cpu_datap());
384 kprintf("cpu_pmap %p\n", current_cpu_datap()->cpu_pmap);
385 kprintf("mapwindow %p\n", current_cpu_datap()->cpu_pmap->mapwindow);
386 kprintf("two stuff %p %p\n",
387 (void *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP),
388 (void *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR));
389#endif
390 SYSMAP(caddr_t,
391 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP),
392 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR),
393 1);
394 current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP =
395 &(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP_store);
396 *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0;
397 }
398
399 /* DMAP user for debugger */
400 SYSMAP(caddr_t, DMAP1, DADDR1, 1);
401 SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */
402
403 virtual_avail = va;
404#endif
405
406 if (PE_parse_boot_argn("npvhash", &npvhash, sizeof (npvhash))) {
407 if (0 != ((npvhash + 1) & npvhash)) {
408 kprintf("invalid hash %d, must be ((2^N)-1), "
409 "using default %d\n", npvhash, NPVHASH);
410 npvhash = NPVHASH;
411 }
412 } else {
413 npvhash = NPVHASH;
414 }
415
b0d623f7
A
416 simple_lock_init(&kernel_pmap->lock, 0);
417 simple_lock_init(&pv_hashed_free_list_lock, 0);
418 simple_lock_init(&pv_hashed_kern_free_list_lock, 0);
419 simple_lock_init(&pv_hash_table_lock,0);
420
421 pmap_cpu_init();
422
6d2010ae
A
423 if (pmap_pcid_ncpus)
424 printf("PMAP: PCID enabled\n");
425
7ddcb079 426
6d2010ae
A
427 boot_args *args = (boot_args *)PE_state.bootArgs;
428 if (args->efiMode == kBootArgsEfiMode32) {
429 printf("EFI32: kernel virtual space limited to 4GB\n");
430 virtual_end = VM_MAX_KERNEL_ADDRESS_EFI32;
431 }
b0d623f7
A
432 kprintf("Kernel virtual space from 0x%lx to 0x%lx.\n",
433 (long)KERNEL_BASE, (long)virtual_end);
434 kprintf("Available physical space from 0x%llx to 0x%llx\n",
435 avail_start, avail_end);
436
437 /*
438 * The -no_shared_cr3 boot-arg is a debugging feature (set by default
439 * in the DEBUG kernel) to force the kernel to switch to its own map
440 * (and cr3) when control is in kernelspace. The kernel's map does not
441 * include (i.e. share) userspace so wild references will cause
442 * a panic. Only copyin and copyout are exempt from this.
443 */
444 (void) PE_parse_boot_argn("-no_shared_cr3",
445 &no_shared_cr3, sizeof (no_shared_cr3));
446 if (no_shared_cr3)
447 kprintf("Kernel not sharing user map\n");
448
449#ifdef PMAP_TRACES
450 if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof (pmap_trace))) {
451 kprintf("Kernel traces for pmap operations enabled\n");
452 }
453#endif /* PMAP_TRACES */
454}
455
456void
457pmap_virtual_space(
458 vm_offset_t *startp,
459 vm_offset_t *endp)
460{
461 *startp = virtual_avail;
462 *endp = virtual_end;
463}
464
465/*
466 * Initialize the pmap module.
467 * Called by vm_init, to initialize any structures that the pmap
468 * system needs to map virtual memory.
469 */
470void
471pmap_init(void)
472{
473 long npages;
474 vm_offset_t addr;
060df5ea 475 vm_size_t s, vsize;
b0d623f7
A
476 vm_map_offset_t vaddr;
477 ppnum_t ppn;
478
479
480 kernel_pmap->pm_obj_pml4 = &kpml4obj_object_store;
481 _vm_object_allocate((vm_object_size_t)NPML4PGS, &kpml4obj_object_store);
482
483 kernel_pmap->pm_obj_pdpt = &kpdptobj_object_store;
484 _vm_object_allocate((vm_object_size_t)NPDPTPGS, &kpdptobj_object_store);
485
486 kernel_pmap->pm_obj = &kptobj_object_store;
487 _vm_object_allocate((vm_object_size_t)NPDEPGS, &kptobj_object_store);
488
489 /*
490 * Allocate memory for the pv_head_table and its lock bits,
491 * the modify bit array, and the pte_page table.
492 */
493
494 /*
495 * zero bias all these arrays now instead of off avail_start
496 * so we cover all memory
497 */
498
499 npages = i386_btop(avail_end);
500 s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages
501 + (sizeof (struct pv_hashed_entry_t *) * (npvhash+1))
502 + pv_lock_table_size(npages)
503 + pv_hash_lock_table_size((npvhash+1))
504 + npages);
505
506 s = round_page(s);
507 if (kernel_memory_allocate(kernel_map, &addr, s, 0,
508 KMA_KOBJECT | KMA_PERMANENT)
509 != KERN_SUCCESS)
510 panic("pmap_init");
511
512 memset((char *)addr, 0, s);
513
060df5ea
A
514 vaddr = addr;
515 vsize = s;
516
b0d623f7
A
517#if PV_DEBUG
518 if (0 == npvhash) panic("npvhash not initialized");
519#endif
520
521 /*
522 * Allocate the structures first to preserve word-alignment.
523 */
524 pv_head_table = (pv_rooted_entry_t) addr;
525 addr = (vm_offset_t) (pv_head_table + npages);
526
527 pv_hash_table = (pv_hashed_entry_t *)addr;
528 addr = (vm_offset_t) (pv_hash_table + (npvhash + 1));
529
530 pv_lock_table = (char *) addr;
531 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
532
533 pv_hash_lock_table = (char *) addr;
534 addr = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhash+1)));
535
536 pmap_phys_attributes = (char *) addr;
537
538 ppnum_t last_pn = i386_btop(avail_end);
539 unsigned int i;
540 pmap_memory_region_t *pmptr = pmap_memory_regions;
541 for (i = 0; i < pmap_memory_region_count; i++, pmptr++) {
542 if (pmptr->type != kEfiConventionalMemory)
543 continue;
544 unsigned int pn;
545 for (pn = pmptr->base; pn <= pmptr->end; pn++) {
546 if (pn < last_pn) {
547 pmap_phys_attributes[pn] |= PHYS_MANAGED;
060df5ea 548
b0d623f7
A
549 if (pn > last_managed_page)
550 last_managed_page = pn;
060df5ea 551
7ddcb079 552 if (pn >= lowest_hi && pn <= highest_hi)
060df5ea 553 pmap_phys_attributes[pn] |= PHYS_NOENCRYPT;
b0d623f7
A
554 }
555 }
556 }
060df5ea
A
557 while (vsize) {
558 ppn = pmap_find_phys(kernel_pmap, vaddr);
b0d623f7 559
060df5ea
A
560 pmap_phys_attributes[ppn] |= PHYS_NOENCRYPT;
561
562 vaddr += PAGE_SIZE;
563 vsize -= PAGE_SIZE;
564 }
b0d623f7
A
565 /*
566 * Create the zone of physical maps,
567 * and of the physical-to-virtual entries.
568 */
569 s = (vm_size_t) sizeof(struct pmap);
570 pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
060df5ea
A
571 zone_change(pmap_zone, Z_NOENCRYPT, TRUE);
572
6d2010ae
A
573 pmap_anchor_zone = zinit(PAGE_SIZE, task_max, PAGE_SIZE, "pagetable anchors");
574 zone_change(pmap_anchor_zone, Z_NOENCRYPT, TRUE);
575
576#if ZONE_DEBUG
577 /* The anchor is required to be page aligned. Zone debugging adds
578 * padding which may violate that requirement. Disable it
579 * to avoid assumptions.
580 */
581 zone_debug_disable(pmap_anchor_zone);
582#endif
583
b0d623f7 584 s = (vm_size_t) sizeof(struct pv_hashed_entry);
6d2010ae
A
585 pv_hashed_list_zone = zinit(s, 10000*s /* Expandable zone */,
586 4096 * 3 /* LCM x86_64*/, "pv_list");
060df5ea 587 zone_change(pv_hashed_list_zone, Z_NOENCRYPT, TRUE);
b0d623f7
A
588
589 /* create pv entries for kernel pages mapped by low level
590 startup code. these have to exist so we can pmap_remove()
591 e.g. kext pages from the middle of our addr space */
592
593 vaddr = (vm_map_offset_t) VM_MIN_KERNEL_ADDRESS;
6d2010ae 594 for (ppn = VM_MIN_KERNEL_PAGE; ppn < i386_btop(avail_start); ppn++) {
b0d623f7
A
595 pv_rooted_entry_t pv_e;
596
597 pv_e = pai_to_pvh(ppn);
598 pv_e->va = vaddr;
599 vaddr += PAGE_SIZE;
600 pv_e->pmap = kernel_pmap;
601 queue_init(&pv_e->qlink);
602 }
603 pmap_initialized = TRUE;
604
b0d623f7
A
605 max_preemption_latency_tsc = tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS, tscFCvtn2t);
606
607 /*
608 * Ensure the kernel's PML4 entry exists for the basement
609 * before this is shared with any user.
610 */
611 pmap_expand_pml4(kernel_pmap, KERNEL_BASEMENT);
612}
613
6d2010ae
A
614/*
615 * Called once VM is fully initialized so that we can release unused
616 * sections of low memory to the general pool.
617 * Also complete the set-up of identity-mapped sections of the kernel:
618 * 1) write-protect kernel text
619 * 2) map kernel text using large pages if possible
620 * 3) read and write-protect page zero (for K32)
621 * 4) map the global page at the appropriate virtual address.
622 *
623 * Use of large pages
624 * ------------------
625 * To effectively map and write-protect all kernel text pages, the text
626 * must be 2M-aligned at the base, and the data section above must also be
627 * 2M-aligned. That is, there's padding below and above. This is achieved
628 * through linker directives. Large pages are used only if this alignment
629 * exists (and not overriden by the -kernel_text_page_4K boot-arg). The
630 * memory layout is:
631 *
632 * : :
633 * | __DATA |
634 * sdata: ================== 2Meg
635 * | |
636 * | zero-padding |
637 * | |
638 * etext: ------------------
639 * | |
640 * : :
641 * | |
642 * | __TEXT |
643 * | |
644 * : :
645 * | |
646 * stext: ================== 2Meg
647 * | |
648 * | zero-padding |
649 * | |
650 * eHIB: ------------------
651 * | __HIB |
652 * : :
653 *
654 * Prior to changing the mapping from 4K to 2M, the zero-padding pages
655 * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the
656 * 4K pages covering [stext,etext] are coalesced as 2M large pages.
657 * The now unused level-1 PTE pages are also freed.
658 */
659extern uint32_t pmap_reserved_ranges;
660void
661pmap_lowmem_finalize(void)
662{
663 spl_t spl;
664 int i;
665
666 /* Check the kernel is linked at the expected base address */
667 if (i386_btop(kvtophys((vm_offset_t) &IdlePML4)) !=
668 I386_KERNEL_IMAGE_BASE_PAGE)
669 panic("pmap_lowmem_finalize() unexpected kernel base address");
670
671 /*
672 * Update wired memory statistics for early boot pages
673 */
674 PMAP_ZINFO_PALLOC(bootstrap_wired_pages * PAGE_SIZE);
675
676 /*
677 * Free all pages in pmap regions below the base:
678 * rdar://6332712
679 * We can't free all the pages to VM that EFI reports available.
680 * Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake.
681 * There's also a size miscalculation here: pend is one page less
682 * than it should be but this is not fixed to be backwards
683 * compatible.
684 * Due to this current EFI limitation, we take only the first
685 * entry in the memory region table. However, the loop is retained
686 * (with the intended termination criteria commented out) in the
687 * hope that some day we can free all low-memory ranges.
688 */
689 for (i = 0;
690// pmap_memory_regions[i].end <= I386_KERNEL_IMAGE_BASE_PAGE;
691 i < 1 && (pmap_reserved_ranges == 0);
692 i++) {
693 vm_offset_t pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base);
694 vm_offset_t pend = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end);
695// vm_offset_t pend = i386_ptob(pmap_memory_regions[i].end+1);
696
697 DBG("ml_static_mfree(%p,%p) for pmap region %d\n",
698 (void *) ml_static_ptovirt(pbase),
699 (void *) (pend - pbase), i);
700 ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase);
701 }
702
703 /*
704 * If text and data are both 2MB-aligned,
705 * we can map text with large-pages,
706 * unless the -kernel_text_ps_4K boot-arg overrides.
707 */
708 if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) {
709 kprintf("Kernel text is 2MB aligned");
710 kernel_text_ps_4K = FALSE;
711 if (PE_parse_boot_argn("-kernel_text_ps_4K",
712 &kernel_text_ps_4K,
713 sizeof (kernel_text_ps_4K)))
714 kprintf(" but will be mapped with 4K pages\n");
715 else
716 kprintf(" and will be mapped with 2M pages\n");
717 }
718
719 (void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof (wpkernel));
720 if (wpkernel)
721 kprintf("Kernel text %p-%p to be write-protected\n",
722 (void *) stext, (void *) etext);
723
724 spl = splhigh();
725
726 /*
727 * Scan over text if mappings are to be changed:
728 * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0
729 * - Change to large-pages if possible and not overriden.
730 */
731 if (kernel_text_ps_4K && wpkernel) {
732 vm_offset_t myva;
733 for (myva = stext; myva < etext; myva += PAGE_SIZE) {
734 pt_entry_t *ptep;
735
736 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
737 if (ptep)
738 pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW);
739 }
740 }
741
742 if (!kernel_text_ps_4K) {
743 vm_offset_t myva;
744
745 /*
746 * Release zero-filled page padding used for 2M-alignment.
747 */
748 DBG("ml_static_mfree(%p,%p) for padding below text\n",
749 (void *) eHIB, (void *) (stext - eHIB));
750 ml_static_mfree(eHIB, stext - eHIB);
751 DBG("ml_static_mfree(%p,%p) for padding above text\n",
752 (void *) etext, (void *) (sdata - etext));
753 ml_static_mfree(etext, sdata - etext);
754
755 /*
756 * Coalesce text pages into large pages.
757 */
758 for (myva = stext; myva < sdata; myva += I386_LPGBYTES) {
759 pt_entry_t *ptep;
760 vm_offset_t pte_phys;
761 pt_entry_t *pdep;
762 pt_entry_t pde;
763
764 pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva);
765 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
766 DBG("myva: %p pdep: %p ptep: %p\n",
767 (void *) myva, (void *) pdep, (void *) ptep);
768 if ((*ptep & INTEL_PTE_VALID) == 0)
769 continue;
770 pte_phys = (vm_offset_t)(*ptep & PG_FRAME);
771 pde = *pdep & PTMASK; /* page attributes from pde */
772 pde |= INTEL_PTE_PS; /* make it a 2M entry */
773 pde |= pte_phys; /* take page frame from pte */
774
775 if (wpkernel)
776 pde &= ~INTEL_PTE_RW;
777 DBG("pmap_store_pte(%p,0x%llx)\n",
778 (void *)pdep, pde);
779 pmap_store_pte(pdep, pde);
780
781 /*
782 * Free the now-unused level-1 pte.
783 * Note: ptep is a virtual address to the pte in the
784 * recursive map. We can't use this address to free
785 * the page. Instead we need to compute its address
786 * in the Idle PTEs in "low memory".
787 */
788 vm_offset_t vm_ptep = (vm_offset_t) KPTphys
789 + (pte_phys >> PTPGSHIFT);
790 DBG("ml_static_mfree(%p,0x%x) for pte\n",
791 (void *) vm_ptep, PAGE_SIZE);
792 ml_static_mfree(vm_ptep, PAGE_SIZE);
793 }
794
795 /* Change variable read by sysctl machdep.pmap */
796 pmap_kernel_text_ps = I386_LPGBYTES;
797 }
798
799 /* map lowmem global page into fixed addr */
800 pt_entry_t *pte = NULL;
801 if (0 == (pte = pmap_pte(kernel_pmap,
802 VM_MIN_KERNEL_LOADED_ADDRESS + 0x2000)))
803 panic("lowmem pte");
804 /* make sure it is defined on page boundary */
805 assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK));
806 pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)
807 | INTEL_PTE_REF
808 | INTEL_PTE_MOD
809 | INTEL_PTE_WIRED
810 | INTEL_PTE_VALID
811 | INTEL_PTE_RW);
812 splx(spl);
813 if (pmap_pcid_ncpus)
814 tlb_flush_global();
815 else
816 flush_tlb_raw();
817}
b0d623f7
A
818
819/*
820 * this function is only used for debugging fron the vm layer
821 */
822boolean_t
823pmap_verify_free(
824 ppnum_t pn)
825{
826 pv_rooted_entry_t pv_h;
827 int pai;
828 boolean_t result;
829
830 assert(pn != vm_page_fictitious_addr);
831
832 if (!pmap_initialized)
833 return(TRUE);
834
835 if (pn == vm_page_guard_addr)
836 return TRUE;
837
838 pai = ppn_to_pai(pn);
839 if (!IS_MANAGED_PAGE(pai))
840 return(FALSE);
841 pv_h = pai_to_pvh(pn);
842 result = (pv_h->pmap == PMAP_NULL);
843 return(result);
844}
845
846boolean_t
847pmap_is_empty(
848 pmap_t pmap,
849 vm_map_offset_t va_start,
850 vm_map_offset_t va_end)
851{
852 vm_map_offset_t offset;
853 ppnum_t phys_page;
854
855 if (pmap == PMAP_NULL) {
856 return TRUE;
857 }
858
859 /*
860 * Check the resident page count
861 * - if it's zero, the pmap is completely empty.
862 * This short-circuit test prevents a virtual address scan which is
863 * painfully slow for 64-bit spaces.
864 * This assumes the count is correct
865 * .. the debug kernel ought to be checking perhaps by page table walk.
866 */
867 if (pmap->stats.resident_count == 0)
868 return TRUE;
869
870 for (offset = va_start;
871 offset < va_end;
872 offset += PAGE_SIZE_64) {
873 phys_page = pmap_find_phys(pmap, offset);
874 if (phys_page) {
875 kprintf("pmap_is_empty(%p,0x%llx,0x%llx): "
876 "page %d at 0x%llx\n",
877 pmap, va_start, va_end, phys_page, offset);
878 return FALSE;
879 }
880 }
881
882 return TRUE;
883}
884
885
886/*
887 * Create and return a physical map.
888 *
889 * If the size specified for the map
890 * is zero, the map is an actual physical
891 * map, and may be referenced by the
892 * hardware.
893 *
894 * If the size specified is non-zero,
895 * the map will be used in software only, and
896 * is bounded by that size.
897 */
898pmap_t
899pmap_create(
900 vm_map_size_t sz,
901 boolean_t is_64bit)
902{
903 pmap_t p;
904 vm_size_t size;
905 pml4_entry_t *pml4;
906 pml4_entry_t *kpml4;
907
908 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START,
909 (uint32_t) (sz>>32), (uint32_t) sz, is_64bit, 0, 0);
910
911 size = (vm_size_t) sz;
912
913 /*
914 * A software use-only map doesn't even need a map.
915 */
916
917 if (size != 0) {
918 return(PMAP_NULL);
919 }
920
921 p = (pmap_t) zalloc(pmap_zone);
922 if (PMAP_NULL == p)
923 panic("pmap_create zalloc");
6d2010ae
A
924 /* Zero all fields */
925 bzero(p, sizeof(*p));
b0d623f7
A
926 /* init counts now since we'll be bumping some */
927 simple_lock_init(&p->lock, 0);
928 p->stats.resident_count = 0;
929 p->stats.resident_max = 0;
930 p->stats.wired_count = 0;
931 p->ref_count = 1;
932 p->nx_enabled = 1;
933 p->pm_shared = FALSE;
934
935 p->pm_task_map = is_64bit ? TASK_MAP_64BIT : TASK_MAP_32BIT;;
6d2010ae
A
936 if (pmap_pcid_ncpus)
937 pmap_pcid_initialize(p);
938 p->pm_pml4 = zalloc(pmap_anchor_zone);
b0d623f7 939
6d2010ae 940 pmap_assert((((uintptr_t)p->pm_pml4) & PAGE_MASK) == 0);
b0d623f7 941
6d2010ae 942 memset((char *)p->pm_pml4, 0, PAGE_SIZE);
b0d623f7 943
6d2010ae 944 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_pml4);
b0d623f7
A
945
946 /* allocate the vm_objs to hold the pdpt, pde and pte pages */
947
948 p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS));
949 if (NULL == p->pm_obj_pml4)
950 panic("pmap_create pdpt obj");
951
952 p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS));
953 if (NULL == p->pm_obj_pdpt)
954 panic("pmap_create pdpt obj");
955
956 p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS));
957 if (NULL == p->pm_obj)
958 panic("pmap_create pte obj");
959
6d2010ae 960 /* All pmaps share the kernel's pml4 */
b0d623f7
A
961 pml4 = pmap64_pml4(p, 0ULL);
962 kpml4 = kernel_pmap->pm_pml4;
963 pml4[KERNEL_PML4_INDEX] = kpml4[KERNEL_PML4_INDEX];
964 pml4[KERNEL_KEXTS_INDEX] = kpml4[KERNEL_KEXTS_INDEX];
965 pml4[KERNEL_PHYSMAP_INDEX] = kpml4[KERNEL_PHYSMAP_INDEX];
966
967 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START,
968 p, is_64bit, 0, 0, 0);
969
970 return(p);
971}
972
973/*
974 * Retire the given physical map from service.
975 * Should only be called if the map contains
976 * no valid mappings.
977 */
978
979void
6d2010ae 980pmap_destroy(pmap_t p)
b0d623f7 981{
6d2010ae 982 int c;
b0d623f7
A
983
984 if (p == PMAP_NULL)
985 return;
986
987 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START,
988 p, 0, 0, 0, 0);
989
990 PMAP_LOCK(p);
991
992 c = --p->ref_count;
993
6d2010ae
A
994 pmap_assert((current_thread() && (current_thread()->map)) ? (current_thread()->map->pmap != p) : TRUE);
995
b0d623f7
A
996 if (c == 0) {
997 /*
998 * If some cpu is not using the physical pmap pointer that it
999 * is supposed to be (see set_dirbase), we might be using the
1000 * pmap that is being destroyed! Make sure we are
1001 * physically on the right pmap:
1002 */
1003 PMAP_UPDATE_TLBS(p, 0x0ULL, 0xFFFFFFFFFFFFF000ULL);
ebb1b9f4
A
1004 if (pmap_pcid_ncpus)
1005 pmap_destroy_pcid_sync(p);
b0d623f7 1006 }
ebb1b9f4 1007
b0d623f7
A
1008 PMAP_UNLOCK(p);
1009
1010 if (c != 0) {
1011 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END,
1012 p, 1, 0, 0, 0);
6d2010ae 1013 pmap_assert(p == kernel_pmap);
b0d623f7
A
1014 return; /* still in use */
1015 }
1016
1017 /*
1018 * Free the memory maps, then the
1019 * pmap structure.
1020 */
1021 int inuse_ptepages = 0;
1022
6d2010ae 1023 zfree(pmap_anchor_zone, p->pm_pml4);
b0d623f7
A
1024
1025 inuse_ptepages += p->pm_obj_pml4->resident_page_count;
1026 vm_object_deallocate(p->pm_obj_pml4);
1027
1028 inuse_ptepages += p->pm_obj_pdpt->resident_page_count;
1029 vm_object_deallocate(p->pm_obj_pdpt);
1030
1031 inuse_ptepages += p->pm_obj->resident_page_count;
1032 vm_object_deallocate(p->pm_obj);
1033
1034 OSAddAtomic(-inuse_ptepages, &inuse_ptepages_count);
6d2010ae 1035 PMAP_ZINFO_PFREE(inuse_ptepages * PAGE_SIZE);
b0d623f7
A
1036
1037 zfree(pmap_zone, p);
1038
1039 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END,
1040 0, 0, 0, 0, 0);
1041}
1042
1043/*
1044 * Add a reference to the specified pmap.
1045 */
1046
1047void
1048pmap_reference(pmap_t p)
1049{
1050 if (p != PMAP_NULL) {
1051 PMAP_LOCK(p);
1052 p->ref_count++;
1053 PMAP_UNLOCK(p);;
1054 }
1055}
1056
b0d623f7
A
1057/*
1058 * Remove phys addr if mapped in specified map
1059 *
1060 */
1061void
1062pmap_remove_some_phys(
1063 __unused pmap_t map,
1064 __unused ppnum_t pn)
1065{
1066
1067/* Implement to support working set code */
1068
1069}
1070
b0d623f7 1071
b0d623f7
A
1072/*
1073 * Set the physical protection on the
1074 * specified range of this map as requested.
1075 * Will not increase permissions.
1076 */
1077void
1078pmap_protect(
1079 pmap_t map,
1080 vm_map_offset_t sva,
1081 vm_map_offset_t eva,
1082 vm_prot_t prot)
1083{
1084 pt_entry_t *pde;
1085 pt_entry_t *spte, *epte;
1086 vm_map_offset_t lva;
1087 vm_map_offset_t orig_sva;
1088 boolean_t set_NX;
1089 int num_found = 0;
1090
1091 pmap_intr_assert();
1092
1093 if (map == PMAP_NULL)
1094 return;
1095
1096 if (prot == VM_PROT_NONE) {
1097 pmap_remove(map, sva, eva);
1098 return;
1099 }
1100 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
1101 map,
1102 (uint32_t) (sva >> 32), (uint32_t) sva,
1103 (uint32_t) (eva >> 32), (uint32_t) eva);
1104
1105 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !map->nx_enabled)
1106 set_NX = FALSE;
1107 else
1108 set_NX = TRUE;
1109
1110 PMAP_LOCK(map);
1111
1112 orig_sva = sva;
1113 while (sva < eva) {
1114 lva = (sva + pde_mapped_size) & ~(pde_mapped_size - 1);
1115 if (lva > eva)
1116 lva = eva;
1117 pde = pmap_pde(map, sva);
1118 if (pde && (*pde & INTEL_PTE_VALID)) {
1119 if (*pde & INTEL_PTE_PS) {
1120 /* superpage */
1121 spte = pde;
1122 epte = spte+1; /* excluded */
1123 } else {
1124 spte = pmap_pte(map, (sva & ~(pde_mapped_size - 1)));
1125 spte = &spte[ptenum(sva)];
1126 epte = &spte[intel_btop(lva - sva)];
1127 }
1128
1129 for (; spte < epte; spte++) {
1130 if (!(*spte & INTEL_PTE_VALID))
1131 continue;
1132
1133 if (prot & VM_PROT_WRITE)
1134 pmap_update_pte(spte, *spte,
1135 *spte | INTEL_PTE_WRITE);
1136 else
1137 pmap_update_pte(spte, *spte,
1138 *spte & ~INTEL_PTE_WRITE);
1139
1140 if (set_NX)
1141 pmap_update_pte(spte, *spte,
1142 *spte | INTEL_PTE_NX);
1143 else
1144 pmap_update_pte(spte, *spte,
1145 *spte & ~INTEL_PTE_NX);
1146
1147 num_found++;
1148 }
1149 }
1150 sva = lva;
1151 }
1152 if (num_found)
1153 PMAP_UPDATE_TLBS(map, orig_sva, eva);
1154
1155 PMAP_UNLOCK(map);
1156
1157 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END,
1158 0, 0, 0, 0, 0);
1159
1160}
1161
1162/* Map a (possibly) autogenned block */
1163void
1164pmap_map_block(
1165 pmap_t pmap,
1166 addr64_t va,
1167 ppnum_t pa,
1168 uint32_t size,
1169 vm_prot_t prot,
1170 int attr,
1171 __unused unsigned int flags)
1172{
1173 uint32_t page;
1174 int cur_page_size;
1175
1176 if (attr & VM_MEM_SUPERPAGE)
1177 cur_page_size = SUPERPAGE_SIZE;
1178 else
1179 cur_page_size = PAGE_SIZE;
1180
1181 for (page = 0; page < size; page+=cur_page_size/PAGE_SIZE) {
1182 pmap_enter(pmap, va, pa, prot, attr, TRUE);
1183 va += cur_page_size;
1184 pa+=cur_page_size/PAGE_SIZE;
1185 }
1186}
1187
b0d623f7
A
1188
1189void
1190pmap_expand_pml4(
1191 pmap_t map,
1192 vm_map_offset_t vaddr)
1193{
1194 vm_page_t m;
1195 pmap_paddr_t pa;
1196 uint64_t i;
1197 ppnum_t pn;
1198 pml4_entry_t *pml4p;
1199
1200 DBG("pmap_expand_pml4(%p,%p)\n", map, (void *)vaddr);
1201
1202 /*
1203 * Allocate a VM page for the pml4 page
1204 */
1205 while ((m = vm_page_grab()) == VM_PAGE_NULL)
1206 VM_PAGE_WAIT();
1207
1208 /*
1209 * put the page into the pmap's obj list so it
1210 * can be found later.
1211 */
1212 pn = m->phys_page;
1213 pa = i386_ptob(pn);
1214 i = pml4idx(map, vaddr);
1215
1216 /*
1217 * Zero the page.
1218 */
1219 pmap_zero_page(pn);
1220
1221 vm_page_lockspin_queues();
1222 vm_page_wire(m);
1223 vm_page_unlock_queues();
1224
1225 OSAddAtomic(1, &inuse_ptepages_count);
6d2010ae
A
1226 OSAddAtomic64(1, &alloc_ptepages_count);
1227 PMAP_ZINFO_PALLOC(PAGE_SIZE);
b0d623f7
A
1228
1229 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
1230 vm_object_lock(map->pm_obj_pml4);
1231
1232 PMAP_LOCK(map);
1233 /*
1234 * See if someone else expanded us first
1235 */
1236 if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) {
1237 PMAP_UNLOCK(map);
1238 vm_object_unlock(map->pm_obj_pml4);
1239
1240 VM_PAGE_FREE(m);
1241
1242 OSAddAtomic(-1, &inuse_ptepages_count);
6d2010ae 1243 PMAP_ZINFO_PFREE(PAGE_SIZE);
b0d623f7
A
1244 return;
1245 }
1246
1247#if 0 /* DEBUG */
1248 if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i)) {
1249 panic("pmap_expand_pml4: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n",
1250 map, map->pm_obj_pml4, vaddr, i);
1251 }
1252#endif
1253 vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i);
1254 vm_object_unlock(map->pm_obj_pml4);
1255
1256 /*
1257 * Set the page directory entry for this page table.
1258 */
1259 pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */
1260
1261 pmap_store_pte(pml4p, pa_to_pte(pa)
1262 | INTEL_PTE_VALID
1263 | INTEL_PTE_USER
1264 | INTEL_PTE_WRITE);
1265
1266 PMAP_UNLOCK(map);
1267
1268 return;
1269}
1270
1271void
1272pmap_expand_pdpt(
1273 pmap_t map,
1274 vm_map_offset_t vaddr)
1275{
1276 vm_page_t m;
1277 pmap_paddr_t pa;
1278 uint64_t i;
1279 ppnum_t pn;
1280 pdpt_entry_t *pdptp;
1281
1282 DBG("pmap_expand_pdpt(%p,%p)\n", map, (void *)vaddr);
1283
1284 while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) {
1285 pmap_expand_pml4(map, vaddr);
1286 }
1287
1288 /*
1289 * Allocate a VM page for the pdpt page
1290 */
1291 while ((m = vm_page_grab()) == VM_PAGE_NULL)
1292 VM_PAGE_WAIT();
1293
1294 /*
1295 * put the page into the pmap's obj list so it
1296 * can be found later.
1297 */
1298 pn = m->phys_page;
1299 pa = i386_ptob(pn);
1300 i = pdptidx(map, vaddr);
1301
1302 /*
1303 * Zero the page.
1304 */
1305 pmap_zero_page(pn);
1306
1307 vm_page_lockspin_queues();
1308 vm_page_wire(m);
1309 vm_page_unlock_queues();
1310
1311 OSAddAtomic(1, &inuse_ptepages_count);
6d2010ae
A
1312 OSAddAtomic64(1, &alloc_ptepages_count);
1313 PMAP_ZINFO_PALLOC(PAGE_SIZE);
b0d623f7
A
1314
1315 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
1316 vm_object_lock(map->pm_obj_pdpt);
1317
1318 PMAP_LOCK(map);
1319 /*
1320 * See if someone else expanded us first
1321 */
1322 if (pmap64_pde(map, vaddr) != PD_ENTRY_NULL) {
1323 PMAP_UNLOCK(map);
1324 vm_object_unlock(map->pm_obj_pdpt);
1325
1326 VM_PAGE_FREE(m);
1327
1328 OSAddAtomic(-1, &inuse_ptepages_count);
6d2010ae 1329 PMAP_ZINFO_PFREE(PAGE_SIZE);
b0d623f7
A
1330 return;
1331 }
1332
1333#if 0 /* DEBUG */
1334 if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i)) {
1335 panic("pmap_expand_pdpt: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n",
1336 map, map->pm_obj_pdpt, vaddr, i);
1337 }
1338#endif
1339 vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i);
1340 vm_object_unlock(map->pm_obj_pdpt);
1341
1342 /*
1343 * Set the page directory entry for this page table.
1344 */
1345 pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */
1346
1347 pmap_store_pte(pdptp, pa_to_pte(pa)
1348 | INTEL_PTE_VALID
1349 | INTEL_PTE_USER
1350 | INTEL_PTE_WRITE);
1351
1352 PMAP_UNLOCK(map);
1353
1354 return;
1355
1356}
1357
1358
1359
1360/*
1361 * Routine: pmap_expand
1362 *
1363 * Expands a pmap to be able to map the specified virtual address.
1364 *
1365 * Allocates new virtual memory for the P0 or P1 portion of the
1366 * pmap, then re-maps the physical pages that were in the old
1367 * pmap to be in the new pmap.
1368 *
1369 * Must be called with the pmap system and the pmap unlocked,
1370 * since these must be unlocked to use vm_allocate or vm_deallocate.
1371 * Thus it must be called in a loop that checks whether the map
1372 * has been expanded enough.
1373 * (We won't loop forever, since page tables aren't shrunk.)
1374 */
1375void
1376pmap_expand(
1377 pmap_t map,
1378 vm_map_offset_t vaddr)
1379{
1380 pt_entry_t *pdp;
1381 register vm_page_t m;
1382 register pmap_paddr_t pa;
1383 uint64_t i;
1384 ppnum_t pn;
1385
1386
1387 /*
1388 * For the kernel, the virtual address must be in or above the basement
1389 * which is for kexts and is in the 512GB immediately below the kernel..
1390 * XXX - should use VM_MIN_KERNEL_AND_KEXT_ADDRESS not KERNEL_BASEMENT
1391 */
1392 if (map == kernel_pmap &&
1393 !(vaddr >= KERNEL_BASEMENT && vaddr <= VM_MAX_KERNEL_ADDRESS))
1394 panic("pmap_expand: bad vaddr 0x%llx for kernel pmap", vaddr);
1395
1396
1397 while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) {
1398 /* need room for another pde entry */
1399 pmap_expand_pdpt(map, vaddr);
1400 }
1401
1402 /*
1403 * Allocate a VM page for the pde entries.
1404 */
1405 while ((m = vm_page_grab()) == VM_PAGE_NULL)
1406 VM_PAGE_WAIT();
1407
1408 /*
1409 * put the page into the pmap's obj list so it
1410 * can be found later.
1411 */
1412 pn = m->phys_page;
1413 pa = i386_ptob(pn);
1414 i = pdeidx(map, vaddr);
1415
1416 /*
1417 * Zero the page.
1418 */
1419 pmap_zero_page(pn);
1420
1421 vm_page_lockspin_queues();
1422 vm_page_wire(m);
1423 vm_page_unlock_queues();
1424
1425 OSAddAtomic(1, &inuse_ptepages_count);
6d2010ae
A
1426 OSAddAtomic64(1, &alloc_ptepages_count);
1427 PMAP_ZINFO_PALLOC(PAGE_SIZE);
b0d623f7
A
1428
1429 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
1430 vm_object_lock(map->pm_obj);
1431
1432 PMAP_LOCK(map);
1433
1434 /*
1435 * See if someone else expanded us first
1436 */
1437 if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) {
1438 PMAP_UNLOCK(map);
1439 vm_object_unlock(map->pm_obj);
1440
1441 VM_PAGE_FREE(m);
1442
1443 OSAddAtomic(-1, &inuse_ptepages_count);
6d2010ae 1444 PMAP_ZINFO_PFREE(PAGE_SIZE);
b0d623f7
A
1445 return;
1446 }
1447
1448#if 0 /* DEBUG */
1449 if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i)) {
1450 panic("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n",
1451 map, map->pm_obj, vaddr, i);
1452 }
1453#endif
1454 vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i);
1455 vm_object_unlock(map->pm_obj);
1456
1457 /*
1458 * Set the page directory entry for this page table.
1459 */
1460 pdp = pmap_pde(map, vaddr);
1461 pmap_store_pte(pdp, pa_to_pte(pa)
1462 | INTEL_PTE_VALID
1463 | INTEL_PTE_USER
1464 | INTEL_PTE_WRITE);
1465
1466 PMAP_UNLOCK(map);
1467
1468 return;
1469}
1470
1471/* On K64 machines with more than 32GB of memory, pmap_steal_memory
1472 * will allocate past the 1GB of pre-expanded virtual kernel area. This
1473 * function allocates all the page tables using memory from the same pool
1474 * that pmap_steal_memory uses, rather than calling vm_page_grab (which
1475 * isn't available yet). */
1476void
6d2010ae
A
1477pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr)
1478{
b0d623f7
A
1479 ppnum_t pn;
1480 pt_entry_t *pte;
1481
1482 PMAP_LOCK(pmap);
1483
1484 if(pmap64_pdpt(pmap, vaddr) == PDPT_ENTRY_NULL) {
0b4c1975 1485 if (!pmap_next_page_hi(&pn))
b0d623f7
A
1486 panic("pmap_pre_expand");
1487
1488 pmap_zero_page(pn);
1489
1490 pte = pmap64_pml4(pmap, vaddr);
1491
1492 pmap_store_pte(pte, pa_to_pte(i386_ptob(pn))
1493 | INTEL_PTE_VALID
1494 | INTEL_PTE_USER
1495 | INTEL_PTE_WRITE);
1496 }
1497
1498 if(pmap64_pde(pmap, vaddr) == PD_ENTRY_NULL) {
0b4c1975 1499 if (!pmap_next_page_hi(&pn))
b0d623f7
A
1500 panic("pmap_pre_expand");
1501
1502 pmap_zero_page(pn);
1503
1504 pte = pmap64_pdpt(pmap, vaddr);
1505
1506 pmap_store_pte(pte, pa_to_pte(i386_ptob(pn))
1507 | INTEL_PTE_VALID
1508 | INTEL_PTE_USER
1509 | INTEL_PTE_WRITE);
1510 }
1511
1512 if(pmap_pte(pmap, vaddr) == PT_ENTRY_NULL) {
0b4c1975 1513 if (!pmap_next_page_hi(&pn))
b0d623f7
A
1514 panic("pmap_pre_expand");
1515
1516 pmap_zero_page(pn);
1517
1518 pte = pmap64_pde(pmap, vaddr);
1519
1520 pmap_store_pte(pte, pa_to_pte(i386_ptob(pn))
1521 | INTEL_PTE_VALID
1522 | INTEL_PTE_USER
1523 | INTEL_PTE_WRITE);
1524 }
1525
1526 PMAP_UNLOCK(pmap);
1527}
1528
1529/*
1530 * pmap_sync_page_data_phys(ppnum_t pa)
1531 *
1532 * Invalidates all of the instruction cache on a physical page and
1533 * pushes any dirty data from the data cache for the same physical page
1534 * Not required in i386.
1535 */
1536void
1537pmap_sync_page_data_phys(__unused ppnum_t pa)
1538{
1539 return;
1540}
1541
1542/*
1543 * pmap_sync_page_attributes_phys(ppnum_t pa)
1544 *
1545 * Write back and invalidate all cachelines on a physical page.
1546 */
1547void
1548pmap_sync_page_attributes_phys(ppnum_t pa)
1549{
1550 cache_flush_page_phys(pa);
1551}
1552
1553
1554
1555#ifdef CURRENTLY_UNUSED_AND_UNTESTED
1556
1557int collect_ref;
1558int collect_unref;
1559
1560/*
1561 * Routine: pmap_collect
1562 * Function:
1563 * Garbage collects the physical map system for
1564 * pages which are no longer used.
1565 * Success need not be guaranteed -- that is, there
1566 * may well be pages which are not referenced, but
1567 * others may be collected.
1568 * Usage:
1569 * Called by the pageout daemon when pages are scarce.
1570 */
1571void
1572pmap_collect(
1573 pmap_t p)
1574{
1575 register pt_entry_t *pdp, *ptp;
1576 pt_entry_t *eptp;
1577 int wired;
1578
1579 if (p == PMAP_NULL)
1580 return;
1581
1582 if (p == kernel_pmap)
1583 return;
1584
1585 /*
1586 * Garbage collect map.
1587 */
1588 PMAP_LOCK(p);
1589
1590 for (pdp = (pt_entry_t *)p->dirbase;
1591 pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)];
1592 pdp++)
1593 {
1594 if (*pdp & INTEL_PTE_VALID) {
1595 if(*pdp & INTEL_PTE_REF) {
1596 pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF);
1597 collect_ref++;
1598 } else {
1599 collect_unref++;
1600 ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase));
1601 eptp = ptp + NPTEPG;
1602
1603 /*
1604 * If the pte page has any wired mappings, we cannot
1605 * free it.
1606 */
1607 wired = 0;
1608 {
1609 register pt_entry_t *ptep;
1610 for (ptep = ptp; ptep < eptp; ptep++) {
1611 if (iswired(*ptep)) {
1612 wired = 1;
1613 break;
1614 }
1615 }
1616 }
1617 if (!wired) {
1618 /*
1619 * Remove the virtual addresses mapped by this pte page.
1620 */
1621 pmap_remove_range(p,
1622 pdetova(pdp - (pt_entry_t *)p->dirbase),
1623 ptp,
1624 eptp);
1625
1626 /*
1627 * Invalidate the page directory pointer.
1628 */
1629 pmap_store_pte(pdp, 0x0);
1630
1631 PMAP_UNLOCK(p);
1632
1633 /*
1634 * And free the pte page itself.
1635 */
1636 {
1637 register vm_page_t m;
1638
1639 vm_object_lock(p->pm_obj);
1640
1641 m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]));
1642 if (m == VM_PAGE_NULL)
1643 panic("pmap_collect: pte page not in object");
1644
6d2010ae
A
1645 vm_object_unlock(p->pm_obj);
1646
b0d623f7
A
1647 VM_PAGE_FREE(m);
1648
1649 OSAddAtomic(-1, &inuse_ptepages_count);
6d2010ae 1650 PMAP_ZINFO_PFREE(PAGE_SIZE);
b0d623f7
A
1651 }
1652
1653 PMAP_LOCK(p);
1654 }
1655 }
1656 }
1657 }
1658
1659 PMAP_UPDATE_TLBS(p, 0x0, 0xFFFFFFFFFFFFF000ULL);
1660 PMAP_UNLOCK(p);
1661 return;
1662
1663}
1664#endif
1665
1666
1667void
1668pmap_copy_page(ppnum_t src, ppnum_t dst)
1669{
1670 bcopy_phys((addr64_t)i386_ptob(src),
1671 (addr64_t)i386_ptob(dst),
1672 PAGE_SIZE);
1673}
1674
1675
1676/*
1677 * Routine: pmap_pageable
1678 * Function:
1679 * Make the specified pages (by pmap, offset)
1680 * pageable (or not) as requested.
1681 *
1682 * A page which is not pageable may not take
1683 * a fault; therefore, its page table entry
1684 * must remain valid for the duration.
1685 *
1686 * This routine is merely advisory; pmap_enter
1687 * will specify that these pages are to be wired
1688 * down (or not) as appropriate.
1689 */
1690void
1691pmap_pageable(
1692 __unused pmap_t pmap,
1693 __unused vm_map_offset_t start_addr,
1694 __unused vm_map_offset_t end_addr,
1695 __unused boolean_t pageable)
1696{
1697#ifdef lint
1698 pmap++; start_addr++; end_addr++; pageable++;
1699#endif /* lint */
1700}
1701
b0d623f7
A
1702
1703void
1704invalidate_icache(__unused vm_offset_t addr,
1705 __unused unsigned cnt,
1706 __unused int phys)
1707{
1708 return;
1709}
1710
1711void
1712flush_dcache(__unused vm_offset_t addr,
1713 __unused unsigned count,
1714 __unused int phys)
1715{
1716 return;
1717}
1718
1719#if CONFIG_DTRACE
1720/*
1721 * Constrain DTrace copyin/copyout actions
1722 */
1723extern kern_return_t dtrace_copyio_preflight(addr64_t);
1724extern kern_return_t dtrace_copyio_postflight(addr64_t);
1725
1726kern_return_t dtrace_copyio_preflight(__unused addr64_t va)
1727{
1728 thread_t thread = current_thread();
6d2010ae 1729 uint64_t ccr3;
b0d623f7
A
1730
1731 if (current_map() == kernel_map)
1732 return KERN_FAILURE;
6d2010ae
A
1733 else if (((ccr3 = get_cr3_base()) != thread->map->pmap->pm_cr3) && (no_shared_cr3 == FALSE))
1734 return KERN_FAILURE;
1735 else if (no_shared_cr3 && (ccr3 != kernel_pmap->pm_cr3))
b0d623f7
A
1736 return KERN_FAILURE;
1737 else if (thread->machine.specFlags & CopyIOActive)
1738 return KERN_FAILURE;
1739 else
1740 return KERN_SUCCESS;
1741}
1742
1743kern_return_t dtrace_copyio_postflight(__unused addr64_t va)
1744{
1745 return KERN_SUCCESS;
1746}
1747#endif /* CONFIG_DTRACE */
1748
1749#include <mach_vm_debug.h>
1750#if MACH_VM_DEBUG
1751#include <vm/vm_debug.h>
1752
1753int
1754pmap_list_resident_pages(
1755 __unused pmap_t pmap,
1756 __unused vm_offset_t *listp,
1757 __unused int space)
1758{
1759 return 0;
1760}
1761#endif /* MACH_VM_DEBUG */
1762
1763
1764
1765/* temporary workaround */
1766boolean_t
1767coredumpok(__unused vm_map_t map, __unused vm_offset_t va)
1768{
1769#if 0
1770 pt_entry_t *ptep;
1771
1772 ptep = pmap_pte(map->pmap, va);
1773 if (0 == ptep)
1774 return FALSE;
1775 return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED));
1776#else
1777 return TRUE;
1778#endif
1779}
1780
1781
1782boolean_t
1783phys_page_exists(ppnum_t pn)
1784{
1785 assert(pn != vm_page_fictitious_addr);
1786
1787 if (!pmap_initialized)
1788 return TRUE;
1789
1790 if (pn == vm_page_guard_addr)
1791 return FALSE;
1792
1793 if (!IS_MANAGED_PAGE(ppn_to_pai(pn)))
1794 return FALSE;
1795
1796 return TRUE;
1797}
1798
6d2010ae
A
1799
1800
b0d623f7
A
1801void
1802pmap_switch(pmap_t tpmap)
1803{
1804 spl_t s;
1805
1806 s = splhigh(); /* Make sure interruptions are disabled */
1807 set_dirbase(tpmap, current_thread());
1808 splx(s);
1809}
1810
1811
1812/*
1813 * disable no-execute capability on
1814 * the specified pmap
1815 */
1816void
1817pmap_disable_NX(pmap_t pmap)
1818{
1819 pmap->nx_enabled = 0;
1820}
1821
6d2010ae
A
1822void
1823pt_fake_zone_init(int zone_index)
1824{
1825 pt_fake_zone_index = zone_index;
1826}
1827
b0d623f7
A
1828void
1829pt_fake_zone_info(
1830 int *count,
1831 vm_size_t *cur_size,
1832 vm_size_t *max_size,
1833 vm_size_t *elem_size,
1834 vm_size_t *alloc_size,
6d2010ae 1835 uint64_t *sum_size,
b0d623f7 1836 int *collectable,
6d2010ae
A
1837 int *exhaustable,
1838 int *caller_acct)
b0d623f7
A
1839{
1840 *count = inuse_ptepages_count;
1841 *cur_size = PAGE_SIZE * inuse_ptepages_count;
1842 *max_size = PAGE_SIZE * (inuse_ptepages_count +
1843 vm_page_inactive_count +
1844 vm_page_active_count +
1845 vm_page_free_count);
1846 *elem_size = PAGE_SIZE;
1847 *alloc_size = PAGE_SIZE;
6d2010ae 1848 *sum_size = alloc_ptepages_count * PAGE_SIZE;
b0d623f7
A
1849
1850 *collectable = 1;
1851 *exhaustable = 0;
6d2010ae 1852 *caller_acct = 1;
b0d623f7
A
1853}
1854
1855static inline void
1856pmap_cpuset_NMIPI(cpu_set cpu_mask) {
1857 unsigned int cpu, cpu_bit;
1858 uint64_t deadline;
1859
1860 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
1861 if (cpu_mask & cpu_bit)
1862 cpu_NMI_interrupt(cpu);
1863 }
1864 deadline = mach_absolute_time() + (LockTimeOut);
1865 while (mach_absolute_time() < deadline)
1866 cpu_pause();
1867}
1868
1869/*
1870 * Called with pmap locked, we:
1871 * - scan through per-cpu data to see which other cpus need to flush
1872 * - send an IPI to each non-idle cpu to be flushed
1873 * - wait for all to signal back that they are inactive or we see that
1874 * they are at a safe point (idle).
1875 * - flush the local tlb if active for this pmap
1876 * - return ... the caller will unlock the pmap
1877 */
6d2010ae 1878
b0d623f7 1879void
6d2010ae 1880pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv)
b0d623f7
A
1881{
1882 unsigned int cpu;
1883 unsigned int cpu_bit;
1884 cpu_set cpus_to_signal;
1885 unsigned int my_cpu = cpu_number();
1886 pmap_paddr_t pmap_cr3 = pmap->pm_cr3;
1887 boolean_t flush_self = FALSE;
1888 uint64_t deadline;
6d2010ae 1889 boolean_t pmap_is_shared = (pmap->pm_shared || (pmap == kernel_pmap));
b0d623f7
A
1890
1891 assert((processor_avail_count < 2) ||
1892 (ml_get_interrupts_enabled() && get_preemption_level() != 0));
1893
1894 /*
1895 * Scan other cpus for matching active or task CR3.
1896 * For idle cpus (with no active map) we mark them invalid but
1897 * don't signal -- they'll check as they go busy.
1898 */
1899 cpus_to_signal = 0;
6d2010ae
A
1900
1901 if (pmap_pcid_ncpus) {
1902 pmap_pcid_invalidate_all_cpus(pmap);
1903 __asm__ volatile("mfence":::"memory");
1904 }
1905
b0d623f7
A
1906 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
1907 if (!cpu_datap(cpu)->cpu_running)
1908 continue;
1909 uint64_t cpu_active_cr3 = CPU_GET_ACTIVE_CR3(cpu);
1910 uint64_t cpu_task_cr3 = CPU_GET_TASK_CR3(cpu);
1911
1912 if ((pmap_cr3 == cpu_task_cr3) ||
1913 (pmap_cr3 == cpu_active_cr3) ||
6d2010ae 1914 (pmap_is_shared)) {
b0d623f7
A
1915 if (cpu == my_cpu) {
1916 flush_self = TRUE;
1917 continue;
1918 }
6d2010ae
A
1919 if (pmap_pcid_ncpus && pmap_is_shared)
1920 cpu_datap(cpu)->cpu_tlb_invalid_global = TRUE;
1921 else
1922 cpu_datap(cpu)->cpu_tlb_invalid_local = TRUE;
1923 __asm__ volatile("mfence":::"memory");
b0d623f7
A
1924
1925 /*
1926 * We don't need to signal processors which will flush
1927 * lazily at the idle state or kernel boundary.
1928 * For example, if we're invalidating the kernel pmap,
1929 * processors currently in userspace don't need to flush
1930 * their TLBs until the next time they enter the kernel.
1931 * Alterations to the address space of a task active
1932 * on a remote processor result in a signal, to
1933 * account for copy operations. (There may be room
1934 * for optimization in such cases).
1935 * The order of the loads below with respect
1936 * to the store to the "cpu_tlb_invalid" field above
1937 * is important--hence the barrier.
1938 */
1939 if (CPU_CR3_IS_ACTIVE(cpu) &&
1940 (pmap_cr3 == CPU_GET_ACTIVE_CR3(cpu) ||
1941 pmap->pm_shared ||
1942 (pmap_cr3 == CPU_GET_TASK_CR3(cpu)))) {
1943 cpus_to_signal |= cpu_bit;
1944 i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC);
1945 }
1946 }
1947 }
1948
6d2010ae
A
1949 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START,
1950 pmap, cpus_to_signal, flush_self, startv, endv);
b0d623f7
A
1951
1952 /*
1953 * Flush local tlb if required.
1954 * Do this now to overlap with other processors responding.
1955 */
6d2010ae
A
1956 if (flush_self) {
1957 if (pmap_pcid_ncpus) {
1958 pmap_pcid_validate_cpu(pmap, my_cpu);
1959 if (pmap_is_shared)
1960 tlb_flush_global();
1961 else
1962 flush_tlb_raw();
1963 }
1964 else
1965 flush_tlb_raw();
1966 }
b0d623f7
A
1967
1968 if (cpus_to_signal) {
1969 cpu_set cpus_to_respond = cpus_to_signal;
1970
1971 deadline = mach_absolute_time() + LockTimeOut;
1972 /*
1973 * Wait for those other cpus to acknowledge
1974 */
1975 while (cpus_to_respond != 0) {
060df5ea 1976 long orig_acks = 0;
b0d623f7
A
1977
1978 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
6d2010ae
A
1979 /* Consider checking local/global invalidity
1980 * as appropriate in the PCID case.
1981 */
b0d623f7
A
1982 if ((cpus_to_respond & cpu_bit) != 0) {
1983 if (!cpu_datap(cpu)->cpu_running ||
1984 cpu_datap(cpu)->cpu_tlb_invalid == FALSE ||
1985 !CPU_CR3_IS_ACTIVE(cpu)) {
1986 cpus_to_respond &= ~cpu_bit;
1987 }
1988 cpu_pause();
1989 }
1990 if (cpus_to_respond == 0)
1991 break;
1992 }
6d2010ae 1993 if (cpus_to_respond && (mach_absolute_time() > deadline)) {
060df5ea
A
1994 if (machine_timeout_suspended())
1995 continue;
1996 pmap_tlb_flush_timeout = TRUE;
1997 orig_acks = NMIPI_acks;
1998 pmap_cpuset_NMIPI(cpus_to_respond);
1999
2000 panic("TLB invalidation IPI timeout: "
2001 "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx",
2002 cpus_to_respond, orig_acks, NMIPI_acks);
2003 }
b0d623f7
A
2004 }
2005 }
2006
6d2010ae
A
2007 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END,
2008 pmap, cpus_to_signal, startv, endv, 0);
b0d623f7
A
2009}
2010
2011void
2012process_pmap_updates(void)
2013{
6d2010ae
A
2014 int ccpu = cpu_number();
2015 pmap_assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
2016 if (pmap_pcid_ncpus) {
2017 pmap_pcid_validate_current();
2018 if (cpu_datap(ccpu)->cpu_tlb_invalid_global) {
2019 cpu_datap(ccpu)->cpu_tlb_invalid = FALSE;
2020 tlb_flush_global();
2021 }
2022 else {
2023 cpu_datap(ccpu)->cpu_tlb_invalid_local = FALSE;
2024 flush_tlb_raw();
2025 }
2026 }
2027 else {
2028 current_cpu_datap()->cpu_tlb_invalid = FALSE;
2029 flush_tlb_raw();
2030 }
b0d623f7 2031
b0d623f7
A
2032 __asm__ volatile("mfence");
2033}
2034
2035void
2036pmap_update_interrupt(void)
2037{
2038 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_START,
2039 0, 0, 0, 0, 0);
2040
2041 process_pmap_updates();
2042
2043 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END,
2044 0, 0, 0, 0, 0);
2045}