]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/i386_vm_init.c
xnu-1504.9.26.tar.gz
[apple/xnu.git] / osfmk / i386 / i386_vm_init.c
1 /*
2 * Copyright (c) 2003-2008 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989, 1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57 #include <platforms.h>
58 #include <mach_kdb.h>
59
60 #include <mach/i386/vm_param.h>
61
62 #include <string.h>
63 #include <mach/vm_param.h>
64 #include <mach/vm_prot.h>
65 #include <mach/machine.h>
66 #include <mach/time_value.h>
67 #include <kern/spl.h>
68 #include <kern/assert.h>
69 #include <kern/debug.h>
70 #include <kern/misc_protos.h>
71 #include <kern/cpu_data.h>
72 #include <kern/processor.h>
73 #include <vm/vm_page.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_kern.h>
76 #include <i386/pmap.h>
77 #include <i386/misc_protos.h>
78 #include <i386/ipl.h>
79 #include <i386/cpuid.h>
80 #include <mach/thread_status.h>
81 #include <pexpert/i386/efi.h>
82 #include <i386/i386_lowmem.h>
83 #include <i386/lowglobals.h>
84
85 #include <mach-o/loader.h>
86 #include <libkern/kernel_mach_header.h>
87
88 #if DEBUG
89 #define DBG(x...) kprintf("DBG: " x)
90 #define PRINT_PMAP_MEMORY_TABLE
91 #else
92 #define DBG(x...)
93 #endif
94
95 vm_size_t mem_size = 0;
96 vm_offset_t first_avail = 0;/* first after page tables */
97
98 uint64_t max_mem; /* Size of physical memory (bytes), adjusted by maxmem */
99 uint64_t mem_actual;
100 uint64_t sane_size = 0; /* Memory size to use for defaults calculations */
101
102 #define MAXLORESERVE ( 32 * 1024 * 1024)
103
104 ppnum_t max_ppnum = 0;
105 ppnum_t lowest_lo = 0;
106 ppnum_t lowest_hi = 0;
107 ppnum_t highest_hi = 0;
108
109 extern unsigned int bsd_mbuf_cluster_reserve(boolean_t *);
110
111 pmap_paddr_t avail_start, avail_end;
112 vm_offset_t virtual_avail, virtual_end;
113 static pmap_paddr_t avail_remaining;
114 vm_offset_t static_memory_end = 0;
115
116 vm_offset_t sHIB, eHIB, stext, etext, sdata, edata, end;
117
118 boolean_t kernel_text_ps_4K = TRUE;
119 boolean_t wpkernel = TRUE;
120
121 extern void *KPTphys;
122
123 /*
124 * _mh_execute_header is the mach_header for the currently executing kernel
125 */
126 void *sectTEXTB; unsigned long sectSizeTEXT;
127 void *sectDATAB; unsigned long sectSizeDATA;
128 void *sectOBJCB; unsigned long sectSizeOBJC;
129 void *sectLINKB; unsigned long sectSizeLINK;
130 void *sectPRELINKB; unsigned long sectSizePRELINK;
131 void *sectHIBB; unsigned long sectSizeHIB;
132 void *sectINITPTB; unsigned long sectSizeINITPT;
133
134 extern uint64_t firmware_Conventional_bytes;
135 extern uint64_t firmware_RuntimeServices_bytes;
136 extern uint64_t firmware_ACPIReclaim_bytes;
137 extern uint64_t firmware_ACPINVS_bytes;
138 extern uint64_t firmware_PalCode_bytes;
139 extern uint64_t firmware_Reserved_bytes;
140 extern uint64_t firmware_Unusable_bytes;
141 extern uint64_t firmware_other_bytes;
142 uint64_t firmware_MMIO_bytes;
143
144 /*
145 * Basic VM initialization.
146 */
147 void
148 i386_vm_init(uint64_t maxmem,
149 boolean_t IA32e,
150 boot_args *args)
151 {
152 pmap_memory_region_t *pmptr;
153 pmap_memory_region_t *prev_pmptr;
154 EfiMemoryRange *mptr;
155 unsigned int mcount;
156 unsigned int msize;
157 ppnum_t fap;
158 unsigned int i;
159 unsigned int safeboot;
160 ppnum_t maxpg = 0;
161 uint32_t pmap_type;
162 uint32_t maxdmaaddr;
163
164 /*
165 * Now retrieve addresses for end, edata, and etext
166 * from MACH-O headers.
167 */
168
169 sectTEXTB = (void *) getsegdatafromheader(
170 &_mh_execute_header, "__TEXT", &sectSizeTEXT);
171 sectDATAB = (void *) getsegdatafromheader(
172 &_mh_execute_header, "__DATA", &sectSizeDATA);
173 sectOBJCB = (void *) getsegdatafromheader(
174 &_mh_execute_header, "__OBJC", &sectSizeOBJC);
175 sectLINKB = (void *) getsegdatafromheader(
176 &_mh_execute_header, "__LINKEDIT", &sectSizeLINK);
177 sectHIBB = (void *)getsegdatafromheader(
178 &_mh_execute_header, "__HIB", &sectSizeHIB);
179 sectINITPTB = (void *)getsegdatafromheader(
180 &_mh_execute_header, "__INITPT", &sectSizeINITPT);
181 sectPRELINKB = (void *) getsegdatafromheader(
182 &_mh_execute_header, "__PRELINK_TEXT", &sectSizePRELINK);
183
184 sHIB = (vm_offset_t) sectHIBB;
185 eHIB = (vm_offset_t) sectHIBB + sectSizeHIB;
186 /* Zero-padded from ehib to stext if text is 2M-aligned */
187 stext = (vm_offset_t) sectTEXTB;
188 etext = (vm_offset_t) sectTEXTB + sectSizeTEXT;
189 /* Zero-padded from etext to sdata if text is 2M-aligned */
190 sdata = (vm_offset_t) sectDATAB;
191 edata = (vm_offset_t) sectDATAB + sectSizeDATA;
192
193 #if DEBUG
194 kprintf("sectTEXTB = %p\n", sectTEXTB);
195 kprintf("sectDATAB = %p\n", sectDATAB);
196 kprintf("sectOBJCB = %p\n", sectOBJCB);
197 kprintf("sectLINKB = %p\n", sectLINKB);
198 kprintf("sectHIBB = %p\n", sectHIBB);
199 kprintf("sectPRELINKB = %p\n", sectPRELINKB);
200 kprintf("eHIB = %p\n", (void *) eHIB);
201 kprintf("stext = %p\n", (void *) stext);
202 kprintf("etext = %p\n", (void *) etext);
203 kprintf("sdata = %p\n", (void *) sdata);
204 kprintf("edata = %p\n", (void *) edata);
205 #endif
206
207 vm_set_page_size();
208
209 /*
210 * Compute the memory size.
211 */
212
213 if ((1 == vm_himemory_mode) || PE_parse_boot_argn("-x", &safeboot, sizeof (safeboot))) {
214 maxpg = 1 << (32 - I386_PGSHIFT);
215 }
216 avail_remaining = 0;
217 avail_end = 0;
218 pmptr = pmap_memory_regions;
219 prev_pmptr = 0;
220 pmap_memory_region_count = pmap_memory_region_current = 0;
221 fap = (ppnum_t) i386_btop(first_avail);
222
223 mptr = (EfiMemoryRange *)ml_static_ptovirt((vm_offset_t)args->MemoryMap);
224 if (args->MemoryMapDescriptorSize == 0)
225 panic("Invalid memory map descriptor size");
226 msize = args->MemoryMapDescriptorSize;
227 mcount = args->MemoryMapSize / msize;
228
229 #define FOURGIG 0x0000000100000000ULL
230 #define ONEGIG 0x0000000040000000ULL
231
232 for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) {
233 ppnum_t base, top;
234 uint64_t region_bytes = 0;
235
236 if (pmap_memory_region_count >= PMAP_MEMORY_REGIONS_SIZE) {
237 kprintf("WARNING: truncating memory region count at %d\n", pmap_memory_region_count);
238 break;
239 }
240 base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT);
241 top = (ppnum_t) (((mptr->PhysicalStart) >> I386_PGSHIFT) + mptr->NumberOfPages - 1);
242 region_bytes = (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT);
243 pmap_type = mptr->Type;
244
245 switch (mptr->Type) {
246 case kEfiLoaderCode:
247 case kEfiLoaderData:
248 case kEfiBootServicesCode:
249 case kEfiBootServicesData:
250 case kEfiConventionalMemory:
251 /*
252 * Consolidate usable memory types into one.
253 */
254 pmap_type = kEfiConventionalMemory;
255 sane_size += region_bytes;
256 firmware_Conventional_bytes += region_bytes;
257 break;
258 /*
259 * sane_size should reflect the total amount of physical
260 * RAM in the system, not just the amount that is
261 * available for the OS to use.
262 * FIXME:Consider deriving this value from SMBIOS tables
263 * rather than reverse engineering the memory map.
264 * Alternatively, see
265 * <rdar://problem/4642773> Memory map should
266 * describe all memory
267 * Firmware on some systems guarantees that the memory
268 * map is complete via the "RomReservedMemoryTracked"
269 * feature field--consult that where possible to
270 * avoid the "round up to 128M" workaround below.
271 */
272
273 case kEfiRuntimeServicesCode:
274 case kEfiRuntimeServicesData:
275 firmware_RuntimeServices_bytes += region_bytes;
276 sane_size += region_bytes;
277 break;
278 case kEfiACPIReclaimMemory:
279 firmware_ACPIReclaim_bytes += region_bytes;
280 sane_size += region_bytes;
281 break;
282 case kEfiACPIMemoryNVS:
283 firmware_ACPINVS_bytes += region_bytes;
284 sane_size += region_bytes;
285 break;
286 case kEfiPalCode:
287 firmware_PalCode_bytes += region_bytes;
288 sane_size += region_bytes;
289 break;
290
291
292 case kEfiReservedMemoryType:
293 firmware_Reserved_bytes += region_bytes;
294 break;
295 case kEfiUnusableMemory:
296 firmware_Unusable_bytes += region_bytes;
297 break;
298 case kEfiMemoryMappedIO:
299 case kEfiMemoryMappedIOPortSpace:
300 firmware_MMIO_bytes += region_bytes;
301 break;
302 default:
303 firmware_other_bytes += region_bytes;
304 break;
305 }
306
307 kprintf("EFI region %d: type %u/%d, base 0x%x, top 0x%x\n",
308 i, mptr->Type, pmap_type, base, top);
309
310 if (maxpg) {
311 if (base >= maxpg)
312 break;
313 top = (top > maxpg) ? maxpg : top;
314 }
315
316 /*
317 * handle each region
318 */
319 if ((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME ||
320 pmap_type != kEfiConventionalMemory) {
321 prev_pmptr = 0;
322 continue;
323 } else {
324 /*
325 * Usable memory region
326 */
327 if (top < I386_LOWMEM_RESERVED) {
328 prev_pmptr = 0;
329 continue;
330 }
331 if (top < fap) {
332 /*
333 * entire range below first_avail
334 * salvage some low memory pages
335 * we use some very low memory at startup
336 * mark as already allocated here
337 */
338 if (base >= I386_LOWMEM_RESERVED)
339 pmptr->base = base;
340 else
341 pmptr->base = I386_LOWMEM_RESERVED;
342 /*
343 * mark as already mapped
344 */
345 pmptr->alloc = pmptr->end = top;
346 pmptr->type = pmap_type;
347 }
348 else if ( (base < fap) && (top > fap) ) {
349 /*
350 * spans first_avail
351 * put mem below first avail in table but
352 * mark already allocated
353 */
354 pmptr->base = base;
355 pmptr->alloc = pmptr->end = (fap - 1);
356 pmptr->type = pmap_type;
357 /*
358 * we bump these here inline so the accounting
359 * below works correctly
360 */
361 pmptr++;
362 pmap_memory_region_count++;
363 pmptr->alloc = pmptr->base = fap;
364 pmptr->type = pmap_type;
365 pmptr->end = top;
366 }
367 else {
368 /*
369 * entire range useable
370 */
371 pmptr->alloc = pmptr->base = base;
372 pmptr->type = pmap_type;
373 pmptr->end = top;
374 }
375
376 if (i386_ptob(pmptr->end) > avail_end )
377 avail_end = i386_ptob(pmptr->end);
378
379 avail_remaining += (pmptr->end - pmptr->base);
380
381 /*
382 * Consolidate contiguous memory regions, if possible
383 */
384 if (prev_pmptr &&
385 pmptr->type == prev_pmptr->type &&
386 pmptr->base == pmptr->alloc &&
387 pmptr->base == (prev_pmptr->end + 1)) {
388 prev_pmptr->end = pmptr->end;
389 } else {
390 pmap_memory_region_count++;
391 prev_pmptr = pmptr;
392 pmptr++;
393 }
394 }
395 }
396
397 #ifdef PRINT_PMAP_MEMORY_TABLE
398 {
399 unsigned int j;
400 pmap_memory_region_t *p = pmap_memory_regions;
401 addr64_t region_start, region_end;
402 addr64_t efi_start, efi_end;
403 for (j=0;j<pmap_memory_region_count;j++, p++) {
404 kprintf("pmap region %d type %d base 0x%llx alloc 0x%llx top 0x%llx\n",
405 j, p->type,
406 (addr64_t) p->base << I386_PGSHIFT,
407 (addr64_t) p->alloc << I386_PGSHIFT,
408 (addr64_t) p->end << I386_PGSHIFT);
409 region_start = (addr64_t) p->base << I386_PGSHIFT;
410 region_end = ((addr64_t) p->end << I386_PGSHIFT) - 1;
411 mptr = (EfiMemoryRange *) ml_static_ptovirt((vm_offset_t)args->MemoryMap);
412 for (i=0; i<mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) {
413 if (mptr->Type != kEfiLoaderCode &&
414 mptr->Type != kEfiLoaderData &&
415 mptr->Type != kEfiBootServicesCode &&
416 mptr->Type != kEfiBootServicesData &&
417 mptr->Type != kEfiConventionalMemory) {
418 efi_start = (addr64_t)mptr->PhysicalStart;
419 efi_end = efi_start + ((vm_offset_t)mptr->NumberOfPages << I386_PGSHIFT) - 1;
420 if ((efi_start >= region_start && efi_start <= region_end) ||
421 (efi_end >= region_start && efi_end <= region_end)) {
422 kprintf(" *** Overlapping region with EFI runtime region %d\n", i);
423 }
424 }
425 }
426 }
427 }
428 #endif
429
430 avail_start = first_avail;
431 mem_actual = sane_size;
432
433 /*
434 * For user visible memory size, round up to 128 Mb - accounting for the various stolen memory
435 * not reported by EFI.
436 */
437
438 sane_size = (sane_size + 128 * MB - 1) & ~((uint64_t)(128 * MB - 1));
439
440 /*
441 * We cap at KERNEL_MAXMEM bytes (currently 32GB for K32, 64GB for K64).
442 * Unless overriden by the maxmem= boot-arg
443 * -- which is a non-zero maxmem argument to this function.
444 */
445 if (maxmem == 0 && sane_size > KERNEL_MAXMEM) {
446 maxmem = KERNEL_MAXMEM;
447 printf("Physical memory %lld bytes capped at %dGB\n",
448 sane_size, (uint32_t) (KERNEL_MAXMEM/GB));
449 }
450
451 /*
452 * if user set maxmem, reduce memory sizes
453 */
454 if ( (maxmem > (uint64_t)first_avail) && (maxmem < sane_size)) {
455 ppnum_t discarded_pages = (ppnum_t)((sane_size - maxmem) >> I386_PGSHIFT);
456 ppnum_t highest_pn = 0;
457 ppnum_t cur_alloc = 0;
458 uint64_t pages_to_use;
459 unsigned cur_region = 0;
460
461 sane_size = maxmem;
462
463 if (avail_remaining > discarded_pages)
464 avail_remaining -= discarded_pages;
465 else
466 avail_remaining = 0;
467
468 pages_to_use = avail_remaining;
469
470 while (cur_region < pmap_memory_region_count && pages_to_use) {
471 for (cur_alloc = pmap_memory_regions[cur_region].alloc;
472 cur_alloc < pmap_memory_regions[cur_region].end && pages_to_use;
473 cur_alloc++) {
474 if (cur_alloc > highest_pn)
475 highest_pn = cur_alloc;
476 pages_to_use--;
477 }
478 if (pages_to_use == 0)
479 pmap_memory_regions[cur_region].end = cur_alloc;
480
481 cur_region++;
482 }
483 pmap_memory_region_count = cur_region;
484
485 avail_end = i386_ptob(highest_pn + 1);
486 }
487
488 /*
489 * mem_size is only a 32 bit container... follow the PPC route
490 * and pin it to a 2 Gbyte maximum
491 */
492 if (sane_size > (FOURGIG >> 1))
493 mem_size = (vm_size_t)(FOURGIG >> 1);
494 else
495 mem_size = (vm_size_t)sane_size;
496 max_mem = sane_size;
497
498 kprintf("Physical memory %llu MB\n", sane_size/MB);
499
500 max_valid_low_ppnum = (2 * GB) / PAGE_SIZE;
501
502 if (!PE_parse_boot_argn("max_valid_dma_addr", &maxdmaaddr, sizeof (maxdmaaddr))) {
503 max_valid_dma_address = (uint64_t)4 * (uint64_t)GB;
504 } else {
505 max_valid_dma_address = ((uint64_t) maxdmaaddr) * MB;
506
507 if ((max_valid_dma_address / PAGE_SIZE) < max_valid_low_ppnum)
508 max_valid_low_ppnum = (ppnum_t)(max_valid_dma_address / PAGE_SIZE);
509 }
510 if (avail_end >= max_valid_dma_address) {
511 uint32_t maxloreserve;
512 uint32_t mbuf_reserve = 0;
513 boolean_t mbuf_override = FALSE;
514
515 if (!PE_parse_boot_argn("maxloreserve", &maxloreserve, sizeof (maxloreserve))) {
516
517 if (sane_size >= (ONEGIG * 15))
518 maxloreserve = (MAXLORESERVE / PAGE_SIZE) * 4;
519 else if (sane_size >= (ONEGIG * 7))
520 maxloreserve = (MAXLORESERVE / PAGE_SIZE) * 2;
521 else
522 maxloreserve = MAXLORESERVE / PAGE_SIZE;
523
524 mbuf_reserve = bsd_mbuf_cluster_reserve(&mbuf_override) / PAGE_SIZE;
525 } else
526 maxloreserve = (maxloreserve * (1024 * 1024)) / PAGE_SIZE;
527
528 if (maxloreserve) {
529 vm_lopage_free_limit = maxloreserve;
530
531 if (mbuf_override == TRUE) {
532 vm_lopage_free_limit += mbuf_reserve;
533 vm_lopage_lowater = 0;
534 } else
535 vm_lopage_lowater = vm_lopage_free_limit / 16;
536
537 vm_lopage_refill = TRUE;
538 vm_lopage_needed = TRUE;
539 }
540 }
541 /*
542 * Initialize kernel physical map.
543 * Kernel virtual address starts at VM_KERNEL_MIN_ADDRESS.
544 */
545 pmap_bootstrap(0, IA32e);
546 }
547
548
549 unsigned int
550 pmap_free_pages(void)
551 {
552 return (unsigned int)avail_remaining;
553 }
554
555 boolean_t
556 pmap_next_page_hi(
557 ppnum_t *pn)
558 {
559 pmap_memory_region_t *region;
560 int n;
561
562 if (avail_remaining) {
563 for (n = pmap_memory_region_count - 1; n >= 0; n--) {
564 region = &pmap_memory_regions[n];
565
566 if (region->alloc != region->end) {
567 *pn = region->alloc++;
568 avail_remaining--;
569
570 if (*pn > max_ppnum)
571 max_ppnum = *pn;
572
573 if (lowest_lo == 0 || *pn < lowest_lo)
574 lowest_lo = *pn;
575
576 if (lowest_hi == 0 || *pn < lowest_hi)
577 lowest_hi = *pn;
578
579 if (*pn > highest_hi)
580 highest_hi = *pn;
581
582 return TRUE;
583 }
584 }
585 }
586 return FALSE;
587 }
588
589
590 boolean_t
591 pmap_next_page(
592 ppnum_t *pn)
593 {
594 if (avail_remaining) while (pmap_memory_region_current < pmap_memory_region_count) {
595 if (pmap_memory_regions[pmap_memory_region_current].alloc ==
596 pmap_memory_regions[pmap_memory_region_current].end) {
597 pmap_memory_region_current++;
598 continue;
599 }
600 *pn = pmap_memory_regions[pmap_memory_region_current].alloc++;
601 avail_remaining--;
602
603 if (*pn > max_ppnum)
604 max_ppnum = *pn;
605
606 if (lowest_lo == 0 || *pn < lowest_lo)
607 lowest_lo = *pn;
608
609 return TRUE;
610 }
611 return FALSE;
612 }
613
614
615 boolean_t
616 pmap_valid_page(
617 ppnum_t pn)
618 {
619 unsigned int i;
620 pmap_memory_region_t *pmptr = pmap_memory_regions;
621
622 for (i = 0; i < pmap_memory_region_count; i++, pmptr++) {
623 if ( (pn >= pmptr->base) && (pn <= pmptr->end) )
624 return TRUE;
625 }
626 return FALSE;
627 }
628
629 /*
630 * Called once VM is fully initialized so that we can release unused
631 * sections of low memory to the general pool.
632 * Also complete the set-up of identity-mapped sections of the kernel:
633 * 1) write-protect kernel text
634 * 2) map kernel text using large pages if possible
635 * 3) read and write-protect page zero (for K32)
636 * 4) map the global page at the appropriate virtual address.
637 *
638 * Use of large pages
639 * ------------------
640 * To effectively map and write-protect all kernel text pages, the text
641 * must be 2M-aligned at the base, and the data section above must also be
642 * 2M-aligned. That is, there's padding below and above. This is achieved
643 * through linker directives. Large pages are used only if this alignment
644 * exists (and not overriden by the -kernel_text_page_4K boot-arg). The
645 * memory layout is:
646 *
647 * : :
648 * | __DATA |
649 * sdata: ================== 2Meg
650 * | |
651 * | zero-padding |
652 * | |
653 * etext: ------------------
654 * | |
655 * : :
656 * | |
657 * | __TEXT |
658 * | |
659 * : :
660 * | |
661 * stext: ================== 2Meg
662 * | |
663 * | zero-padding |
664 * | |
665 * eHIB: ------------------
666 * | __HIB |
667 * : :
668 *
669 * Prior to changing the mapping from 4K to 2M, the zero-padding pages
670 * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the
671 * 4K pages covering [stext,etext] are coalesced as 2M large pages.
672 * The now unused level-1 PTE pages are also freed.
673 */
674 void
675 pmap_lowmem_finalize(void)
676 {
677 spl_t spl;
678 int i;
679
680 /* Check the kernel is linked at the expected base address */
681 if (i386_btop(kvtophys((vm_offset_t) &IdlePML4)) !=
682 I386_KERNEL_IMAGE_BASE_PAGE)
683 panic("pmap_lowmem_finalize() unexpected kernel base address");
684
685 /*
686 * Free all pages in pmap regions below the base:
687 * rdar://6332712
688 * We can't free all the pages to VM that EFI reports available.
689 * Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake.
690 * There's also a size miscalculation here: pend is one page less
691 * than it should be but this is not fixed to be backwards
692 * compatible.
693 * Due to this current EFI limitation, we take only the first
694 * entry in the memory region table. However, the loop is retained
695 * (with the intended termination criteria commented out) in the
696 * hope that some day we can free all low-memory ranges.
697 */
698 for (i = 0;
699 // pmap_memory_regions[i].end <= I386_KERNEL_IMAGE_BASE_PAGE;
700 i < 1;
701 i++) {
702 vm_offset_t pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base);
703 vm_offset_t pend = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end);
704 // vm_offset_t pend = i386_ptob(pmap_memory_regions[i].end+1);
705
706 DBG("ml_static_mfree(%p,%p) for pmap region %d\n",
707 (void *) ml_static_ptovirt(pbase),
708 (void *) (pend - pbase), i);
709 ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase);
710 }
711
712 /*
713 * If text and data are both 2MB-aligned,
714 * we can map text with large-pages,
715 * unless the -kernel_text_ps_4K boot-arg overrides.
716 */
717 if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) {
718 kprintf("Kernel text is 2MB aligned");
719 kernel_text_ps_4K = FALSE;
720 if (PE_parse_boot_argn("-kernel_text_ps_4K",
721 &kernel_text_ps_4K,
722 sizeof (kernel_text_ps_4K)))
723 kprintf(" but will be mapped with 4K pages\n");
724 else
725 kprintf(" and will be mapped with 2M pages\n");
726 }
727
728 (void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof (wpkernel));
729 if (wpkernel)
730 kprintf("Kernel text %p-%p to be write-protected\n",
731 (void *) stext, (void *) etext);
732
733 spl = splhigh();
734
735 /*
736 * Scan over text if mappings are to be changed:
737 * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0
738 * - Change to large-pages if possible and not overriden.
739 */
740 if (kernel_text_ps_4K && wpkernel) {
741 vm_offset_t myva;
742 for (myva = stext; myva < etext; myva += PAGE_SIZE) {
743 pt_entry_t *ptep;
744
745 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
746 if (ptep)
747 pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW);
748 }
749 }
750
751 if (!kernel_text_ps_4K) {
752 vm_offset_t myva;
753
754 /*
755 * Release zero-filled page padding used for 2M-alignment.
756 */
757 DBG("ml_static_mfree(%p,%p) for padding below text\n",
758 (void *) eHIB, (void *) (stext - eHIB));
759 ml_static_mfree(eHIB, stext - eHIB);
760 DBG("ml_static_mfree(%p,%p) for padding above text\n",
761 (void *) etext, (void *) (sdata - etext));
762 ml_static_mfree(etext, sdata - etext);
763
764 /*
765 * Coalesce text pages into large pages.
766 */
767 for (myva = stext; myva < sdata; myva += I386_LPGBYTES) {
768 pt_entry_t *ptep;
769 vm_offset_t pte_phys;
770 pt_entry_t *pdep;
771 pt_entry_t pde;
772
773 pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva);
774 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
775 DBG("myva: %p pdep: %p ptep: %p\n",
776 (void *) myva, (void *) pdep, (void *) ptep);
777 if ((*ptep & INTEL_PTE_VALID) == 0)
778 continue;
779 pte_phys = (vm_offset_t)(*ptep & PG_FRAME);
780 pde = *pdep & PTMASK; /* page attributes from pde */
781 pde |= INTEL_PTE_PS; /* make it a 2M entry */
782 pde |= pte_phys; /* take page frame from pte */
783
784 if (wpkernel)
785 pde &= ~INTEL_PTE_RW;
786 DBG("pmap_store_pte(%p,0x%llx)\n",
787 (void *)pdep, pde);
788 pmap_store_pte(pdep, pde);
789
790 /*
791 * Free the now-unused level-1 pte.
792 * Note: ptep is a virtual address to the pte in the
793 * recursive map. We can't use this address to free
794 * the page. Instead we need to compute its address
795 * in the Idle PTEs in "low memory".
796 */
797 vm_offset_t vm_ptep = (vm_offset_t) KPTphys
798 + (pte_phys >> PTPGSHIFT);
799 DBG("ml_static_mfree(%p,0x%x) for pte\n",
800 (void *) vm_ptep, PAGE_SIZE);
801 ml_static_mfree(vm_ptep, PAGE_SIZE);
802 }
803
804 /* Change variable read by sysctl machdep.pmap */
805 pmap_kernel_text_ps = I386_LPGBYTES;
806 }
807
808 #if defined(__i386__)
809 /* no matter what, kernel page zero is not accessible */
810 pmap_store_pte(pmap_pte(kernel_pmap, 0), INTEL_PTE_INVALID);
811 #endif
812
813 /* map lowmem global page into fixed addr */
814 pt_entry_t *pte = NULL;
815 if (0 == (pte = pmap_pte(kernel_pmap,
816 VM_MIN_KERNEL_LOADED_ADDRESS + 0x2000)))
817 panic("lowmem pte");
818 /* make sure it is defined on page boundary */
819 assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK));
820 pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)
821 | INTEL_PTE_REF
822 | INTEL_PTE_MOD
823 | INTEL_PTE_WIRED
824 | INTEL_PTE_VALID
825 | INTEL_PTE_RW);
826 splx(spl);
827 flush_tlb();
828 }
829