]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/i386_vm_init.c
63a1b46ef811b30bec746464d7664e3e08339f6d
[apple/xnu.git] / osfmk / i386 / i386_vm_init.c
1 /*
2 * Copyright (c) 2003-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989, 1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57
58 #include <mach/i386/vm_param.h>
59
60 #include <string.h>
61 #include <mach/vm_param.h>
62 #include <mach/vm_prot.h>
63 #include <mach/machine.h>
64 #include <mach/time_value.h>
65 #include <kern/spl.h>
66 #include <kern/assert.h>
67 #include <kern/debug.h>
68 #include <kern/misc_protos.h>
69 #include <kern/cpu_data.h>
70 #include <kern/processor.h>
71 #include <vm/vm_page.h>
72 #include <vm/pmap.h>
73 #include <vm/vm_kern.h>
74 #include <i386/pmap.h>
75 #include <i386/misc_protos.h>
76 #include <i386/cpuid.h>
77 #include <mach/thread_status.h>
78 #include <pexpert/i386/efi.h>
79 #include <i386/i386_lowmem.h>
80 #include <x86_64/lowglobals.h>
81 #include <i386/pal_routines.h>
82
83 #include <mach-o/loader.h>
84 #include <libkern/kernel_mach_header.h>
85
86
87 vm_size_t mem_size = 0;
88 pmap_paddr_t first_avail = 0;/* first after page tables */
89
90 uint64_t max_mem; /* Size of physical memory (bytes), adjusted by maxmem */
91 uint64_t mem_actual;
92 uint64_t sane_size = 0; /* Memory size for defaults calculations */
93
94 /*
95 * KASLR parameters
96 */
97 ppnum_t vm_kernel_base_page;
98 vm_offset_t vm_kernel_base;
99 vm_offset_t vm_kernel_top;
100 vm_offset_t vm_kernel_stext;
101 vm_offset_t vm_kernel_etext;
102 vm_offset_t vm_kernel_slide;
103 vm_offset_t vm_kernel_slid_base;
104 vm_offset_t vm_kernel_slid_top;
105 vm_offset_t vm_hib_base;
106 vm_offset_t vm_kext_base = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
107 vm_offset_t vm_kext_top = VM_MIN_KERNEL_ADDRESS;
108
109 vm_offset_t vm_prelink_stext;
110 vm_offset_t vm_prelink_etext;
111 vm_offset_t vm_prelink_sinfo;
112 vm_offset_t vm_prelink_einfo;
113 vm_offset_t vm_slinkedit;
114 vm_offset_t vm_elinkedit;
115
116 vm_offset_t vm_kernel_builtinkmod_text;
117 vm_offset_t vm_kernel_builtinkmod_text_end;
118
119 #define MAXLORESERVE (32 * 1024 * 1024)
120
121 ppnum_t max_ppnum = 0;
122 ppnum_t lowest_lo = 0;
123 ppnum_t lowest_hi = 0;
124 ppnum_t highest_hi = 0;
125
126 enum {PMAP_MAX_RESERVED_RANGES = 32};
127 uint32_t pmap_reserved_pages_allocated = 0;
128 uint32_t pmap_reserved_range_indices[PMAP_MAX_RESERVED_RANGES];
129 uint32_t pmap_last_reserved_range_index = 0;
130 uint32_t pmap_reserved_ranges = 0;
131
132 extern unsigned int bsd_mbuf_cluster_reserve(boolean_t *);
133
134 pmap_paddr_t avail_start, avail_end;
135 vm_offset_t virtual_avail, virtual_end;
136 static pmap_paddr_t avail_remaining;
137 vm_offset_t static_memory_end = 0;
138
139 vm_offset_t sHIB, eHIB, stext, etext, sdata, edata, end, sconst, econst;
140
141 /*
142 * _mh_execute_header is the mach_header for the currently executing kernel
143 */
144 vm_offset_t segTEXTB; unsigned long segSizeTEXT;
145 vm_offset_t segDATAB; unsigned long segSizeDATA;
146 vm_offset_t segLINKB; unsigned long segSizeLINK;
147 vm_offset_t segPRELINKTEXTB; unsigned long segSizePRELINKTEXT;
148 vm_offset_t segPRELINKINFOB; unsigned long segSizePRELINKINFO;
149 vm_offset_t segHIBB; unsigned long segSizeHIB;
150 unsigned long segSizeConst;
151
152 static kernel_segment_command_t *segTEXT, *segDATA;
153 static kernel_section_t *cursectTEXT, *lastsectTEXT;
154 static kernel_segment_command_t *segCONST;
155
156 extern uint64_t firmware_Conventional_bytes;
157 extern uint64_t firmware_RuntimeServices_bytes;
158 extern uint64_t firmware_ACPIReclaim_bytes;
159 extern uint64_t firmware_ACPINVS_bytes;
160 extern uint64_t firmware_PalCode_bytes;
161 extern uint64_t firmware_Reserved_bytes;
162 extern uint64_t firmware_Unusable_bytes;
163 extern uint64_t firmware_other_bytes;
164 uint64_t firmware_MMIO_bytes;
165
166 /*
167 * Linker magic to establish the highest address in the kernel.
168 */
169 extern void *last_kernel_symbol;
170
171 boolean_t memmap = FALSE;
172 #if DEBUG || DEVELOPMENT
173 static void
174 kprint_memmap(vm_offset_t maddr, unsigned int msize, unsigned int mcount)
175 {
176 unsigned int i;
177 unsigned int j;
178 pmap_memory_region_t *p = pmap_memory_regions;
179 EfiMemoryRange *mptr;
180 addr64_t region_start, region_end;
181 addr64_t efi_start, efi_end;
182
183 for (j = 0; j < pmap_memory_region_count; j++, p++) {
184 kprintf("pmap region %d type %d base 0x%llx alloc_up 0x%llx alloc_down 0x%llx top 0x%llx\n",
185 j, p->type,
186 (addr64_t) p->base << I386_PGSHIFT,
187 (addr64_t) p->alloc_up << I386_PGSHIFT,
188 (addr64_t) p->alloc_down << I386_PGSHIFT,
189 (addr64_t) p->end << I386_PGSHIFT);
190 region_start = (addr64_t) p->base << I386_PGSHIFT;
191 region_end = ((addr64_t) p->end << I386_PGSHIFT) - 1;
192 mptr = (EfiMemoryRange *) maddr;
193 for (i = 0;
194 i < mcount;
195 i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) {
196 if (mptr->Type != kEfiLoaderCode &&
197 mptr->Type != kEfiLoaderData &&
198 mptr->Type != kEfiBootServicesCode &&
199 mptr->Type != kEfiBootServicesData &&
200 mptr->Type != kEfiConventionalMemory) {
201 efi_start = (addr64_t)mptr->PhysicalStart;
202 efi_end = efi_start + ((vm_offset_t)mptr->NumberOfPages << I386_PGSHIFT) - 1;
203 if ((efi_start >= region_start && efi_start <= region_end) ||
204 (efi_end >= region_start && efi_end <= region_end)) {
205 kprintf(" *** Overlapping region with EFI runtime region %d\n", i);
206 }
207 }
208 }
209 }
210 }
211 #define DPRINTF(x...) do { if (memmap) kprintf(x); } while (0)
212
213 #else
214
215 static void
216 kprint_memmap(vm_offset_t maddr, unsigned int msize, unsigned int mcount)
217 {
218 #pragma unused(maddr, msize, mcount)
219 }
220
221 #define DPRINTF(x...)
222 #endif /* DEBUG */
223
224 /*
225 * Basic VM initialization.
226 */
227 void
228 i386_vm_init(uint64_t maxmem,
229 boolean_t IA32e,
230 boot_args *args)
231 {
232 pmap_memory_region_t *pmptr;
233 pmap_memory_region_t *prev_pmptr;
234 EfiMemoryRange *mptr;
235 unsigned int mcount;
236 unsigned int msize;
237 vm_offset_t maddr;
238 ppnum_t fap;
239 unsigned int i;
240 ppnum_t maxpg = 0;
241 uint32_t pmap_type;
242 uint32_t maxloreserve;
243 uint32_t maxdmaaddr;
244 uint32_t mbuf_reserve = 0;
245 boolean_t mbuf_override = FALSE;
246 boolean_t coalescing_permitted;
247 vm_kernel_base_page = i386_btop(args->kaddr);
248 vm_offset_t base_address;
249 vm_offset_t static_base_address;
250
251 PE_parse_boot_argn("memmap", &memmap, sizeof(memmap));
252
253 /*
254 * Establish the KASLR parameters.
255 */
256 static_base_address = ml_static_ptovirt(KERNEL_BASE_OFFSET);
257 base_address = ml_static_ptovirt(args->kaddr);
258 vm_kernel_slide = base_address - static_base_address;
259 if (args->kslide) {
260 kprintf("KASLR slide: 0x%016lx dynamic\n", vm_kernel_slide);
261 if (vm_kernel_slide != ((vm_offset_t)args->kslide)) {
262 panic("Kernel base inconsistent with slide - rebased?");
263 }
264 } else {
265 /* No slide relative to on-disk symbols */
266 kprintf("KASLR slide: 0x%016lx static and ignored\n",
267 vm_kernel_slide);
268 vm_kernel_slide = 0;
269 }
270
271 /*
272 * Zero out local relocations to avoid confusing kxld.
273 * TODO: might be better to move this code to OSKext::initialize
274 */
275 if (_mh_execute_header.flags & MH_PIE) {
276 struct load_command *loadcmd;
277 uint32_t cmd;
278
279 loadcmd = (struct load_command *)((uintptr_t)&_mh_execute_header +
280 sizeof(_mh_execute_header));
281
282 for (cmd = 0; cmd < _mh_execute_header.ncmds; cmd++) {
283 if (loadcmd->cmd == LC_DYSYMTAB) {
284 struct dysymtab_command *dysymtab;
285
286 dysymtab = (struct dysymtab_command *)loadcmd;
287 dysymtab->nlocrel = 0;
288 dysymtab->locreloff = 0;
289 kprintf("Hiding local relocations\n");
290 break;
291 }
292 loadcmd = (struct load_command *)((uintptr_t)loadcmd + loadcmd->cmdsize);
293 }
294 }
295
296 /*
297 * Now retrieve addresses for end, edata, and etext
298 * from MACH-O headers.
299 */
300 segTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
301 "__TEXT", &segSizeTEXT);
302 segDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
303 "__DATA", &segSizeDATA);
304 segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
305 "__LINKEDIT", &segSizeLINK);
306 segHIBB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
307 "__HIB", &segSizeHIB);
308 segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
309 "__PRELINK_TEXT", &segSizePRELINKTEXT);
310 segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
311 "__PRELINK_INFO", &segSizePRELINKINFO);
312 segTEXT = getsegbynamefromheader(&_mh_execute_header,
313 "__TEXT");
314 segDATA = getsegbynamefromheader(&_mh_execute_header,
315 "__DATA");
316 segCONST = getsegbynamefromheader(&_mh_execute_header,
317 "__CONST");
318 cursectTEXT = lastsectTEXT = firstsect(segTEXT);
319 /* Discover the last TEXT section within the TEXT segment */
320 while ((cursectTEXT = nextsect(segTEXT, cursectTEXT)) != NULL) {
321 lastsectTEXT = cursectTEXT;
322 }
323
324 sHIB = segHIBB;
325 eHIB = segHIBB + segSizeHIB;
326 vm_hib_base = sHIB;
327 /* Zero-padded from ehib to stext if text is 2M-aligned */
328 stext = segTEXTB;
329 lowGlo.lgStext = stext;
330 etext = (vm_offset_t) round_page_64(lastsectTEXT->addr + lastsectTEXT->size);
331 /* Zero-padded from etext to sdata if text is 2M-aligned */
332 sdata = segDATAB;
333 edata = segDATAB + segSizeDATA;
334
335 sconst = segCONST->vmaddr;
336 segSizeConst = segCONST->vmsize;
337 econst = sconst + segSizeConst;
338
339 assert(((sconst | econst) & PAGE_MASK) == 0);
340
341 DPRINTF("segTEXTB = %p\n", (void *) segTEXTB);
342 DPRINTF("segDATAB = %p\n", (void *) segDATAB);
343 DPRINTF("segLINKB = %p\n", (void *) segLINKB);
344 DPRINTF("segHIBB = %p\n", (void *) segHIBB);
345 DPRINTF("segPRELINKTEXTB = %p\n", (void *) segPRELINKTEXTB);
346 DPRINTF("segPRELINKINFOB = %p\n", (void *) segPRELINKINFOB);
347 DPRINTF("sHIB = %p\n", (void *) sHIB);
348 DPRINTF("eHIB = %p\n", (void *) eHIB);
349 DPRINTF("stext = %p\n", (void *) stext);
350 DPRINTF("etext = %p\n", (void *) etext);
351 DPRINTF("sdata = %p\n", (void *) sdata);
352 DPRINTF("edata = %p\n", (void *) edata);
353 DPRINTF("sconst = %p\n", (void *) sconst);
354 DPRINTF("econst = %p\n", (void *) econst);
355 DPRINTF("kernel_top = %p\n", (void *) &last_kernel_symbol);
356
357 vm_kernel_base = sHIB;
358 vm_kernel_top = (vm_offset_t) &last_kernel_symbol;
359 vm_kernel_stext = stext;
360 vm_kernel_etext = etext;
361 vm_prelink_stext = segPRELINKTEXTB;
362 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT;
363 vm_prelink_sinfo = segPRELINKINFOB;
364 vm_prelink_einfo = segPRELINKINFOB + segSizePRELINKINFO;
365 vm_slinkedit = segLINKB;
366 vm_elinkedit = segLINKB + segSizeLINK;
367 vm_kernel_slid_base = vm_kext_base + vm_kernel_slide;
368 vm_kernel_slid_top = vm_prelink_einfo;
369
370 vm_set_page_size();
371
372 /*
373 * Compute the memory size.
374 */
375
376 avail_remaining = 0;
377 avail_end = 0;
378 pmptr = pmap_memory_regions;
379 prev_pmptr = 0;
380 pmap_memory_region_count = pmap_memory_region_current = 0;
381 fap = (ppnum_t) i386_btop(first_avail);
382
383 maddr = ml_static_ptovirt((vm_offset_t)args->MemoryMap);
384 mptr = (EfiMemoryRange *)maddr;
385 if (args->MemoryMapDescriptorSize == 0) {
386 panic("Invalid memory map descriptor size");
387 }
388 msize = args->MemoryMapDescriptorSize;
389 mcount = args->MemoryMapSize / msize;
390
391 #define FOURGIG 0x0000000100000000ULL
392 #define ONEGIG 0x0000000040000000ULL
393
394 for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) {
395 ppnum_t base, top;
396 uint64_t region_bytes = 0;
397
398 if (pmap_memory_region_count >= PMAP_MEMORY_REGIONS_SIZE) {
399 kprintf("WARNING: truncating memory region count at %d\n", pmap_memory_region_count);
400 break;
401 }
402 base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT);
403 top = (ppnum_t) (((mptr->PhysicalStart) >> I386_PGSHIFT) + mptr->NumberOfPages - 1);
404
405 if (base == 0) {
406 /*
407 * Avoid having to deal with the edge case of the
408 * very first possible physical page and the roll-over
409 * to -1; just ignore that page.
410 */
411 kprintf("WARNING: ignoring first page in [0x%llx:0x%llx]\n", (uint64_t) base, (uint64_t) top);
412 base++;
413 }
414 if (top + 1 == 0) {
415 /*
416 * Avoid having to deal with the edge case of the
417 * very last possible physical page and the roll-over
418 * to 0; just ignore that page.
419 */
420 kprintf("WARNING: ignoring last page in [0x%llx:0x%llx]\n", (uint64_t) base, (uint64_t) top);
421 top--;
422 }
423 if (top < base) {
424 /*
425 * That was the only page in that region, so
426 * ignore the whole region.
427 */
428 continue;
429 }
430
431 #if MR_RSV_TEST
432 static uint32_t nmr = 0;
433 if ((base > 0x20000) && (nmr++ < 4)) {
434 mptr->Attribute |= EFI_MEMORY_KERN_RESERVED;
435 }
436 #endif
437 region_bytes = (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT);
438 pmap_type = mptr->Type;
439
440 switch (mptr->Type) {
441 case kEfiLoaderCode:
442 case kEfiLoaderData:
443 case kEfiBootServicesCode:
444 case kEfiBootServicesData:
445 case kEfiConventionalMemory:
446 /*
447 * Consolidate usable memory types into one.
448 */
449 pmap_type = kEfiConventionalMemory;
450 sane_size += region_bytes;
451 firmware_Conventional_bytes += region_bytes;
452 break;
453 /*
454 * sane_size should reflect the total amount of physical
455 * RAM in the system, not just the amount that is
456 * available for the OS to use.
457 * We now get this value from SMBIOS tables
458 * rather than reverse engineering the memory map.
459 * But the legacy computation of "sane_size" is kept
460 * for diagnostic information.
461 */
462
463 case kEfiRuntimeServicesCode:
464 case kEfiRuntimeServicesData:
465 firmware_RuntimeServices_bytes += region_bytes;
466 sane_size += region_bytes;
467 break;
468 case kEfiACPIReclaimMemory:
469 firmware_ACPIReclaim_bytes += region_bytes;
470 sane_size += region_bytes;
471 break;
472 case kEfiACPIMemoryNVS:
473 firmware_ACPINVS_bytes += region_bytes;
474 sane_size += region_bytes;
475 break;
476 case kEfiPalCode:
477 firmware_PalCode_bytes += region_bytes;
478 sane_size += region_bytes;
479 break;
480
481 case kEfiReservedMemoryType:
482 firmware_Reserved_bytes += region_bytes;
483 break;
484 case kEfiUnusableMemory:
485 firmware_Unusable_bytes += region_bytes;
486 break;
487 case kEfiMemoryMappedIO:
488 case kEfiMemoryMappedIOPortSpace:
489 firmware_MMIO_bytes += region_bytes;
490 break;
491 default:
492 firmware_other_bytes += region_bytes;
493 break;
494 }
495
496 DPRINTF("EFI region %d: type %u/%d, base 0x%x, top 0x%x %s\n",
497 i, mptr->Type, pmap_type, base, top,
498 (mptr->Attribute & EFI_MEMORY_KERN_RESERVED)? "RESERVED" :
499 (mptr->Attribute & EFI_MEMORY_RUNTIME)? "RUNTIME" : "");
500
501 if (maxpg) {
502 if (base >= maxpg) {
503 break;
504 }
505 top = (top > maxpg) ? maxpg : top;
506 }
507
508 /*
509 * handle each region
510 */
511 if ((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME ||
512 pmap_type != kEfiConventionalMemory) {
513 prev_pmptr = 0;
514 continue;
515 } else {
516 /*
517 * Usable memory region
518 */
519 if (top < I386_LOWMEM_RESERVED ||
520 !pal_is_usable_memory(base, top)) {
521 prev_pmptr = 0;
522 continue;
523 }
524 /*
525 * A range may be marked with with the
526 * EFI_MEMORY_KERN_RESERVED attribute
527 * on some systems, to indicate that the range
528 * must not be made available to devices.
529 */
530
531 if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) {
532 if (++pmap_reserved_ranges > PMAP_MAX_RESERVED_RANGES) {
533 panic("Too many reserved ranges %u\n", pmap_reserved_ranges);
534 }
535 }
536
537 if (top < fap) {
538 /*
539 * entire range below first_avail
540 * salvage some low memory pages
541 * we use some very low memory at startup
542 * mark as already allocated here
543 */
544 if (base >= I386_LOWMEM_RESERVED) {
545 pmptr->base = base;
546 } else {
547 pmptr->base = I386_LOWMEM_RESERVED;
548 }
549
550 pmptr->end = top;
551
552
553 if ((mptr->Attribute & EFI_MEMORY_KERN_RESERVED) &&
554 (top < vm_kernel_base_page)) {
555 pmptr->alloc_up = pmptr->base;
556 pmptr->alloc_down = pmptr->end;
557 pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count;
558 } else {
559 /*
560 * mark as already mapped
561 */
562 pmptr->alloc_up = top + 1;
563 pmptr->alloc_down = top;
564 }
565 pmptr->type = pmap_type;
566 pmptr->attribute = mptr->Attribute;
567 } else if ((base < fap) && (top > fap)) {
568 /*
569 * spans first_avail
570 * put mem below first avail in table but
571 * mark already allocated
572 */
573 pmptr->base = base;
574 pmptr->end = (fap - 1);
575 pmptr->alloc_up = pmptr->end + 1;
576 pmptr->alloc_down = pmptr->end;
577 pmptr->type = pmap_type;
578 pmptr->attribute = mptr->Attribute;
579 /*
580 * we bump these here inline so the accounting
581 * below works correctly
582 */
583 pmptr++;
584 pmap_memory_region_count++;
585
586 pmptr->alloc_up = pmptr->base = fap;
587 pmptr->type = pmap_type;
588 pmptr->attribute = mptr->Attribute;
589 pmptr->alloc_down = pmptr->end = top;
590
591 if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) {
592 pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count;
593 }
594 } else {
595 /*
596 * entire range useable
597 */
598 pmptr->alloc_up = pmptr->base = base;
599 pmptr->type = pmap_type;
600 pmptr->attribute = mptr->Attribute;
601 pmptr->alloc_down = pmptr->end = top;
602 if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) {
603 pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count;
604 }
605 }
606
607 if (i386_ptob(pmptr->end) > avail_end) {
608 avail_end = i386_ptob(pmptr->end);
609 }
610
611 avail_remaining += (pmptr->end - pmptr->base);
612 coalescing_permitted = (prev_pmptr && (pmptr->attribute == prev_pmptr->attribute) && ((pmptr->attribute & EFI_MEMORY_KERN_RESERVED) == 0));
613 /*
614 * Consolidate contiguous memory regions, if possible
615 */
616 if (prev_pmptr &&
617 (pmptr->type == prev_pmptr->type) &&
618 (coalescing_permitted) &&
619 (pmptr->base == pmptr->alloc_up) &&
620 (prev_pmptr->end == prev_pmptr->alloc_down) &&
621 (pmptr->base == (prev_pmptr->end + 1))) {
622 prev_pmptr->end = pmptr->end;
623 prev_pmptr->alloc_down = pmptr->alloc_down;
624 } else {
625 pmap_memory_region_count++;
626 prev_pmptr = pmptr;
627 pmptr++;
628 }
629 }
630 }
631
632 if (memmap) {
633 kprint_memmap(maddr, msize, mcount);
634 }
635
636 avail_start = first_avail;
637 mem_actual = args->PhysicalMemorySize;
638
639 /*
640 * For user visible memory size, round up to 128 Mb
641 * - accounting for the various stolen memory not reported by EFI.
642 * This is maintained for historical, comparison purposes but
643 * we now use the memory size reported by EFI/Booter.
644 */
645 sane_size = (sane_size + 128 * MB - 1) & ~((uint64_t)(128 * MB - 1));
646 if (sane_size != mem_actual) {
647 printf("mem_actual: 0x%llx\n legacy sane_size: 0x%llx\n",
648 mem_actual, sane_size);
649 }
650 sane_size = mem_actual;
651
652 /*
653 * We cap at KERNEL_MAXMEM bytes (currently 32GB for K32, 96GB for K64).
654 * Unless overriden by the maxmem= boot-arg
655 * -- which is a non-zero maxmem argument to this function.
656 */
657 if (maxmem == 0 && sane_size > KERNEL_MAXMEM) {
658 maxmem = KERNEL_MAXMEM;
659 printf("Physical memory %lld bytes capped at %dGB\n",
660 sane_size, (uint32_t) (KERNEL_MAXMEM / GB));
661 }
662
663 /*
664 * if user set maxmem, reduce memory sizes
665 */
666 if ((maxmem > (uint64_t)first_avail) && (maxmem < sane_size)) {
667 ppnum_t discarded_pages = (ppnum_t)((sane_size - maxmem) >> I386_PGSHIFT);
668 ppnum_t highest_pn = 0;
669 ppnum_t cur_end = 0;
670 uint64_t pages_to_use;
671 unsigned cur_region = 0;
672
673 sane_size = maxmem;
674
675 if (avail_remaining > discarded_pages) {
676 avail_remaining -= discarded_pages;
677 } else {
678 avail_remaining = 0;
679 }
680
681 pages_to_use = avail_remaining;
682
683 while (cur_region < pmap_memory_region_count && pages_to_use) {
684 for (cur_end = pmap_memory_regions[cur_region].base;
685 cur_end < pmap_memory_regions[cur_region].end && pages_to_use;
686 cur_end++) {
687 if (cur_end > highest_pn) {
688 highest_pn = cur_end;
689 }
690 pages_to_use--;
691 }
692 if (pages_to_use == 0) {
693 pmap_memory_regions[cur_region].end = cur_end;
694 pmap_memory_regions[cur_region].alloc_down = cur_end;
695 }
696
697 cur_region++;
698 }
699 pmap_memory_region_count = cur_region;
700
701 avail_end = i386_ptob(highest_pn + 1);
702 }
703
704 /*
705 * mem_size is only a 32 bit container... follow the PPC route
706 * and pin it to a 2 Gbyte maximum
707 */
708 if (sane_size > (FOURGIG >> 1)) {
709 mem_size = (vm_size_t)(FOURGIG >> 1);
710 } else {
711 mem_size = (vm_size_t)sane_size;
712 }
713 max_mem = sane_size;
714
715 kprintf("Physical memory %llu MB\n", sane_size / MB);
716
717 max_valid_low_ppnum = (2 * GB) / PAGE_SIZE;
718
719 if (!PE_parse_boot_argn("max_valid_dma_addr", &maxdmaaddr, sizeof(maxdmaaddr))) {
720 max_valid_dma_address = (uint64_t)4 * (uint64_t)GB;
721 } else {
722 max_valid_dma_address = ((uint64_t) maxdmaaddr) * MB;
723
724 if ((max_valid_dma_address / PAGE_SIZE) < max_valid_low_ppnum) {
725 max_valid_low_ppnum = (ppnum_t)(max_valid_dma_address / PAGE_SIZE);
726 }
727 }
728 if (avail_end >= max_valid_dma_address) {
729 if (!PE_parse_boot_argn("maxloreserve", &maxloreserve, sizeof(maxloreserve))) {
730 if (sane_size >= (ONEGIG * 15)) {
731 maxloreserve = (MAXLORESERVE / PAGE_SIZE) * 4;
732 } else if (sane_size >= (ONEGIG * 7)) {
733 maxloreserve = (MAXLORESERVE / PAGE_SIZE) * 2;
734 } else {
735 maxloreserve = MAXLORESERVE / PAGE_SIZE;
736 }
737
738 #if SOCKETS
739 mbuf_reserve = bsd_mbuf_cluster_reserve(&mbuf_override) / PAGE_SIZE;
740 #endif
741 } else {
742 maxloreserve = (maxloreserve * (1024 * 1024)) / PAGE_SIZE;
743 }
744
745 if (maxloreserve) {
746 vm_lopage_free_limit = maxloreserve;
747
748 if (mbuf_override == TRUE) {
749 vm_lopage_free_limit += mbuf_reserve;
750 vm_lopage_lowater = 0;
751 } else {
752 vm_lopage_lowater = vm_lopage_free_limit / 16;
753 }
754
755 vm_lopage_refill = TRUE;
756 vm_lopage_needed = TRUE;
757 }
758 }
759
760 /*
761 * Initialize kernel physical map.
762 * Kernel virtual address starts at VM_KERNEL_MIN_ADDRESS.
763 */
764 kprintf("avail_remaining = 0x%lx\n", (unsigned long)avail_remaining);
765 pmap_bootstrap(0, IA32e);
766 }
767
768
769 unsigned int
770 pmap_free_pages(void)
771 {
772 return (unsigned int)avail_remaining;
773 }
774
775
776 boolean_t pmap_next_page_reserved(ppnum_t *);
777
778 /*
779 * Pick a page from a "kernel private" reserved range; works around
780 * errata on some hardware.
781 */
782 boolean_t
783 pmap_next_page_reserved(ppnum_t *pn)
784 {
785 if (pmap_reserved_ranges) {
786 uint32_t n;
787 pmap_memory_region_t *region;
788 for (n = 0; n < pmap_last_reserved_range_index; n++) {
789 uint32_t reserved_index = pmap_reserved_range_indices[n];
790 region = &pmap_memory_regions[reserved_index];
791 if (region->alloc_up <= region->alloc_down) {
792 *pn = region->alloc_up++;
793 avail_remaining--;
794
795 if (*pn > max_ppnum) {
796 max_ppnum = *pn;
797 }
798
799 if (lowest_lo == 0 || *pn < lowest_lo) {
800 lowest_lo = *pn;
801 }
802
803 pmap_reserved_pages_allocated++;
804 #if DEBUG
805 if (region->alloc_up > region->alloc_down) {
806 kprintf("Exhausted reserved range index: %u, base: 0x%x end: 0x%x, type: 0x%x, attribute: 0x%llx\n", reserved_index, region->base, region->end, region->type, region->attribute);
807 }
808 #endif
809 return TRUE;
810 }
811 }
812 }
813 return FALSE;
814 }
815
816
817 boolean_t
818 pmap_next_page_hi(
819 ppnum_t *pn)
820 {
821 pmap_memory_region_t *region;
822 int n;
823
824 if (pmap_next_page_reserved(pn)) {
825 return TRUE;
826 }
827
828 if (avail_remaining) {
829 for (n = pmap_memory_region_count - 1; n >= 0; n--) {
830 region = &pmap_memory_regions[n];
831
832 if (region->alloc_down >= region->alloc_up) {
833 *pn = region->alloc_down--;
834 avail_remaining--;
835
836 if (*pn > max_ppnum) {
837 max_ppnum = *pn;
838 }
839
840 if (lowest_lo == 0 || *pn < lowest_lo) {
841 lowest_lo = *pn;
842 }
843
844 if (lowest_hi == 0 || *pn < lowest_hi) {
845 lowest_hi = *pn;
846 }
847
848 if (*pn > highest_hi) {
849 highest_hi = *pn;
850 }
851
852 return TRUE;
853 }
854 }
855 }
856 return FALSE;
857 }
858
859
860 boolean_t
861 pmap_next_page(
862 ppnum_t *pn)
863 {
864 if (avail_remaining) {
865 while (pmap_memory_region_current < pmap_memory_region_count) {
866 if (pmap_memory_regions[pmap_memory_region_current].alloc_up >
867 pmap_memory_regions[pmap_memory_region_current].alloc_down) {
868 pmap_memory_region_current++;
869 continue;
870 }
871 *pn = pmap_memory_regions[pmap_memory_region_current].alloc_up++;
872 avail_remaining--;
873
874 if (*pn > max_ppnum) {
875 max_ppnum = *pn;
876 }
877
878 if (lowest_lo == 0 || *pn < lowest_lo) {
879 lowest_lo = *pn;
880 }
881
882 return TRUE;
883 }
884 }
885 return FALSE;
886 }
887
888
889 boolean_t
890 pmap_valid_page(
891 ppnum_t pn)
892 {
893 unsigned int i;
894 pmap_memory_region_t *pmptr = pmap_memory_regions;
895
896 for (i = 0; i < pmap_memory_region_count; i++, pmptr++) {
897 if ((pn >= pmptr->base) && (pn <= pmptr->end)) {
898 return TRUE;
899 }
900 }
901 return FALSE;
902 }