]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/i386_vm_init.c
xnu-6153.101.6.tar.gz
[apple/xnu.git] / osfmk / i386 / i386_vm_init.c
1 /*
2 * Copyright (c) 2003-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989, 1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57
58 #include <mach/i386/vm_param.h>
59
60 #include <string.h>
61 #include <mach/vm_param.h>
62 #include <mach/vm_prot.h>
63 #include <mach/machine.h>
64 #include <mach/time_value.h>
65 #include <kern/spl.h>
66 #include <kern/assert.h>
67 #include <kern/debug.h>
68 #include <kern/misc_protos.h>
69 #include <kern/cpu_data.h>
70 #include <kern/processor.h>
71 #include <vm/vm_page.h>
72 #include <vm/pmap.h>
73 #include <vm/vm_kern.h>
74 #include <i386/pmap.h>
75 #include <i386/misc_protos.h>
76 #include <i386/cpuid.h>
77 #include <mach/thread_status.h>
78 #include <pexpert/i386/efi.h>
79 #include <i386/i386_lowmem.h>
80 #include <i386/misc_protos.h>
81 #include <x86_64/lowglobals.h>
82 #include <i386/pal_routines.h>
83
84 #include <mach-o/loader.h>
85 #include <libkern/kernel_mach_header.h>
86
87
88 vm_size_t mem_size = 0;
89 pmap_paddr_t first_avail = 0;/* first after page tables */
90
91 uint64_t max_mem; /* Size of physical memory (bytes), adjusted by maxmem */
92 uint64_t mem_actual;
93 uint64_t sane_size = 0; /* Memory size for defaults calculations */
94
95 /*
96 * KASLR parameters
97 */
98 ppnum_t vm_kernel_base_page;
99 vm_offset_t vm_kernel_base;
100 vm_offset_t vm_kernel_top;
101 vm_offset_t vm_kernel_stext;
102 vm_offset_t vm_kernel_etext;
103 vm_offset_t vm_kernel_slide;
104 vm_offset_t vm_kernel_slid_base;
105 vm_offset_t vm_kernel_slid_top;
106 vm_offset_t vm_hib_base;
107 vm_offset_t vm_kext_base = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
108 vm_offset_t vm_kext_top = VM_MIN_KERNEL_ADDRESS;
109
110 vm_offset_t vm_prelink_stext;
111 vm_offset_t vm_prelink_etext;
112 vm_offset_t vm_prelink_sinfo;
113 vm_offset_t vm_prelink_einfo;
114 vm_offset_t vm_slinkedit;
115 vm_offset_t vm_elinkedit;
116
117 vm_offset_t vm_kernel_builtinkmod_text;
118 vm_offset_t vm_kernel_builtinkmod_text_end;
119
120 #define MAXLORESERVE (32 * 1024 * 1024)
121
122 ppnum_t max_ppnum = 0;
123
124 /*
125 * pmap_high_used* are the highest range of physical memory used for kernel
126 * internals (page tables, vm_pages) via pmap_steal_memory() that don't
127 * need to be encrypted in hibernation images. There can be one gap in
128 * the middle of this due to fragmentation when using a mix of small
129 * and large pages. In that case, the fragment lives between the high
130 * and middle ranges.
131 */
132 ppnum_t pmap_high_used_top = 0;
133 ppnum_t pmap_high_used_bottom = 0;
134 ppnum_t pmap_middle_used_top = 0;
135 ppnum_t pmap_middle_used_bottom = 0;
136
137 enum {PMAP_MAX_RESERVED_RANGES = 32};
138 uint32_t pmap_reserved_pages_allocated = 0;
139 uint32_t pmap_reserved_range_indices[PMAP_MAX_RESERVED_RANGES];
140 uint32_t pmap_last_reserved_range_index = 0;
141 uint32_t pmap_reserved_ranges = 0;
142
143 extern unsigned int bsd_mbuf_cluster_reserve(boolean_t *);
144
145 pmap_paddr_t avail_start, avail_end;
146 vm_offset_t virtual_avail, virtual_end;
147 static pmap_paddr_t avail_remaining;
148 vm_offset_t static_memory_end = 0;
149
150 vm_offset_t sHIB, eHIB, stext, etext, sdata, edata, end, sconst, econst;
151
152 /*
153 * _mh_execute_header is the mach_header for the currently executing kernel
154 */
155 vm_offset_t segTEXTB; unsigned long segSizeTEXT;
156 vm_offset_t segDATAB; unsigned long segSizeDATA;
157 vm_offset_t segLINKB; unsigned long segSizeLINK;
158 vm_offset_t segPRELINKTEXTB; unsigned long segSizePRELINKTEXT;
159 vm_offset_t segPRELINKINFOB; unsigned long segSizePRELINKINFO;
160 vm_offset_t segHIBB; unsigned long segSizeHIB;
161 unsigned long segSizeConst;
162
163 static kernel_segment_command_t *segTEXT, *segDATA;
164 static kernel_section_t *cursectTEXT, *lastsectTEXT;
165 static kernel_segment_command_t *segCONST;
166
167 extern uint64_t firmware_Conventional_bytes;
168 extern uint64_t firmware_RuntimeServices_bytes;
169 extern uint64_t firmware_ACPIReclaim_bytes;
170 extern uint64_t firmware_ACPINVS_bytes;
171 extern uint64_t firmware_PalCode_bytes;
172 extern uint64_t firmware_Reserved_bytes;
173 extern uint64_t firmware_Unusable_bytes;
174 extern uint64_t firmware_other_bytes;
175 uint64_t firmware_MMIO_bytes;
176
177 /*
178 * Linker magic to establish the highest address in the kernel.
179 */
180 extern void *last_kernel_symbol;
181
182 #define LG_PPNUM_PAGES (I386_LPGBYTES >> PAGE_SHIFT)
183 #define LG_PPNUM_MASK (I386_LPGMASK >> PAGE_SHIFT)
184
185 /* set so no region large page fragment pages exist */
186 #define RESET_FRAG(r) (((r)->alloc_frag_up = 1), ((r)->alloc_frag_down = 0))
187
188 boolean_t memmap = FALSE;
189 #if DEBUG || DEVELOPMENT
190 static void
191 kprint_memmap(vm_offset_t maddr, unsigned int msize, unsigned int mcount)
192 {
193 unsigned int i;
194 unsigned int j;
195 pmap_memory_region_t *p = pmap_memory_regions;
196 EfiMemoryRange *mptr;
197 addr64_t region_start, region_end;
198 addr64_t efi_start, efi_end;
199
200 for (j = 0; j < pmap_memory_region_count; j++, p++) {
201 kprintf("pmap region %d type %d base 0x%llx alloc_up 0x%llx alloc_down 0x%llx"
202 " alloc_frag_up 0x%llx alloc_frag_down 0x%llx top 0x%llx\n",
203 j, p->type,
204 (addr64_t) p->base << I386_PGSHIFT,
205 (addr64_t) p->alloc_up << I386_PGSHIFT,
206 (addr64_t) p->alloc_down << I386_PGSHIFT,
207 (addr64_t) p->alloc_frag_up << I386_PGSHIFT,
208 (addr64_t) p->alloc_frag_down << I386_PGSHIFT,
209 (addr64_t) p->end << I386_PGSHIFT);
210 region_start = (addr64_t) p->base << I386_PGSHIFT;
211 region_end = ((addr64_t) p->end << I386_PGSHIFT) - 1;
212 mptr = (EfiMemoryRange *) maddr;
213 for (i = 0;
214 i < mcount;
215 i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) {
216 if (mptr->Type != kEfiLoaderCode &&
217 mptr->Type != kEfiLoaderData &&
218 mptr->Type != kEfiBootServicesCode &&
219 mptr->Type != kEfiBootServicesData &&
220 mptr->Type != kEfiConventionalMemory) {
221 efi_start = (addr64_t)mptr->PhysicalStart;
222 efi_end = efi_start + ((vm_offset_t)mptr->NumberOfPages << I386_PGSHIFT) - 1;
223 if ((efi_start >= region_start && efi_start <= region_end) ||
224 (efi_end >= region_start && efi_end <= region_end)) {
225 kprintf(" *** Overlapping region with EFI runtime region %d\n", i);
226 }
227 }
228 }
229 }
230 }
231 #define DPRINTF(x...) do { if (memmap) kprintf(x); } while (0)
232
233 #else
234
235 static void
236 kprint_memmap(vm_offset_t maddr, unsigned int msize, unsigned int mcount)
237 {
238 #pragma unused(maddr, msize, mcount)
239 }
240
241 #define DPRINTF(x...)
242 #endif /* DEBUG */
243
244 /*
245 * Basic VM initialization.
246 */
247 void
248 i386_vm_init(uint64_t maxmem,
249 boolean_t IA32e,
250 boot_args *args)
251 {
252 pmap_memory_region_t *pmptr;
253 pmap_memory_region_t *prev_pmptr;
254 EfiMemoryRange *mptr;
255 unsigned int mcount;
256 unsigned int msize;
257 vm_offset_t maddr;
258 ppnum_t fap;
259 unsigned int i;
260 ppnum_t maxpg = 0;
261 uint32_t pmap_type;
262 uint32_t maxloreserve;
263 uint32_t maxdmaaddr;
264 uint32_t mbuf_reserve = 0;
265 boolean_t mbuf_override = FALSE;
266 boolean_t coalescing_permitted;
267 vm_kernel_base_page = i386_btop(args->kaddr);
268 vm_offset_t base_address;
269 vm_offset_t static_base_address;
270
271 PE_parse_boot_argn("memmap", &memmap, sizeof(memmap));
272
273 /*
274 * Establish the KASLR parameters.
275 */
276 static_base_address = ml_static_ptovirt(KERNEL_BASE_OFFSET);
277 base_address = ml_static_ptovirt(args->kaddr);
278 vm_kernel_slide = base_address - static_base_address;
279 if (args->kslide) {
280 kprintf("KASLR slide: 0x%016lx dynamic\n", vm_kernel_slide);
281 if (vm_kernel_slide != ((vm_offset_t)args->kslide)) {
282 panic("Kernel base inconsistent with slide - rebased?");
283 }
284 } else {
285 /* No slide relative to on-disk symbols */
286 kprintf("KASLR slide: 0x%016lx static and ignored\n",
287 vm_kernel_slide);
288 vm_kernel_slide = 0;
289 }
290
291 /*
292 * Zero out local relocations to avoid confusing kxld.
293 * TODO: might be better to move this code to OSKext::initialize
294 */
295 if (_mh_execute_header.flags & MH_PIE) {
296 struct load_command *loadcmd;
297 uint32_t cmd;
298
299 loadcmd = (struct load_command *)((uintptr_t)&_mh_execute_header +
300 sizeof(_mh_execute_header));
301
302 for (cmd = 0; cmd < _mh_execute_header.ncmds; cmd++) {
303 if (loadcmd->cmd == LC_DYSYMTAB) {
304 struct dysymtab_command *dysymtab;
305
306 dysymtab = (struct dysymtab_command *)loadcmd;
307 dysymtab->nlocrel = 0;
308 dysymtab->locreloff = 0;
309 kprintf("Hiding local relocations\n");
310 break;
311 }
312 loadcmd = (struct load_command *)((uintptr_t)loadcmd + loadcmd->cmdsize);
313 }
314 }
315
316 /*
317 * Now retrieve addresses for end, edata, and etext
318 * from MACH-O headers.
319 */
320 segTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
321 "__TEXT", &segSizeTEXT);
322 segDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
323 "__DATA", &segSizeDATA);
324 segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
325 "__LINKEDIT", &segSizeLINK);
326 segHIBB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
327 "__HIB", &segSizeHIB);
328 segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
329 "__PRELINK_TEXT", &segSizePRELINKTEXT);
330 segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
331 "__PRELINK_INFO", &segSizePRELINKINFO);
332 segTEXT = getsegbynamefromheader(&_mh_execute_header,
333 "__TEXT");
334 segDATA = getsegbynamefromheader(&_mh_execute_header,
335 "__DATA");
336 segCONST = getsegbynamefromheader(&_mh_execute_header,
337 "__DATA_CONST");
338 cursectTEXT = lastsectTEXT = firstsect(segTEXT);
339 /* Discover the last TEXT section within the TEXT segment */
340 while ((cursectTEXT = nextsect(segTEXT, cursectTEXT)) != NULL) {
341 lastsectTEXT = cursectTEXT;
342 }
343
344 sHIB = segHIBB;
345 eHIB = segHIBB + segSizeHIB;
346 vm_hib_base = sHIB;
347 /* Zero-padded from ehib to stext if text is 2M-aligned */
348 stext = segTEXTB;
349 lowGlo.lgStext = stext;
350 etext = (vm_offset_t) round_page_64(lastsectTEXT->addr + lastsectTEXT->size);
351 /* Zero-padded from etext to sdata if text is 2M-aligned */
352 sdata = segDATAB;
353 edata = segDATAB + segSizeDATA;
354
355 sconst = segCONST->vmaddr;
356 segSizeConst = segCONST->vmsize;
357 econst = sconst + segSizeConst;
358
359 assert(((sconst | econst) & PAGE_MASK) == 0);
360
361 DPRINTF("segTEXTB = %p\n", (void *) segTEXTB);
362 DPRINTF("segDATAB = %p\n", (void *) segDATAB);
363 DPRINTF("segLINKB = %p\n", (void *) segLINKB);
364 DPRINTF("segHIBB = %p\n", (void *) segHIBB);
365 DPRINTF("segPRELINKTEXTB = %p\n", (void *) segPRELINKTEXTB);
366 DPRINTF("segPRELINKINFOB = %p\n", (void *) segPRELINKINFOB);
367 DPRINTF("sHIB = %p\n", (void *) sHIB);
368 DPRINTF("eHIB = %p\n", (void *) eHIB);
369 DPRINTF("stext = %p\n", (void *) stext);
370 DPRINTF("etext = %p\n", (void *) etext);
371 DPRINTF("sdata = %p\n", (void *) sdata);
372 DPRINTF("edata = %p\n", (void *) edata);
373 DPRINTF("sconst = %p\n", (void *) sconst);
374 DPRINTF("econst = %p\n", (void *) econst);
375 DPRINTF("kernel_top = %p\n", (void *) &last_kernel_symbol);
376
377 vm_kernel_base = sHIB;
378 vm_kernel_top = (vm_offset_t) &last_kernel_symbol;
379 vm_kernel_stext = stext;
380 vm_kernel_etext = etext;
381 vm_prelink_stext = segPRELINKTEXTB;
382 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT;
383 vm_prelink_sinfo = segPRELINKINFOB;
384 vm_prelink_einfo = segPRELINKINFOB + segSizePRELINKINFO;
385 vm_slinkedit = segLINKB;
386 vm_elinkedit = segLINKB + segSizeLINK;
387 vm_kernel_slid_base = vm_kext_base + vm_kernel_slide;
388 vm_kernel_slid_top = vm_prelink_einfo;
389
390 vm_set_page_size();
391
392 /*
393 * Compute the memory size.
394 */
395
396 avail_remaining = 0;
397 avail_end = 0;
398 pmptr = pmap_memory_regions;
399 prev_pmptr = 0;
400 pmap_memory_region_count = pmap_memory_region_current = 0;
401 fap = (ppnum_t) i386_btop(first_avail);
402
403 maddr = ml_static_ptovirt((vm_offset_t)args->MemoryMap);
404 mptr = (EfiMemoryRange *)maddr;
405 if (args->MemoryMapDescriptorSize == 0) {
406 panic("Invalid memory map descriptor size");
407 }
408 msize = args->MemoryMapDescriptorSize;
409 mcount = args->MemoryMapSize / msize;
410
411 #define FOURGIG 0x0000000100000000ULL
412 #define ONEGIG 0x0000000040000000ULL
413
414 for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) {
415 ppnum_t base, top;
416 uint64_t region_bytes = 0;
417
418 if (pmap_memory_region_count >= PMAP_MEMORY_REGIONS_SIZE) {
419 kprintf("WARNING: truncating memory region count at %d\n", pmap_memory_region_count);
420 break;
421 }
422 base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT);
423 top = (ppnum_t) (((mptr->PhysicalStart) >> I386_PGSHIFT) + mptr->NumberOfPages - 1);
424
425 if (base == 0) {
426 /*
427 * Avoid having to deal with the edge case of the
428 * very first possible physical page and the roll-over
429 * to -1; just ignore that page.
430 */
431 kprintf("WARNING: ignoring first page in [0x%llx:0x%llx]\n", (uint64_t) base, (uint64_t) top);
432 base++;
433 }
434 if (top + 1 == 0) {
435 /*
436 * Avoid having to deal with the edge case of the
437 * very last possible physical page and the roll-over
438 * to 0; just ignore that page.
439 */
440 kprintf("WARNING: ignoring last page in [0x%llx:0x%llx]\n", (uint64_t) base, (uint64_t) top);
441 top--;
442 }
443 if (top < base) {
444 /*
445 * That was the only page in that region, so
446 * ignore the whole region.
447 */
448 continue;
449 }
450
451 #if MR_RSV_TEST
452 static uint32_t nmr = 0;
453 if ((base > 0x20000) && (nmr++ < 4)) {
454 mptr->Attribute |= EFI_MEMORY_KERN_RESERVED;
455 }
456 #endif
457 region_bytes = (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT);
458 pmap_type = mptr->Type;
459
460 switch (mptr->Type) {
461 case kEfiLoaderCode:
462 case kEfiLoaderData:
463 case kEfiBootServicesCode:
464 case kEfiBootServicesData:
465 case kEfiConventionalMemory:
466 /*
467 * Consolidate usable memory types into one.
468 */
469 pmap_type = kEfiConventionalMemory;
470 sane_size += region_bytes;
471 firmware_Conventional_bytes += region_bytes;
472 break;
473 /*
474 * sane_size should reflect the total amount of physical
475 * RAM in the system, not just the amount that is
476 * available for the OS to use.
477 * We now get this value from SMBIOS tables
478 * rather than reverse engineering the memory map.
479 * But the legacy computation of "sane_size" is kept
480 * for diagnostic information.
481 */
482
483 case kEfiRuntimeServicesCode:
484 case kEfiRuntimeServicesData:
485 firmware_RuntimeServices_bytes += region_bytes;
486 sane_size += region_bytes;
487 break;
488 case kEfiACPIReclaimMemory:
489 firmware_ACPIReclaim_bytes += region_bytes;
490 sane_size += region_bytes;
491 break;
492 case kEfiACPIMemoryNVS:
493 firmware_ACPINVS_bytes += region_bytes;
494 sane_size += region_bytes;
495 break;
496 case kEfiPalCode:
497 firmware_PalCode_bytes += region_bytes;
498 sane_size += region_bytes;
499 break;
500
501 case kEfiReservedMemoryType:
502 firmware_Reserved_bytes += region_bytes;
503 break;
504 case kEfiUnusableMemory:
505 firmware_Unusable_bytes += region_bytes;
506 break;
507 case kEfiMemoryMappedIO:
508 case kEfiMemoryMappedIOPortSpace:
509 firmware_MMIO_bytes += region_bytes;
510 break;
511 default:
512 firmware_other_bytes += region_bytes;
513 break;
514 }
515
516 DPRINTF("EFI region %d: type %u/%d, base 0x%x, top 0x%x %s\n",
517 i, mptr->Type, pmap_type, base, top,
518 (mptr->Attribute & EFI_MEMORY_KERN_RESERVED)? "RESERVED" :
519 (mptr->Attribute & EFI_MEMORY_RUNTIME)? "RUNTIME" : "");
520
521 if (maxpg) {
522 if (base >= maxpg) {
523 break;
524 }
525 top = (top > maxpg) ? maxpg : top;
526 }
527
528 /*
529 * handle each region
530 */
531 if ((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME ||
532 pmap_type != kEfiConventionalMemory) {
533 prev_pmptr = 0;
534 continue;
535 } else {
536 /*
537 * Usable memory region
538 */
539 if (top < I386_LOWMEM_RESERVED ||
540 !pal_is_usable_memory(base, top)) {
541 prev_pmptr = 0;
542 continue;
543 }
544 /*
545 * A range may be marked with with the
546 * EFI_MEMORY_KERN_RESERVED attribute
547 * on some systems, to indicate that the range
548 * must not be made available to devices.
549 */
550
551 if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) {
552 if (++pmap_reserved_ranges > PMAP_MAX_RESERVED_RANGES) {
553 panic("Too many reserved ranges %u\n", pmap_reserved_ranges);
554 }
555 }
556
557 if (top < fap) {
558 /*
559 * entire range below first_avail
560 * salvage some low memory pages
561 * we use some very low memory at startup
562 * mark as already allocated here
563 */
564 if (base >= I386_LOWMEM_RESERVED) {
565 pmptr->base = base;
566 } else {
567 pmptr->base = I386_LOWMEM_RESERVED;
568 }
569
570 pmptr->end = top;
571
572
573 if ((mptr->Attribute & EFI_MEMORY_KERN_RESERVED) &&
574 (top < vm_kernel_base_page)) {
575 pmptr->alloc_up = pmptr->base;
576 pmptr->alloc_down = pmptr->end;
577 RESET_FRAG(pmptr);
578 pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count;
579 } else {
580 /*
581 * mark as already mapped
582 */
583 pmptr->alloc_up = top + 1;
584 pmptr->alloc_down = top;
585 RESET_FRAG(pmptr);
586 }
587 pmptr->type = pmap_type;
588 pmptr->attribute = mptr->Attribute;
589 } else if ((base < fap) && (top > fap)) {
590 /*
591 * spans first_avail
592 * put mem below first avail in table but
593 * mark already allocated
594 */
595 pmptr->base = base;
596 pmptr->end = (fap - 1);
597 pmptr->alloc_up = pmptr->end + 1;
598 pmptr->alloc_down = pmptr->end;
599 RESET_FRAG(pmptr);
600 pmptr->type = pmap_type;
601 pmptr->attribute = mptr->Attribute;
602 /*
603 * we bump these here inline so the accounting
604 * below works correctly
605 */
606 pmptr++;
607 pmap_memory_region_count++;
608
609 pmptr->alloc_up = pmptr->base = fap;
610 pmptr->type = pmap_type;
611 pmptr->attribute = mptr->Attribute;
612 pmptr->alloc_down = pmptr->end = top;
613 RESET_FRAG(pmptr);
614
615 if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) {
616 pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count;
617 }
618 } else {
619 /*
620 * entire range useable
621 */
622 pmptr->alloc_up = pmptr->base = base;
623 pmptr->type = pmap_type;
624 pmptr->attribute = mptr->Attribute;
625 pmptr->alloc_down = pmptr->end = top;
626 RESET_FRAG(pmptr);
627 if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) {
628 pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count;
629 }
630 }
631
632 if (i386_ptob(pmptr->end) > avail_end) {
633 avail_end = i386_ptob(pmptr->end);
634 }
635
636 avail_remaining += (pmptr->end - pmptr->base);
637 coalescing_permitted = (prev_pmptr && (pmptr->attribute == prev_pmptr->attribute) && ((pmptr->attribute & EFI_MEMORY_KERN_RESERVED) == 0));
638 /*
639 * Consolidate contiguous memory regions, if possible
640 */
641 if (prev_pmptr &&
642 (pmptr->type == prev_pmptr->type) &&
643 (coalescing_permitted) &&
644 (pmptr->base == pmptr->alloc_up) &&
645 (prev_pmptr->end == prev_pmptr->alloc_down) &&
646 (pmptr->base == (prev_pmptr->end + 1))) {
647 prev_pmptr->end = pmptr->end;
648 prev_pmptr->alloc_down = pmptr->alloc_down;
649 RESET_FRAG(pmptr);
650 } else {
651 pmap_memory_region_count++;
652 prev_pmptr = pmptr;
653 pmptr++;
654 }
655 }
656 }
657
658 if (memmap) {
659 kprint_memmap(maddr, msize, mcount);
660 }
661
662 avail_start = first_avail;
663 mem_actual = args->PhysicalMemorySize;
664
665 /*
666 * For user visible memory size, round up to 128 Mb
667 * - accounting for the various stolen memory not reported by EFI.
668 * This is maintained for historical, comparison purposes but
669 * we now use the memory size reported by EFI/Booter.
670 */
671 sane_size = (sane_size + 128 * MB - 1) & ~((uint64_t)(128 * MB - 1));
672 if (sane_size != mem_actual) {
673 printf("mem_actual: 0x%llx\n legacy sane_size: 0x%llx\n",
674 mem_actual, sane_size);
675 }
676 sane_size = mem_actual;
677
678 /*
679 * We cap at KERNEL_MAXMEM bytes (currently 32GB for K32, 96GB for K64).
680 * Unless overriden by the maxmem= boot-arg
681 * -- which is a non-zero maxmem argument to this function.
682 */
683 if (maxmem == 0 && sane_size > KERNEL_MAXMEM) {
684 maxmem = KERNEL_MAXMEM;
685 printf("Physical memory %lld bytes capped at %dGB\n",
686 sane_size, (uint32_t) (KERNEL_MAXMEM / GB));
687 }
688
689 /*
690 * if user set maxmem, reduce memory sizes
691 */
692 if ((maxmem > (uint64_t)first_avail) && (maxmem < sane_size)) {
693 ppnum_t discarded_pages = (ppnum_t)((sane_size - maxmem) >> I386_PGSHIFT);
694 ppnum_t highest_pn = 0;
695 ppnum_t cur_end = 0;
696 uint64_t pages_to_use;
697 unsigned cur_region = 0;
698
699 sane_size = maxmem;
700
701 if (avail_remaining > discarded_pages) {
702 avail_remaining -= discarded_pages;
703 } else {
704 avail_remaining = 0;
705 }
706
707 pages_to_use = avail_remaining;
708
709 while (cur_region < pmap_memory_region_count && pages_to_use) {
710 for (cur_end = pmap_memory_regions[cur_region].base;
711 cur_end < pmap_memory_regions[cur_region].end && pages_to_use;
712 cur_end++) {
713 if (cur_end > highest_pn) {
714 highest_pn = cur_end;
715 }
716 pages_to_use--;
717 }
718 if (pages_to_use == 0) {
719 pmap_memory_regions[cur_region].end = cur_end;
720 pmap_memory_regions[cur_region].alloc_down = cur_end;
721 RESET_FRAG(&pmap_memory_regions[cur_region]);
722 }
723
724 cur_region++;
725 }
726 pmap_memory_region_count = cur_region;
727
728 avail_end = i386_ptob(highest_pn + 1);
729 }
730
731 /*
732 * mem_size is only a 32 bit container... follow the PPC route
733 * and pin it to a 2 Gbyte maximum
734 */
735 if (sane_size > (FOURGIG >> 1)) {
736 mem_size = (vm_size_t)(FOURGIG >> 1);
737 } else {
738 mem_size = (vm_size_t)sane_size;
739 }
740 max_mem = sane_size;
741
742 kprintf("Physical memory %llu MB\n", sane_size / MB);
743
744 max_valid_low_ppnum = (2 * GB) / PAGE_SIZE;
745
746 if (!PE_parse_boot_argn("max_valid_dma_addr", &maxdmaaddr, sizeof(maxdmaaddr))) {
747 max_valid_dma_address = (uint64_t)4 * (uint64_t)GB;
748 } else {
749 max_valid_dma_address = ((uint64_t) maxdmaaddr) * MB;
750
751 if ((max_valid_dma_address / PAGE_SIZE) < max_valid_low_ppnum) {
752 max_valid_low_ppnum = (ppnum_t)(max_valid_dma_address / PAGE_SIZE);
753 }
754 }
755 if (avail_end >= max_valid_dma_address) {
756 if (!PE_parse_boot_argn("maxloreserve", &maxloreserve, sizeof(maxloreserve))) {
757 if (sane_size >= (ONEGIG * 15)) {
758 maxloreserve = (MAXLORESERVE / PAGE_SIZE) * 4;
759 } else if (sane_size >= (ONEGIG * 7)) {
760 maxloreserve = (MAXLORESERVE / PAGE_SIZE) * 2;
761 } else {
762 maxloreserve = MAXLORESERVE / PAGE_SIZE;
763 }
764
765 #if SOCKETS
766 mbuf_reserve = bsd_mbuf_cluster_reserve(&mbuf_override) / PAGE_SIZE;
767 #endif
768 } else {
769 maxloreserve = (maxloreserve * (1024 * 1024)) / PAGE_SIZE;
770 }
771
772 if (maxloreserve) {
773 vm_lopage_free_limit = maxloreserve;
774
775 if (mbuf_override == TRUE) {
776 vm_lopage_free_limit += mbuf_reserve;
777 vm_lopage_lowater = 0;
778 } else {
779 vm_lopage_lowater = vm_lopage_free_limit / 16;
780 }
781
782 vm_lopage_refill = TRUE;
783 vm_lopage_needed = TRUE;
784 }
785 }
786
787 /*
788 * Initialize kernel physical map.
789 * Kernel virtual address starts at VM_KERNEL_MIN_ADDRESS.
790 */
791 kprintf("avail_remaining = 0x%lx\n", (unsigned long)avail_remaining);
792 pmap_bootstrap(0, IA32e);
793 }
794
795
796 unsigned int
797 pmap_free_pages(void)
798 {
799 return (unsigned int)avail_remaining;
800 }
801
802 boolean_t pmap_next_page_reserved(ppnum_t *);
803
804 /*
805 * Pick a page from a "kernel private" reserved range; works around
806 * errata on some hardware. EFI marks pages which can't be used for
807 * certain kinds of I/O-ish activities as reserved. We reserve them for
808 * kernel internal usage and prevent them from ever going on regular
809 * free list.
810 */
811 boolean_t
812 pmap_next_page_reserved(
813 ppnum_t *pn)
814 {
815 uint32_t n;
816 pmap_memory_region_t *region;
817 uint32_t reserved_index;
818
819 if (pmap_reserved_ranges) {
820 for (n = 0; n < pmap_last_reserved_range_index; n++) {
821 reserved_index = pmap_reserved_range_indices[n];
822 region = &pmap_memory_regions[reserved_index];
823 if (region->alloc_up <= region->alloc_down) {
824 *pn = region->alloc_up++;
825 } else if (region->alloc_frag_up <= region->alloc_frag_down) {
826 *pn = region->alloc_frag_up++;
827 } else {
828 continue;
829 }
830 avail_remaining--;
831
832 if (*pn > max_ppnum) {
833 max_ppnum = *pn;
834 }
835
836 pmap_reserved_pages_allocated++;
837 #if DEBUG
838 if (region->alloc_up > region->alloc_down) {
839 kprintf("Exhausted reserved range index: %u, base: 0x%x end: 0x%x, type: 0x%x, attribute: 0x%llx\n", reserved_index, region->base, region->end, region->type, region->attribute);
840 }
841 #endif
842 return TRUE;
843 }
844 }
845 return FALSE;
846 }
847
848 /*
849 * Return the highest large page available. Fails once there are no more large pages.
850 */
851 kern_return_t
852 pmap_next_page_large(
853 ppnum_t *pn)
854 {
855 int r;
856 pmap_memory_region_t *region;
857 ppnum_t frag_start;
858 ppnum_t lgpg;
859
860 if (avail_remaining < LG_PPNUM_PAGES) {
861 return KERN_FAILURE;
862 }
863
864 for (r = pmap_memory_region_count - 1; r >= 0; r--) {
865 region = &pmap_memory_regions[r];
866
867 /*
868 * First check if there is enough memory.
869 */
870 if (region->alloc_down < region->alloc_up ||
871 (region->alloc_down - region->alloc_up + 1) < LG_PPNUM_PAGES) {
872 continue;
873 }
874
875 /*
876 * Find the starting large page, creating a fragment if needed.
877 */
878 if ((region->alloc_down & LG_PPNUM_MASK) == LG_PPNUM_MASK) {
879 lgpg = (region->alloc_down & ~LG_PPNUM_MASK);
880 } else {
881 /* Can only have 1 fragment per region at a time */
882 if (region->alloc_frag_up <= region->alloc_frag_down) {
883 continue;
884 }
885
886 /* Check for enough room below any fragment. */
887 frag_start = (region->alloc_down & ~LG_PPNUM_MASK);
888 if (frag_start < region->alloc_up ||
889 frag_start - region->alloc_up < LG_PPNUM_PAGES) {
890 continue;
891 }
892
893 lgpg = frag_start - LG_PPNUM_PAGES;
894 region->alloc_frag_up = frag_start;
895 region->alloc_frag_down = region->alloc_down;
896 }
897
898 *pn = lgpg;
899 region->alloc_down = lgpg - 1;
900
901
902 avail_remaining -= LG_PPNUM_PAGES;
903 if (*pn + LG_PPNUM_MASK > max_ppnum) {
904 max_ppnum = *pn + LG_PPNUM_MASK;
905 }
906
907 return KERN_SUCCESS;
908 }
909 return KERN_FAILURE;
910 }
911
912 boolean_t
913 pmap_next_page_hi(
914 ppnum_t *pn,
915 boolean_t might_free)
916 {
917 pmap_memory_region_t *region;
918 int n;
919
920 if (!might_free && pmap_next_page_reserved(pn)) {
921 return TRUE;
922 }
923
924 if (avail_remaining) {
925 for (n = pmap_memory_region_count - 1; n >= 0; n--) {
926 region = &pmap_memory_regions[n];
927 if (region->alloc_frag_up <= region->alloc_frag_down) {
928 *pn = region->alloc_frag_down--;
929 } else if (region->alloc_down >= region->alloc_up) {
930 *pn = region->alloc_down--;
931 } else {
932 continue;
933 }
934
935 avail_remaining--;
936
937 if (*pn > max_ppnum) {
938 max_ppnum = *pn;
939 }
940
941 return TRUE;
942 }
943 }
944 return FALSE;
945 }
946
947 /*
948 * Record which high pages have been allocated so far,
949 * so that pmap_init() can mark them PMAP_NOENCRYPT, which
950 * makes hibernation faster.
951 *
952 * Because of the code in pmap_next_page_large(), we could
953 * theoretically have fragments in several regions.
954 * In practice that just doesn't happen. The last pmap region
955 * is normally the largest and will satisfy all pmap_next_hi/large()
956 * allocations. Since this information is used as an optimization
957 * and it's ok to be conservative, we'll just record the information
958 * for the final region.
959 */
960 void
961 pmap_hi_pages_done(void)
962 {
963 pmap_memory_region_t *r;
964
965 r = &pmap_memory_regions[pmap_memory_region_count - 1];
966 pmap_high_used_top = r->end;
967 if (r->alloc_frag_up <= r->alloc_frag_down) {
968 pmap_high_used_bottom = r->alloc_frag_down + 1;
969 pmap_middle_used_top = r->alloc_frag_up - 1;
970 if (r->alloc_up <= r->alloc_down) {
971 pmap_middle_used_bottom = r->alloc_down + 1;
972 } else {
973 pmap_high_used_bottom = r->base;
974 }
975 } else {
976 if (r->alloc_up <= r->alloc_down) {
977 pmap_high_used_bottom = r->alloc_down + 1;
978 } else {
979 pmap_high_used_bottom = r->base;
980 }
981 }
982 #if DEBUG || DEVELOPMENT
983 kprintf("pmap_high_used_top 0x%x\n", pmap_high_used_top);
984 kprintf("pmap_high_used_bottom 0x%x\n", pmap_high_used_bottom);
985 kprintf("pmap_middle_used_top 0x%x\n", pmap_middle_used_top);
986 kprintf("pmap_middle_used_bottom 0x%x\n", pmap_middle_used_bottom);
987 #endif
988 }
989
990 /*
991 * Return the next available page from lowest memory for general use.
992 */
993 boolean_t
994 pmap_next_page(
995 ppnum_t *pn)
996 {
997 pmap_memory_region_t *region;
998
999 if (avail_remaining) {
1000 while (pmap_memory_region_current < pmap_memory_region_count) {
1001 region = &pmap_memory_regions[pmap_memory_region_current];
1002 if (region->alloc_up <= region->alloc_down) {
1003 *pn = region->alloc_up++;
1004 } else if (region->alloc_frag_up <= region->alloc_frag_down) {
1005 *pn = region->alloc_frag_up++;
1006 } else {
1007 pmap_memory_region_current++;
1008 continue;
1009 }
1010 avail_remaining--;
1011
1012 if (*pn > max_ppnum) {
1013 max_ppnum = *pn;
1014 }
1015
1016 return TRUE;
1017 }
1018 }
1019 return FALSE;
1020 }
1021
1022
1023 boolean_t
1024 pmap_valid_page(
1025 ppnum_t pn)
1026 {
1027 unsigned int i;
1028 pmap_memory_region_t *pmptr = pmap_memory_regions;
1029
1030 for (i = 0; i < pmap_memory_region_count; i++, pmptr++) {
1031 if ((pn >= pmptr->base) && (pn <= pmptr->end)) {
1032 return TRUE;
1033 }
1034 }
1035 return FALSE;
1036 }