]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/i386_vm_init.c
xnu-1504.15.3.tar.gz
[apple/xnu.git] / osfmk / i386 / i386_vm_init.c
CommitLineData
55e303ae 1/*
b0d623f7 2 * Copyright (c) 2003-2008 Apple Computer, Inc. All rights reserved.
55e303ae 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
55e303ae 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
55e303ae
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989, 1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
55e303ae
A
57#include <platforms.h>
58#include <mach_kdb.h>
55e303ae
A
59
60#include <mach/i386/vm_param.h>
61
62#include <string.h>
63#include <mach/vm_param.h>
64#include <mach/vm_prot.h>
65#include <mach/machine.h>
66#include <mach/time_value.h>
55e303ae
A
67#include <kern/spl.h>
68#include <kern/assert.h>
69#include <kern/debug.h>
70#include <kern/misc_protos.h>
71#include <kern/cpu_data.h>
72#include <kern/processor.h>
73#include <vm/vm_page.h>
74#include <vm/pmap.h>
75#include <vm/vm_kern.h>
76#include <i386/pmap.h>
55e303ae 77#include <i386/misc_protos.h>
b0d623f7 78#include <i386/ipl.h>
91447636 79#include <i386/cpuid.h>
55e303ae 80#include <mach/thread_status.h>
0c530ab8 81#include <pexpert/i386/efi.h>
b0d623f7
A
82#include <i386/i386_lowmem.h>
83#include <i386/lowglobals.h>
84
85#include <mach-o/loader.h>
86#include <libkern/kernel_mach_header.h>
87
88#if DEBUG
89#define DBG(x...) kprintf("DBG: " x)
90#define PRINT_PMAP_MEMORY_TABLE
91#else
92#define DBG(x...)
93#endif
55e303ae
A
94
95vm_size_t mem_size = 0;
55e303ae 96vm_offset_t first_avail = 0;/* first after page tables */
55e303ae 97
0c530ab8
A
98uint64_t max_mem; /* Size of physical memory (bytes), adjusted by maxmem */
99uint64_t mem_actual;
100uint64_t sane_size = 0; /* Memory size to use for defaults calculations */
101
0c530ab8
A
102#define MAXLORESERVE ( 32 * 1024 * 1024)
103
0b4c1975
A
104ppnum_t max_ppnum = 0;
105ppnum_t lowest_lo = 0;
106ppnum_t lowest_hi = 0;
107ppnum_t highest_hi = 0;
0c530ab8 108
060df5ea
A
109uint32_t pmap_reserved_pages_allocated = 0;
110uint32_t pmap_last_reserved_range = 0xFFFFFFFF;
111uint32_t pmap_reserved_ranges = 0;
112
0b4c1975 113extern unsigned int bsd_mbuf_cluster_reserve(boolean_t *);
0c530ab8 114
2d21ac55 115pmap_paddr_t avail_start, avail_end;
55e303ae 116vm_offset_t virtual_avail, virtual_end;
0c530ab8 117static pmap_paddr_t avail_remaining;
91447636 118vm_offset_t static_memory_end = 0;
55e303ae 119
b0d623f7 120vm_offset_t sHIB, eHIB, stext, etext, sdata, edata, end;
0c530ab8 121
b0d623f7
A
122boolean_t kernel_text_ps_4K = TRUE;
123boolean_t wpkernel = TRUE;
0c530ab8 124
b0d623f7 125extern void *KPTphys;
0c530ab8 126
b0d623f7
A
127/*
128 * _mh_execute_header is the mach_header for the currently executing kernel
129 */
130void *sectTEXTB; unsigned long sectSizeTEXT;
131void *sectDATAB; unsigned long sectSizeDATA;
132void *sectOBJCB; unsigned long sectSizeOBJC;
133void *sectLINKB; unsigned long sectSizeLINK;
134void *sectPRELINKB; unsigned long sectSizePRELINK;
135void *sectHIBB; unsigned long sectSizeHIB;
136void *sectINITPTB; unsigned long sectSizeINITPT;
b0d623f7
A
137
138extern uint64_t firmware_Conventional_bytes;
139extern uint64_t firmware_RuntimeServices_bytes;
140extern uint64_t firmware_ACPIReclaim_bytes;
141extern uint64_t firmware_ACPINVS_bytes;
142extern uint64_t firmware_PalCode_bytes;
143extern uint64_t firmware_Reserved_bytes;
144extern uint64_t firmware_Unusable_bytes;
145extern uint64_t firmware_other_bytes;
146uint64_t firmware_MMIO_bytes;
55e303ae
A
147
148/*
149 * Basic VM initialization.
150 */
151void
0c530ab8
A
152i386_vm_init(uint64_t maxmem,
153 boolean_t IA32e,
154 boot_args *args)
55e303ae 155{
91447636 156 pmap_memory_region_t *pmptr;
0c530ab8
A
157 pmap_memory_region_t *prev_pmptr;
158 EfiMemoryRange *mptr;
159 unsigned int mcount;
160 unsigned int msize;
91447636
A
161 ppnum_t fap;
162 unsigned int i;
0c530ab8
A
163 unsigned int safeboot;
164 ppnum_t maxpg = 0;
165 uint32_t pmap_type;
0c530ab8 166 uint32_t maxdmaaddr;
55e303ae 167
0c530ab8
A
168 /*
169 * Now retrieve addresses for end, edata, and etext
55e303ae
A
170 * from MACH-O headers.
171 */
172
173 sectTEXTB = (void *) getsegdatafromheader(
174 &_mh_execute_header, "__TEXT", &sectSizeTEXT);
175 sectDATAB = (void *) getsegdatafromheader(
176 &_mh_execute_header, "__DATA", &sectSizeDATA);
177 sectOBJCB = (void *) getsegdatafromheader(
178 &_mh_execute_header, "__OBJC", &sectSizeOBJC);
179 sectLINKB = (void *) getsegdatafromheader(
180 &_mh_execute_header, "__LINKEDIT", &sectSizeLINK);
91447636
A
181 sectHIBB = (void *)getsegdatafromheader(
182 &_mh_execute_header, "__HIB", &sectSizeHIB);
b0d623f7
A
183 sectINITPTB = (void *)getsegdatafromheader(
184 &_mh_execute_header, "__INITPT", &sectSizeINITPT);
55e303ae 185 sectPRELINKB = (void *) getsegdatafromheader(
b0d623f7 186 &_mh_execute_header, "__PRELINK_TEXT", &sectSizePRELINK);
55e303ae 187
b0d623f7
A
188 sHIB = (vm_offset_t) sectHIBB;
189 eHIB = (vm_offset_t) sectHIBB + sectSizeHIB;
190 /* Zero-padded from ehib to stext if text is 2M-aligned */
191 stext = (vm_offset_t) sectTEXTB;
55e303ae 192 etext = (vm_offset_t) sectTEXTB + sectSizeTEXT;
b0d623f7
A
193 /* Zero-padded from etext to sdata if text is 2M-aligned */
194 sdata = (vm_offset_t) sectDATAB;
55e303ae 195 edata = (vm_offset_t) sectDATAB + sectSizeDATA;
55e303ae 196
b0d623f7
A
197#if DEBUG
198 kprintf("sectTEXTB = %p\n", sectTEXTB);
199 kprintf("sectDATAB = %p\n", sectDATAB);
200 kprintf("sectOBJCB = %p\n", sectOBJCB);
201 kprintf("sectLINKB = %p\n", sectLINKB);
202 kprintf("sectHIBB = %p\n", sectHIBB);
203 kprintf("sectPRELINKB = %p\n", sectPRELINKB);
204 kprintf("eHIB = %p\n", (void *) eHIB);
205 kprintf("stext = %p\n", (void *) stext);
206 kprintf("etext = %p\n", (void *) etext);
207 kprintf("sdata = %p\n", (void *) sdata);
208 kprintf("edata = %p\n", (void *) edata);
209#endif
210
55e303ae
A
211 vm_set_page_size();
212
55e303ae
A
213 /*
214 * Compute the memory size.
215 */
216
593a1d5f 217 if ((1 == vm_himemory_mode) || PE_parse_boot_argn("-x", &safeboot, sizeof (safeboot))) {
0c530ab8
A
218 maxpg = 1 << (32 - I386_PGSHIFT);
219 }
91447636
A
220 avail_remaining = 0;
221 avail_end = 0;
222 pmptr = pmap_memory_regions;
0c530ab8 223 prev_pmptr = 0;
91447636
A
224 pmap_memory_region_count = pmap_memory_region_current = 0;
225 fap = (ppnum_t) i386_btop(first_avail);
91447636 226
b0d623f7 227 mptr = (EfiMemoryRange *)ml_static_ptovirt((vm_offset_t)args->MemoryMap);
0c530ab8
A
228 if (args->MemoryMapDescriptorSize == 0)
229 panic("Invalid memory map descriptor size");
230 msize = args->MemoryMapDescriptorSize;
231 mcount = args->MemoryMapSize / msize;
232
6601e61a 233#define FOURGIG 0x0000000100000000ULL
0b4c1975 234#define ONEGIG 0x0000000040000000ULL
0c530ab8
A
235
236 for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) {
237 ppnum_t base, top;
b0d623f7 238 uint64_t region_bytes = 0;
0c530ab8
A
239
240 if (pmap_memory_region_count >= PMAP_MEMORY_REGIONS_SIZE) {
241 kprintf("WARNING: truncating memory region count at %d\n", pmap_memory_region_count);
242 break;
243 }
244 base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT);
b0d623f7
A
245 top = (ppnum_t) (((mptr->PhysicalStart) >> I386_PGSHIFT) + mptr->NumberOfPages - 1);
246 region_bytes = (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT);
247 pmap_type = mptr->Type;
0c530ab8
A
248
249 switch (mptr->Type) {
250 case kEfiLoaderCode:
251 case kEfiLoaderData:
252 case kEfiBootServicesCode:
253 case kEfiBootServicesData:
254 case kEfiConventionalMemory:
255 /*
256 * Consolidate usable memory types into one.
257 */
258 pmap_type = kEfiConventionalMemory;
b0d623f7
A
259 sane_size += region_bytes;
260 firmware_Conventional_bytes += region_bytes;
0c530ab8 261 break;
b0d623f7
A
262 /*
263 * sane_size should reflect the total amount of physical
264 * RAM in the system, not just the amount that is
265 * available for the OS to use.
266 * FIXME:Consider deriving this value from SMBIOS tables
267 * rather than reverse engineering the memory map.
268 * Alternatively, see
269 * <rdar://problem/4642773> Memory map should
270 * describe all memory
271 * Firmware on some systems guarantees that the memory
272 * map is complete via the "RomReservedMemoryTracked"
273 * feature field--consult that where possible to
274 * avoid the "round up to 128M" workaround below.
275 */
0c530ab8
A
276
277 case kEfiRuntimeServicesCode:
278 case kEfiRuntimeServicesData:
b0d623f7
A
279 firmware_RuntimeServices_bytes += region_bytes;
280 sane_size += region_bytes;
281 break;
0c530ab8 282 case kEfiACPIReclaimMemory:
b0d623f7
A
283 firmware_ACPIReclaim_bytes += region_bytes;
284 sane_size += region_bytes;
285 break;
0c530ab8 286 case kEfiACPIMemoryNVS:
b0d623f7
A
287 firmware_ACPINVS_bytes += region_bytes;
288 sane_size += region_bytes;
289 break;
0c530ab8 290 case kEfiPalCode:
b0d623f7
A
291 firmware_PalCode_bytes += region_bytes;
292 sane_size += region_bytes;
293 break;
294
b0d623f7
A
295 case kEfiReservedMemoryType:
296 firmware_Reserved_bytes += region_bytes;
297 break;
0c530ab8 298 case kEfiUnusableMemory:
b0d623f7
A
299 firmware_Unusable_bytes += region_bytes;
300 break;
0c530ab8
A
301 case kEfiMemoryMappedIO:
302 case kEfiMemoryMappedIOPortSpace:
b0d623f7
A
303 firmware_MMIO_bytes += region_bytes;
304 break;
0c530ab8 305 default:
b0d623f7
A
306 firmware_other_bytes += region_bytes;
307 break;
0c530ab8
A
308 }
309
b0d623f7
A
310 kprintf("EFI region %d: type %u/%d, base 0x%x, top 0x%x\n",
311 i, mptr->Type, pmap_type, base, top);
0c530ab8
A
312
313 if (maxpg) {
314 if (base >= maxpg)
315 break;
316 top = (top > maxpg) ? maxpg : top;
317 }
318
319 /*
320 * handle each region
321 */
2d21ac55
A
322 if ((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME ||
323 pmap_type != kEfiConventionalMemory) {
0c530ab8
A
324 prev_pmptr = 0;
325 continue;
326 } else {
327 /*
328 * Usable memory region
329 */
330 if (top < I386_LOWMEM_RESERVED) {
331 prev_pmptr = 0;
332 continue;
333 }
334 if (top < fap) {
335 /*
336 * entire range below first_avail
337 * salvage some low memory pages
338 * we use some very low memory at startup
339 * mark as already allocated here
340 */
341 if (base >= I386_LOWMEM_RESERVED)
342 pmptr->base = base;
343 else
344 pmptr->base = I386_LOWMEM_RESERVED;
060df5ea
A
345
346 pmptr->end = top;
347
0c530ab8 348 /*
060df5ea
A
349 * A range may be marked with with the
350 * EFI_MEMORY_KERN_RESERVED attribute
351 * on some systems, to indicate that the range
352 * must not be made available to devices.
353 * Simplifying assumptions are made regarding
354 * the placement of the range.
0c530ab8 355 */
060df5ea
A
356 if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED)
357 pmap_reserved_ranges++;
358
359 if ((mptr->Attribute & EFI_MEMORY_KERN_RESERVED) &&
360 (top < I386_KERNEL_IMAGE_BASE_PAGE)) {
361 pmptr->alloc = pmptr->base;
362 pmap_last_reserved_range = pmap_memory_region_count;
363 }
364 else {
365 /*
366 * mark as already mapped
367 */
368 pmptr->alloc = top;
369 }
0c530ab8
A
370 pmptr->type = pmap_type;
371 }
372 else if ( (base < fap) && (top > fap) ) {
373 /*
374 * spans first_avail
375 * put mem below first avail in table but
376 * mark already allocated
377 */
378 pmptr->base = base;
379 pmptr->alloc = pmptr->end = (fap - 1);
380 pmptr->type = pmap_type;
381 /*
382 * we bump these here inline so the accounting
383 * below works correctly
384 */
385 pmptr++;
386 pmap_memory_region_count++;
387 pmptr->alloc = pmptr->base = fap;
388 pmptr->type = pmap_type;
389 pmptr->end = top;
390 }
391 else {
392 /*
393 * entire range useable
394 */
395 pmptr->alloc = pmptr->base = base;
396 pmptr->type = pmap_type;
397 pmptr->end = top;
398 }
399
400 if (i386_ptob(pmptr->end) > avail_end )
401 avail_end = i386_ptob(pmptr->end);
402
403 avail_remaining += (pmptr->end - pmptr->base);
404
405 /*
406 * Consolidate contiguous memory regions, if possible
407 */
408 if (prev_pmptr &&
409 pmptr->type == prev_pmptr->type &&
410 pmptr->base == pmptr->alloc &&
411 pmptr->base == (prev_pmptr->end + 1)) {
412 prev_pmptr->end = pmptr->end;
413 } else {
414 pmap_memory_region_count++;
415 prev_pmptr = pmptr;
416 pmptr++;
417 }
418 }
6601e61a 419 }
0c530ab8 420
91447636 421#ifdef PRINT_PMAP_MEMORY_TABLE
0c530ab8
A
422 {
423 unsigned int j;
424 pmap_memory_region_t *p = pmap_memory_regions;
b0d623f7
A
425 addr64_t region_start, region_end;
426 addr64_t efi_start, efi_end;
0c530ab8 427 for (j=0;j<pmap_memory_region_count;j++, p++) {
b0d623f7
A
428 kprintf("pmap region %d type %d base 0x%llx alloc 0x%llx top 0x%llx\n",
429 j, p->type,
430 (addr64_t) p->base << I386_PGSHIFT,
431 (addr64_t) p->alloc << I386_PGSHIFT,
432 (addr64_t) p->end << I386_PGSHIFT);
433 region_start = (addr64_t) p->base << I386_PGSHIFT;
434 region_end = ((addr64_t) p->end << I386_PGSHIFT) - 1;
435 mptr = (EfiMemoryRange *) ml_static_ptovirt((vm_offset_t)args->MemoryMap);
0c530ab8
A
436 for (i=0; i<mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) {
437 if (mptr->Type != kEfiLoaderCode &&
438 mptr->Type != kEfiLoaderData &&
439 mptr->Type != kEfiBootServicesCode &&
440 mptr->Type != kEfiBootServicesData &&
441 mptr->Type != kEfiConventionalMemory) {
b0d623f7 442 efi_start = (addr64_t)mptr->PhysicalStart;
0c530ab8
A
443 efi_end = efi_start + ((vm_offset_t)mptr->NumberOfPages << I386_PGSHIFT) - 1;
444 if ((efi_start >= region_start && efi_start <= region_end) ||
445 (efi_end >= region_start && efi_end <= region_end)) {
446 kprintf(" *** Overlapping region with EFI runtime region %d\n", i);
447 }
b0d623f7 448 }
0c530ab8 449 }
b0d623f7 450 }
0c530ab8 451 }
91447636 452#endif
55e303ae 453
91447636 454 avail_start = first_avail;
0c530ab8 455 mem_actual = sane_size;
4452a7af 456
0c530ab8
A
457 /*
458 * For user visible memory size, round up to 128 Mb - accounting for the various stolen memory
459 * not reported by EFI.
460 */
461
b0d623f7 462 sane_size = (sane_size + 128 * MB - 1) & ~((uint64_t)(128 * MB - 1));
0c530ab8 463
c910b4d9 464 /*
b0d623f7 465 * We cap at KERNEL_MAXMEM bytes (currently 32GB for K32, 64GB for K64).
c910b4d9
A
466 * Unless overriden by the maxmem= boot-arg
467 * -- which is a non-zero maxmem argument to this function.
468 */
b0d623f7
A
469 if (maxmem == 0 && sane_size > KERNEL_MAXMEM) {
470 maxmem = KERNEL_MAXMEM;
471 printf("Physical memory %lld bytes capped at %dGB\n",
472 sane_size, (uint32_t) (KERNEL_MAXMEM/GB));
c910b4d9 473 }
b0d623f7 474
0c530ab8
A
475 /*
476 * if user set maxmem, reduce memory sizes
477 */
478 if ( (maxmem > (uint64_t)first_avail) && (maxmem < sane_size)) {
b0d623f7 479 ppnum_t discarded_pages = (ppnum_t)((sane_size - maxmem) >> I386_PGSHIFT);
2d21ac55
A
480 ppnum_t highest_pn = 0;
481 ppnum_t cur_alloc = 0;
482 uint64_t pages_to_use;
483 unsigned cur_region = 0;
484
485 sane_size = maxmem;
486
0c530ab8
A
487 if (avail_remaining > discarded_pages)
488 avail_remaining -= discarded_pages;
489 else
490 avail_remaining = 0;
2d21ac55
A
491
492 pages_to_use = avail_remaining;
493
494 while (cur_region < pmap_memory_region_count && pages_to_use) {
495 for (cur_alloc = pmap_memory_regions[cur_region].alloc;
496 cur_alloc < pmap_memory_regions[cur_region].end && pages_to_use;
497 cur_alloc++) {
498 if (cur_alloc > highest_pn)
499 highest_pn = cur_alloc;
500 pages_to_use--;
501 }
502 if (pages_to_use == 0)
503 pmap_memory_regions[cur_region].end = cur_alloc;
504
505 cur_region++;
506 }
507 pmap_memory_region_count = cur_region;
508
509 avail_end = i386_ptob(highest_pn + 1);
55e303ae 510 }
4452a7af 511
0c530ab8
A
512 /*
513 * mem_size is only a 32 bit container... follow the PPC route
514 * and pin it to a 2 Gbyte maximum
515 */
516 if (sane_size > (FOURGIG >> 1))
517 mem_size = (vm_size_t)(FOURGIG >> 1);
518 else
519 mem_size = (vm_size_t)sane_size;
6601e61a 520 max_mem = sane_size;
5d5c5d0d 521
b0d623f7 522 kprintf("Physical memory %llu MB\n", sane_size/MB);
4452a7af 523
0b4c1975 524 max_valid_low_ppnum = (2 * GB) / PAGE_SIZE;
0c530ab8 525
0b4c1975
A
526 if (!PE_parse_boot_argn("max_valid_dma_addr", &maxdmaaddr, sizeof (maxdmaaddr))) {
527 max_valid_dma_address = (uint64_t)4 * (uint64_t)GB;
528 } else {
529 max_valid_dma_address = ((uint64_t) maxdmaaddr) * MB;
21362eb3 530
0b4c1975
A
531 if ((max_valid_dma_address / PAGE_SIZE) < max_valid_low_ppnum)
532 max_valid_low_ppnum = (ppnum_t)(max_valid_dma_address / PAGE_SIZE);
533 }
0c530ab8 534 if (avail_end >= max_valid_dma_address) {
0b4c1975
A
535 uint32_t maxloreserve;
536 uint32_t mbuf_reserve = 0;
537 boolean_t mbuf_override = FALSE;
538
539 if (!PE_parse_boot_argn("maxloreserve", &maxloreserve, sizeof (maxloreserve))) {
540
541 if (sane_size >= (ONEGIG * 15))
542 maxloreserve = (MAXLORESERVE / PAGE_SIZE) * 4;
543 else if (sane_size >= (ONEGIG * 7))
544 maxloreserve = (MAXLORESERVE / PAGE_SIZE) * 2;
545 else
546 maxloreserve = MAXLORESERVE / PAGE_SIZE;
547
548 mbuf_reserve = bsd_mbuf_cluster_reserve(&mbuf_override) / PAGE_SIZE;
549 } else
550 maxloreserve = (maxloreserve * (1024 * 1024)) / PAGE_SIZE;
551
552 if (maxloreserve) {
553 vm_lopage_free_limit = maxloreserve;
554
555 if (mbuf_override == TRUE) {
556 vm_lopage_free_limit += mbuf_reserve;
557 vm_lopage_lowater = 0;
558 } else
559 vm_lopage_lowater = vm_lopage_free_limit / 16;
560
561 vm_lopage_refill = TRUE;
562 vm_lopage_needed = TRUE;
563 }
0c530ab8 564 }
0c530ab8
A
565 /*
566 * Initialize kernel physical map.
567 * Kernel virtual address starts at VM_KERNEL_MIN_ADDRESS.
568 */
569 pmap_bootstrap(0, IA32e);
6601e61a
A
570}
571
0c530ab8 572
55e303ae
A
573unsigned int
574pmap_free_pages(void)
575{
b0d623f7 576 return (unsigned int)avail_remaining;
55e303ae
A
577}
578
060df5ea
A
579boolean_t pmap_next_page_reserved(ppnum_t *);
580
581/*
582 * Pick a page from a "kernel private" reserved range; works around
583 * errata on some hardware.
584 */
585boolean_t
586pmap_next_page_reserved(ppnum_t *pn) {
587 if (pmap_reserved_ranges && pmap_last_reserved_range != 0xFFFFFFFF) {
588 uint32_t n;
589 pmap_memory_region_t *region;
590 for (n = 0; n <= pmap_last_reserved_range; n++) {
591 region = &pmap_memory_regions[n];
592 if (region->alloc < region->end) {
593 *pn = region->alloc++;
594 avail_remaining--;
595
596 if (*pn > max_ppnum)
597 max_ppnum = *pn;
598
599 if (lowest_lo == 0 || *pn < lowest_lo)
600 lowest_lo = *pn;
601
602 pmap_reserved_pages_allocated++;
603 return TRUE;
604 }
605 }
606 }
607 return FALSE;
608}
609
610
b0d623f7 611boolean_t
0b4c1975
A
612pmap_next_page_hi(
613 ppnum_t *pn)
b0d623f7 614{
0b4c1975
A
615 pmap_memory_region_t *region;
616 int n;
617
060df5ea
A
618 if (pmap_next_page_reserved(pn))
619 return TRUE;
620
0b4c1975
A
621 if (avail_remaining) {
622 for (n = pmap_memory_region_count - 1; n >= 0; n--) {
623 region = &pmap_memory_regions[n];
624
625 if (region->alloc != region->end) {
626 *pn = region->alloc++;
627 avail_remaining--;
628
629 if (*pn > max_ppnum)
630 max_ppnum = *pn;
631
632 if (lowest_lo == 0 || *pn < lowest_lo)
633 lowest_lo = *pn;
634
635 if (lowest_hi == 0 || *pn < lowest_hi)
636 lowest_hi = *pn;
637
638 if (*pn > highest_hi)
639 highest_hi = *pn;
640
641 return TRUE;
642 }
b0d623f7
A
643 }
644 }
0b4c1975 645 return FALSE;
b0d623f7 646}
0b4c1975 647
0c530ab8 648
55e303ae
A
649boolean_t
650pmap_next_page(
651 ppnum_t *pn)
652{
0c530ab8 653 if (avail_remaining) while (pmap_memory_region_current < pmap_memory_region_count) {
b0d623f7
A
654 if (pmap_memory_regions[pmap_memory_region_current].alloc ==
655 pmap_memory_regions[pmap_memory_region_current].end) {
656 pmap_memory_region_current++;
0c530ab8
A
657 continue;
658 }
659 *pn = pmap_memory_regions[pmap_memory_region_current].alloc++;
660 avail_remaining--;
661
0b4c1975
A
662 if (*pn > max_ppnum)
663 max_ppnum = *pn;
664
665 if (lowest_lo == 0 || *pn < lowest_lo)
666 lowest_lo = *pn;
667
0c530ab8 668 return TRUE;
91447636
A
669 }
670 return FALSE;
55e303ae
A
671}
672
0c530ab8 673
55e303ae
A
674boolean_t
675pmap_valid_page(
91447636 676 ppnum_t pn)
55e303ae 677{
0c530ab8
A
678 unsigned int i;
679 pmap_memory_region_t *pmptr = pmap_memory_regions;
680
0c530ab8 681 for (i = 0; i < pmap_memory_region_count; i++, pmptr++) {
2d21ac55 682 if ( (pn >= pmptr->base) && (pn <= pmptr->end) )
0c530ab8
A
683 return TRUE;
684 }
685 return FALSE;
686}
687
b0d623f7
A
688/*
689 * Called once VM is fully initialized so that we can release unused
690 * sections of low memory to the general pool.
691 * Also complete the set-up of identity-mapped sections of the kernel:
692 * 1) write-protect kernel text
693 * 2) map kernel text using large pages if possible
694 * 3) read and write-protect page zero (for K32)
695 * 4) map the global page at the appropriate virtual address.
696 *
697 * Use of large pages
698 * ------------------
699 * To effectively map and write-protect all kernel text pages, the text
700 * must be 2M-aligned at the base, and the data section above must also be
701 * 2M-aligned. That is, there's padding below and above. This is achieved
702 * through linker directives. Large pages are used only if this alignment
703 * exists (and not overriden by the -kernel_text_page_4K boot-arg). The
704 * memory layout is:
705 *
706 * : :
707 * | __DATA |
708 * sdata: ================== 2Meg
709 * | |
710 * | zero-padding |
711 * | |
712 * etext: ------------------
713 * | |
714 * : :
715 * | |
716 * | __TEXT |
717 * | |
718 * : :
719 * | |
720 * stext: ================== 2Meg
721 * | |
722 * | zero-padding |
723 * | |
724 * eHIB: ------------------
725 * | __HIB |
726 * : :
727 *
728 * Prior to changing the mapping from 4K to 2M, the zero-padding pages
729 * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the
730 * 4K pages covering [stext,etext] are coalesced as 2M large pages.
731 * The now unused level-1 PTE pages are also freed.
732 */
733void
734pmap_lowmem_finalize(void)
735{
736 spl_t spl;
737 int i;
738
739 /* Check the kernel is linked at the expected base address */
740 if (i386_btop(kvtophys((vm_offset_t) &IdlePML4)) !=
741 I386_KERNEL_IMAGE_BASE_PAGE)
742 panic("pmap_lowmem_finalize() unexpected kernel base address");
743
744 /*
745 * Free all pages in pmap regions below the base:
746 * rdar://6332712
747 * We can't free all the pages to VM that EFI reports available.
748 * Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake.
749 * There's also a size miscalculation here: pend is one page less
750 * than it should be but this is not fixed to be backwards
751 * compatible.
752 * Due to this current EFI limitation, we take only the first
753 * entry in the memory region table. However, the loop is retained
754 * (with the intended termination criteria commented out) in the
755 * hope that some day we can free all low-memory ranges.
060df5ea
A
756 * This loop assumes the first range does not span the kernel
757 * image base & avail_start. We skip this process on systems
758 * with "kernel reserved" ranges, as the low memory reclamation
759 * is handled in the initial memory map processing loop on
760 * such systems.
b0d623f7
A
761 */
762 for (i = 0;
763// pmap_memory_regions[i].end <= I386_KERNEL_IMAGE_BASE_PAGE;
060df5ea 764 i < 1 && (pmap_reserved_ranges == 0);
b0d623f7
A
765 i++) {
766 vm_offset_t pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base);
767 vm_offset_t pend = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end);
768// vm_offset_t pend = i386_ptob(pmap_memory_regions[i].end+1);
769
770 DBG("ml_static_mfree(%p,%p) for pmap region %d\n",
771 (void *) ml_static_ptovirt(pbase),
772 (void *) (pend - pbase), i);
773 ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase);
774 }
775
776 /*
777 * If text and data are both 2MB-aligned,
778 * we can map text with large-pages,
779 * unless the -kernel_text_ps_4K boot-arg overrides.
780 */
781 if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) {
782 kprintf("Kernel text is 2MB aligned");
783 kernel_text_ps_4K = FALSE;
784 if (PE_parse_boot_argn("-kernel_text_ps_4K",
785 &kernel_text_ps_4K,
786 sizeof (kernel_text_ps_4K)))
787 kprintf(" but will be mapped with 4K pages\n");
788 else
789 kprintf(" and will be mapped with 2M pages\n");
790 }
791
792 (void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof (wpkernel));
793 if (wpkernel)
794 kprintf("Kernel text %p-%p to be write-protected\n",
795 (void *) stext, (void *) etext);
796
797 spl = splhigh();
798
799 /*
800 * Scan over text if mappings are to be changed:
801 * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0
802 * - Change to large-pages if possible and not overriden.
803 */
804 if (kernel_text_ps_4K && wpkernel) {
805 vm_offset_t myva;
806 for (myva = stext; myva < etext; myva += PAGE_SIZE) {
807 pt_entry_t *ptep;
808
809 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
810 if (ptep)
811 pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW);
812 }
813 }
814
815 if (!kernel_text_ps_4K) {
816 vm_offset_t myva;
817
818 /*
819 * Release zero-filled page padding used for 2M-alignment.
820 */
821 DBG("ml_static_mfree(%p,%p) for padding below text\n",
822 (void *) eHIB, (void *) (stext - eHIB));
823 ml_static_mfree(eHIB, stext - eHIB);
824 DBG("ml_static_mfree(%p,%p) for padding above text\n",
825 (void *) etext, (void *) (sdata - etext));
826 ml_static_mfree(etext, sdata - etext);
827
828 /*
829 * Coalesce text pages into large pages.
830 */
831 for (myva = stext; myva < sdata; myva += I386_LPGBYTES) {
832 pt_entry_t *ptep;
833 vm_offset_t pte_phys;
834 pt_entry_t *pdep;
835 pt_entry_t pde;
836
837 pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva);
838 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
839 DBG("myva: %p pdep: %p ptep: %p\n",
840 (void *) myva, (void *) pdep, (void *) ptep);
841 if ((*ptep & INTEL_PTE_VALID) == 0)
842 continue;
843 pte_phys = (vm_offset_t)(*ptep & PG_FRAME);
844 pde = *pdep & PTMASK; /* page attributes from pde */
845 pde |= INTEL_PTE_PS; /* make it a 2M entry */
846 pde |= pte_phys; /* take page frame from pte */
847
848 if (wpkernel)
849 pde &= ~INTEL_PTE_RW;
850 DBG("pmap_store_pte(%p,0x%llx)\n",
851 (void *)pdep, pde);
852 pmap_store_pte(pdep, pde);
853
854 /*
855 * Free the now-unused level-1 pte.
856 * Note: ptep is a virtual address to the pte in the
857 * recursive map. We can't use this address to free
858 * the page. Instead we need to compute its address
859 * in the Idle PTEs in "low memory".
860 */
861 vm_offset_t vm_ptep = (vm_offset_t) KPTphys
862 + (pte_phys >> PTPGSHIFT);
863 DBG("ml_static_mfree(%p,0x%x) for pte\n",
864 (void *) vm_ptep, PAGE_SIZE);
865 ml_static_mfree(vm_ptep, PAGE_SIZE);
866 }
867
868 /* Change variable read by sysctl machdep.pmap */
869 pmap_kernel_text_ps = I386_LPGBYTES;
870 }
871
872#if defined(__i386__)
873 /* no matter what, kernel page zero is not accessible */
874 pmap_store_pte(pmap_pte(kernel_pmap, 0), INTEL_PTE_INVALID);
875#endif
876
877 /* map lowmem global page into fixed addr */
878 pt_entry_t *pte = NULL;
879 if (0 == (pte = pmap_pte(kernel_pmap,
880 VM_MIN_KERNEL_LOADED_ADDRESS + 0x2000)))
881 panic("lowmem pte");
882 /* make sure it is defined on page boundary */
883 assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK));
884 pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)
885 | INTEL_PTE_REF
886 | INTEL_PTE_MOD
887 | INTEL_PTE_WIRED
888 | INTEL_PTE_VALID
889 | INTEL_PTE_RW);
890 splx(spl);
891 flush_tlb();
892}
893