]>
Commit | Line | Data |
---|---|---|
55e303ae | 1 | /* |
b0d623f7 | 2 | * Copyright (c) 2003-2008 Apple Computer, Inc. All rights reserved. |
55e303ae | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
55e303ae | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
55e303ae A |
27 | */ |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
30 | */ | |
31 | /* | |
32 | * Mach Operating System | |
33 | * Copyright (c) 1991,1990,1989, 1988 Carnegie Mellon University | |
34 | * All Rights Reserved. | |
35 | * | |
36 | * Permission to use, copy, modify and distribute this software and its | |
37 | * documentation is hereby granted, provided that both the copyright | |
38 | * notice and this permission notice appear in all copies of the | |
39 | * software, derivative works or modified versions, and any portions | |
40 | * thereof, and that both notices appear in supporting documentation. | |
41 | * | |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
45 | * | |
46 | * Carnegie Mellon requests users of this software to return to | |
47 | * | |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
49 | * School of Computer Science | |
50 | * Carnegie Mellon University | |
51 | * Pittsburgh PA 15213-3890 | |
52 | * | |
53 | * any improvements or extensions that they make and grant Carnegie Mellon | |
54 | * the rights to redistribute these changes. | |
55 | */ | |
56 | ||
55e303ae A |
57 | #include <platforms.h> |
58 | #include <mach_kdb.h> | |
55e303ae A |
59 | |
60 | #include <mach/i386/vm_param.h> | |
61 | ||
62 | #include <string.h> | |
63 | #include <mach/vm_param.h> | |
64 | #include <mach/vm_prot.h> | |
65 | #include <mach/machine.h> | |
66 | #include <mach/time_value.h> | |
55e303ae A |
67 | #include <kern/spl.h> |
68 | #include <kern/assert.h> | |
69 | #include <kern/debug.h> | |
70 | #include <kern/misc_protos.h> | |
71 | #include <kern/cpu_data.h> | |
72 | #include <kern/processor.h> | |
73 | #include <vm/vm_page.h> | |
74 | #include <vm/pmap.h> | |
75 | #include <vm/vm_kern.h> | |
76 | #include <i386/pmap.h> | |
55e303ae | 77 | #include <i386/misc_protos.h> |
b0d623f7 | 78 | #include <i386/ipl.h> |
91447636 | 79 | #include <i386/cpuid.h> |
55e303ae | 80 | #include <mach/thread_status.h> |
0c530ab8 | 81 | #include <pexpert/i386/efi.h> |
b0d623f7 A |
82 | #include <i386/i386_lowmem.h> |
83 | #include <i386/lowglobals.h> | |
84 | ||
85 | #include <mach-o/loader.h> | |
86 | #include <libkern/kernel_mach_header.h> | |
87 | ||
88 | #if DEBUG | |
89 | #define DBG(x...) kprintf("DBG: " x) | |
90 | #define PRINT_PMAP_MEMORY_TABLE | |
91 | #else | |
92 | #define DBG(x...) | |
93 | #endif | |
55e303ae A |
94 | |
95 | vm_size_t mem_size = 0; | |
55e303ae | 96 | vm_offset_t first_avail = 0;/* first after page tables */ |
55e303ae | 97 | |
0c530ab8 A |
98 | uint64_t max_mem; /* Size of physical memory (bytes), adjusted by maxmem */ |
99 | uint64_t mem_actual; | |
100 | uint64_t sane_size = 0; /* Memory size to use for defaults calculations */ | |
101 | ||
0c530ab8 A |
102 | #define MAXLORESERVE ( 32 * 1024 * 1024) |
103 | ||
0b4c1975 A |
104 | ppnum_t max_ppnum = 0; |
105 | ppnum_t lowest_lo = 0; | |
106 | ppnum_t lowest_hi = 0; | |
107 | ppnum_t highest_hi = 0; | |
0c530ab8 | 108 | |
060df5ea A |
109 | uint32_t pmap_reserved_pages_allocated = 0; |
110 | uint32_t pmap_last_reserved_range = 0xFFFFFFFF; | |
111 | uint32_t pmap_reserved_ranges = 0; | |
112 | ||
0b4c1975 | 113 | extern unsigned int bsd_mbuf_cluster_reserve(boolean_t *); |
0c530ab8 | 114 | |
2d21ac55 | 115 | pmap_paddr_t avail_start, avail_end; |
55e303ae | 116 | vm_offset_t virtual_avail, virtual_end; |
0c530ab8 | 117 | static pmap_paddr_t avail_remaining; |
91447636 | 118 | vm_offset_t static_memory_end = 0; |
55e303ae | 119 | |
b0d623f7 | 120 | vm_offset_t sHIB, eHIB, stext, etext, sdata, edata, end; |
0c530ab8 | 121 | |
b0d623f7 A |
122 | boolean_t kernel_text_ps_4K = TRUE; |
123 | boolean_t wpkernel = TRUE; | |
0c530ab8 | 124 | |
b0d623f7 | 125 | extern void *KPTphys; |
0c530ab8 | 126 | |
b0d623f7 A |
127 | /* |
128 | * _mh_execute_header is the mach_header for the currently executing kernel | |
129 | */ | |
130 | void *sectTEXTB; unsigned long sectSizeTEXT; | |
131 | void *sectDATAB; unsigned long sectSizeDATA; | |
132 | void *sectOBJCB; unsigned long sectSizeOBJC; | |
133 | void *sectLINKB; unsigned long sectSizeLINK; | |
134 | void *sectPRELINKB; unsigned long sectSizePRELINK; | |
135 | void *sectHIBB; unsigned long sectSizeHIB; | |
136 | void *sectINITPTB; unsigned long sectSizeINITPT; | |
b0d623f7 A |
137 | |
138 | extern uint64_t firmware_Conventional_bytes; | |
139 | extern uint64_t firmware_RuntimeServices_bytes; | |
140 | extern uint64_t firmware_ACPIReclaim_bytes; | |
141 | extern uint64_t firmware_ACPINVS_bytes; | |
142 | extern uint64_t firmware_PalCode_bytes; | |
143 | extern uint64_t firmware_Reserved_bytes; | |
144 | extern uint64_t firmware_Unusable_bytes; | |
145 | extern uint64_t firmware_other_bytes; | |
146 | uint64_t firmware_MMIO_bytes; | |
55e303ae A |
147 | |
148 | /* | |
149 | * Basic VM initialization. | |
150 | */ | |
151 | void | |
0c530ab8 A |
152 | i386_vm_init(uint64_t maxmem, |
153 | boolean_t IA32e, | |
154 | boot_args *args) | |
55e303ae | 155 | { |
91447636 | 156 | pmap_memory_region_t *pmptr; |
0c530ab8 A |
157 | pmap_memory_region_t *prev_pmptr; |
158 | EfiMemoryRange *mptr; | |
159 | unsigned int mcount; | |
160 | unsigned int msize; | |
91447636 A |
161 | ppnum_t fap; |
162 | unsigned int i; | |
0c530ab8 A |
163 | unsigned int safeboot; |
164 | ppnum_t maxpg = 0; | |
165 | uint32_t pmap_type; | |
0c530ab8 | 166 | uint32_t maxdmaaddr; |
55e303ae | 167 | |
0c530ab8 A |
168 | /* |
169 | * Now retrieve addresses for end, edata, and etext | |
55e303ae A |
170 | * from MACH-O headers. |
171 | */ | |
172 | ||
173 | sectTEXTB = (void *) getsegdatafromheader( | |
174 | &_mh_execute_header, "__TEXT", §SizeTEXT); | |
175 | sectDATAB = (void *) getsegdatafromheader( | |
176 | &_mh_execute_header, "__DATA", §SizeDATA); | |
177 | sectOBJCB = (void *) getsegdatafromheader( | |
178 | &_mh_execute_header, "__OBJC", §SizeOBJC); | |
179 | sectLINKB = (void *) getsegdatafromheader( | |
180 | &_mh_execute_header, "__LINKEDIT", §SizeLINK); | |
91447636 A |
181 | sectHIBB = (void *)getsegdatafromheader( |
182 | &_mh_execute_header, "__HIB", §SizeHIB); | |
b0d623f7 A |
183 | sectINITPTB = (void *)getsegdatafromheader( |
184 | &_mh_execute_header, "__INITPT", §SizeINITPT); | |
55e303ae | 185 | sectPRELINKB = (void *) getsegdatafromheader( |
b0d623f7 | 186 | &_mh_execute_header, "__PRELINK_TEXT", §SizePRELINK); |
55e303ae | 187 | |
b0d623f7 A |
188 | sHIB = (vm_offset_t) sectHIBB; |
189 | eHIB = (vm_offset_t) sectHIBB + sectSizeHIB; | |
190 | /* Zero-padded from ehib to stext if text is 2M-aligned */ | |
191 | stext = (vm_offset_t) sectTEXTB; | |
55e303ae | 192 | etext = (vm_offset_t) sectTEXTB + sectSizeTEXT; |
b0d623f7 A |
193 | /* Zero-padded from etext to sdata if text is 2M-aligned */ |
194 | sdata = (vm_offset_t) sectDATAB; | |
55e303ae | 195 | edata = (vm_offset_t) sectDATAB + sectSizeDATA; |
55e303ae | 196 | |
b0d623f7 A |
197 | #if DEBUG |
198 | kprintf("sectTEXTB = %p\n", sectTEXTB); | |
199 | kprintf("sectDATAB = %p\n", sectDATAB); | |
200 | kprintf("sectOBJCB = %p\n", sectOBJCB); | |
201 | kprintf("sectLINKB = %p\n", sectLINKB); | |
202 | kprintf("sectHIBB = %p\n", sectHIBB); | |
203 | kprintf("sectPRELINKB = %p\n", sectPRELINKB); | |
204 | kprintf("eHIB = %p\n", (void *) eHIB); | |
205 | kprintf("stext = %p\n", (void *) stext); | |
206 | kprintf("etext = %p\n", (void *) etext); | |
207 | kprintf("sdata = %p\n", (void *) sdata); | |
208 | kprintf("edata = %p\n", (void *) edata); | |
209 | #endif | |
210 | ||
55e303ae A |
211 | vm_set_page_size(); |
212 | ||
55e303ae A |
213 | /* |
214 | * Compute the memory size. | |
215 | */ | |
216 | ||
593a1d5f | 217 | if ((1 == vm_himemory_mode) || PE_parse_boot_argn("-x", &safeboot, sizeof (safeboot))) { |
0c530ab8 A |
218 | maxpg = 1 << (32 - I386_PGSHIFT); |
219 | } | |
91447636 A |
220 | avail_remaining = 0; |
221 | avail_end = 0; | |
222 | pmptr = pmap_memory_regions; | |
0c530ab8 | 223 | prev_pmptr = 0; |
91447636 A |
224 | pmap_memory_region_count = pmap_memory_region_current = 0; |
225 | fap = (ppnum_t) i386_btop(first_avail); | |
91447636 | 226 | |
b0d623f7 | 227 | mptr = (EfiMemoryRange *)ml_static_ptovirt((vm_offset_t)args->MemoryMap); |
0c530ab8 A |
228 | if (args->MemoryMapDescriptorSize == 0) |
229 | panic("Invalid memory map descriptor size"); | |
230 | msize = args->MemoryMapDescriptorSize; | |
231 | mcount = args->MemoryMapSize / msize; | |
232 | ||
6601e61a | 233 | #define FOURGIG 0x0000000100000000ULL |
0b4c1975 | 234 | #define ONEGIG 0x0000000040000000ULL |
0c530ab8 A |
235 | |
236 | for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { | |
237 | ppnum_t base, top; | |
b0d623f7 | 238 | uint64_t region_bytes = 0; |
0c530ab8 A |
239 | |
240 | if (pmap_memory_region_count >= PMAP_MEMORY_REGIONS_SIZE) { | |
241 | kprintf("WARNING: truncating memory region count at %d\n", pmap_memory_region_count); | |
242 | break; | |
243 | } | |
244 | base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT); | |
b0d623f7 A |
245 | top = (ppnum_t) (((mptr->PhysicalStart) >> I386_PGSHIFT) + mptr->NumberOfPages - 1); |
246 | region_bytes = (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT); | |
247 | pmap_type = mptr->Type; | |
0c530ab8 A |
248 | |
249 | switch (mptr->Type) { | |
250 | case kEfiLoaderCode: | |
251 | case kEfiLoaderData: | |
252 | case kEfiBootServicesCode: | |
253 | case kEfiBootServicesData: | |
254 | case kEfiConventionalMemory: | |
255 | /* | |
256 | * Consolidate usable memory types into one. | |
257 | */ | |
258 | pmap_type = kEfiConventionalMemory; | |
b0d623f7 A |
259 | sane_size += region_bytes; |
260 | firmware_Conventional_bytes += region_bytes; | |
0c530ab8 | 261 | break; |
b0d623f7 A |
262 | /* |
263 | * sane_size should reflect the total amount of physical | |
264 | * RAM in the system, not just the amount that is | |
265 | * available for the OS to use. | |
266 | * FIXME:Consider deriving this value from SMBIOS tables | |
267 | * rather than reverse engineering the memory map. | |
268 | * Alternatively, see | |
269 | * <rdar://problem/4642773> Memory map should | |
270 | * describe all memory | |
271 | * Firmware on some systems guarantees that the memory | |
272 | * map is complete via the "RomReservedMemoryTracked" | |
273 | * feature field--consult that where possible to | |
274 | * avoid the "round up to 128M" workaround below. | |
275 | */ | |
0c530ab8 A |
276 | |
277 | case kEfiRuntimeServicesCode: | |
278 | case kEfiRuntimeServicesData: | |
b0d623f7 A |
279 | firmware_RuntimeServices_bytes += region_bytes; |
280 | sane_size += region_bytes; | |
281 | break; | |
0c530ab8 | 282 | case kEfiACPIReclaimMemory: |
b0d623f7 A |
283 | firmware_ACPIReclaim_bytes += region_bytes; |
284 | sane_size += region_bytes; | |
285 | break; | |
0c530ab8 | 286 | case kEfiACPIMemoryNVS: |
b0d623f7 A |
287 | firmware_ACPINVS_bytes += region_bytes; |
288 | sane_size += region_bytes; | |
289 | break; | |
0c530ab8 | 290 | case kEfiPalCode: |
b0d623f7 A |
291 | firmware_PalCode_bytes += region_bytes; |
292 | sane_size += region_bytes; | |
293 | break; | |
294 | ||
b0d623f7 A |
295 | case kEfiReservedMemoryType: |
296 | firmware_Reserved_bytes += region_bytes; | |
297 | break; | |
0c530ab8 | 298 | case kEfiUnusableMemory: |
b0d623f7 A |
299 | firmware_Unusable_bytes += region_bytes; |
300 | break; | |
0c530ab8 A |
301 | case kEfiMemoryMappedIO: |
302 | case kEfiMemoryMappedIOPortSpace: | |
b0d623f7 A |
303 | firmware_MMIO_bytes += region_bytes; |
304 | break; | |
0c530ab8 | 305 | default: |
b0d623f7 A |
306 | firmware_other_bytes += region_bytes; |
307 | break; | |
0c530ab8 A |
308 | } |
309 | ||
b0d623f7 A |
310 | kprintf("EFI region %d: type %u/%d, base 0x%x, top 0x%x\n", |
311 | i, mptr->Type, pmap_type, base, top); | |
0c530ab8 A |
312 | |
313 | if (maxpg) { | |
314 | if (base >= maxpg) | |
315 | break; | |
316 | top = (top > maxpg) ? maxpg : top; | |
317 | } | |
318 | ||
319 | /* | |
320 | * handle each region | |
321 | */ | |
2d21ac55 A |
322 | if ((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME || |
323 | pmap_type != kEfiConventionalMemory) { | |
0c530ab8 A |
324 | prev_pmptr = 0; |
325 | continue; | |
326 | } else { | |
327 | /* | |
328 | * Usable memory region | |
329 | */ | |
330 | if (top < I386_LOWMEM_RESERVED) { | |
331 | prev_pmptr = 0; | |
332 | continue; | |
333 | } | |
334 | if (top < fap) { | |
335 | /* | |
336 | * entire range below first_avail | |
337 | * salvage some low memory pages | |
338 | * we use some very low memory at startup | |
339 | * mark as already allocated here | |
340 | */ | |
341 | if (base >= I386_LOWMEM_RESERVED) | |
342 | pmptr->base = base; | |
343 | else | |
344 | pmptr->base = I386_LOWMEM_RESERVED; | |
060df5ea A |
345 | |
346 | pmptr->end = top; | |
347 | ||
0c530ab8 | 348 | /* |
060df5ea A |
349 | * A range may be marked with with the |
350 | * EFI_MEMORY_KERN_RESERVED attribute | |
351 | * on some systems, to indicate that the range | |
352 | * must not be made available to devices. | |
353 | * Simplifying assumptions are made regarding | |
354 | * the placement of the range. | |
0c530ab8 | 355 | */ |
060df5ea A |
356 | if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) |
357 | pmap_reserved_ranges++; | |
358 | ||
359 | if ((mptr->Attribute & EFI_MEMORY_KERN_RESERVED) && | |
360 | (top < I386_KERNEL_IMAGE_BASE_PAGE)) { | |
361 | pmptr->alloc = pmptr->base; | |
362 | pmap_last_reserved_range = pmap_memory_region_count; | |
363 | } | |
364 | else { | |
365 | /* | |
366 | * mark as already mapped | |
367 | */ | |
368 | pmptr->alloc = top; | |
369 | } | |
0c530ab8 A |
370 | pmptr->type = pmap_type; |
371 | } | |
372 | else if ( (base < fap) && (top > fap) ) { | |
373 | /* | |
374 | * spans first_avail | |
375 | * put mem below first avail in table but | |
376 | * mark already allocated | |
377 | */ | |
378 | pmptr->base = base; | |
379 | pmptr->alloc = pmptr->end = (fap - 1); | |
380 | pmptr->type = pmap_type; | |
381 | /* | |
382 | * we bump these here inline so the accounting | |
383 | * below works correctly | |
384 | */ | |
385 | pmptr++; | |
386 | pmap_memory_region_count++; | |
387 | pmptr->alloc = pmptr->base = fap; | |
388 | pmptr->type = pmap_type; | |
389 | pmptr->end = top; | |
390 | } | |
391 | else { | |
392 | /* | |
393 | * entire range useable | |
394 | */ | |
395 | pmptr->alloc = pmptr->base = base; | |
396 | pmptr->type = pmap_type; | |
397 | pmptr->end = top; | |
398 | } | |
399 | ||
400 | if (i386_ptob(pmptr->end) > avail_end ) | |
401 | avail_end = i386_ptob(pmptr->end); | |
402 | ||
403 | avail_remaining += (pmptr->end - pmptr->base); | |
404 | ||
405 | /* | |
406 | * Consolidate contiguous memory regions, if possible | |
407 | */ | |
408 | if (prev_pmptr && | |
409 | pmptr->type == prev_pmptr->type && | |
410 | pmptr->base == pmptr->alloc && | |
411 | pmptr->base == (prev_pmptr->end + 1)) { | |
412 | prev_pmptr->end = pmptr->end; | |
413 | } else { | |
414 | pmap_memory_region_count++; | |
415 | prev_pmptr = pmptr; | |
416 | pmptr++; | |
417 | } | |
418 | } | |
6601e61a | 419 | } |
0c530ab8 | 420 | |
91447636 | 421 | #ifdef PRINT_PMAP_MEMORY_TABLE |
0c530ab8 A |
422 | { |
423 | unsigned int j; | |
424 | pmap_memory_region_t *p = pmap_memory_regions; | |
b0d623f7 A |
425 | addr64_t region_start, region_end; |
426 | addr64_t efi_start, efi_end; | |
0c530ab8 | 427 | for (j=0;j<pmap_memory_region_count;j++, p++) { |
b0d623f7 A |
428 | kprintf("pmap region %d type %d base 0x%llx alloc 0x%llx top 0x%llx\n", |
429 | j, p->type, | |
430 | (addr64_t) p->base << I386_PGSHIFT, | |
431 | (addr64_t) p->alloc << I386_PGSHIFT, | |
432 | (addr64_t) p->end << I386_PGSHIFT); | |
433 | region_start = (addr64_t) p->base << I386_PGSHIFT; | |
434 | region_end = ((addr64_t) p->end << I386_PGSHIFT) - 1; | |
435 | mptr = (EfiMemoryRange *) ml_static_ptovirt((vm_offset_t)args->MemoryMap); | |
0c530ab8 A |
436 | for (i=0; i<mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { |
437 | if (mptr->Type != kEfiLoaderCode && | |
438 | mptr->Type != kEfiLoaderData && | |
439 | mptr->Type != kEfiBootServicesCode && | |
440 | mptr->Type != kEfiBootServicesData && | |
441 | mptr->Type != kEfiConventionalMemory) { | |
b0d623f7 | 442 | efi_start = (addr64_t)mptr->PhysicalStart; |
0c530ab8 A |
443 | efi_end = efi_start + ((vm_offset_t)mptr->NumberOfPages << I386_PGSHIFT) - 1; |
444 | if ((efi_start >= region_start && efi_start <= region_end) || | |
445 | (efi_end >= region_start && efi_end <= region_end)) { | |
446 | kprintf(" *** Overlapping region with EFI runtime region %d\n", i); | |
447 | } | |
b0d623f7 | 448 | } |
0c530ab8 | 449 | } |
b0d623f7 | 450 | } |
0c530ab8 | 451 | } |
91447636 | 452 | #endif |
55e303ae | 453 | |
91447636 | 454 | avail_start = first_avail; |
0c530ab8 | 455 | mem_actual = sane_size; |
4452a7af | 456 | |
0c530ab8 A |
457 | /* |
458 | * For user visible memory size, round up to 128 Mb - accounting for the various stolen memory | |
459 | * not reported by EFI. | |
460 | */ | |
461 | ||
b0d623f7 | 462 | sane_size = (sane_size + 128 * MB - 1) & ~((uint64_t)(128 * MB - 1)); |
0c530ab8 | 463 | |
c910b4d9 | 464 | /* |
b0d623f7 | 465 | * We cap at KERNEL_MAXMEM bytes (currently 32GB for K32, 64GB for K64). |
c910b4d9 A |
466 | * Unless overriden by the maxmem= boot-arg |
467 | * -- which is a non-zero maxmem argument to this function. | |
468 | */ | |
b0d623f7 A |
469 | if (maxmem == 0 && sane_size > KERNEL_MAXMEM) { |
470 | maxmem = KERNEL_MAXMEM; | |
471 | printf("Physical memory %lld bytes capped at %dGB\n", | |
472 | sane_size, (uint32_t) (KERNEL_MAXMEM/GB)); | |
c910b4d9 | 473 | } |
b0d623f7 | 474 | |
0c530ab8 A |
475 | /* |
476 | * if user set maxmem, reduce memory sizes | |
477 | */ | |
478 | if ( (maxmem > (uint64_t)first_avail) && (maxmem < sane_size)) { | |
b0d623f7 | 479 | ppnum_t discarded_pages = (ppnum_t)((sane_size - maxmem) >> I386_PGSHIFT); |
2d21ac55 A |
480 | ppnum_t highest_pn = 0; |
481 | ppnum_t cur_alloc = 0; | |
482 | uint64_t pages_to_use; | |
483 | unsigned cur_region = 0; | |
484 | ||
485 | sane_size = maxmem; | |
486 | ||
0c530ab8 A |
487 | if (avail_remaining > discarded_pages) |
488 | avail_remaining -= discarded_pages; | |
489 | else | |
490 | avail_remaining = 0; | |
2d21ac55 A |
491 | |
492 | pages_to_use = avail_remaining; | |
493 | ||
494 | while (cur_region < pmap_memory_region_count && pages_to_use) { | |
495 | for (cur_alloc = pmap_memory_regions[cur_region].alloc; | |
496 | cur_alloc < pmap_memory_regions[cur_region].end && pages_to_use; | |
497 | cur_alloc++) { | |
498 | if (cur_alloc > highest_pn) | |
499 | highest_pn = cur_alloc; | |
500 | pages_to_use--; | |
501 | } | |
502 | if (pages_to_use == 0) | |
503 | pmap_memory_regions[cur_region].end = cur_alloc; | |
504 | ||
505 | cur_region++; | |
506 | } | |
507 | pmap_memory_region_count = cur_region; | |
508 | ||
509 | avail_end = i386_ptob(highest_pn + 1); | |
55e303ae | 510 | } |
4452a7af | 511 | |
0c530ab8 A |
512 | /* |
513 | * mem_size is only a 32 bit container... follow the PPC route | |
514 | * and pin it to a 2 Gbyte maximum | |
515 | */ | |
516 | if (sane_size > (FOURGIG >> 1)) | |
517 | mem_size = (vm_size_t)(FOURGIG >> 1); | |
518 | else | |
519 | mem_size = (vm_size_t)sane_size; | |
6601e61a | 520 | max_mem = sane_size; |
5d5c5d0d | 521 | |
b0d623f7 | 522 | kprintf("Physical memory %llu MB\n", sane_size/MB); |
4452a7af | 523 | |
0b4c1975 | 524 | max_valid_low_ppnum = (2 * GB) / PAGE_SIZE; |
0c530ab8 | 525 | |
0b4c1975 A |
526 | if (!PE_parse_boot_argn("max_valid_dma_addr", &maxdmaaddr, sizeof (maxdmaaddr))) { |
527 | max_valid_dma_address = (uint64_t)4 * (uint64_t)GB; | |
528 | } else { | |
529 | max_valid_dma_address = ((uint64_t) maxdmaaddr) * MB; | |
21362eb3 | 530 | |
0b4c1975 A |
531 | if ((max_valid_dma_address / PAGE_SIZE) < max_valid_low_ppnum) |
532 | max_valid_low_ppnum = (ppnum_t)(max_valid_dma_address / PAGE_SIZE); | |
533 | } | |
0c530ab8 | 534 | if (avail_end >= max_valid_dma_address) { |
0b4c1975 A |
535 | uint32_t maxloreserve; |
536 | uint32_t mbuf_reserve = 0; | |
537 | boolean_t mbuf_override = FALSE; | |
538 | ||
539 | if (!PE_parse_boot_argn("maxloreserve", &maxloreserve, sizeof (maxloreserve))) { | |
540 | ||
541 | if (sane_size >= (ONEGIG * 15)) | |
542 | maxloreserve = (MAXLORESERVE / PAGE_SIZE) * 4; | |
543 | else if (sane_size >= (ONEGIG * 7)) | |
544 | maxloreserve = (MAXLORESERVE / PAGE_SIZE) * 2; | |
545 | else | |
546 | maxloreserve = MAXLORESERVE / PAGE_SIZE; | |
547 | ||
548 | mbuf_reserve = bsd_mbuf_cluster_reserve(&mbuf_override) / PAGE_SIZE; | |
549 | } else | |
550 | maxloreserve = (maxloreserve * (1024 * 1024)) / PAGE_SIZE; | |
551 | ||
552 | if (maxloreserve) { | |
553 | vm_lopage_free_limit = maxloreserve; | |
554 | ||
555 | if (mbuf_override == TRUE) { | |
556 | vm_lopage_free_limit += mbuf_reserve; | |
557 | vm_lopage_lowater = 0; | |
558 | } else | |
559 | vm_lopage_lowater = vm_lopage_free_limit / 16; | |
560 | ||
561 | vm_lopage_refill = TRUE; | |
562 | vm_lopage_needed = TRUE; | |
563 | } | |
0c530ab8 | 564 | } |
0c530ab8 A |
565 | /* |
566 | * Initialize kernel physical map. | |
567 | * Kernel virtual address starts at VM_KERNEL_MIN_ADDRESS. | |
568 | */ | |
569 | pmap_bootstrap(0, IA32e); | |
6601e61a A |
570 | } |
571 | ||
0c530ab8 | 572 | |
55e303ae A |
573 | unsigned int |
574 | pmap_free_pages(void) | |
575 | { | |
b0d623f7 | 576 | return (unsigned int)avail_remaining; |
55e303ae A |
577 | } |
578 | ||
060df5ea A |
579 | boolean_t pmap_next_page_reserved(ppnum_t *); |
580 | ||
581 | /* | |
582 | * Pick a page from a "kernel private" reserved range; works around | |
583 | * errata on some hardware. | |
584 | */ | |
585 | boolean_t | |
586 | pmap_next_page_reserved(ppnum_t *pn) { | |
587 | if (pmap_reserved_ranges && pmap_last_reserved_range != 0xFFFFFFFF) { | |
588 | uint32_t n; | |
589 | pmap_memory_region_t *region; | |
590 | for (n = 0; n <= pmap_last_reserved_range; n++) { | |
591 | region = &pmap_memory_regions[n]; | |
592 | if (region->alloc < region->end) { | |
593 | *pn = region->alloc++; | |
594 | avail_remaining--; | |
595 | ||
596 | if (*pn > max_ppnum) | |
597 | max_ppnum = *pn; | |
598 | ||
599 | if (lowest_lo == 0 || *pn < lowest_lo) | |
600 | lowest_lo = *pn; | |
601 | ||
602 | pmap_reserved_pages_allocated++; | |
603 | return TRUE; | |
604 | } | |
605 | } | |
606 | } | |
607 | return FALSE; | |
608 | } | |
609 | ||
610 | ||
b0d623f7 | 611 | boolean_t |
0b4c1975 A |
612 | pmap_next_page_hi( |
613 | ppnum_t *pn) | |
b0d623f7 | 614 | { |
0b4c1975 A |
615 | pmap_memory_region_t *region; |
616 | int n; | |
617 | ||
060df5ea A |
618 | if (pmap_next_page_reserved(pn)) |
619 | return TRUE; | |
620 | ||
0b4c1975 A |
621 | if (avail_remaining) { |
622 | for (n = pmap_memory_region_count - 1; n >= 0; n--) { | |
623 | region = &pmap_memory_regions[n]; | |
624 | ||
625 | if (region->alloc != region->end) { | |
626 | *pn = region->alloc++; | |
627 | avail_remaining--; | |
628 | ||
629 | if (*pn > max_ppnum) | |
630 | max_ppnum = *pn; | |
631 | ||
632 | if (lowest_lo == 0 || *pn < lowest_lo) | |
633 | lowest_lo = *pn; | |
634 | ||
635 | if (lowest_hi == 0 || *pn < lowest_hi) | |
636 | lowest_hi = *pn; | |
637 | ||
638 | if (*pn > highest_hi) | |
639 | highest_hi = *pn; | |
640 | ||
641 | return TRUE; | |
642 | } | |
b0d623f7 A |
643 | } |
644 | } | |
0b4c1975 | 645 | return FALSE; |
b0d623f7 | 646 | } |
0b4c1975 | 647 | |
0c530ab8 | 648 | |
55e303ae A |
649 | boolean_t |
650 | pmap_next_page( | |
651 | ppnum_t *pn) | |
652 | { | |
0c530ab8 | 653 | if (avail_remaining) while (pmap_memory_region_current < pmap_memory_region_count) { |
b0d623f7 A |
654 | if (pmap_memory_regions[pmap_memory_region_current].alloc == |
655 | pmap_memory_regions[pmap_memory_region_current].end) { | |
656 | pmap_memory_region_current++; | |
0c530ab8 A |
657 | continue; |
658 | } | |
659 | *pn = pmap_memory_regions[pmap_memory_region_current].alloc++; | |
660 | avail_remaining--; | |
661 | ||
0b4c1975 A |
662 | if (*pn > max_ppnum) |
663 | max_ppnum = *pn; | |
664 | ||
665 | if (lowest_lo == 0 || *pn < lowest_lo) | |
666 | lowest_lo = *pn; | |
667 | ||
0c530ab8 | 668 | return TRUE; |
91447636 A |
669 | } |
670 | return FALSE; | |
55e303ae A |
671 | } |
672 | ||
0c530ab8 | 673 | |
55e303ae A |
674 | boolean_t |
675 | pmap_valid_page( | |
91447636 | 676 | ppnum_t pn) |
55e303ae | 677 | { |
0c530ab8 A |
678 | unsigned int i; |
679 | pmap_memory_region_t *pmptr = pmap_memory_regions; | |
680 | ||
0c530ab8 | 681 | for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { |
2d21ac55 | 682 | if ( (pn >= pmptr->base) && (pn <= pmptr->end) ) |
0c530ab8 A |
683 | return TRUE; |
684 | } | |
685 | return FALSE; | |
686 | } | |
687 | ||
b0d623f7 A |
688 | /* |
689 | * Called once VM is fully initialized so that we can release unused | |
690 | * sections of low memory to the general pool. | |
691 | * Also complete the set-up of identity-mapped sections of the kernel: | |
692 | * 1) write-protect kernel text | |
693 | * 2) map kernel text using large pages if possible | |
694 | * 3) read and write-protect page zero (for K32) | |
695 | * 4) map the global page at the appropriate virtual address. | |
696 | * | |
697 | * Use of large pages | |
698 | * ------------------ | |
699 | * To effectively map and write-protect all kernel text pages, the text | |
700 | * must be 2M-aligned at the base, and the data section above must also be | |
701 | * 2M-aligned. That is, there's padding below and above. This is achieved | |
702 | * through linker directives. Large pages are used only if this alignment | |
703 | * exists (and not overriden by the -kernel_text_page_4K boot-arg). The | |
704 | * memory layout is: | |
705 | * | |
706 | * : : | |
707 | * | __DATA | | |
708 | * sdata: ================== 2Meg | |
709 | * | | | |
710 | * | zero-padding | | |
711 | * | | | |
712 | * etext: ------------------ | |
713 | * | | | |
714 | * : : | |
715 | * | | | |
716 | * | __TEXT | | |
717 | * | | | |
718 | * : : | |
719 | * | | | |
720 | * stext: ================== 2Meg | |
721 | * | | | |
722 | * | zero-padding | | |
723 | * | | | |
724 | * eHIB: ------------------ | |
725 | * | __HIB | | |
726 | * : : | |
727 | * | |
728 | * Prior to changing the mapping from 4K to 2M, the zero-padding pages | |
729 | * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the | |
730 | * 4K pages covering [stext,etext] are coalesced as 2M large pages. | |
731 | * The now unused level-1 PTE pages are also freed. | |
732 | */ | |
733 | void | |
734 | pmap_lowmem_finalize(void) | |
735 | { | |
736 | spl_t spl; | |
737 | int i; | |
738 | ||
739 | /* Check the kernel is linked at the expected base address */ | |
740 | if (i386_btop(kvtophys((vm_offset_t) &IdlePML4)) != | |
741 | I386_KERNEL_IMAGE_BASE_PAGE) | |
742 | panic("pmap_lowmem_finalize() unexpected kernel base address"); | |
743 | ||
744 | /* | |
745 | * Free all pages in pmap regions below the base: | |
746 | * rdar://6332712 | |
747 | * We can't free all the pages to VM that EFI reports available. | |
748 | * Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake. | |
749 | * There's also a size miscalculation here: pend is one page less | |
750 | * than it should be but this is not fixed to be backwards | |
751 | * compatible. | |
752 | * Due to this current EFI limitation, we take only the first | |
753 | * entry in the memory region table. However, the loop is retained | |
754 | * (with the intended termination criteria commented out) in the | |
755 | * hope that some day we can free all low-memory ranges. | |
060df5ea A |
756 | * This loop assumes the first range does not span the kernel |
757 | * image base & avail_start. We skip this process on systems | |
758 | * with "kernel reserved" ranges, as the low memory reclamation | |
759 | * is handled in the initial memory map processing loop on | |
760 | * such systems. | |
b0d623f7 A |
761 | */ |
762 | for (i = 0; | |
763 | // pmap_memory_regions[i].end <= I386_KERNEL_IMAGE_BASE_PAGE; | |
060df5ea | 764 | i < 1 && (pmap_reserved_ranges == 0); |
b0d623f7 A |
765 | i++) { |
766 | vm_offset_t pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base); | |
767 | vm_offset_t pend = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end); | |
768 | // vm_offset_t pend = i386_ptob(pmap_memory_regions[i].end+1); | |
769 | ||
770 | DBG("ml_static_mfree(%p,%p) for pmap region %d\n", | |
771 | (void *) ml_static_ptovirt(pbase), | |
772 | (void *) (pend - pbase), i); | |
773 | ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase); | |
774 | } | |
775 | ||
776 | /* | |
777 | * If text and data are both 2MB-aligned, | |
778 | * we can map text with large-pages, | |
779 | * unless the -kernel_text_ps_4K boot-arg overrides. | |
780 | */ | |
781 | if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) { | |
782 | kprintf("Kernel text is 2MB aligned"); | |
783 | kernel_text_ps_4K = FALSE; | |
784 | if (PE_parse_boot_argn("-kernel_text_ps_4K", | |
785 | &kernel_text_ps_4K, | |
786 | sizeof (kernel_text_ps_4K))) | |
787 | kprintf(" but will be mapped with 4K pages\n"); | |
788 | else | |
789 | kprintf(" and will be mapped with 2M pages\n"); | |
790 | } | |
791 | ||
792 | (void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof (wpkernel)); | |
793 | if (wpkernel) | |
794 | kprintf("Kernel text %p-%p to be write-protected\n", | |
795 | (void *) stext, (void *) etext); | |
796 | ||
797 | spl = splhigh(); | |
798 | ||
799 | /* | |
800 | * Scan over text if mappings are to be changed: | |
801 | * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0 | |
802 | * - Change to large-pages if possible and not overriden. | |
803 | */ | |
804 | if (kernel_text_ps_4K && wpkernel) { | |
805 | vm_offset_t myva; | |
806 | for (myva = stext; myva < etext; myva += PAGE_SIZE) { | |
807 | pt_entry_t *ptep; | |
808 | ||
809 | ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); | |
810 | if (ptep) | |
811 | pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW); | |
812 | } | |
813 | } | |
814 | ||
815 | if (!kernel_text_ps_4K) { | |
816 | vm_offset_t myva; | |
817 | ||
818 | /* | |
819 | * Release zero-filled page padding used for 2M-alignment. | |
820 | */ | |
821 | DBG("ml_static_mfree(%p,%p) for padding below text\n", | |
822 | (void *) eHIB, (void *) (stext - eHIB)); | |
823 | ml_static_mfree(eHIB, stext - eHIB); | |
824 | DBG("ml_static_mfree(%p,%p) for padding above text\n", | |
825 | (void *) etext, (void *) (sdata - etext)); | |
826 | ml_static_mfree(etext, sdata - etext); | |
827 | ||
828 | /* | |
829 | * Coalesce text pages into large pages. | |
830 | */ | |
831 | for (myva = stext; myva < sdata; myva += I386_LPGBYTES) { | |
832 | pt_entry_t *ptep; | |
833 | vm_offset_t pte_phys; | |
834 | pt_entry_t *pdep; | |
835 | pt_entry_t pde; | |
836 | ||
837 | pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva); | |
838 | ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); | |
839 | DBG("myva: %p pdep: %p ptep: %p\n", | |
840 | (void *) myva, (void *) pdep, (void *) ptep); | |
841 | if ((*ptep & INTEL_PTE_VALID) == 0) | |
842 | continue; | |
843 | pte_phys = (vm_offset_t)(*ptep & PG_FRAME); | |
844 | pde = *pdep & PTMASK; /* page attributes from pde */ | |
845 | pde |= INTEL_PTE_PS; /* make it a 2M entry */ | |
846 | pde |= pte_phys; /* take page frame from pte */ | |
847 | ||
848 | if (wpkernel) | |
849 | pde &= ~INTEL_PTE_RW; | |
850 | DBG("pmap_store_pte(%p,0x%llx)\n", | |
851 | (void *)pdep, pde); | |
852 | pmap_store_pte(pdep, pde); | |
853 | ||
854 | /* | |
855 | * Free the now-unused level-1 pte. | |
856 | * Note: ptep is a virtual address to the pte in the | |
857 | * recursive map. We can't use this address to free | |
858 | * the page. Instead we need to compute its address | |
859 | * in the Idle PTEs in "low memory". | |
860 | */ | |
861 | vm_offset_t vm_ptep = (vm_offset_t) KPTphys | |
862 | + (pte_phys >> PTPGSHIFT); | |
863 | DBG("ml_static_mfree(%p,0x%x) for pte\n", | |
864 | (void *) vm_ptep, PAGE_SIZE); | |
865 | ml_static_mfree(vm_ptep, PAGE_SIZE); | |
866 | } | |
867 | ||
868 | /* Change variable read by sysctl machdep.pmap */ | |
869 | pmap_kernel_text_ps = I386_LPGBYTES; | |
870 | } | |
871 | ||
872 | #if defined(__i386__) | |
873 | /* no matter what, kernel page zero is not accessible */ | |
874 | pmap_store_pte(pmap_pte(kernel_pmap, 0), INTEL_PTE_INVALID); | |
875 | #endif | |
876 | ||
877 | /* map lowmem global page into fixed addr */ | |
878 | pt_entry_t *pte = NULL; | |
879 | if (0 == (pte = pmap_pte(kernel_pmap, | |
880 | VM_MIN_KERNEL_LOADED_ADDRESS + 0x2000))) | |
881 | panic("lowmem pte"); | |
882 | /* make sure it is defined on page boundary */ | |
883 | assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK)); | |
884 | pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo) | |
885 | | INTEL_PTE_REF | |
886 | | INTEL_PTE_MOD | |
887 | | INTEL_PTE_WIRED | |
888 | | INTEL_PTE_VALID | |
889 | | INTEL_PTE_RW); | |
890 | splx(spl); | |
891 | flush_tlb(); | |
892 | } | |
893 |