]>
Commit | Line | Data |
---|---|---|
55e303ae | 1 | /* |
b0d623f7 | 2 | * Copyright (c) 2003-2008 Apple Computer, Inc. All rights reserved. |
55e303ae | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
55e303ae | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
55e303ae A |
27 | */ |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
30 | */ | |
31 | /* | |
32 | * Mach Operating System | |
33 | * Copyright (c) 1991,1990,1989, 1988 Carnegie Mellon University | |
34 | * All Rights Reserved. | |
35 | * | |
36 | * Permission to use, copy, modify and distribute this software and its | |
37 | * documentation is hereby granted, provided that both the copyright | |
38 | * notice and this permission notice appear in all copies of the | |
39 | * software, derivative works or modified versions, and any portions | |
40 | * thereof, and that both notices appear in supporting documentation. | |
41 | * | |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
45 | * | |
46 | * Carnegie Mellon requests users of this software to return to | |
47 | * | |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
49 | * School of Computer Science | |
50 | * Carnegie Mellon University | |
51 | * Pittsburgh PA 15213-3890 | |
52 | * | |
53 | * any improvements or extensions that they make and grant Carnegie Mellon | |
54 | * the rights to redistribute these changes. | |
55 | */ | |
56 | ||
55e303ae A |
57 | #include <platforms.h> |
58 | #include <mach_kdb.h> | |
55e303ae A |
59 | |
60 | #include <mach/i386/vm_param.h> | |
61 | ||
62 | #include <string.h> | |
63 | #include <mach/vm_param.h> | |
64 | #include <mach/vm_prot.h> | |
65 | #include <mach/machine.h> | |
66 | #include <mach/time_value.h> | |
55e303ae A |
67 | #include <kern/spl.h> |
68 | #include <kern/assert.h> | |
69 | #include <kern/debug.h> | |
70 | #include <kern/misc_protos.h> | |
71 | #include <kern/cpu_data.h> | |
72 | #include <kern/processor.h> | |
73 | #include <vm/vm_page.h> | |
74 | #include <vm/pmap.h> | |
75 | #include <vm/vm_kern.h> | |
76 | #include <i386/pmap.h> | |
55e303ae | 77 | #include <i386/misc_protos.h> |
b0d623f7 | 78 | #include <i386/ipl.h> |
91447636 | 79 | #include <i386/cpuid.h> |
55e303ae | 80 | #include <mach/thread_status.h> |
0c530ab8 | 81 | #include <pexpert/i386/efi.h> |
b0d623f7 A |
82 | #include <i386/i386_lowmem.h> |
83 | #include <i386/lowglobals.h> | |
84 | ||
85 | #include <mach-o/loader.h> | |
86 | #include <libkern/kernel_mach_header.h> | |
87 | ||
88 | #if DEBUG | |
89 | #define DBG(x...) kprintf("DBG: " x) | |
90 | #define PRINT_PMAP_MEMORY_TABLE | |
91 | #else | |
92 | #define DBG(x...) | |
93 | #endif | |
55e303ae A |
94 | |
95 | vm_size_t mem_size = 0; | |
55e303ae | 96 | vm_offset_t first_avail = 0;/* first after page tables */ |
55e303ae | 97 | |
0c530ab8 A |
98 | uint64_t max_mem; /* Size of physical memory (bytes), adjusted by maxmem */ |
99 | uint64_t mem_actual; | |
100 | uint64_t sane_size = 0; /* Memory size to use for defaults calculations */ | |
101 | ||
102 | #define MAXBOUNCEPOOL (128 * 1024 * 1024) | |
103 | #define MAXLORESERVE ( 32 * 1024 * 1024) | |
104 | ||
b0d623f7 | 105 | extern unsigned int bsd_mbuf_cluster_reserve(void); |
0c530ab8 A |
106 | |
107 | ||
108 | uint32_t bounce_pool_base = 0; | |
109 | uint32_t bounce_pool_size = 0; | |
4452a7af | 110 | |
0c530ab8 A |
111 | static void reserve_bouncepool(uint32_t); |
112 | ||
113 | ||
2d21ac55 | 114 | pmap_paddr_t avail_start, avail_end; |
55e303ae | 115 | vm_offset_t virtual_avail, virtual_end; |
0c530ab8 | 116 | static pmap_paddr_t avail_remaining; |
91447636 | 117 | vm_offset_t static_memory_end = 0; |
55e303ae | 118 | |
b0d623f7 | 119 | vm_offset_t sHIB, eHIB, stext, etext, sdata, edata, end; |
0c530ab8 | 120 | |
b0d623f7 A |
121 | boolean_t kernel_text_ps_4K = TRUE; |
122 | boolean_t wpkernel = TRUE; | |
0c530ab8 | 123 | |
b0d623f7 | 124 | extern void *KPTphys; |
0c530ab8 | 125 | |
b0d623f7 A |
126 | /* |
127 | * _mh_execute_header is the mach_header for the currently executing kernel | |
128 | */ | |
129 | void *sectTEXTB; unsigned long sectSizeTEXT; | |
130 | void *sectDATAB; unsigned long sectSizeDATA; | |
131 | void *sectOBJCB; unsigned long sectSizeOBJC; | |
132 | void *sectLINKB; unsigned long sectSizeLINK; | |
133 | void *sectPRELINKB; unsigned long sectSizePRELINK; | |
134 | void *sectHIBB; unsigned long sectSizeHIB; | |
135 | void *sectINITPTB; unsigned long sectSizeINITPT; | |
136 | extern int srv; | |
137 | ||
138 | extern uint64_t firmware_Conventional_bytes; | |
139 | extern uint64_t firmware_RuntimeServices_bytes; | |
140 | extern uint64_t firmware_ACPIReclaim_bytes; | |
141 | extern uint64_t firmware_ACPINVS_bytes; | |
142 | extern uint64_t firmware_PalCode_bytes; | |
143 | extern uint64_t firmware_Reserved_bytes; | |
144 | extern uint64_t firmware_Unusable_bytes; | |
145 | extern uint64_t firmware_other_bytes; | |
146 | uint64_t firmware_MMIO_bytes; | |
55e303ae A |
147 | |
148 | /* | |
149 | * Basic VM initialization. | |
150 | */ | |
151 | void | |
0c530ab8 A |
152 | i386_vm_init(uint64_t maxmem, |
153 | boolean_t IA32e, | |
154 | boot_args *args) | |
55e303ae | 155 | { |
91447636 | 156 | pmap_memory_region_t *pmptr; |
0c530ab8 A |
157 | pmap_memory_region_t *prev_pmptr; |
158 | EfiMemoryRange *mptr; | |
159 | unsigned int mcount; | |
160 | unsigned int msize; | |
91447636 A |
161 | ppnum_t fap; |
162 | unsigned int i; | |
0c530ab8 A |
163 | unsigned int safeboot; |
164 | ppnum_t maxpg = 0; | |
165 | uint32_t pmap_type; | |
166 | uint32_t maxbouncepoolsize; | |
167 | uint32_t maxloreserve; | |
168 | uint32_t maxdmaaddr; | |
55e303ae | 169 | |
0c530ab8 A |
170 | /* |
171 | * Now retrieve addresses for end, edata, and etext | |
55e303ae A |
172 | * from MACH-O headers. |
173 | */ | |
174 | ||
175 | sectTEXTB = (void *) getsegdatafromheader( | |
176 | &_mh_execute_header, "__TEXT", §SizeTEXT); | |
177 | sectDATAB = (void *) getsegdatafromheader( | |
178 | &_mh_execute_header, "__DATA", §SizeDATA); | |
179 | sectOBJCB = (void *) getsegdatafromheader( | |
180 | &_mh_execute_header, "__OBJC", §SizeOBJC); | |
181 | sectLINKB = (void *) getsegdatafromheader( | |
182 | &_mh_execute_header, "__LINKEDIT", §SizeLINK); | |
91447636 A |
183 | sectHIBB = (void *)getsegdatafromheader( |
184 | &_mh_execute_header, "__HIB", §SizeHIB); | |
b0d623f7 A |
185 | sectINITPTB = (void *)getsegdatafromheader( |
186 | &_mh_execute_header, "__INITPT", §SizeINITPT); | |
55e303ae | 187 | sectPRELINKB = (void *) getsegdatafromheader( |
b0d623f7 | 188 | &_mh_execute_header, "__PRELINK_TEXT", §SizePRELINK); |
55e303ae | 189 | |
b0d623f7 A |
190 | sHIB = (vm_offset_t) sectHIBB; |
191 | eHIB = (vm_offset_t) sectHIBB + sectSizeHIB; | |
192 | /* Zero-padded from ehib to stext if text is 2M-aligned */ | |
193 | stext = (vm_offset_t) sectTEXTB; | |
55e303ae | 194 | etext = (vm_offset_t) sectTEXTB + sectSizeTEXT; |
b0d623f7 A |
195 | /* Zero-padded from etext to sdata if text is 2M-aligned */ |
196 | sdata = (vm_offset_t) sectDATAB; | |
55e303ae | 197 | edata = (vm_offset_t) sectDATAB + sectSizeDATA; |
55e303ae | 198 | |
b0d623f7 A |
199 | #if DEBUG |
200 | kprintf("sectTEXTB = %p\n", sectTEXTB); | |
201 | kprintf("sectDATAB = %p\n", sectDATAB); | |
202 | kprintf("sectOBJCB = %p\n", sectOBJCB); | |
203 | kprintf("sectLINKB = %p\n", sectLINKB); | |
204 | kprintf("sectHIBB = %p\n", sectHIBB); | |
205 | kprintf("sectPRELINKB = %p\n", sectPRELINKB); | |
206 | kprintf("eHIB = %p\n", (void *) eHIB); | |
207 | kprintf("stext = %p\n", (void *) stext); | |
208 | kprintf("etext = %p\n", (void *) etext); | |
209 | kprintf("sdata = %p\n", (void *) sdata); | |
210 | kprintf("edata = %p\n", (void *) edata); | |
211 | #endif | |
212 | ||
55e303ae A |
213 | vm_set_page_size(); |
214 | ||
55e303ae A |
215 | /* |
216 | * Compute the memory size. | |
217 | */ | |
218 | ||
593a1d5f | 219 | if ((1 == vm_himemory_mode) || PE_parse_boot_argn("-x", &safeboot, sizeof (safeboot))) { |
0c530ab8 A |
220 | maxpg = 1 << (32 - I386_PGSHIFT); |
221 | } | |
91447636 A |
222 | avail_remaining = 0; |
223 | avail_end = 0; | |
224 | pmptr = pmap_memory_regions; | |
0c530ab8 | 225 | prev_pmptr = 0; |
91447636 A |
226 | pmap_memory_region_count = pmap_memory_region_current = 0; |
227 | fap = (ppnum_t) i386_btop(first_avail); | |
91447636 | 228 | |
b0d623f7 | 229 | mptr = (EfiMemoryRange *)ml_static_ptovirt((vm_offset_t)args->MemoryMap); |
0c530ab8 A |
230 | if (args->MemoryMapDescriptorSize == 0) |
231 | panic("Invalid memory map descriptor size"); | |
232 | msize = args->MemoryMapDescriptorSize; | |
233 | mcount = args->MemoryMapSize / msize; | |
234 | ||
6601e61a | 235 | #define FOURGIG 0x0000000100000000ULL |
0c530ab8 A |
236 | |
237 | for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { | |
238 | ppnum_t base, top; | |
b0d623f7 | 239 | uint64_t region_bytes = 0; |
0c530ab8 A |
240 | |
241 | if (pmap_memory_region_count >= PMAP_MEMORY_REGIONS_SIZE) { | |
242 | kprintf("WARNING: truncating memory region count at %d\n", pmap_memory_region_count); | |
243 | break; | |
244 | } | |
245 | base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT); | |
b0d623f7 A |
246 | top = (ppnum_t) (((mptr->PhysicalStart) >> I386_PGSHIFT) + mptr->NumberOfPages - 1); |
247 | region_bytes = (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT); | |
248 | pmap_type = mptr->Type; | |
0c530ab8 A |
249 | |
250 | switch (mptr->Type) { | |
251 | case kEfiLoaderCode: | |
252 | case kEfiLoaderData: | |
253 | case kEfiBootServicesCode: | |
254 | case kEfiBootServicesData: | |
255 | case kEfiConventionalMemory: | |
256 | /* | |
257 | * Consolidate usable memory types into one. | |
258 | */ | |
259 | pmap_type = kEfiConventionalMemory; | |
b0d623f7 A |
260 | sane_size += region_bytes; |
261 | firmware_Conventional_bytes += region_bytes; | |
0c530ab8 | 262 | break; |
b0d623f7 A |
263 | /* |
264 | * sane_size should reflect the total amount of physical | |
265 | * RAM in the system, not just the amount that is | |
266 | * available for the OS to use. | |
267 | * FIXME:Consider deriving this value from SMBIOS tables | |
268 | * rather than reverse engineering the memory map. | |
269 | * Alternatively, see | |
270 | * <rdar://problem/4642773> Memory map should | |
271 | * describe all memory | |
272 | * Firmware on some systems guarantees that the memory | |
273 | * map is complete via the "RomReservedMemoryTracked" | |
274 | * feature field--consult that where possible to | |
275 | * avoid the "round up to 128M" workaround below. | |
276 | */ | |
0c530ab8 A |
277 | |
278 | case kEfiRuntimeServicesCode: | |
279 | case kEfiRuntimeServicesData: | |
b0d623f7 A |
280 | firmware_RuntimeServices_bytes += region_bytes; |
281 | sane_size += region_bytes; | |
282 | break; | |
0c530ab8 | 283 | case kEfiACPIReclaimMemory: |
b0d623f7 A |
284 | firmware_ACPIReclaim_bytes += region_bytes; |
285 | sane_size += region_bytes; | |
286 | break; | |
0c530ab8 | 287 | case kEfiACPIMemoryNVS: |
b0d623f7 A |
288 | firmware_ACPINVS_bytes += region_bytes; |
289 | sane_size += region_bytes; | |
290 | break; | |
0c530ab8 | 291 | case kEfiPalCode: |
b0d623f7 A |
292 | firmware_PalCode_bytes += region_bytes; |
293 | sane_size += region_bytes; | |
294 | break; | |
295 | ||
0c530ab8 | 296 | |
b0d623f7 A |
297 | case kEfiReservedMemoryType: |
298 | firmware_Reserved_bytes += region_bytes; | |
299 | break; | |
0c530ab8 | 300 | case kEfiUnusableMemory: |
b0d623f7 A |
301 | firmware_Unusable_bytes += region_bytes; |
302 | break; | |
0c530ab8 A |
303 | case kEfiMemoryMappedIO: |
304 | case kEfiMemoryMappedIOPortSpace: | |
b0d623f7 A |
305 | firmware_MMIO_bytes += region_bytes; |
306 | break; | |
0c530ab8 | 307 | default: |
b0d623f7 A |
308 | firmware_other_bytes += region_bytes; |
309 | break; | |
0c530ab8 A |
310 | } |
311 | ||
b0d623f7 A |
312 | kprintf("EFI region %d: type %u/%d, base 0x%x, top 0x%x\n", |
313 | i, mptr->Type, pmap_type, base, top); | |
0c530ab8 A |
314 | |
315 | if (maxpg) { | |
316 | if (base >= maxpg) | |
317 | break; | |
318 | top = (top > maxpg) ? maxpg : top; | |
319 | } | |
320 | ||
321 | /* | |
322 | * handle each region | |
323 | */ | |
2d21ac55 A |
324 | if ((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME || |
325 | pmap_type != kEfiConventionalMemory) { | |
0c530ab8 A |
326 | prev_pmptr = 0; |
327 | continue; | |
328 | } else { | |
329 | /* | |
330 | * Usable memory region | |
331 | */ | |
332 | if (top < I386_LOWMEM_RESERVED) { | |
333 | prev_pmptr = 0; | |
334 | continue; | |
335 | } | |
336 | if (top < fap) { | |
337 | /* | |
338 | * entire range below first_avail | |
339 | * salvage some low memory pages | |
340 | * we use some very low memory at startup | |
341 | * mark as already allocated here | |
342 | */ | |
343 | if (base >= I386_LOWMEM_RESERVED) | |
344 | pmptr->base = base; | |
345 | else | |
346 | pmptr->base = I386_LOWMEM_RESERVED; | |
347 | /* | |
348 | * mark as already mapped | |
349 | */ | |
350 | pmptr->alloc = pmptr->end = top; | |
351 | pmptr->type = pmap_type; | |
352 | } | |
353 | else if ( (base < fap) && (top > fap) ) { | |
354 | /* | |
355 | * spans first_avail | |
356 | * put mem below first avail in table but | |
357 | * mark already allocated | |
358 | */ | |
359 | pmptr->base = base; | |
360 | pmptr->alloc = pmptr->end = (fap - 1); | |
361 | pmptr->type = pmap_type; | |
362 | /* | |
363 | * we bump these here inline so the accounting | |
364 | * below works correctly | |
365 | */ | |
366 | pmptr++; | |
367 | pmap_memory_region_count++; | |
368 | pmptr->alloc = pmptr->base = fap; | |
369 | pmptr->type = pmap_type; | |
370 | pmptr->end = top; | |
371 | } | |
372 | else { | |
373 | /* | |
374 | * entire range useable | |
375 | */ | |
376 | pmptr->alloc = pmptr->base = base; | |
377 | pmptr->type = pmap_type; | |
378 | pmptr->end = top; | |
379 | } | |
380 | ||
381 | if (i386_ptob(pmptr->end) > avail_end ) | |
382 | avail_end = i386_ptob(pmptr->end); | |
383 | ||
384 | avail_remaining += (pmptr->end - pmptr->base); | |
385 | ||
386 | /* | |
387 | * Consolidate contiguous memory regions, if possible | |
388 | */ | |
389 | if (prev_pmptr && | |
390 | pmptr->type == prev_pmptr->type && | |
391 | pmptr->base == pmptr->alloc && | |
392 | pmptr->base == (prev_pmptr->end + 1)) { | |
393 | prev_pmptr->end = pmptr->end; | |
394 | } else { | |
395 | pmap_memory_region_count++; | |
396 | prev_pmptr = pmptr; | |
397 | pmptr++; | |
398 | } | |
399 | } | |
6601e61a | 400 | } |
0c530ab8 | 401 | |
91447636 | 402 | #ifdef PRINT_PMAP_MEMORY_TABLE |
0c530ab8 A |
403 | { |
404 | unsigned int j; | |
405 | pmap_memory_region_t *p = pmap_memory_regions; | |
b0d623f7 A |
406 | addr64_t region_start, region_end; |
407 | addr64_t efi_start, efi_end; | |
0c530ab8 | 408 | for (j=0;j<pmap_memory_region_count;j++, p++) { |
b0d623f7 A |
409 | kprintf("pmap region %d type %d base 0x%llx alloc 0x%llx top 0x%llx\n", |
410 | j, p->type, | |
411 | (addr64_t) p->base << I386_PGSHIFT, | |
412 | (addr64_t) p->alloc << I386_PGSHIFT, | |
413 | (addr64_t) p->end << I386_PGSHIFT); | |
414 | region_start = (addr64_t) p->base << I386_PGSHIFT; | |
415 | region_end = ((addr64_t) p->end << I386_PGSHIFT) - 1; | |
416 | mptr = (EfiMemoryRange *) ml_static_ptovirt((vm_offset_t)args->MemoryMap); | |
0c530ab8 A |
417 | for (i=0; i<mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { |
418 | if (mptr->Type != kEfiLoaderCode && | |
419 | mptr->Type != kEfiLoaderData && | |
420 | mptr->Type != kEfiBootServicesCode && | |
421 | mptr->Type != kEfiBootServicesData && | |
422 | mptr->Type != kEfiConventionalMemory) { | |
b0d623f7 | 423 | efi_start = (addr64_t)mptr->PhysicalStart; |
0c530ab8 A |
424 | efi_end = efi_start + ((vm_offset_t)mptr->NumberOfPages << I386_PGSHIFT) - 1; |
425 | if ((efi_start >= region_start && efi_start <= region_end) || | |
426 | (efi_end >= region_start && efi_end <= region_end)) { | |
427 | kprintf(" *** Overlapping region with EFI runtime region %d\n", i); | |
428 | } | |
b0d623f7 | 429 | } |
0c530ab8 | 430 | } |
b0d623f7 | 431 | } |
0c530ab8 | 432 | } |
91447636 | 433 | #endif |
55e303ae | 434 | |
91447636 | 435 | avail_start = first_avail; |
0c530ab8 | 436 | mem_actual = sane_size; |
4452a7af | 437 | |
0c530ab8 A |
438 | /* |
439 | * For user visible memory size, round up to 128 Mb - accounting for the various stolen memory | |
440 | * not reported by EFI. | |
441 | */ | |
442 | ||
b0d623f7 | 443 | sane_size = (sane_size + 128 * MB - 1) & ~((uint64_t)(128 * MB - 1)); |
0c530ab8 | 444 | |
c910b4d9 | 445 | /* |
b0d623f7 | 446 | * We cap at KERNEL_MAXMEM bytes (currently 32GB for K32, 64GB for K64). |
c910b4d9 A |
447 | * Unless overriden by the maxmem= boot-arg |
448 | * -- which is a non-zero maxmem argument to this function. | |
449 | */ | |
b0d623f7 A |
450 | if (maxmem == 0 && sane_size > KERNEL_MAXMEM) { |
451 | maxmem = KERNEL_MAXMEM; | |
452 | printf("Physical memory %lld bytes capped at %dGB\n", | |
453 | sane_size, (uint32_t) (KERNEL_MAXMEM/GB)); | |
c910b4d9 | 454 | } |
b0d623f7 | 455 | |
0c530ab8 A |
456 | /* |
457 | * if user set maxmem, reduce memory sizes | |
458 | */ | |
459 | if ( (maxmem > (uint64_t)first_avail) && (maxmem < sane_size)) { | |
b0d623f7 | 460 | ppnum_t discarded_pages = (ppnum_t)((sane_size - maxmem) >> I386_PGSHIFT); |
2d21ac55 A |
461 | ppnum_t highest_pn = 0; |
462 | ppnum_t cur_alloc = 0; | |
463 | uint64_t pages_to_use; | |
464 | unsigned cur_region = 0; | |
465 | ||
466 | sane_size = maxmem; | |
467 | ||
0c530ab8 A |
468 | if (avail_remaining > discarded_pages) |
469 | avail_remaining -= discarded_pages; | |
470 | else | |
471 | avail_remaining = 0; | |
2d21ac55 A |
472 | |
473 | pages_to_use = avail_remaining; | |
474 | ||
475 | while (cur_region < pmap_memory_region_count && pages_to_use) { | |
476 | for (cur_alloc = pmap_memory_regions[cur_region].alloc; | |
477 | cur_alloc < pmap_memory_regions[cur_region].end && pages_to_use; | |
478 | cur_alloc++) { | |
479 | if (cur_alloc > highest_pn) | |
480 | highest_pn = cur_alloc; | |
481 | pages_to_use--; | |
482 | } | |
483 | if (pages_to_use == 0) | |
484 | pmap_memory_regions[cur_region].end = cur_alloc; | |
485 | ||
486 | cur_region++; | |
487 | } | |
488 | pmap_memory_region_count = cur_region; | |
489 | ||
490 | avail_end = i386_ptob(highest_pn + 1); | |
55e303ae | 491 | } |
4452a7af | 492 | |
0c530ab8 A |
493 | /* |
494 | * mem_size is only a 32 bit container... follow the PPC route | |
495 | * and pin it to a 2 Gbyte maximum | |
496 | */ | |
497 | if (sane_size > (FOURGIG >> 1)) | |
498 | mem_size = (vm_size_t)(FOURGIG >> 1); | |
499 | else | |
500 | mem_size = (vm_size_t)sane_size; | |
6601e61a | 501 | max_mem = sane_size; |
5d5c5d0d | 502 | |
b0d623f7 | 503 | kprintf("Physical memory %llu MB\n", sane_size/MB); |
4452a7af | 504 | |
593a1d5f | 505 | if (!PE_parse_boot_argn("max_valid_dma_addr", &maxdmaaddr, sizeof (maxdmaaddr))) |
b0d623f7 | 506 | max_valid_dma_address = 4 * GB; |
0c530ab8 | 507 | else |
b0d623f7 | 508 | max_valid_dma_address = ((uint64_t) maxdmaaddr) * MB; |
0c530ab8 | 509 | |
593a1d5f | 510 | if (!PE_parse_boot_argn("maxbouncepool", &maxbouncepoolsize, sizeof (maxbouncepoolsize))) |
0c530ab8 A |
511 | maxbouncepoolsize = MAXBOUNCEPOOL; |
512 | else | |
513 | maxbouncepoolsize = maxbouncepoolsize * (1024 * 1024); | |
89b3af67 | 514 | |
b0d623f7 A |
515 | /* since bsd_mbuf_cluster_reserve() is going to be called, we need to check for server */ |
516 | if (PE_parse_boot_argn("srv", &srv, sizeof (srv))) { | |
517 | srv = 1; | |
518 | } | |
519 | ||
520 | ||
4452a7af | 521 | /* |
0c530ab8 A |
522 | * bsd_mbuf_cluster_reserve depends on sane_size being set |
523 | * in order to correctly determine the size of the mbuf pool | |
524 | * that will be reserved | |
4452a7af | 525 | */ |
593a1d5f | 526 | if (!PE_parse_boot_argn("maxloreserve", &maxloreserve, sizeof (maxloreserve))) |
0c530ab8 A |
527 | maxloreserve = MAXLORESERVE + bsd_mbuf_cluster_reserve(); |
528 | else | |
529 | maxloreserve = maxloreserve * (1024 * 1024); | |
530 | ||
21362eb3 | 531 | |
0c530ab8 A |
532 | if (avail_end >= max_valid_dma_address) { |
533 | if (maxbouncepoolsize) | |
534 | reserve_bouncepool(maxbouncepoolsize); | |
535 | ||
536 | if (maxloreserve) | |
2d21ac55 | 537 | vm_lopage_poolsize = maxloreserve / PAGE_SIZE; |
0c530ab8 | 538 | } |
2d21ac55 | 539 | |
0c530ab8 A |
540 | /* |
541 | * Initialize kernel physical map. | |
542 | * Kernel virtual address starts at VM_KERNEL_MIN_ADDRESS. | |
543 | */ | |
544 | pmap_bootstrap(0, IA32e); | |
6601e61a A |
545 | } |
546 | ||
0c530ab8 | 547 | |
55e303ae A |
548 | unsigned int |
549 | pmap_free_pages(void) | |
550 | { | |
b0d623f7 | 551 | return (unsigned int)avail_remaining; |
55e303ae A |
552 | } |
553 | ||
b0d623f7 A |
554 | #if defined(__LP64__) |
555 | /* On large memory systems, early allocations should prefer memory from the | |
556 | * last region, which is typically all physical memory >4GB. This is used | |
557 | * by pmap_steal_memory and pmap_pre_expand during init only. */ | |
558 | boolean_t | |
559 | pmap_next_page_k64( ppnum_t *pn) | |
560 | { | |
561 | if(max_mem >= (32*GB)) { | |
562 | pmap_memory_region_t *last_region = &pmap_memory_regions[pmap_memory_region_count-1]; | |
563 | if (last_region->alloc != last_region->end) { | |
564 | *pn = last_region->alloc++; | |
565 | avail_remaining--; | |
566 | return TRUE; | |
567 | } | |
568 | } | |
569 | return pmap_next_page(pn); | |
570 | } | |
571 | #endif | |
0c530ab8 | 572 | |
55e303ae A |
573 | boolean_t |
574 | pmap_next_page( | |
575 | ppnum_t *pn) | |
576 | { | |
0c530ab8 | 577 | if (avail_remaining) while (pmap_memory_region_current < pmap_memory_region_count) { |
b0d623f7 A |
578 | if (pmap_memory_regions[pmap_memory_region_current].alloc == |
579 | pmap_memory_regions[pmap_memory_region_current].end) { | |
580 | pmap_memory_region_current++; | |
0c530ab8 A |
581 | continue; |
582 | } | |
583 | *pn = pmap_memory_regions[pmap_memory_region_current].alloc++; | |
584 | avail_remaining--; | |
585 | ||
586 | return TRUE; | |
91447636 A |
587 | } |
588 | return FALSE; | |
55e303ae A |
589 | } |
590 | ||
0c530ab8 | 591 | |
55e303ae A |
592 | boolean_t |
593 | pmap_valid_page( | |
91447636 | 594 | ppnum_t pn) |
55e303ae | 595 | { |
0c530ab8 A |
596 | unsigned int i; |
597 | pmap_memory_region_t *pmptr = pmap_memory_regions; | |
598 | ||
0c530ab8 | 599 | for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { |
2d21ac55 | 600 | if ( (pn >= pmptr->base) && (pn <= pmptr->end) ) |
0c530ab8 A |
601 | return TRUE; |
602 | } | |
603 | return FALSE; | |
604 | } | |
605 | ||
606 | ||
607 | static void | |
608 | reserve_bouncepool(uint32_t bounce_pool_wanted) | |
609 | { | |
610 | pmap_memory_region_t *pmptr = pmap_memory_regions; | |
611 | pmap_memory_region_t *lowest = NULL; | |
612 | unsigned int i; | |
613 | unsigned int pages_needed; | |
614 | ||
615 | pages_needed = bounce_pool_wanted / PAGE_SIZE; | |
616 | ||
617 | for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { | |
2d21ac55 | 618 | if ( (pmptr->end - pmptr->alloc) >= pages_needed ) { |
0c530ab8 A |
619 | if ( (lowest == NULL) || (pmptr->alloc < lowest->alloc) ) |
620 | lowest = pmptr; | |
621 | } | |
622 | } | |
623 | if ( (lowest != NULL) ) { | |
624 | bounce_pool_base = lowest->alloc * PAGE_SIZE; | |
625 | bounce_pool_size = bounce_pool_wanted; | |
626 | ||
627 | lowest->alloc += pages_needed; | |
628 | avail_remaining -= pages_needed; | |
629 | } | |
55e303ae | 630 | } |
b0d623f7 A |
631 | |
632 | /* | |
633 | * Called once VM is fully initialized so that we can release unused | |
634 | * sections of low memory to the general pool. | |
635 | * Also complete the set-up of identity-mapped sections of the kernel: | |
636 | * 1) write-protect kernel text | |
637 | * 2) map kernel text using large pages if possible | |
638 | * 3) read and write-protect page zero (for K32) | |
639 | * 4) map the global page at the appropriate virtual address. | |
640 | * | |
641 | * Use of large pages | |
642 | * ------------------ | |
643 | * To effectively map and write-protect all kernel text pages, the text | |
644 | * must be 2M-aligned at the base, and the data section above must also be | |
645 | * 2M-aligned. That is, there's padding below and above. This is achieved | |
646 | * through linker directives. Large pages are used only if this alignment | |
647 | * exists (and not overriden by the -kernel_text_page_4K boot-arg). The | |
648 | * memory layout is: | |
649 | * | |
650 | * : : | |
651 | * | __DATA | | |
652 | * sdata: ================== 2Meg | |
653 | * | | | |
654 | * | zero-padding | | |
655 | * | | | |
656 | * etext: ------------------ | |
657 | * | | | |
658 | * : : | |
659 | * | | | |
660 | * | __TEXT | | |
661 | * | | | |
662 | * : : | |
663 | * | | | |
664 | * stext: ================== 2Meg | |
665 | * | | | |
666 | * | zero-padding | | |
667 | * | | | |
668 | * eHIB: ------------------ | |
669 | * | __HIB | | |
670 | * : : | |
671 | * | |
672 | * Prior to changing the mapping from 4K to 2M, the zero-padding pages | |
673 | * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the | |
674 | * 4K pages covering [stext,etext] are coalesced as 2M large pages. | |
675 | * The now unused level-1 PTE pages are also freed. | |
676 | */ | |
677 | void | |
678 | pmap_lowmem_finalize(void) | |
679 | { | |
680 | spl_t spl; | |
681 | int i; | |
682 | ||
683 | /* Check the kernel is linked at the expected base address */ | |
684 | if (i386_btop(kvtophys((vm_offset_t) &IdlePML4)) != | |
685 | I386_KERNEL_IMAGE_BASE_PAGE) | |
686 | panic("pmap_lowmem_finalize() unexpected kernel base address"); | |
687 | ||
688 | /* | |
689 | * Free all pages in pmap regions below the base: | |
690 | * rdar://6332712 | |
691 | * We can't free all the pages to VM that EFI reports available. | |
692 | * Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake. | |
693 | * There's also a size miscalculation here: pend is one page less | |
694 | * than it should be but this is not fixed to be backwards | |
695 | * compatible. | |
696 | * Due to this current EFI limitation, we take only the first | |
697 | * entry in the memory region table. However, the loop is retained | |
698 | * (with the intended termination criteria commented out) in the | |
699 | * hope that some day we can free all low-memory ranges. | |
700 | */ | |
701 | for (i = 0; | |
702 | // pmap_memory_regions[i].end <= I386_KERNEL_IMAGE_BASE_PAGE; | |
703 | i < 1; | |
704 | i++) { | |
705 | vm_offset_t pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base); | |
706 | vm_offset_t pend = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end); | |
707 | // vm_offset_t pend = i386_ptob(pmap_memory_regions[i].end+1); | |
708 | ||
709 | DBG("ml_static_mfree(%p,%p) for pmap region %d\n", | |
710 | (void *) ml_static_ptovirt(pbase), | |
711 | (void *) (pend - pbase), i); | |
712 | ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase); | |
713 | } | |
714 | ||
715 | /* | |
716 | * If text and data are both 2MB-aligned, | |
717 | * we can map text with large-pages, | |
718 | * unless the -kernel_text_ps_4K boot-arg overrides. | |
719 | */ | |
720 | if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) { | |
721 | kprintf("Kernel text is 2MB aligned"); | |
722 | kernel_text_ps_4K = FALSE; | |
723 | if (PE_parse_boot_argn("-kernel_text_ps_4K", | |
724 | &kernel_text_ps_4K, | |
725 | sizeof (kernel_text_ps_4K))) | |
726 | kprintf(" but will be mapped with 4K pages\n"); | |
727 | else | |
728 | kprintf(" and will be mapped with 2M pages\n"); | |
729 | } | |
730 | ||
731 | (void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof (wpkernel)); | |
732 | if (wpkernel) | |
733 | kprintf("Kernel text %p-%p to be write-protected\n", | |
734 | (void *) stext, (void *) etext); | |
735 | ||
736 | spl = splhigh(); | |
737 | ||
738 | /* | |
739 | * Scan over text if mappings are to be changed: | |
740 | * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0 | |
741 | * - Change to large-pages if possible and not overriden. | |
742 | */ | |
743 | if (kernel_text_ps_4K && wpkernel) { | |
744 | vm_offset_t myva; | |
745 | for (myva = stext; myva < etext; myva += PAGE_SIZE) { | |
746 | pt_entry_t *ptep; | |
747 | ||
748 | ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); | |
749 | if (ptep) | |
750 | pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW); | |
751 | } | |
752 | } | |
753 | ||
754 | if (!kernel_text_ps_4K) { | |
755 | vm_offset_t myva; | |
756 | ||
757 | /* | |
758 | * Release zero-filled page padding used for 2M-alignment. | |
759 | */ | |
760 | DBG("ml_static_mfree(%p,%p) for padding below text\n", | |
761 | (void *) eHIB, (void *) (stext - eHIB)); | |
762 | ml_static_mfree(eHIB, stext - eHIB); | |
763 | DBG("ml_static_mfree(%p,%p) for padding above text\n", | |
764 | (void *) etext, (void *) (sdata - etext)); | |
765 | ml_static_mfree(etext, sdata - etext); | |
766 | ||
767 | /* | |
768 | * Coalesce text pages into large pages. | |
769 | */ | |
770 | for (myva = stext; myva < sdata; myva += I386_LPGBYTES) { | |
771 | pt_entry_t *ptep; | |
772 | vm_offset_t pte_phys; | |
773 | pt_entry_t *pdep; | |
774 | pt_entry_t pde; | |
775 | ||
776 | pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva); | |
777 | ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); | |
778 | DBG("myva: %p pdep: %p ptep: %p\n", | |
779 | (void *) myva, (void *) pdep, (void *) ptep); | |
780 | if ((*ptep & INTEL_PTE_VALID) == 0) | |
781 | continue; | |
782 | pte_phys = (vm_offset_t)(*ptep & PG_FRAME); | |
783 | pde = *pdep & PTMASK; /* page attributes from pde */ | |
784 | pde |= INTEL_PTE_PS; /* make it a 2M entry */ | |
785 | pde |= pte_phys; /* take page frame from pte */ | |
786 | ||
787 | if (wpkernel) | |
788 | pde &= ~INTEL_PTE_RW; | |
789 | DBG("pmap_store_pte(%p,0x%llx)\n", | |
790 | (void *)pdep, pde); | |
791 | pmap_store_pte(pdep, pde); | |
792 | ||
793 | /* | |
794 | * Free the now-unused level-1 pte. | |
795 | * Note: ptep is a virtual address to the pte in the | |
796 | * recursive map. We can't use this address to free | |
797 | * the page. Instead we need to compute its address | |
798 | * in the Idle PTEs in "low memory". | |
799 | */ | |
800 | vm_offset_t vm_ptep = (vm_offset_t) KPTphys | |
801 | + (pte_phys >> PTPGSHIFT); | |
802 | DBG("ml_static_mfree(%p,0x%x) for pte\n", | |
803 | (void *) vm_ptep, PAGE_SIZE); | |
804 | ml_static_mfree(vm_ptep, PAGE_SIZE); | |
805 | } | |
806 | ||
807 | /* Change variable read by sysctl machdep.pmap */ | |
808 | pmap_kernel_text_ps = I386_LPGBYTES; | |
809 | } | |
810 | ||
811 | #if defined(__i386__) | |
812 | /* no matter what, kernel page zero is not accessible */ | |
813 | pmap_store_pte(pmap_pte(kernel_pmap, 0), INTEL_PTE_INVALID); | |
814 | #endif | |
815 | ||
816 | /* map lowmem global page into fixed addr */ | |
817 | pt_entry_t *pte = NULL; | |
818 | if (0 == (pte = pmap_pte(kernel_pmap, | |
819 | VM_MIN_KERNEL_LOADED_ADDRESS + 0x2000))) | |
820 | panic("lowmem pte"); | |
821 | /* make sure it is defined on page boundary */ | |
822 | assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK)); | |
823 | pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo) | |
824 | | INTEL_PTE_REF | |
825 | | INTEL_PTE_MOD | |
826 | | INTEL_PTE_WIRED | |
827 | | INTEL_PTE_VALID | |
828 | | INTEL_PTE_RW); | |
829 | splx(spl); | |
830 | flush_tlb(); | |
831 | } | |
832 |