]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_kern.c
xnu-1504.7.4.tar.gz
[apple/xnu.git] / osfmk / vm / vm_kern.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_kern.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Kernel memory management.
64 */
65
1c79356b
A
66#include <mach/kern_return.h>
67#include <mach/vm_param.h>
68#include <kern/assert.h>
69#include <kern/lock.h>
70#include <kern/thread.h>
71#include <vm/vm_kern.h>
72#include <vm/vm_map.h>
73#include <vm/vm_object.h>
74#include <vm/vm_page.h>
75#include <vm/vm_pageout.h>
76#include <kern/misc_protos.h>
77#include <vm/cpm.h>
78
79#include <string.h>
2d21ac55
A
80
81#include <libkern/OSDebug.h>
82#include <sys/kdebug.h>
83
1c79356b
A
84/*
85 * Variables exported by this module.
86 */
87
88vm_map_t kernel_map;
89vm_map_t kernel_pageable_map;
90
2d21ac55
A
91extern boolean_t vm_kernel_ready;
92
1c79356b
A
93/*
94 * Forward declarations for internal functions.
95 */
96extern kern_return_t kmem_alloc_pages(
97 register vm_object_t object,
98 register vm_object_offset_t offset,
91447636 99 register vm_object_size_t size);
1c79356b
A
100
101extern void kmem_remap_pages(
102 register vm_object_t object,
103 register vm_object_offset_t offset,
104 register vm_offset_t start,
105 register vm_offset_t end,
106 vm_prot_t protection);
107
108kern_return_t
109kmem_alloc_contig(
91447636
A
110 vm_map_t map,
111 vm_offset_t *addrp,
112 vm_size_t size,
113 vm_offset_t mask,
2d21ac55 114 ppnum_t max_pnum,
b0d623f7 115 ppnum_t pnum_mask,
91447636 116 int flags)
1c79356b
A
117{
118 vm_object_t object;
1c79356b 119 vm_object_offset_t offset;
91447636
A
120 vm_map_offset_t map_addr;
121 vm_map_offset_t map_mask;
122 vm_map_size_t map_size, i;
1c79356b 123 vm_map_entry_t entry;
91447636
A
124 vm_page_t m, pages;
125 kern_return_t kr;
1c79356b 126
b0d623f7 127 if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT)))
1c79356b
A
128 return KERN_INVALID_ARGUMENT;
129
130 if (size == 0) {
131 *addrp = 0;
132 return KERN_INVALID_ARGUMENT;
133 }
134
91447636
A
135 map_size = vm_map_round_page(size);
136 map_mask = (vm_map_offset_t)mask;
1c79356b 137
91447636
A
138 /*
139 * Allocate a new object (if necessary) and the reference we
140 * will be donating to the map entry. We must do this before
141 * locking the map, or risk deadlock with the default pager.
142 */
143 if ((flags & KMA_KOBJECT) != 0) {
144 object = kernel_object;
145 vm_object_reference(object);
1c79356b 146 } else {
91447636 147 object = vm_object_allocate(map_size);
1c79356b
A
148 }
149
0c530ab8 150 kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0, &entry);
91447636
A
151 if (KERN_SUCCESS != kr) {
152 vm_object_deallocate(object);
1c79356b
A
153 return kr;
154 }
155
91447636
A
156 entry->object.vm_object = object;
157 entry->offset = offset = (object == kernel_object) ?
b0d623f7 158 map_addr : 0;
91447636
A
159
160 /* Take an extra object ref in case the map entry gets deleted */
161 vm_object_reference(object);
1c79356b
A
162 vm_map_unlock(map);
163
b0d623f7 164 kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
1c79356b
A
165
166 if (kr != KERN_SUCCESS) {
91447636
A
167 vm_map_remove(map, vm_map_trunc_page(map_addr),
168 vm_map_round_page(map_addr + map_size), 0);
169 vm_object_deallocate(object);
1c79356b
A
170 *addrp = 0;
171 return kr;
172 }
173
174 vm_object_lock(object);
91447636 175 for (i = 0; i < map_size; i += PAGE_SIZE) {
1c79356b
A
176 m = pages;
177 pages = NEXT_PAGE(m);
0c530ab8 178 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
1c79356b
A
179 m->busy = FALSE;
180 vm_page_insert(m, object, offset + i);
181 }
182 vm_object_unlock(object);
183
91447636
A
184 if ((kr = vm_map_wire(map, vm_map_trunc_page(map_addr),
185 vm_map_round_page(map_addr + map_size), VM_PROT_DEFAULT, FALSE))
1c79356b
A
186 != KERN_SUCCESS) {
187 if (object == kernel_object) {
188 vm_object_lock(object);
91447636 189 vm_object_page_remove(object, offset, offset + map_size);
1c79356b
A
190 vm_object_unlock(object);
191 }
91447636
A
192 vm_map_remove(map, vm_map_trunc_page(map_addr),
193 vm_map_round_page(map_addr + map_size), 0);
194 vm_object_deallocate(object);
1c79356b
A
195 return kr;
196 }
91447636
A
197 vm_object_deallocate(object);
198
1c79356b 199 if (object == kernel_object)
91447636 200 vm_map_simplify(map, map_addr);
1c79356b 201
b0d623f7
A
202 *addrp = (vm_offset_t) map_addr;
203 assert((vm_map_offset_t) *addrp == map_addr);
1c79356b
A
204 return KERN_SUCCESS;
205}
206
207/*
208 * Master entry point for allocating kernel memory.
209 * NOTE: this routine is _never_ interrupt safe.
210 *
211 * map : map to allocate into
212 * addrp : pointer to start address of new memory
213 * size : size of memory requested
214 * flags : options
215 * KMA_HERE *addrp is base address, else "anywhere"
216 * KMA_NOPAGEWAIT don't wait for pages if unavailable
217 * KMA_KOBJECT use kernel_object
0c530ab8
A
218 * KMA_LOMEM support for 32 bit devices in a 64 bit world
219 * if set and a lomemory pool is available
220 * grab pages from it... this also implies
221 * KMA_NOPAGEWAIT
1c79356b
A
222 */
223
224kern_return_t
225kernel_memory_allocate(
226 register vm_map_t map,
227 register vm_offset_t *addrp,
228 register vm_size_t size,
229 register vm_offset_t mask,
230 int flags)
231{
91447636
A
232 vm_object_t object;
233 vm_object_offset_t offset;
b0d623f7 234 vm_object_offset_t pg_offset;
1c79356b 235 vm_map_entry_t entry;
2d21ac55 236 vm_map_offset_t map_addr, fill_start;
91447636 237 vm_map_offset_t map_mask;
2d21ac55 238 vm_map_size_t map_size, fill_size;
1c79356b 239 kern_return_t kr;
2d21ac55 240 vm_page_t mem;
b0d623f7
A
241 vm_page_t guard_page_list = NULL;
242 vm_page_t wired_page_list = NULL;
243 int guard_page_count = 0;
244 int wired_page_count = 0;
245 int i;
2d21ac55
A
246 int vm_alloc_flags;
247
248 if (! vm_kernel_ready) {
249 panic("kernel_memory_allocate: VM is not ready");
250 }
1c79356b 251
91447636
A
252 if (size == 0) {
253 *addrp = 0;
254 return KERN_INVALID_ARGUMENT;
1c79356b 255 }
0c530ab8
A
256 if (flags & KMA_LOMEM) {
257 if ( !(flags & KMA_NOPAGEWAIT) ) {
258 *addrp = 0;
259 return KERN_INVALID_ARGUMENT;
260 }
261 }
91447636
A
262
263 map_size = vm_map_round_page(size);
264 map_mask = (vm_map_offset_t) mask;
2d21ac55
A
265 vm_alloc_flags = 0;
266
b0d623f7
A
267
268 /*
269 * limit the size of a single extent of wired memory
270 * to try and limit the damage to the system if
271 * too many pages get wired down
272 */
273 if (map_size > (1 << 30)) {
274 return KERN_RESOURCE_SHORTAGE;
275 }
276
2d21ac55
A
277 /*
278 * Guard pages:
279 *
280 * Guard pages are implemented as ficticious pages. By placing guard pages
281 * on either end of a stack, they can help detect cases where a thread walks
282 * off either end of its stack. They are allocated and set up here and attempts
283 * to access those pages are trapped in vm_fault_page().
284 *
285 * The map_size we were passed may include extra space for
286 * guard pages. If those were requested, then back it out of fill_size
287 * since vm_map_find_space() takes just the actual size not including
288 * guard pages. Similarly, fill_start indicates where the actual pages
289 * will begin in the range.
290 */
291
292 fill_start = 0;
293 fill_size = map_size;
b0d623f7 294
2d21ac55
A
295 if (flags & KMA_GUARD_FIRST) {
296 vm_alloc_flags |= VM_FLAGS_GUARD_BEFORE;
297 fill_start += PAGE_SIZE_64;
298 fill_size -= PAGE_SIZE_64;
299 if (map_size < fill_start + fill_size) {
300 /* no space for a guard page */
301 *addrp = 0;
302 return KERN_INVALID_ARGUMENT;
303 }
b0d623f7 304 guard_page_count++;
2d21ac55
A
305 }
306 if (flags & KMA_GUARD_LAST) {
307 vm_alloc_flags |= VM_FLAGS_GUARD_AFTER;
308 fill_size -= PAGE_SIZE_64;
309 if (map_size <= fill_start + fill_size) {
310 /* no space for a guard page */
311 *addrp = 0;
312 return KERN_INVALID_ARGUMENT;
313 }
b0d623f7
A
314 guard_page_count++;
315 }
316 wired_page_count = (int) (fill_size / PAGE_SIZE_64);
317 assert(wired_page_count * PAGE_SIZE_64 == fill_size);
318
319 for (i = 0; i < guard_page_count; i++) {
320 for (;;) {
321 mem = vm_page_grab_guard();
322
323 if (mem != VM_PAGE_NULL)
324 break;
325 if (flags & KMA_NOPAGEWAIT) {
326 kr = KERN_RESOURCE_SHORTAGE;
327 goto out;
328 }
329 vm_page_more_fictitious();
330 }
331 mem->pageq.next = (queue_entry_t)guard_page_list;
332 guard_page_list = mem;
333 }
334
335 for (i = 0; i < wired_page_count; i++) {
336 uint64_t unavailable;
337
338 for (;;) {
339 if (flags & KMA_LOMEM)
340 mem = vm_page_grablo();
341 else
342 mem = vm_page_grab();
343
344 if (mem != VM_PAGE_NULL)
345 break;
346
347 if (flags & KMA_NOPAGEWAIT) {
348 kr = KERN_RESOURCE_SHORTAGE;
349 goto out;
350 }
351 unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE;
352
353 if (unavailable > max_mem || map_size > (max_mem - unavailable)) {
354 kr = KERN_RESOURCE_SHORTAGE;
355 goto out;
356 }
357 VM_PAGE_WAIT();
358 }
359 mem->pageq.next = (queue_entry_t)wired_page_list;
360 wired_page_list = mem;
2d21ac55 361 }
91447636
A
362
363 /*
364 * Allocate a new object (if necessary). We must do this before
365 * locking the map, or risk deadlock with the default pager.
366 */
367 if ((flags & KMA_KOBJECT) != 0) {
1c79356b 368 object = kernel_object;
91447636
A
369 vm_object_reference(object);
370 } else {
371 object = vm_object_allocate(map_size);
1c79356b 372 }
91447636 373
2d21ac55
A
374 kr = vm_map_find_space(map, &map_addr,
375 fill_size, map_mask,
376 vm_alloc_flags, &entry);
91447636
A
377 if (KERN_SUCCESS != kr) {
378 vm_object_deallocate(object);
b0d623f7 379 goto out;
1c79356b 380 }
2d21ac55 381
91447636
A
382 entry->object.vm_object = object;
383 entry->offset = offset = (object == kernel_object) ?
b0d623f7 384 map_addr : 0;
1c79356b 385
b0d623f7
A
386 entry->wired_count++;
387
388 if (flags & KMA_PERMANENT)
389 entry->permanent = TRUE;
390
391 if (object != kernel_object)
392 vm_object_reference(object);
1c79356b
A
393
394 vm_object_lock(object);
b0d623f7 395 vm_map_unlock(map);
1c79356b 396
b0d623f7
A
397 pg_offset = 0;
398
399 if (fill_start) {
400 if (guard_page_list == NULL)
401 panic("kernel_memory_allocate: guard_page_list == NULL");
402
403 mem = guard_page_list;
404 guard_page_list = (vm_page_t)mem->pageq.next;
405 mem->pageq.next = NULL;
406
407 vm_page_insert(mem, object, offset + pg_offset);
2d21ac55 408
2d21ac55 409 mem->busy = FALSE;
b0d623f7 410 pg_offset += PAGE_SIZE_64;
2d21ac55 411 }
b0d623f7
A
412 for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
413 if (wired_page_list == NULL)
414 panic("kernel_memory_allocate: wired_page_list == NULL");
2d21ac55 415
b0d623f7
A
416 mem = wired_page_list;
417 wired_page_list = (vm_page_t)mem->pageq.next;
418 mem->pageq.next = NULL;
419 mem->wire_count++;
2d21ac55 420
b0d623f7 421 vm_page_insert(mem, object, offset + pg_offset);
0c530ab8 422
1c79356b 423 mem->busy = FALSE;
b0d623f7
A
424 mem->pmapped = TRUE;
425 mem->wpmapped = TRUE;
426
427 PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem,
428 VM_PROT_READ | VM_PROT_WRITE, object->wimg_bits & VM_WIMG_MASK, TRUE);
1c79356b 429 }
b0d623f7
A
430 if ((fill_start + fill_size) < map_size) {
431 if (guard_page_list == NULL)
432 panic("kernel_memory_allocate: guard_page_list == NULL");
1c79356b 433
b0d623f7
A
434 mem = guard_page_list;
435 guard_page_list = (vm_page_t)mem->pageq.next;
436 mem->pageq.next = NULL;
437
438 vm_page_insert(mem, object, offset + pg_offset);
2d21ac55 439
2d21ac55 440 mem->busy = FALSE;
1c79356b 441 }
b0d623f7
A
442 if (guard_page_list || wired_page_list)
443 panic("kernel_memory_allocate: non empty list\n");
2d21ac55 444
b0d623f7
A
445 vm_page_lockspin_queues();
446 vm_page_wire_count += wired_page_count;
447 vm_page_unlock_queues();
2d21ac55 448
b0d623f7
A
449 vm_object_unlock(object);
450
451 /*
452 * now that the pages are wired, we no longer have to fear coalesce
453 */
1c79356b 454 if (object == kernel_object)
91447636 455 vm_map_simplify(map, map_addr);
b0d623f7
A
456 else
457 vm_object_deallocate(object);
1c79356b
A
458
459 /*
460 * Return the memory, not zeroed.
461 */
91447636 462 *addrp = CAST_DOWN(vm_offset_t, map_addr);
1c79356b 463 return KERN_SUCCESS;
2d21ac55 464
b0d623f7
A
465out:
466 if (guard_page_list)
467 vm_page_free_list(guard_page_list, FALSE);
468
469 if (wired_page_list)
470 vm_page_free_list(wired_page_list, FALSE);
471
472 return kr;
1c79356b
A
473}
474
475/*
476 * kmem_alloc:
477 *
478 * Allocate wired-down memory in the kernel's address map
479 * or a submap. The memory is not zero-filled.
480 */
481
482kern_return_t
483kmem_alloc(
484 vm_map_t map,
485 vm_offset_t *addrp,
486 vm_size_t size)
487{
2d21ac55
A
488 kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, 0);
489 TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp);
490 return kr;
1c79356b
A
491}
492
493/*
494 * kmem_realloc:
495 *
496 * Reallocate wired-down memory in the kernel's address map
497 * or a submap. Newly allocated pages are not zeroed.
498 * This can only be used on regions allocated with kmem_alloc.
499 *
500 * If successful, the pages in the old region are mapped twice.
501 * The old region is unchanged. Use kmem_free to get rid of it.
502 */
503kern_return_t
504kmem_realloc(
91447636
A
505 vm_map_t map,
506 vm_offset_t oldaddr,
507 vm_size_t oldsize,
508 vm_offset_t *newaddrp,
509 vm_size_t newsize)
1c79356b 510{
91447636
A
511 vm_object_t object;
512 vm_object_offset_t offset;
513 vm_map_offset_t oldmapmin;
514 vm_map_offset_t oldmapmax;
515 vm_map_offset_t newmapaddr;
516 vm_map_size_t oldmapsize;
517 vm_map_size_t newmapsize;
518 vm_map_entry_t oldentry;
519 vm_map_entry_t newentry;
520 vm_page_t mem;
521 kern_return_t kr;
1c79356b 522
91447636
A
523 oldmapmin = vm_map_trunc_page(oldaddr);
524 oldmapmax = vm_map_round_page(oldaddr + oldsize);
525 oldmapsize = oldmapmax - oldmapmin;
526 newmapsize = vm_map_round_page(newsize);
1c79356b 527
1c79356b
A
528
529 /*
530 * Find the VM object backing the old region.
531 */
532
b4c24cb9
A
533 vm_map_lock(map);
534
91447636 535 if (!vm_map_lookup_entry(map, oldmapmin, &oldentry))
1c79356b
A
536 panic("kmem_realloc");
537 object = oldentry->object.vm_object;
538
539 /*
540 * Increase the size of the object and
541 * fill in the new region.
542 */
543
544 vm_object_reference(object);
b4c24cb9
A
545 /* by grabbing the object lock before unlocking the map */
546 /* we guarantee that we will panic if more than one */
547 /* attempt is made to realloc a kmem_alloc'd area */
1c79356b 548 vm_object_lock(object);
b4c24cb9 549 vm_map_unlock(map);
91447636 550 if (object->size != oldmapsize)
1c79356b 551 panic("kmem_realloc");
91447636 552 object->size = newmapsize;
1c79356b
A
553 vm_object_unlock(object);
554
b4c24cb9
A
555 /* allocate the new pages while expanded portion of the */
556 /* object is still not mapped */
91447636
A
557 kmem_alloc_pages(object, vm_object_round_page(oldmapsize),
558 vm_object_round_page(newmapsize-oldmapsize));
1c79356b
A
559
560 /*
b4c24cb9 561 * Find space for the new region.
1c79356b
A
562 */
563
91447636 564 kr = vm_map_find_space(map, &newmapaddr, newmapsize,
0c530ab8 565 (vm_map_offset_t) 0, 0, &newentry);
b4c24cb9
A
566 if (kr != KERN_SUCCESS) {
567 vm_object_lock(object);
91447636
A
568 for(offset = oldmapsize;
569 offset < newmapsize; offset += PAGE_SIZE) {
b4c24cb9 570 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
b0d623f7 571 VM_PAGE_FREE(mem);
b4c24cb9
A
572 }
573 }
91447636 574 object->size = oldmapsize;
b4c24cb9
A
575 vm_object_unlock(object);
576 vm_object_deallocate(object);
577 return kr;
578 }
579 newentry->object.vm_object = object;
580 newentry->offset = 0;
581 assert (newentry->wired_count == 0);
582
583
584 /* add an extra reference in case we have someone doing an */
585 /* unexpected deallocate */
586 vm_object_reference(object);
1c79356b
A
587 vm_map_unlock(map);
588
91447636
A
589 kr = vm_map_wire(map, newmapaddr, newmapaddr + newmapsize, VM_PROT_DEFAULT, FALSE);
590 if (KERN_SUCCESS != kr) {
591 vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, 0);
b4c24cb9 592 vm_object_lock(object);
91447636 593 for(offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
b4c24cb9 594 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
b0d623f7 595 VM_PAGE_FREE(mem);
b4c24cb9
A
596 }
597 }
91447636 598 object->size = oldmapsize;
b4c24cb9
A
599 vm_object_unlock(object);
600 vm_object_deallocate(object);
601 return (kr);
602 }
603 vm_object_deallocate(object);
1c79356b 604
91447636 605 *newaddrp = CAST_DOWN(vm_offset_t, newmapaddr);
1c79356b
A
606 return KERN_SUCCESS;
607}
608
609/*
b0d623f7 610 * kmem_alloc_kobject:
1c79356b
A
611 *
612 * Allocate wired-down memory in the kernel's address map
613 * or a submap. The memory is not zero-filled.
614 *
615 * The memory is allocated in the kernel_object.
616 * It may not be copied with vm_map_copy, and
617 * it may not be reallocated with kmem_realloc.
618 */
619
620kern_return_t
b0d623f7 621kmem_alloc_kobject(
1c79356b
A
622 vm_map_t map,
623 vm_offset_t *addrp,
624 vm_size_t size)
625{
626 return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT);
627}
628
629/*
630 * kmem_alloc_aligned:
631 *
b0d623f7 632 * Like kmem_alloc_kobject, except that the memory is aligned.
1c79356b
A
633 * The size should be a power-of-2.
634 */
635
636kern_return_t
637kmem_alloc_aligned(
638 vm_map_t map,
639 vm_offset_t *addrp,
640 vm_size_t size)
641{
642 if ((size & (size - 1)) != 0)
643 panic("kmem_alloc_aligned: size not aligned");
644 return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT);
645}
646
647/*
648 * kmem_alloc_pageable:
649 *
650 * Allocate pageable memory in the kernel's address map.
651 */
652
653kern_return_t
654kmem_alloc_pageable(
655 vm_map_t map,
656 vm_offset_t *addrp,
657 vm_size_t size)
658{
91447636
A
659 vm_map_offset_t map_addr;
660 vm_map_size_t map_size;
1c79356b
A
661 kern_return_t kr;
662
663#ifndef normal
91447636 664 map_addr = (vm_map_min(map)) + 0x1000;
1c79356b 665#else
91447636 666 map_addr = vm_map_min(map);
1c79356b 667#endif
91447636
A
668 map_size = vm_map_round_page(size);
669
670 kr = vm_map_enter(map, &map_addr, map_size,
671 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
1c79356b
A
672 VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE,
673 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
91447636 674
1c79356b
A
675 if (kr != KERN_SUCCESS)
676 return kr;
677
91447636 678 *addrp = CAST_DOWN(vm_offset_t, map_addr);
1c79356b
A
679 return KERN_SUCCESS;
680}
681
682/*
683 * kmem_free:
684 *
685 * Release a region of kernel virtual memory allocated
b0d623f7 686 * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
1c79356b
A
687 * and return the physical pages associated with that region.
688 */
689
690void
691kmem_free(
692 vm_map_t map,
693 vm_offset_t addr,
694 vm_size_t size)
695{
696 kern_return_t kr;
697
b0d623f7
A
698 assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
699
2d21ac55
A
700 TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr);
701
b0d623f7
A
702 if(size == 0) {
703#if MACH_ASSERT
704 printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n",map,(uint64_t)addr);
705#endif
706 return;
707 }
708
91447636
A
709 kr = vm_map_remove(map, vm_map_trunc_page(addr),
710 vm_map_round_page(addr + size),
55e303ae 711 VM_MAP_REMOVE_KUNWIRE);
1c79356b
A
712 if (kr != KERN_SUCCESS)
713 panic("kmem_free");
714}
715
716/*
b4c24cb9 717 * Allocate new pages in an object.
1c79356b
A
718 */
719
720kern_return_t
721kmem_alloc_pages(
722 register vm_object_t object,
723 register vm_object_offset_t offset,
91447636 724 register vm_object_size_t size)
1c79356b 725{
91447636 726 vm_object_size_t alloc_size;
1c79356b 727
91447636 728 alloc_size = vm_object_round_page(size);
b4c24cb9 729 vm_object_lock(object);
91447636 730 while (alloc_size) {
1c79356b
A
731 register vm_page_t mem;
732
1c79356b
A
733
734 /*
735 * Allocate a page
736 */
91447636
A
737 while (VM_PAGE_NULL ==
738 (mem = vm_page_alloc(object, offset))) {
1c79356b
A
739 vm_object_unlock(object);
740 VM_PAGE_WAIT();
741 vm_object_lock(object);
742 }
91447636 743 mem->busy = FALSE;
1c79356b 744
91447636 745 alloc_size -= PAGE_SIZE;
b4c24cb9 746 offset += PAGE_SIZE;
1c79356b 747 }
b4c24cb9 748 vm_object_unlock(object);
1c79356b
A
749 return KERN_SUCCESS;
750}
751
752/*
753 * Remap wired pages in an object into a new region.
754 * The object is assumed to be mapped into the kernel map or
755 * a submap.
756 */
757void
758kmem_remap_pages(
759 register vm_object_t object,
760 register vm_object_offset_t offset,
761 register vm_offset_t start,
762 register vm_offset_t end,
763 vm_prot_t protection)
764{
91447636
A
765
766 vm_map_offset_t map_start;
767 vm_map_offset_t map_end;
768
1c79356b
A
769 /*
770 * Mark the pmap region as not pageable.
771 */
91447636
A
772 map_start = vm_map_trunc_page(start);
773 map_end = vm_map_round_page(end);
1c79356b 774
91447636
A
775 pmap_pageable(kernel_pmap, map_start, map_end, FALSE);
776
777 while (map_start < map_end) {
1c79356b
A
778 register vm_page_t mem;
779
780 vm_object_lock(object);
781
782 /*
783 * Find a page
784 */
785 if ((mem = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
786 panic("kmem_remap_pages");
787
788 /*
789 * Wire it down (again)
790 */
2d21ac55 791 vm_page_lockspin_queues();
1c79356b
A
792 vm_page_wire(mem);
793 vm_page_unlock_queues();
794 vm_object_unlock(object);
795
91447636
A
796 /*
797 * ENCRYPTED SWAP:
798 * The page is supposed to be wired now, so it
799 * shouldn't be encrypted at this point. It can
800 * safely be entered in the page table.
801 */
802 ASSERT_PAGE_DECRYPTED(mem);
803
1c79356b
A
804 /*
805 * Enter it in the kernel pmap. The page isn't busy,
806 * but this shouldn't be a problem because it is wired.
807 */
b0d623f7
A
808
809 mem->pmapped = TRUE;
810 mem->wpmapped = TRUE;
811
91447636 812 PMAP_ENTER(kernel_pmap, map_start, mem, protection,
55e303ae
A
813 ((unsigned int)(mem->object->wimg_bits))
814 & VM_WIMG_MASK,
815 TRUE);
1c79356b 816
91447636 817 map_start += PAGE_SIZE;
1c79356b
A
818 offset += PAGE_SIZE;
819 }
820}
821
822/*
823 * kmem_suballoc:
824 *
825 * Allocates a map to manage a subrange
826 * of the kernel virtual address space.
827 *
828 * Arguments are as follows:
829 *
830 * parent Map to take range from
831 * addr Address of start of range (IN/OUT)
832 * size Size of range to find
833 * pageable Can region be paged
834 * anywhere Can region be located anywhere in map
835 * new_map Pointer to new submap
836 */
837kern_return_t
838kmem_suballoc(
839 vm_map_t parent,
840 vm_offset_t *addr,
841 vm_size_t size,
842 boolean_t pageable,
91447636 843 int flags,
1c79356b
A
844 vm_map_t *new_map)
845{
91447636
A
846 vm_map_t map;
847 vm_map_offset_t map_addr;
848 vm_map_size_t map_size;
849 kern_return_t kr;
1c79356b 850
91447636 851 map_size = vm_map_round_page(size);
1c79356b
A
852
853 /*
854 * Need reference on submap object because it is internal
855 * to the vm_system. vm_object_enter will never be called
856 * on it (usual source of reference for vm_map_enter).
857 */
858 vm_object_reference(vm_submap_object);
859
91447636
A
860 map_addr = (flags & VM_FLAGS_ANYWHERE) ?
861 vm_map_min(parent) : vm_map_trunc_page(*addr);
862
863 kr = vm_map_enter(parent, &map_addr, map_size,
864 (vm_map_offset_t) 0, flags,
1c79356b
A
865 vm_submap_object, (vm_object_offset_t) 0, FALSE,
866 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
867 if (kr != KERN_SUCCESS) {
868 vm_object_deallocate(vm_submap_object);
869 return (kr);
870 }
871
872 pmap_reference(vm_map_pmap(parent));
91447636 873 map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable);
1c79356b
A
874 if (map == VM_MAP_NULL)
875 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */
876
91447636 877 kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE);
1c79356b
A
878 if (kr != KERN_SUCCESS) {
879 /*
880 * See comment preceding vm_map_submap().
881 */
91447636 882 vm_map_remove(parent, map_addr, map_addr + map_size, VM_MAP_NO_FLAGS);
1c79356b
A
883 vm_map_deallocate(map); /* also removes ref to pmap */
884 vm_object_deallocate(vm_submap_object);
885 return (kr);
886 }
91447636 887 *addr = CAST_DOWN(vm_offset_t, map_addr);
1c79356b
A
888 *new_map = map;
889 return (KERN_SUCCESS);
890}
891
892/*
893 * kmem_init:
894 *
895 * Initialize the kernel's virtual memory map, taking
896 * into account all memory allocated up to this time.
897 */
898void
899kmem_init(
900 vm_offset_t start,
901 vm_offset_t end)
902{
91447636
A
903 vm_map_offset_t map_start;
904 vm_map_offset_t map_end;
905
906 map_start = vm_map_trunc_page(start);
907 map_end = vm_map_round_page(end);
908
909 kernel_map = vm_map_create(pmap_kernel(),VM_MIN_KERNEL_ADDRESS,
0c530ab8 910 map_end, FALSE);
1c79356b
A
911 /*
912 * Reserve virtual memory allocated up to this time.
913 */
1c79356b 914 if (start != VM_MIN_KERNEL_ADDRESS) {
91447636 915 vm_map_offset_t map_addr;
0c530ab8 916
91447636 917 map_addr = VM_MIN_KERNEL_ADDRESS;
1c79356b 918 (void) vm_map_enter(kernel_map,
0c530ab8
A
919 &map_addr,
920 (vm_map_size_t)(map_start - VM_MIN_KERNEL_ADDRESS),
921 (vm_map_offset_t) 0,
922 VM_FLAGS_ANYWHERE | VM_FLAGS_NO_PMAP_CHECK,
923 VM_OBJECT_NULL,
924 (vm_object_offset_t) 0, FALSE,
925 VM_PROT_NONE, VM_PROT_NONE,
926 VM_INHERIT_DEFAULT);
1c79356b
A
927 }
928
2d21ac55 929
1c79356b
A
930 /*
931 * Account for kernel memory (text, data, bss, vm shenanigans).
932 * This may include inaccessible "holes" as determined by what
55e303ae 933 * the machine-dependent init code includes in max_mem.
1c79356b 934 */
b0d623f7
A
935 assert(atop_64(max_mem) == (unsigned int) atop_64(max_mem));
936 vm_page_wire_count = ((unsigned int) atop_64(max_mem) -
937 (vm_page_free_count +
938 vm_page_active_count +
939 vm_page_inactive_count));
2d21ac55
A
940
941 /*
942 * Set the default global user wire limit which limits the amount of
b0d623f7
A
943 * memory that can be locked via mlock(). We set this to the total
944 * amount of memory that are potentially usable by a user app (max_mem)
945 * minus a certain amount. This can be overridden via a sysctl.
2d21ac55 946 */
b0d623f7
A
947 vm_global_no_user_wire_amount = MIN(max_mem*20/100,
948 VM_NOT_USER_WIREABLE);
949 vm_global_user_wire_limit = max_mem - vm_global_no_user_wire_amount;
2d21ac55 950
b0d623f7
A
951 /* the default per user limit is the same as the global limit */
952 vm_user_wire_limit = vm_global_user_wire_limit;
1c79356b
A
953}
954
1c79356b 955
1c79356b
A
956/*
957 * Routine: copyinmap
958 * Purpose:
959 * Like copyin, except that fromaddr is an address
960 * in the specified VM map. This implementation
961 * is incomplete; it handles the current user map
962 * and the kernel map/submaps.
963 */
91447636 964kern_return_t
1c79356b 965copyinmap(
91447636
A
966 vm_map_t map,
967 vm_map_offset_t fromaddr,
968 void *todata,
969 vm_size_t length)
1c79356b 970{
91447636
A
971 kern_return_t kr = KERN_SUCCESS;
972 vm_map_t oldmap;
973
974 if (vm_map_pmap(map) == pmap_kernel())
975 {
1c79356b 976 /* assume a correct copy */
91447636
A
977 memcpy(todata, CAST_DOWN(void *, fromaddr), length);
978 }
979 else if (current_map() == map)
980 {
981 if (copyin(fromaddr, todata, length) != 0)
982 kr = KERN_INVALID_ADDRESS;
1c79356b 983 }
91447636
A
984 else
985 {
986 vm_map_reference(map);
987 oldmap = vm_map_switch(map);
988 if (copyin(fromaddr, todata, length) != 0)
989 kr = KERN_INVALID_ADDRESS;
990 vm_map_switch(oldmap);
991 vm_map_deallocate(map);
992 }
993 return kr;
1c79356b
A
994}
995
996/*
997 * Routine: copyoutmap
998 * Purpose:
999 * Like copyout, except that toaddr is an address
1000 * in the specified VM map. This implementation
1001 * is incomplete; it handles the current user map
1002 * and the kernel map/submaps.
1003 */
91447636 1004kern_return_t
1c79356b 1005copyoutmap(
91447636
A
1006 vm_map_t map,
1007 void *fromdata,
1008 vm_map_address_t toaddr,
1009 vm_size_t length)
1c79356b
A
1010{
1011 if (vm_map_pmap(map) == pmap_kernel()) {
1012 /* assume a correct copy */
91447636
A
1013 memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
1014 return KERN_SUCCESS;
1c79356b
A
1015 }
1016
91447636
A
1017 if (current_map() != map)
1018 return KERN_NOT_SUPPORTED;
1019
1020 if (copyout(fromdata, toaddr, length) != 0)
1021 return KERN_INVALID_ADDRESS;
1c79356b 1022
91447636 1023 return KERN_SUCCESS;
1c79356b 1024}
9bccf70c
A
1025
1026
1027kern_return_t
1028vm_conflict_check(
1029 vm_map_t map,
91447636
A
1030 vm_map_offset_t off,
1031 vm_map_size_t len,
1032 memory_object_t pager,
9bccf70c
A
1033 vm_object_offset_t file_off)
1034{
1035 vm_map_entry_t entry;
1036 vm_object_t obj;
1037 vm_object_offset_t obj_off;
1038 vm_map_t base_map;
91447636
A
1039 vm_map_offset_t base_offset;
1040 vm_map_offset_t original_offset;
9bccf70c 1041 kern_return_t kr;
91447636 1042 vm_map_size_t local_len;
9bccf70c
A
1043
1044 base_map = map;
1045 base_offset = off;
1046 original_offset = off;
1047 kr = KERN_SUCCESS;
1048 vm_map_lock(map);
1049 while(vm_map_lookup_entry(map, off, &entry)) {
1050 local_len = len;
1051
1052 if (entry->object.vm_object == VM_OBJECT_NULL) {
1053 vm_map_unlock(map);
1054 return KERN_SUCCESS;
1055 }
1056 if (entry->is_sub_map) {
1057 vm_map_t old_map;
55e303ae 1058
9bccf70c
A
1059 old_map = map;
1060 vm_map_lock(entry->object.sub_map);
1061 map = entry->object.sub_map;
1062 off = entry->offset + (off - entry->vme_start);
1063 vm_map_unlock(old_map);
1064 continue;
1065 }
1066 obj = entry->object.vm_object;
1067 obj_off = (off - entry->vme_start) + entry->offset;
1068 while(obj->shadow) {
1069 obj_off += obj->shadow_offset;
1070 obj = obj->shadow;
1071 }
1072 if((obj->pager_created) && (obj->pager == pager)) {
1073 if(((obj->paging_offset) + obj_off) == file_off) {
1074 if(off != base_offset) {
1075 vm_map_unlock(map);
1076 return KERN_FAILURE;
1077 }
1078 kr = KERN_ALREADY_WAITING;
55e303ae
A
1079 } else {
1080 vm_object_offset_t obj_off_aligned;
1081 vm_object_offset_t file_off_aligned;
1082
1083 obj_off_aligned = obj_off & ~PAGE_MASK;
1084 file_off_aligned = file_off & ~PAGE_MASK;
1085
1086 if (file_off_aligned == (obj->paging_offset + obj_off_aligned)) {
1087 /*
1088 * the target map and the file offset start in the same page
1089 * but are not identical...
1090 */
1091 vm_map_unlock(map);
1092 return KERN_FAILURE;
1093 }
1094 if ((file_off < (obj->paging_offset + obj_off_aligned)) &&
1095 ((file_off + len) > (obj->paging_offset + obj_off_aligned))) {
1096 /*
1097 * some portion of the tail of the I/O will fall
1098 * within the encompass of the target map
1099 */
1100 vm_map_unlock(map);
1101 return KERN_FAILURE;
1102 }
1103 if ((file_off_aligned > (obj->paging_offset + obj_off)) &&
1104 (file_off_aligned < (obj->paging_offset + obj_off) + len)) {
1105 /*
1106 * the beginning page of the file offset falls within
1107 * the target map's encompass
1108 */
1109 vm_map_unlock(map);
1110 return KERN_FAILURE;
1111 }
9bccf70c
A
1112 }
1113 } else if(kr != KERN_SUCCESS) {
55e303ae 1114 vm_map_unlock(map);
9bccf70c
A
1115 return KERN_FAILURE;
1116 }
1117
55e303ae 1118 if(len <= ((entry->vme_end - entry->vme_start) -
9bccf70c
A
1119 (off - entry->vme_start))) {
1120 vm_map_unlock(map);
1121 return kr;
1122 } else {
1123 len -= (entry->vme_end - entry->vme_start) -
1124 (off - entry->vme_start);
1125 }
1126 base_offset = base_offset + (local_len - len);
1127 file_off = file_off + (local_len - len);
1128 off = base_offset;
1129 if(map != base_map) {
1130 vm_map_unlock(map);
1131 vm_map_lock(base_map);
1132 map = base_map;
1133 }
1134 }
1135
1136 vm_map_unlock(map);
1137 return kr;
9bccf70c 1138}