]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_map.c
xnu-6153.121.1.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
CommitLineData
1c79356b 1/*
cb323159 2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5ba3f43e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
5ba3f43e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
5ba3f43e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
5ba3f43e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
5ba3f43e 31/*
1c79356b
A
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
5ba3f43e 35 *
1c79356b
A
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
5ba3f43e 41 *
1c79356b
A
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
5ba3f43e 45 *
1c79356b 46 * Carnegie Mellon requests users of this software to return to
5ba3f43e 47 *
1c79356b
A
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
5ba3f43e 52 *
1c79356b
A
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
1c79356b
A
66#include <task_swapper.h>
67#include <mach_assert.h>
fe8ab488
A
68
69#include <vm/vm_options.h>
70
91447636 71#include <libkern/OSAtomic.h>
1c79356b
A
72
73#include <mach/kern_return.h>
74#include <mach/port.h>
75#include <mach/vm_attributes.h>
76#include <mach/vm_param.h>
77#include <mach/vm_behavior.h>
55e303ae 78#include <mach/vm_statistics.h>
91447636 79#include <mach/memory_object.h>
0c530ab8 80#include <mach/mach_vm.h>
91447636 81#include <machine/cpu_capabilities.h>
2d21ac55 82#include <mach/sdt.h>
91447636 83
1c79356b 84#include <kern/assert.h>
39037602 85#include <kern/backtrace.h>
1c79356b 86#include <kern/counters.h>
d9a64523 87#include <kern/exc_guard.h>
91447636 88#include <kern/kalloc.h>
1c79356b 89#include <kern/zalloc.h>
91447636
A
90
91#include <vm/cpm.h>
d9a64523 92#include <vm/vm_compressor.h>
39236c6e 93#include <vm/vm_compressor_pager.h>
1c79356b
A
94#include <vm/vm_init.h>
95#include <vm/vm_fault.h>
96#include <vm/vm_map.h>
97#include <vm/vm_object.h>
98#include <vm/vm_page.h>
b0d623f7 99#include <vm/vm_pageout.h>
d9a64523 100#include <vm/pmap.h>
1c79356b
A
101#include <vm/vm_kern.h>
102#include <ipc/ipc_port.h>
103#include <kern/sched_prim.h>
104#include <kern/misc_protos.h>
1c79356b 105
91447636
A
106#include <mach/vm_map_server.h>
107#include <mach/mach_host_server.h>
2d21ac55 108#include <vm/vm_protos.h>
b0d623f7 109#include <vm/vm_purgeable_internal.h>
91447636 110
91447636 111#include <vm/vm_protos.h>
2d21ac55 112#include <vm/vm_shared_region.h>
6d2010ae 113#include <vm/vm_map_store.h>
91447636 114
5ba3f43e
A
115#include <san/kasan.h>
116
d9a64523
A
117#include <sys/codesign.h>
118#include <libkern/section_keywords.h>
119#if DEVELOPMENT || DEBUG
120extern int proc_selfcsflags(void);
121#if CONFIG_EMBEDDED
122extern int panic_on_unsigned_execute;
123#endif /* CONFIG_EMBEDDED */
124#endif /* DEVELOPMENT || DEBUG */
125
5ba3f43e 126#if __arm64__
d9a64523
A
127extern const int fourk_binary_compatibility_unsafe;
128extern const int fourk_binary_compatibility_allow_wx;
5ba3f43e 129#endif /* __arm64__ */
39037602
A
130extern int proc_selfpid(void);
131extern char *proc_name_address(void *p);
132
133#if VM_MAP_DEBUG_APPLE_PROTECT
134int vm_map_debug_apple_protect = 0;
135#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
136#if VM_MAP_DEBUG_FOURK
137int vm_map_debug_fourk = 0;
138#endif /* VM_MAP_DEBUG_FOURK */
3e170ce0 139
d9a64523
A
140SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
141int vm_map_executable_immutable_verbose = 0;
5ba3f43e 142
cb323159
A
143os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
144
0a7de745 145extern u_int32_t random(void); /* from <libkern/libkern.h> */
1c79356b
A
146/* Internal prototypes
147 */
2d21ac55 148
91447636 149static void vm_map_simplify_range(
0a7de745
A
150 vm_map_t map,
151 vm_map_offset_t start,
152 vm_map_offset_t end); /* forward */
153
154static boolean_t vm_map_range_check(
155 vm_map_t map,
156 vm_map_offset_t start,
157 vm_map_offset_t end,
158 vm_map_entry_t *entry);
159
160static vm_map_entry_t _vm_map_entry_create(
161 struct vm_map_header *map_header, boolean_t map_locked);
162
163static void _vm_map_entry_dispose(
164 struct vm_map_header *map_header,
165 vm_map_entry_t entry);
166
167static void vm_map_pmap_enter(
168 vm_map_t map,
169 vm_map_offset_t addr,
170 vm_map_offset_t end_addr,
171 vm_object_t object,
172 vm_object_offset_t offset,
173 vm_prot_t protection);
174
175static void _vm_map_clip_end(
176 struct vm_map_header *map_header,
177 vm_map_entry_t entry,
178 vm_map_offset_t end);
179
180static void _vm_map_clip_start(
181 struct vm_map_header *map_header,
182 vm_map_entry_t entry,
183 vm_map_offset_t start);
184
185static void vm_map_entry_delete(
186 vm_map_t map,
187 vm_map_entry_t entry);
188
189static kern_return_t vm_map_delete(
190 vm_map_t map,
191 vm_map_offset_t start,
192 vm_map_offset_t end,
193 int flags,
194 vm_map_t zap_map);
195
196static void vm_map_copy_insert(
197 vm_map_t map,
198 vm_map_entry_t after_where,
199 vm_map_copy_t copy);
200
201static kern_return_t vm_map_copy_overwrite_unaligned(
202 vm_map_t dst_map,
203 vm_map_entry_t entry,
204 vm_map_copy_t copy,
39236c6e 205 vm_map_address_t start,
0a7de745 206 boolean_t discard_on_success);
1c79356b 207
0a7de745
A
208static kern_return_t vm_map_copy_overwrite_aligned(
209 vm_map_t dst_map,
210 vm_map_entry_t tmp_entry,
211 vm_map_copy_t copy,
2d21ac55 212 vm_map_offset_t start,
0a7de745 213 pmap_t pmap);
1c79356b 214
0a7de745
A
215static kern_return_t vm_map_copyin_kernel_buffer(
216 vm_map_t src_map,
2d21ac55 217 vm_map_address_t src_addr,
0a7de745
A
218 vm_map_size_t len,
219 boolean_t src_destroy,
220 vm_map_copy_t *copy_result); /* OUT */
221
222static kern_return_t vm_map_copyout_kernel_buffer(
223 vm_map_t map,
224 vm_map_address_t *addr, /* IN/OUT */
225 vm_map_copy_t copy,
39037602 226 vm_map_size_t copy_size,
0a7de745
A
227 boolean_t overwrite,
228 boolean_t consume_on_success);
229
230static void vm_map_fork_share(
231 vm_map_t old_map,
232 vm_map_entry_t old_entry,
233 vm_map_t new_map);
234
235static boolean_t vm_map_fork_copy(
236 vm_map_t old_map,
237 vm_map_entry_t *old_entry_p,
238 vm_map_t new_map,
239 int vm_map_copyin_flags);
240
241static kern_return_t vm_map_wire_nested(
242 vm_map_t map,
243 vm_map_offset_t start,
244 vm_map_offset_t end,
245 vm_prot_t caller_prot,
246 vm_tag_t tag,
247 boolean_t user_wire,
248 pmap_t map_pmap,
249 vm_map_offset_t pmap_addr,
250 ppnum_t *physpage_p);
251
252static kern_return_t vm_map_unwire_nested(
253 vm_map_t map,
254 vm_map_offset_t start,
255 vm_map_offset_t end,
256 boolean_t user_wire,
257 pmap_t map_pmap,
258 vm_map_offset_t pmap_addr);
259
260static kern_return_t vm_map_overwrite_submap_recurse(
261 vm_map_t dst_map,
262 vm_map_offset_t dst_addr,
263 vm_map_size_t dst_size);
264
265static kern_return_t vm_map_copy_overwrite_nested(
266 vm_map_t dst_map,
267 vm_map_offset_t dst_addr,
268 vm_map_copy_t copy,
269 boolean_t interruptible,
270 pmap_t pmap,
271 boolean_t discard_on_success);
272
273static kern_return_t vm_map_remap_extract(
274 vm_map_t map,
275 vm_map_offset_t addr,
276 vm_map_size_t size,
277 boolean_t copy,
278 struct vm_map_header *map_header,
279 vm_prot_t *cur_protection,
280 vm_prot_t *max_protection,
281 vm_inherit_t inheritance,
282 boolean_t pageable,
283 boolean_t same_map,
284 vm_map_kernel_flags_t vmk_flags);
285
286static kern_return_t vm_map_remap_range_allocate(
287 vm_map_t map,
288 vm_map_address_t *address,
289 vm_map_size_t size,
290 vm_map_offset_t mask,
291 int flags,
292 vm_map_kernel_flags_t vmk_flags,
293 vm_tag_t tag,
294 vm_map_entry_t *map_entry);
295
296static void vm_map_region_look_for_page(
297 vm_map_t map,
2d21ac55 298 vm_map_offset_t va,
0a7de745
A
299 vm_object_t object,
300 vm_object_offset_t offset,
2d21ac55
A
301 int max_refcnt,
302 int depth,
39236c6e
A
303 vm_region_extended_info_t extended,
304 mach_msg_type_number_t count);
91447636 305
0a7de745
A
306static int vm_map_region_count_obj_refs(
307 vm_map_entry_t entry,
308 vm_object_t object);
1c79356b 309
b0d623f7 310
0a7de745
A
311static kern_return_t vm_map_willneed(
312 vm_map_t map,
313 vm_map_offset_t start,
314 vm_map_offset_t end);
b0d623f7 315
0a7de745
A
316static kern_return_t vm_map_reuse_pages(
317 vm_map_t map,
318 vm_map_offset_t start,
319 vm_map_offset_t end);
b0d623f7 320
0a7de745
A
321static kern_return_t vm_map_reusable_pages(
322 vm_map_t map,
323 vm_map_offset_t start,
324 vm_map_offset_t end);
b0d623f7 325
0a7de745
A
326static kern_return_t vm_map_can_reuse(
327 vm_map_t map,
328 vm_map_offset_t start,
329 vm_map_offset_t end);
b0d623f7 330
3e170ce0 331#if MACH_ASSERT
0a7de745
A
332static kern_return_t vm_map_pageout(
333 vm_map_t map,
334 vm_map_offset_t start,
335 vm_map_offset_t end);
3e170ce0 336#endif /* MACH_ASSERT */
6d2010ae 337
0a7de745
A
338static void vm_map_corpse_footprint_destroy(
339 vm_map_t map);
d9a64523 340
5ba3f43e
A
341pid_t find_largest_process_vm_map_entries(void);
342
1c79356b
A
343/*
344 * Macros to copy a vm_map_entry. We must be careful to correctly
345 * manage the wired page count. vm_map_entry_copy() creates a new
346 * map entry to the same memory - the wired count in the new entry
347 * must be set to zero. vm_map_entry_copy_full() creates a new
348 * entry that is identical to the old entry. This preserves the
349 * wire count; it's used for map splitting and zone changing in
350 * vm_map_copyout.
351 */
316670eb 352
d9a64523
A
353#if CONFIG_EMBEDDED
354
355/*
356 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
357 * But for security reasons on embedded platforms, we don't want the
358 * new mapping to be "used for jit", so we always reset the flag here.
359 * Same for "pmap_cs_associated".
360 */
0a7de745
A
361#define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
362MACRO_BEGIN \
363 (NEW)->used_for_jit = FALSE; \
364 (NEW)->pmap_cs_associated = FALSE; \
d9a64523
A
365MACRO_END
366
367#else /* CONFIG_EMBEDDED */
368
369/*
370 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
371 * On macOS, the new mapping can be "used for jit".
372 */
0a7de745
A
373#define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
374MACRO_BEGIN \
375 assert((NEW)->used_for_jit == (OLD)->used_for_jit); \
376 assert((NEW)->pmap_cs_associated == FALSE); \
d9a64523
A
377MACRO_END
378
379#endif /* CONFIG_EMBEDDED */
380
0a7de745
A
381#define vm_map_entry_copy(NEW, OLD) \
382MACRO_BEGIN \
383boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
2d21ac55 384 *(NEW) = *(OLD); \
0a7de745 385 (NEW)->is_shared = FALSE; \
2d21ac55
A
386 (NEW)->needs_wakeup = FALSE; \
387 (NEW)->in_transition = FALSE; \
388 (NEW)->wired_count = 0; \
389 (NEW)->user_wired_count = 0; \
0a7de745
A
390 (NEW)->permanent = FALSE; \
391 VM_MAP_ENTRY_COPY_CODE_SIGNING((NEW),(OLD)); \
392 (NEW)->from_reserved_zone = _vmec_reserved; \
393 if ((NEW)->iokit_acct) { \
5c9f4661 394 assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
0a7de745
A
395 (NEW)->iokit_acct = FALSE; \
396 (NEW)->use_pmap = TRUE; \
397 } \
3e170ce0 398 (NEW)->vme_resilient_codesign = FALSE; \
0a7de745
A
399 (NEW)->vme_resilient_media = FALSE; \
400 (NEW)->vme_atomic = FALSE; \
cb323159 401 (NEW)->vme_no_copy_on_read = FALSE; \
1c79356b
A
402MACRO_END
403
0a7de745
A
404#define vm_map_entry_copy_full(NEW, OLD) \
405MACRO_BEGIN \
406boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
407(*(NEW) = *(OLD)); \
408(NEW)->from_reserved_zone = _vmecf_reserved; \
7ddcb079 409MACRO_END
1c79356b 410
cb323159
A
411/*
412 * Normal lock_read_to_write() returns FALSE/0 on failure.
413 * These functions evaluate to zero on success and non-zero value on failure.
414 */
415__attribute__((always_inline))
416int
417vm_map_lock_read_to_write(vm_map_t map)
418{
419 if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
420 DTRACE_VM(vm_map_lock_upgrade);
421 return 0;
422 }
423 return 1;
424}
425
426__attribute__((always_inline))
427boolean_t
428vm_map_try_lock(vm_map_t map)
429{
430 if (lck_rw_try_lock_exclusive(&(map)->lock)) {
431 DTRACE_VM(vm_map_lock_w);
432 return TRUE;
433 }
434 return FALSE;
435}
436
437__attribute__((always_inline))
438boolean_t
439vm_map_try_lock_read(vm_map_t map)
440{
441 if (lck_rw_try_lock_shared(&(map)->lock)) {
442 DTRACE_VM(vm_map_lock_r);
443 return TRUE;
444 }
445 return FALSE;
446}
447
2d21ac55
A
448/*
449 * Decide if we want to allow processes to execute from their data or stack areas.
5ba3f43e 450 * override_nx() returns true if we do. Data/stack execution can be enabled independently
2d21ac55
A
451 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
452 * or allow_stack_exec to enable data execution for that type of data area for that particular
453 * ABI (or both by or'ing the flags together). These are initialized in the architecture
5ba3f43e
A
454 * specific pmap files since the default behavior varies according to architecture. The
455 * main reason it varies is because of the need to provide binary compatibility with old
456 * applications that were written before these restrictions came into being. In the old
457 * days, an app could execute anything it could read, but this has slowly been tightened
2d21ac55
A
458 * up over time. The default behavior is:
459 *
460 * 32-bit PPC apps may execute from both stack and data areas
461 * 32-bit Intel apps may exeucte from data areas but not stack
462 * 64-bit PPC/Intel apps may not execute from either data or stack
463 *
464 * An application on any architecture may override these defaults by explicitly
5ba3f43e 465 * adding PROT_EXEC permission to the page in question with the mprotect(2)
2d21ac55 466 * system call. This code here just determines what happens when an app tries to
0a7de745 467 * execute from a page that lacks execute permission.
2d21ac55
A
468 *
469 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
6d2010ae
A
470 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
471 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
472 * execution from data areas for a particular binary even if the arch normally permits it. As
473 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
474 * to support some complicated use cases, notably browsers with out-of-process plugins that
475 * are not all NX-safe.
2d21ac55
A
476 */
477
478extern int allow_data_exec, allow_stack_exec;
479
480int
481override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
482{
483 int current_abi;
484
0a7de745
A
485 if (map->pmap == kernel_pmap) {
486 return FALSE;
487 }
3e170ce0 488
2d21ac55
A
489 /*
490 * Determine if the app is running in 32 or 64 bit mode.
491 */
492
0a7de745 493 if (vm_map_is_64bit(map)) {
2d21ac55 494 current_abi = VM_ABI_64;
0a7de745 495 } else {
2d21ac55 496 current_abi = VM_ABI_32;
0a7de745 497 }
2d21ac55
A
498
499 /*
5ba3f43e 500 * Determine if we should allow the execution based on whether it's a
2d21ac55
A
501 * stack or data area and the current architecture.
502 */
503
0a7de745 504 if (user_tag == VM_MEMORY_STACK) {
2d21ac55 505 return allow_stack_exec & current_abi;
0a7de745 506 }
2d21ac55 507
6d2010ae 508 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
2d21ac55
A
509}
510
511
1c79356b
A
512/*
513 * Virtual memory maps provide for the mapping, protection,
514 * and sharing of virtual memory objects. In addition,
515 * this module provides for an efficient virtual copy of
516 * memory from one map to another.
517 *
518 * Synchronization is required prior to most operations.
519 *
520 * Maps consist of an ordered doubly-linked list of simple
521 * entries; a single hint is used to speed up lookups.
522 *
523 * Sharing maps have been deleted from this version of Mach.
524 * All shared objects are now mapped directly into the respective
525 * maps. This requires a change in the copy on write strategy;
526 * the asymmetric (delayed) strategy is used for shared temporary
527 * objects instead of the symmetric (shadow) strategy. All maps
528 * are now "top level" maps (either task map, kernel map or submap
5ba3f43e 529 * of the kernel map).
1c79356b
A
530 *
531 * Since portions of maps are specified by start/end addreses,
532 * which may not align with existing map entries, all
533 * routines merely "clip" entries to these start/end values.
534 * [That is, an entry is split into two, bordering at a
535 * start or end value.] Note that these clippings may not
536 * always be necessary (as the two resulting entries are then
537 * not changed); however, the clipping is done for convenience.
538 * No attempt is currently made to "glue back together" two
539 * abutting entries.
540 *
541 * The symmetric (shadow) copy strategy implements virtual copy
542 * by copying VM object references from one map to
543 * another, and then marking both regions as copy-on-write.
544 * It is important to note that only one writeable reference
545 * to a VM object region exists in any map when this strategy
546 * is used -- this means that shadow object creation can be
547 * delayed until a write operation occurs. The symmetric (delayed)
548 * strategy allows multiple maps to have writeable references to
549 * the same region of a vm object, and hence cannot delay creating
550 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
551 * Copying of permanent objects is completely different; see
552 * vm_object_copy_strategically() in vm_object.c.
553 */
554
0a7de745
A
555static zone_t vm_map_zone; /* zone for vm_map structures */
556zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
557static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking allocations */
558static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
559zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
1c79356b
A
560
561
562/*
563 * Placeholder object for submap operations. This object is dropped
564 * into the range by a call to vm_map_find, and removed when
565 * vm_map_submap creates the submap.
566 */
567
0a7de745 568vm_object_t vm_submap_object;
1c79356b 569
0a7de745
A
570static void *map_data;
571static vm_size_t map_data_size;
572static void *kentry_data;
573static vm_size_t kentry_data_size;
574static void *map_holes_data;
575static vm_size_t map_holes_data_size;
1c79356b 576
5ba3f43e 577#if CONFIG_EMBEDDED
0a7de745 578#define NO_COALESCE_LIMIT 0
5ba3f43e 579#else
b0d623f7 580#define NO_COALESCE_LIMIT ((1024 * 128) - 1)
5ba3f43e 581#endif
1c79356b 582
55e303ae 583/* Skip acquiring locks if we're in the midst of a kernel core dump */
b0d623f7 584unsigned int not_in_kdp = 1;
55e303ae 585
6d2010ae
A
586unsigned int vm_map_set_cache_attr_count = 0;
587
588kern_return_t
589vm_map_set_cache_attr(
0a7de745
A
590 vm_map_t map,
591 vm_map_offset_t va)
6d2010ae 592{
0a7de745
A
593 vm_map_entry_t map_entry;
594 vm_object_t object;
595 kern_return_t kr = KERN_SUCCESS;
6d2010ae
A
596
597 vm_map_lock_read(map);
598
599 if (!vm_map_lookup_entry(map, va, &map_entry) ||
600 map_entry->is_sub_map) {
601 /*
602 * that memory is not properly mapped
603 */
604 kr = KERN_INVALID_ARGUMENT;
605 goto done;
606 }
3e170ce0 607 object = VME_OBJECT(map_entry);
6d2010ae
A
608
609 if (object == VM_OBJECT_NULL) {
610 /*
611 * there should be a VM object here at this point
612 */
613 kr = KERN_INVALID_ARGUMENT;
614 goto done;
615 }
616 vm_object_lock(object);
617 object->set_cache_attr = TRUE;
618 vm_object_unlock(object);
619
620 vm_map_set_cache_attr_count++;
621done:
622 vm_map_unlock_read(map);
623
624 return kr;
625}
626
627
593a1d5f
A
628#if CONFIG_CODE_DECRYPTION
629/*
630 * vm_map_apple_protected:
5ba3f43e 631 * This remaps the requested part of the object with an object backed by
593a1d5f
A
632 * the decrypting pager.
633 * crypt_info contains entry points and session data for the crypt module.
634 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
635 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
636 */
0c530ab8
A
637kern_return_t
638vm_map_apple_protected(
0a7de745
A
639 vm_map_t map,
640 vm_map_offset_t start,
641 vm_map_offset_t end,
642 vm_object_offset_t crypto_backing_offset,
593a1d5f 643 struct pager_crypt_info *crypt_info)
0c530ab8 644{
0a7de745
A
645 boolean_t map_locked;
646 kern_return_t kr;
647 vm_map_entry_t map_entry;
3e170ce0 648 struct vm_map_entry tmp_entry;
0a7de745
A
649 memory_object_t unprotected_mem_obj;
650 vm_object_t protected_object;
651 vm_map_offset_t map_addr;
652 vm_map_offset_t start_aligned, end_aligned;
653 vm_object_offset_t crypto_start, crypto_end;
654 int vm_flags;
5ba3f43e
A
655 vm_map_kernel_flags_t vmk_flags;
656
657 vm_flags = 0;
658 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
0c530ab8 659
3e170ce0
A
660 map_locked = FALSE;
661 unprotected_mem_obj = MEMORY_OBJECT_NULL;
0c530ab8 662
3e170ce0
A
663 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
664 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
665 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
666 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
b0d623f7 667
5ba3f43e
A
668#if __arm64__
669 /*
670 * "start" and "end" might be 4K-aligned but not 16K-aligned,
671 * so we might have to loop and establish up to 3 mappings:
672 *
673 * + the first 16K-page, which might overlap with the previous
674 * 4K-aligned mapping,
675 * + the center,
676 * + the last 16K-page, which might overlap with the next
677 * 4K-aligned mapping.
678 * Each of these mapping might be backed by a vnode pager (if
679 * properly page-aligned) or a "fourk_pager", itself backed by a
680 * vnode pager (if 4K-aligned but not page-aligned).
681 */
5ba3f43e 682#endif /* __arm64__ */
b0d623f7 683
3e170ce0
A
684 map_addr = start_aligned;
685 for (map_addr = start_aligned;
0a7de745
A
686 map_addr < end;
687 map_addr = tmp_entry.vme_end) {
3e170ce0
A
688 vm_map_lock(map);
689 map_locked = TRUE;
b0d623f7 690
3e170ce0
A
691 /* lookup the protected VM object */
692 if (!vm_map_lookup_entry(map,
0a7de745
A
693 map_addr,
694 &map_entry) ||
3e170ce0
A
695 map_entry->is_sub_map ||
696 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
697 !(map_entry->protection & VM_PROT_EXECUTE)) {
698 /* that memory is not properly mapped */
699 kr = KERN_INVALID_ARGUMENT;
700 goto done;
701 }
b0d623f7 702
3e170ce0
A
703 /* get the protected object to be decrypted */
704 protected_object = VME_OBJECT(map_entry);
705 if (protected_object == VM_OBJECT_NULL) {
706 /* there should be a VM object here at this point */
707 kr = KERN_INVALID_ARGUMENT;
708 goto done;
709 }
710 /* ensure protected object stays alive while map is unlocked */
711 vm_object_reference(protected_object);
712
713 /* limit the map entry to the area we want to cover */
714 vm_map_clip_start(map, map_entry, start_aligned);
715 vm_map_clip_end(map, map_entry, end_aligned);
716
717 tmp_entry = *map_entry;
718 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
719 vm_map_unlock(map);
720 map_locked = FALSE;
721
722 /*
723 * This map entry might be only partially encrypted
724 * (if not fully "page-aligned").
725 */
726 crypto_start = 0;
727 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
728 if (tmp_entry.vme_start < start) {
729 if (tmp_entry.vme_start != start_aligned) {
730 kr = KERN_INVALID_ADDRESS;
731 }
732 crypto_start += (start - tmp_entry.vme_start);
733 }
734 if (tmp_entry.vme_end > end) {
735 if (tmp_entry.vme_end != end_aligned) {
736 kr = KERN_INVALID_ADDRESS;
737 }
738 crypto_end -= (tmp_entry.vme_end - end);
739 }
740
741 /*
742 * This "extra backing offset" is needed to get the decryption
743 * routine to use the right key. It adjusts for the possibly
744 * relative offset of an interposed "4K" pager...
745 */
746 if (crypto_backing_offset == (vm_object_offset_t) -1) {
747 crypto_backing_offset = VME_OFFSET(&tmp_entry);
748 }
0c530ab8 749
3e170ce0
A
750 /*
751 * Lookup (and create if necessary) the protected memory object
752 * matching that VM object.
753 * If successful, this also grabs a reference on the memory object,
754 * to guarantee that it doesn't go away before we get a chance to map
755 * it.
756 */
757 unprotected_mem_obj = apple_protect_pager_setup(
758 protected_object,
759 VME_OFFSET(&tmp_entry),
760 crypto_backing_offset,
761 crypt_info,
762 crypto_start,
763 crypto_end);
764
765 /* release extra ref on protected object */
766 vm_object_deallocate(protected_object);
767
768 if (unprotected_mem_obj == NULL) {
769 kr = KERN_FAILURE;
770 goto done;
771 }
772
773 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
5ba3f43e
A
774 /* can overwrite an immutable mapping */
775 vmk_flags.vmkf_overwrite_immutable = TRUE;
776#if __arm64__
777 if (tmp_entry.used_for_jit &&
778 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
0a7de745 779 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
5ba3f43e
A
780 fourk_binary_compatibility_unsafe &&
781 fourk_binary_compatibility_allow_wx) {
782 printf("** FOURK_COMPAT [%d]: "
0a7de745
A
783 "allowing write+execute at 0x%llx\n",
784 proc_selfpid(), tmp_entry.vme_start);
5ba3f43e
A
785 vmk_flags.vmkf_map_jit = TRUE;
786 }
787#endif /* __arm64__ */
3e170ce0
A
788
789 /* map this memory object in place of the current one */
790 map_addr = tmp_entry.vme_start;
791 kr = vm_map_enter_mem_object(map,
0a7de745
A
792 &map_addr,
793 (tmp_entry.vme_end -
794 tmp_entry.vme_start),
795 (mach_vm_offset_t) 0,
796 vm_flags,
797 vmk_flags,
798 VM_KERN_MEMORY_NONE,
799 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
800 0,
801 TRUE,
802 tmp_entry.protection,
803 tmp_entry.max_protection,
804 tmp_entry.inheritance);
5ba3f43e 805 assertf(kr == KERN_SUCCESS,
0a7de745 806 "kr = 0x%x\n", kr);
5ba3f43e 807 assertf(map_addr == tmp_entry.vme_start,
0a7de745
A
808 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
809 (uint64_t)map_addr,
810 (uint64_t) tmp_entry.vme_start,
811 &tmp_entry);
3e170ce0
A
812
813#if VM_MAP_DEBUG_APPLE_PROTECT
39037602
A
814 if (vm_map_debug_apple_protect) {
815 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
0a7de745
A
816 " backing:[object:%p,offset:0x%llx,"
817 "crypto_backing_offset:0x%llx,"
818 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
819 map,
820 (uint64_t) map_addr,
821 (uint64_t) (map_addr + (tmp_entry.vme_end -
822 tmp_entry.vme_start)),
823 unprotected_mem_obj,
824 protected_object,
825 VME_OFFSET(&tmp_entry),
826 crypto_backing_offset,
827 crypto_start,
828 crypto_end);
39037602 829 }
3e170ce0 830#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
5ba3f43e 831
3e170ce0
A
832 /*
833 * Release the reference obtained by
834 * apple_protect_pager_setup().
835 * The mapping (if it succeeded) is now holding a reference on
836 * the memory object.
837 */
838 memory_object_deallocate(unprotected_mem_obj);
839 unprotected_mem_obj = MEMORY_OBJECT_NULL;
840
841 /* continue with next map entry */
842 crypto_backing_offset += (tmp_entry.vme_end -
0a7de745 843 tmp_entry.vme_start);
3e170ce0
A
844 crypto_backing_offset -= crypto_start;
845 }
846 kr = KERN_SUCCESS;
0c530ab8
A
847
848done:
849 if (map_locked) {
3e170ce0 850 vm_map_unlock(map);
0c530ab8
A
851 }
852 return kr;
853}
0a7de745 854#endif /* CONFIG_CODE_DECRYPTION */
0c530ab8
A
855
856
0a7de745
A
857lck_grp_t vm_map_lck_grp;
858lck_grp_attr_t vm_map_lck_grp_attr;
859lck_attr_t vm_map_lck_attr;
860lck_attr_t vm_map_lck_rw_attr;
b0d623f7 861
d9a64523
A
862#if CONFIG_EMBEDDED
863int malloc_no_cow = 1;
864#define VM_PROTECT_WX_FAIL 0
865#else /* CONFIG_EMBEDDED */
866int malloc_no_cow = 0;
867#define VM_PROTECT_WX_FAIL 1
868#endif /* CONFIG_EMBEDDED */
869uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
4ba76501
A
870#if DEBUG
871int vm_check_map_sanity = 0;
872#endif
b0d623f7 873
593a1d5f
A
874/*
875 * vm_map_init:
876 *
877 * Initialize the vm_map module. Must be called before
878 * any other vm_map routines.
879 *
880 * Map and entry structures are allocated from zones -- we must
881 * initialize those zones.
882 *
883 * There are three zones of interest:
884 *
885 * vm_map_zone: used to allocate maps.
886 * vm_map_entry_zone: used to allocate map entries.
7ddcb079 887 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
593a1d5f
A
888 *
889 * The kernel allocates map entries from a special zone that is initially
890 * "crammed" with memory. It would be difficult (perhaps impossible) for
891 * the kernel to allocate more memory to a entry zone when it became
892 * empty since the very act of allocating memory implies the creation
893 * of a new entry.
894 */
1c79356b
A
895void
896vm_map_init(
897 void)
898{
7ddcb079 899 vm_size_t entry_zone_alloc_size;
316670eb
A
900 const char *mez_name = "VM map entries";
901
0a7de745
A
902 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40 * 1024,
903 PAGE_SIZE, "maps");
0b4c1975 904 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
0a7de745 905#if defined(__LP64__)
7ddcb079
A
906 entry_zone_alloc_size = PAGE_SIZE * 5;
907#else
908 entry_zone_alloc_size = PAGE_SIZE * 6;
909#endif
91447636 910 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
0a7de745
A
911 1024 * 1024, entry_zone_alloc_size,
912 mez_name);
0b4c1975 913 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
7ddcb079 914 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
316670eb 915 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
1c79356b 916
7ddcb079 917 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
0a7de745
A
918 kentry_data_size * 64, kentry_data_size,
919 "Reserved VM map entries");
7ddcb079 920 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
5ba3f43e
A
921 /* Don't quarantine because we always need elements available */
922 zone_change(vm_map_entry_reserved_zone, Z_KASAN_QUARANTINE, FALSE);
1c79356b 923
91447636 924 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
0a7de745 925 16 * 1024, PAGE_SIZE, "VM map copies");
0b4c1975 926 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
1c79356b 927
3e170ce0 928 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
0a7de745 929 16 * 1024, PAGE_SIZE, "VM map holes");
3e170ce0
A
930 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
931
1c79356b
A
932 /*
933 * Cram the map and kentry zones with initial data.
7ddcb079 934 * Set reserved_zone non-collectible to aid zone_gc().
1c79356b
A
935 */
936 zone_change(vm_map_zone, Z_COLLECT, FALSE);
39037602 937 zone_change(vm_map_zone, Z_FOREIGN, TRUE);
0a7de745 938 zone_change(vm_map_zone, Z_GZALLOC_EXEMPT, TRUE);
7ddcb079
A
939
940 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
941 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
942 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
943 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
944 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
6d2010ae 945 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
316670eb 946 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
6d2010ae 947
3e170ce0
A
948 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
949 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
950 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
951 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
952 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
953 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
954
5ba3f43e 955 /*
3e170ce0 956 * Add the stolen memory to zones, adjust zone size and stolen counts.
5ba3f43e 957 * zcram only up to the maximum number of pages for each zone chunk.
3e170ce0 958 */
7ddcb079 959 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
5ba3f43e
A
960
961 const vm_size_t stride = ZONE_CHUNK_MAXPAGES * PAGE_SIZE;
962 for (vm_offset_t off = 0; off < kentry_data_size; off += stride) {
963 zcram(vm_map_entry_reserved_zone,
0a7de745
A
964 (vm_offset_t)kentry_data + off,
965 MIN(kentry_data_size - off, stride));
5ba3f43e
A
966 }
967 for (vm_offset_t off = 0; off < map_holes_data_size; off += stride) {
968 zcram(vm_map_holes_zone,
0a7de745
A
969 (vm_offset_t)map_holes_data + off,
970 MIN(map_holes_data_size - off, stride));
5ba3f43e
A
971 }
972
0a7de745
A
973 /*
974 * Since these are covered by zones, remove them from stolen page accounting.
975 */
3e170ce0
A
976 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
977
b0d623f7
A
978 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
979 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
5ba3f43e 980 lck_attr_setdefault(&vm_map_lck_attr);
316670eb 981
fe8ab488
A
982 lck_attr_setdefault(&vm_map_lck_rw_attr);
983 lck_attr_cleardebug(&vm_map_lck_rw_attr);
984
39037602
A
985#if VM_MAP_DEBUG_APPLE_PROTECT
986 PE_parse_boot_argn("vm_map_debug_apple_protect",
0a7de745
A
987 &vm_map_debug_apple_protect,
988 sizeof(vm_map_debug_apple_protect));
39037602
A
989#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
990#if VM_MAP_DEBUG_APPLE_FOURK
991 PE_parse_boot_argn("vm_map_debug_fourk",
0a7de745
A
992 &vm_map_debug_fourk,
993 sizeof(vm_map_debug_fourk));
39037602 994#endif /* VM_MAP_DEBUG_FOURK */
5ba3f43e 995 PE_parse_boot_argn("vm_map_executable_immutable",
0a7de745
A
996 &vm_map_executable_immutable,
997 sizeof(vm_map_executable_immutable));
d9a64523 998 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
0a7de745
A
999 &vm_map_executable_immutable_verbose,
1000 sizeof(vm_map_executable_immutable_verbose));
d9a64523
A
1001
1002 PE_parse_boot_argn("malloc_no_cow",
0a7de745
A
1003 &malloc_no_cow,
1004 sizeof(malloc_no_cow));
d9a64523
A
1005 if (malloc_no_cow) {
1006 vm_memory_malloc_no_cow_mask = 0ULL;
1007 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1008 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
0a7de745 1009 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
d9a64523
A
1010 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1011// vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1012// vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1013 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1014 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1015 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1016 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1017// vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1018 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
0a7de745
A
1019 &vm_memory_malloc_no_cow_mask,
1020 sizeof(vm_memory_malloc_no_cow_mask));
d9a64523 1021 }
4ba76501
A
1022
1023#if DEBUG
1024 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1025 if (vm_check_map_sanity) {
1026 kprintf("VM sanity checking enabled\n");
1027 } else {
1028 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1029 }
1030#endif /* DEBUG */
1c79356b
A
1031}
1032
1033void
1034vm_map_steal_memory(
1035 void)
1036{
7ddcb079
A
1037 uint32_t kentry_initial_pages;
1038
b0d623f7 1039 map_data_size = round_page(10 * sizeof(struct _vm_map));
1c79356b
A
1040 map_data = pmap_steal_memory(map_data_size);
1041
1c79356b 1042 /*
7ddcb079
A
1043 * kentry_initial_pages corresponds to the number of kernel map entries
1044 * required during bootstrap until the asynchronous replenishment
1045 * scheme is activated and/or entries are available from the general
1046 * map entry pool.
1c79356b 1047 */
0a7de745 1048#if defined(__LP64__)
7ddcb079
A
1049 kentry_initial_pages = 10;
1050#else
1051 kentry_initial_pages = 6;
1c79356b 1052#endif
316670eb
A
1053
1054#if CONFIG_GZALLOC
1055 /* If using the guard allocator, reserve more memory for the kernel
1056 * reserved map entry pool.
0a7de745
A
1057 */
1058 if (gzalloc_enabled()) {
316670eb 1059 kentry_initial_pages *= 1024;
0a7de745 1060 }
316670eb
A
1061#endif
1062
7ddcb079 1063 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1c79356b 1064 kentry_data = pmap_steal_memory(kentry_data_size);
3e170ce0
A
1065
1066 map_holes_data_size = kentry_data_size;
1067 map_holes_data = pmap_steal_memory(map_holes_data_size);
1c79356b
A
1068}
1069
5ba3f43e
A
1070boolean_t vm_map_supports_hole_optimization = FALSE;
1071
3e170ce0 1072void
0a7de745
A
1073vm_kernel_reserved_entry_init(void)
1074{
1075 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_entry));
5ba3f43e
A
1076
1077 /*
1078 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1079 */
0a7de745 1080 zone_prio_refill_configure(vm_map_holes_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_links));
5ba3f43e 1081 vm_map_supports_hole_optimization = TRUE;
3e170ce0
A
1082}
1083
1084void
1085vm_map_disable_hole_optimization(vm_map_t map)
1086{
0a7de745 1087 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
3e170ce0
A
1088
1089 if (map->holelistenabled) {
d9a64523 1090 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
3e170ce0
A
1091
1092 while (hole_entry != NULL) {
3e170ce0
A
1093 next_hole_entry = hole_entry->vme_next;
1094
1095 hole_entry->vme_next = NULL;
1096 hole_entry->vme_prev = NULL;
1097 zfree(vm_map_holes_zone, hole_entry);
1098
1099 if (next_hole_entry == head_entry) {
1100 hole_entry = NULL;
1101 } else {
1102 hole_entry = next_hole_entry;
1103 }
1104 }
1105
1106 map->holes_list = NULL;
1107 map->holelistenabled = FALSE;
1108
1109 map->first_free = vm_map_first_entry(map);
1110 SAVE_HINT_HOLE_WRITE(map, NULL);
1111 }
1112}
1113
1114boolean_t
0a7de745
A
1115vm_kernel_map_is_kernel(vm_map_t map)
1116{
1117 return map->pmap == kernel_pmap;
7ddcb079
A
1118}
1119
1c79356b
A
1120/*
1121 * vm_map_create:
1122 *
1123 * Creates and returns a new empty VM map with
1124 * the given physical map structure, and having
1125 * the given lower and upper address bounds.
1126 */
3e170ce0 1127
1c79356b
A
1128vm_map_t
1129vm_map_create(
0a7de745
A
1130 pmap_t pmap,
1131 vm_map_offset_t min,
1132 vm_map_offset_t max,
1133 boolean_t pageable)
d9a64523
A
1134{
1135 int options;
1136
1137 options = 0;
1138 if (pageable) {
1139 options |= VM_MAP_CREATE_PAGEABLE;
1140 }
1141 return vm_map_create_options(pmap, min, max, options);
1142}
1143
1144vm_map_t
1145vm_map_create_options(
0a7de745
A
1146 pmap_t pmap,
1147 vm_map_offset_t min,
d9a64523 1148 vm_map_offset_t max,
0a7de745 1149 int options)
1c79356b 1150{
0a7de745
A
1151 vm_map_t result;
1152 struct vm_map_links *hole_entry = NULL;
1c79356b 1153
d9a64523
A
1154 if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1155 /* unknown option */
1156 return VM_MAP_NULL;
1157 }
1158
1c79356b 1159 result = (vm_map_t) zalloc(vm_map_zone);
0a7de745 1160 if (result == VM_MAP_NULL) {
1c79356b 1161 panic("vm_map_create");
0a7de745 1162 }
1c79356b
A
1163
1164 vm_map_first_entry(result) = vm_map_to_entry(result);
1165 vm_map_last_entry(result) = vm_map_to_entry(result);
1166 result->hdr.nentries = 0;
d9a64523
A
1167 if (options & VM_MAP_CREATE_PAGEABLE) {
1168 result->hdr.entries_pageable = TRUE;
1169 } else {
1170 result->hdr.entries_pageable = FALSE;
1171 }
1c79356b 1172
0a7de745 1173 vm_map_store_init( &(result->hdr));
5ba3f43e 1174
39236c6e
A
1175 result->hdr.page_shift = PAGE_SHIFT;
1176
1c79356b 1177 result->size = 0;
0a7de745 1178 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
2d21ac55 1179 result->user_wire_size = 0;
cb323159 1180#if !CONFIG_EMBEDDED
5ba3f43e 1181 result->vmmap_high_start = 0;
cb323159
A
1182#endif
1183 os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
0a7de745 1184#if TASK_SWAPPER
1c79356b
A
1185 result->res_count = 1;
1186 result->sw_state = MAP_SW_IN;
0a7de745 1187#endif /* TASK_SWAPPER */
1c79356b
A
1188 result->pmap = pmap;
1189 result->min_offset = min;
1190 result->max_offset = max;
1191 result->wiring_required = FALSE;
1192 result->no_zero_fill = FALSE;
316670eb 1193 result->mapped_in_other_pmaps = FALSE;
1c79356b 1194 result->wait_for_space = FALSE;
b0d623f7 1195 result->switch_protect = FALSE;
6d2010ae
A
1196 result->disable_vmentry_reuse = FALSE;
1197 result->map_disallow_data_exec = FALSE;
39037602 1198 result->is_nested_map = FALSE;
a39ff7e2 1199 result->map_disallow_new_exec = FALSE;
ea3f0419 1200 result->terminated = FALSE;
6d2010ae 1201 result->highest_entry_end = 0;
1c79356b
A
1202 result->first_free = vm_map_to_entry(result);
1203 result->hint = vm_map_to_entry(result);
0a7de745 1204 result->jit_entry_exists = FALSE;
3e170ce0 1205
d9a64523
A
1206 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1207 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1208 result->has_corpse_footprint = TRUE;
1209 result->holelistenabled = FALSE;
1210 result->vmmap_corpse_footprint = NULL;
1211 } else {
1212 result->has_corpse_footprint = FALSE;
1213 if (vm_map_supports_hole_optimization) {
1214 hole_entry = zalloc(vm_map_holes_zone);
3e170ce0 1215
d9a64523 1216 hole_entry->start = min;
5ba3f43e 1217#if defined(__arm__) || defined(__arm64__)
d9a64523 1218 hole_entry->end = result->max_offset;
5ba3f43e 1219#else
d9a64523 1220 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
5ba3f43e 1221#endif
d9a64523
A
1222 result->holes_list = result->hole_hint = hole_entry;
1223 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1224 result->holelistenabled = TRUE;
1225 } else {
1226 result->holelistenabled = FALSE;
1227 }
3e170ce0
A
1228 }
1229
1c79356b 1230 vm_map_lock_init(result);
b0d623f7 1231 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
5ba3f43e 1232
0a7de745 1233 return result;
1c79356b
A
1234}
1235
1236/*
1237 * vm_map_entry_create: [ internal use only ]
1238 *
1239 * Allocates a VM map entry for insertion in the
1240 * given map (or map copy). No fields are filled.
1241 */
0a7de745 1242#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1c79356b 1243
0a7de745 1244#define vm_map_copy_entry_create(copy, map_locked) \
7ddcb079
A
1245 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1246unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1c79356b 1247
91447636 1248static vm_map_entry_t
1c79356b 1249_vm_map_entry_create(
0a7de745 1250 struct vm_map_header *map_header, boolean_t __unused map_locked)
1c79356b 1251{
0a7de745
A
1252 zone_t zone;
1253 vm_map_entry_t entry;
1c79356b 1254
7ddcb079
A
1255 zone = vm_map_entry_zone;
1256
1257 assert(map_header->entries_pageable ? !map_locked : TRUE);
1258
1259 if (map_header->entries_pageable) {
1260 entry = (vm_map_entry_t) zalloc(zone);
0a7de745 1261 } else {
7ddcb079
A
1262 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1263
1264 if (entry == VM_MAP_ENTRY_NULL) {
1265 zone = vm_map_entry_reserved_zone;
1266 entry = (vm_map_entry_t) zalloc(zone);
1267 OSAddAtomic(1, &reserved_zalloc_count);
0a7de745 1268 } else {
7ddcb079 1269 OSAddAtomic(1, &nonreserved_zalloc_count);
0a7de745 1270 }
7ddcb079 1271 }
1c79356b 1272
0a7de745 1273 if (entry == VM_MAP_ENTRY_NULL) {
1c79356b 1274 panic("vm_map_entry_create");
0a7de745 1275 }
7ddcb079
A
1276 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1277
0a7de745
A
1278 vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1279#if MAP_ENTRY_CREATION_DEBUG
39236c6e 1280 entry->vme_creation_maphdr = map_header;
39037602 1281 backtrace(&entry->vme_creation_bt[0],
cb323159 1282 (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)), NULL);
316670eb 1283#endif
0a7de745 1284 return entry;
1c79356b
A
1285}
1286
1287/*
1288 * vm_map_entry_dispose: [ internal use only ]
1289 *
1290 * Inverse of vm_map_entry_create.
2d21ac55 1291 *
0a7de745 1292 * write map lock held so no need to
2d21ac55 1293 * do anything special to insure correctness
0a7de745 1294 * of the stores
1c79356b 1295 */
0a7de745 1296#define vm_map_entry_dispose(map, entry) \
6d2010ae 1297 _vm_map_entry_dispose(&(map)->hdr, (entry))
1c79356b 1298
0a7de745 1299#define vm_map_copy_entry_dispose(map, entry) \
1c79356b
A
1300 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1301
91447636 1302static void
1c79356b 1303_vm_map_entry_dispose(
0a7de745
A
1304 struct vm_map_header *map_header,
1305 vm_map_entry_t entry)
1c79356b 1306{
0a7de745 1307 zone_t zone;
1c79356b 1308
0a7de745 1309 if (map_header->entries_pageable || !(entry->from_reserved_zone)) {
2d21ac55 1310 zone = vm_map_entry_zone;
0a7de745 1311 } else {
7ddcb079 1312 zone = vm_map_entry_reserved_zone;
0a7de745 1313 }
7ddcb079
A
1314
1315 if (!map_header->entries_pageable) {
0a7de745 1316 if (zone == vm_map_entry_zone) {
7ddcb079 1317 OSAddAtomic(-1, &nonreserved_zalloc_count);
0a7de745 1318 } else {
7ddcb079 1319 OSAddAtomic(-1, &reserved_zalloc_count);
0a7de745 1320 }
7ddcb079 1321 }
1c79356b 1322
91447636 1323 zfree(zone, entry);
1c79356b
A
1324}
1325
91447636 1326#if MACH_ASSERT
91447636 1327static boolean_t first_free_check = FALSE;
6d2010ae 1328boolean_t
1c79356b 1329first_free_is_valid(
0a7de745 1330 vm_map_t map)
1c79356b 1331{
0a7de745 1332 if (!first_free_check) {
1c79356b 1333 return TRUE;
0a7de745 1334 }
5ba3f43e 1335
0a7de745 1336 return first_free_is_valid_store( map );
1c79356b 1337}
91447636 1338#endif /* MACH_ASSERT */
1c79356b 1339
1c79356b 1340
0a7de745 1341#define vm_map_copy_entry_link(copy, after_where, entry) \
6d2010ae 1342 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1c79356b 1343
0a7de745 1344#define vm_map_copy_entry_unlink(copy, entry) \
6d2010ae 1345 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1c79356b 1346
0a7de745 1347#if MACH_ASSERT && TASK_SWAPPER
1c79356b
A
1348/*
1349 * vm_map_res_reference:
1350 *
1351 * Adds another valid residence count to the given map.
1352 *
1353 * Map is locked so this function can be called from
1354 * vm_map_swapin.
1355 *
1356 */
0a7de745
A
1357void
1358vm_map_res_reference(vm_map_t map)
1c79356b
A
1359{
1360 /* assert map is locked */
1361 assert(map->res_count >= 0);
cb323159 1362 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1c79356b 1363 if (map->res_count == 0) {
b0d623f7 1364 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1365 vm_map_lock(map);
1366 vm_map_swapin(map);
b0d623f7 1367 lck_mtx_lock(&map->s_lock);
1c79356b
A
1368 ++map->res_count;
1369 vm_map_unlock(map);
0a7de745 1370 } else {
1c79356b 1371 ++map->res_count;
0a7de745 1372 }
1c79356b
A
1373}
1374
1375/*
1376 * vm_map_reference_swap:
1377 *
1378 * Adds valid reference and residence counts to the given map.
1379 *
1380 * The map may not be in memory (i.e. zero residence count).
1381 *
1382 */
0a7de745
A
1383void
1384vm_map_reference_swap(vm_map_t map)
1c79356b
A
1385{
1386 assert(map != VM_MAP_NULL);
b0d623f7 1387 lck_mtx_lock(&map->s_lock);
1c79356b 1388 assert(map->res_count >= 0);
cb323159
A
1389 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1390 os_ref_retain_locked(&map->map_refcnt);
1c79356b 1391 vm_map_res_reference(map);
b0d623f7 1392 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1393}
1394
1395/*
1396 * vm_map_res_deallocate:
1397 *
1398 * Decrement residence count on a map; possibly causing swapout.
1399 *
1400 * The map must be in memory (i.e. non-zero residence count).
1401 *
1402 * The map is locked, so this function is callable from vm_map_deallocate.
1403 *
1404 */
0a7de745
A
1405void
1406vm_map_res_deallocate(vm_map_t map)
1c79356b
A
1407{
1408 assert(map->res_count > 0);
1409 if (--map->res_count == 0) {
b0d623f7 1410 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1411 vm_map_lock(map);
1412 vm_map_swapout(map);
1413 vm_map_unlock(map);
b0d623f7 1414 lck_mtx_lock(&map->s_lock);
1c79356b 1415 }
cb323159 1416 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1c79356b 1417}
0a7de745 1418#endif /* MACH_ASSERT && TASK_SWAPPER */
1c79356b 1419
1c79356b
A
1420/*
1421 * vm_map_destroy:
1422 *
1423 * Actually destroy a map.
1424 */
1425void
1426vm_map_destroy(
0a7de745
A
1427 vm_map_t map,
1428 int flags)
5ba3f43e 1429{
1c79356b 1430 vm_map_lock(map);
2d21ac55 1431
3e170ce0
A
1432 /* final cleanup: no need to unnest shared region */
1433 flags |= VM_MAP_REMOVE_NO_UNNESTING;
5ba3f43e
A
1434 /* final cleanup: ok to remove immutable mappings */
1435 flags |= VM_MAP_REMOVE_IMMUTABLE;
d9a64523
A
1436 /* final cleanup: allow gaps in range */
1437 flags |= VM_MAP_REMOVE_GAPS_OK;
3e170ce0 1438
2d21ac55
A
1439 /* clean up regular map entries */
1440 (void) vm_map_delete(map, map->min_offset, map->max_offset,
0a7de745 1441 flags, VM_MAP_NULL);
2d21ac55 1442 /* clean up leftover special mappings (commpage, etc...) */
0a7de745 1443#if !defined(__arm__) && !defined(__arm64__)
2d21ac55 1444 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
0a7de745 1445 flags, VM_MAP_NULL);
5ba3f43e 1446#endif /* !__arm__ && !__arm64__ */
6d2010ae 1447
3e170ce0 1448 vm_map_disable_hole_optimization(map);
d9a64523
A
1449 vm_map_corpse_footprint_destroy(map);
1450
1c79356b
A
1451 vm_map_unlock(map);
1452
2d21ac55 1453 assert(map->hdr.nentries == 0);
5ba3f43e 1454
0a7de745 1455 if (map->pmap) {
55e303ae 1456 pmap_destroy(map->pmap);
0a7de745 1457 }
1c79356b 1458
39037602
A
1459 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1460 /*
1461 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1462 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1463 * structure or kalloc'ed via lck_mtx_init.
1464 * An example is s_lock_ext within struct _vm_map.
1465 *
1466 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1467 * can add another tag to detect embedded vs alloc'ed indirect external
1468 * mutexes but that'll be additional checks in the lock path and require
1469 * updating dependencies for the old vs new tag.
1470 *
1471 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1472 * just when lock debugging is ON, we choose to forego explicitly destroying
1473 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1474 * count on vm_map_lck_grp, which has no serious side-effect.
1475 */
1476 } else {
1477 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1478 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1479 }
1480
91447636 1481 zfree(vm_map_zone, map);
1c79356b
A
1482}
1483
5ba3f43e
A
1484/*
1485 * Returns pid of the task with the largest number of VM map entries.
1486 * Used in the zone-map-exhaustion jetsam path.
1487 */
1488pid_t
1489find_largest_process_vm_map_entries(void)
1490{
1491 pid_t victim_pid = -1;
1492 int max_vm_map_entries = 0;
1493 task_t task = TASK_NULL;
1494 queue_head_t *task_list = &tasks;
1495
1496 lck_mtx_lock(&tasks_threads_lock);
1497 queue_iterate(task_list, task, task_t, tasks) {
0a7de745 1498 if (task == kernel_task || !task->active) {
5ba3f43e 1499 continue;
0a7de745 1500 }
5ba3f43e
A
1501
1502 vm_map_t task_map = task->map;
1503 if (task_map != VM_MAP_NULL) {
1504 int task_vm_map_entries = task_map->hdr.nentries;
1505 if (task_vm_map_entries > max_vm_map_entries) {
1506 max_vm_map_entries = task_vm_map_entries;
1507 victim_pid = pid_from_task(task);
1508 }
1509 }
1510 }
1511 lck_mtx_unlock(&tasks_threads_lock);
1512
1513 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1514 return victim_pid;
1515}
1516
0a7de745 1517#if TASK_SWAPPER
1c79356b
A
1518/*
1519 * vm_map_swapin/vm_map_swapout
1520 *
5ba3f43e 1521 * Swap a map in and out, either referencing or releasing its resources.
1c79356b
A
1522 * These functions are internal use only; however, they must be exported
1523 * because they may be called from macros, which are exported.
1524 *
5ba3f43e
A
1525 * In the case of swapout, there could be races on the residence count,
1526 * so if the residence count is up, we return, assuming that a
1c79356b
A
1527 * vm_map_deallocate() call in the near future will bring us back.
1528 *
1529 * Locking:
1530 * -- We use the map write lock for synchronization among races.
1531 * -- The map write lock, and not the simple s_lock, protects the
1532 * swap state of the map.
1533 * -- If a map entry is a share map, then we hold both locks, in
1534 * hierarchical order.
1535 *
1536 * Synchronization Notes:
1537 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1538 * will block on the map lock and proceed when swapout is through.
1539 * 2) A vm_map_reference() call at this time is illegal, and will
1540 * cause a panic. vm_map_reference() is only allowed on resident
1541 * maps, since it refuses to block.
5ba3f43e 1542 * 3) A vm_map_swapin() call during a swapin will block, and
1c79356b
A
1543 * proceeed when the first swapin is done, turning into a nop.
1544 * This is the reason the res_count is not incremented until
1545 * after the swapin is complete.
1546 * 4) There is a timing hole after the checks of the res_count, before
1547 * the map lock is taken, during which a swapin may get the lock
1548 * before a swapout about to happen. If this happens, the swapin
1549 * will detect the state and increment the reference count, causing
5ba3f43e
A
1550 * the swapout to be a nop, thereby delaying it until a later
1551 * vm_map_deallocate. If the swapout gets the lock first, then
1552 * the swapin will simply block until the swapout is done, and
1c79356b
A
1553 * then proceed.
1554 *
1555 * Because vm_map_swapin() is potentially an expensive operation, it
1556 * should be used with caution.
1557 *
1558 * Invariants:
1559 * 1) A map with a residence count of zero is either swapped, or
1560 * being swapped.
1561 * 2) A map with a non-zero residence count is either resident,
1562 * or being swapped in.
1563 */
1564
1565int vm_map_swap_enable = 1;
1566
0a7de745
A
1567void
1568vm_map_swapin(vm_map_t map)
1c79356b 1569{
39037602 1570 vm_map_entry_t entry;
2d21ac55 1571
0a7de745 1572 if (!vm_map_swap_enable) { /* debug */
1c79356b 1573 return;
0a7de745 1574 }
1c79356b
A
1575
1576 /*
1577 * Map is locked
1578 * First deal with various races.
1579 */
0a7de745 1580 if (map->sw_state == MAP_SW_IN) {
5ba3f43e 1581 /*
1c79356b
A
1582 * we raced with swapout and won. Returning will incr.
1583 * the res_count, turning the swapout into a nop.
1584 */
1585 return;
0a7de745 1586 }
1c79356b
A
1587
1588 /*
1589 * The residence count must be zero. If we raced with another
1590 * swapin, the state would have been IN; if we raced with a
1591 * swapout (after another competing swapin), we must have lost
1592 * the race to get here (see above comment), in which case
1593 * res_count is still 0.
1594 */
1595 assert(map->res_count == 0);
1596
1597 /*
1598 * There are no intermediate states of a map going out or
1599 * coming in, since the map is locked during the transition.
1600 */
1601 assert(map->sw_state == MAP_SW_OUT);
1602
1603 /*
5ba3f43e 1604 * We now operate upon each map entry. If the entry is a sub-
1c79356b
A
1605 * or share-map, we call vm_map_res_reference upon it.
1606 * If the entry is an object, we call vm_object_res_reference
1607 * (this may iterate through the shadow chain).
1608 * Note that we hold the map locked the entire time,
1609 * even if we get back here via a recursive call in
1610 * vm_map_res_reference.
1611 */
1612 entry = vm_map_first_entry(map);
1613
1614 while (entry != vm_map_to_entry(map)) {
3e170ce0 1615 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1c79356b 1616 if (entry->is_sub_map) {
3e170ce0 1617 vm_map_t lmap = VME_SUBMAP(entry);
b0d623f7 1618 lck_mtx_lock(&lmap->s_lock);
1c79356b 1619 vm_map_res_reference(lmap);
b0d623f7 1620 lck_mtx_unlock(&lmap->s_lock);
1c79356b 1621 } else {
3e170ce0 1622 vm_object_t object = VME_OBEJCT(entry);
1c79356b
A
1623 vm_object_lock(object);
1624 /*
1625 * This call may iterate through the
1626 * shadow chain.
1627 */
1628 vm_object_res_reference(object);
1629 vm_object_unlock(object);
1630 }
1631 }
1632 entry = entry->vme_next;
1633 }
1634 assert(map->sw_state == MAP_SW_OUT);
1635 map->sw_state = MAP_SW_IN;
1636}
1637
0a7de745
A
1638void
1639vm_map_swapout(vm_map_t map)
1c79356b 1640{
39037602 1641 vm_map_entry_t entry;
5ba3f43e 1642
1c79356b
A
1643 /*
1644 * Map is locked
1645 * First deal with various races.
1646 * If we raced with a swapin and lost, the residence count
1647 * will have been incremented to 1, and we simply return.
1648 */
b0d623f7 1649 lck_mtx_lock(&map->s_lock);
1c79356b 1650 if (map->res_count != 0) {
b0d623f7 1651 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1652 return;
1653 }
b0d623f7 1654 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1655
1656 /*
1657 * There are no intermediate states of a map going out or
1658 * coming in, since the map is locked during the transition.
1659 */
1660 assert(map->sw_state == MAP_SW_IN);
1661
0a7de745 1662 if (!vm_map_swap_enable) {
1c79356b 1663 return;
0a7de745 1664 }
1c79356b
A
1665
1666 /*
5ba3f43e 1667 * We now operate upon each map entry. If the entry is a sub-
1c79356b
A
1668 * or share-map, we call vm_map_res_deallocate upon it.
1669 * If the entry is an object, we call vm_object_res_deallocate
1670 * (this may iterate through the shadow chain).
1671 * Note that we hold the map locked the entire time,
1672 * even if we get back here via a recursive call in
1673 * vm_map_res_deallocate.
1674 */
1675 entry = vm_map_first_entry(map);
1676
1677 while (entry != vm_map_to_entry(map)) {
3e170ce0 1678 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1c79356b 1679 if (entry->is_sub_map) {
3e170ce0 1680 vm_map_t lmap = VME_SUBMAP(entry);
b0d623f7 1681 lck_mtx_lock(&lmap->s_lock);
1c79356b 1682 vm_map_res_deallocate(lmap);
b0d623f7 1683 lck_mtx_unlock(&lmap->s_lock);
1c79356b 1684 } else {
3e170ce0 1685 vm_object_t object = VME_OBJECT(entry);
1c79356b
A
1686 vm_object_lock(object);
1687 /*
5ba3f43e
A
1688 * This call may take a long time,
1689 * since it could actively push
1690 * out pages (if we implement it
1c79356b
A
1691 * that way).
1692 */
1693 vm_object_res_deallocate(object);
1694 vm_object_unlock(object);
1695 }
1696 }
1697 entry = entry->vme_next;
1698 }
1699 assert(map->sw_state == MAP_SW_IN);
1700 map->sw_state = MAP_SW_OUT;
1701}
1702
0a7de745 1703#endif /* TASK_SWAPPER */
1c79356b 1704
1c79356b
A
1705/*
1706 * vm_map_lookup_entry: [ internal use only ]
1707 *
5ba3f43e
A
1708 * Calls into the vm map store layer to find the map
1709 * entry containing (or immediately preceding) the
6d2010ae 1710 * specified address in the given map; the entry is returned
1c79356b
A
1711 * in the "entry" parameter. The boolean
1712 * result indicates whether the address is
1713 * actually contained in the map.
1714 */
1715boolean_t
1716vm_map_lookup_entry(
0a7de745
A
1717 vm_map_t map,
1718 vm_map_offset_t address,
1719 vm_map_entry_t *entry) /* OUT */
1c79356b 1720{
0a7de745 1721 return vm_map_store_lookup_entry( map, address, entry );
1c79356b
A
1722}
1723
1724/*
1725 * Routine: vm_map_find_space
1726 * Purpose:
1727 * Allocate a range in the specified virtual address map,
1728 * returning the entry allocated for that range.
1729 * Used by kmem_alloc, etc.
1730 *
1731 * The map must be NOT be locked. It will be returned locked
1732 * on KERN_SUCCESS, unlocked on failure.
1733 *
1734 * If an entry is allocated, the object/offset fields
1735 * are initialized to zero.
1736 */
1737kern_return_t
1738vm_map_find_space(
0a7de745
A
1739 vm_map_t map,
1740 vm_map_offset_t *address, /* OUT */
1741 vm_map_size_t size,
1742 vm_map_offset_t mask,
1743 int flags __unused,
1744 vm_map_kernel_flags_t vmk_flags,
1745 vm_tag_t tag,
1746 vm_map_entry_t *o_entry) /* OUT */
1c79356b 1747{
0a7de745
A
1748 vm_map_entry_t entry, new_entry;
1749 vm_map_offset_t start;
1750 vm_map_offset_t end;
1751 vm_map_entry_t hole_entry;
91447636
A
1752
1753 if (size == 0) {
1754 *address = 0;
1755 return KERN_INVALID_ARGUMENT;
1756 }
1c79356b 1757
5ba3f43e 1758 if (vmk_flags.vmkf_guard_after) {
2d21ac55 1759 /* account for the back guard page in the size */
39236c6e 1760 size += VM_MAP_PAGE_SIZE(map);
2d21ac55
A
1761 }
1762
7ddcb079 1763 new_entry = vm_map_entry_create(map, FALSE);
1c79356b
A
1764
1765 /*
1766 * Look for the first possible address; if there's already
1767 * something at this address, we have to start after it.
1768 */
1769
1770 vm_map_lock(map);
1771
0a7de745 1772 if (map->disable_vmentry_reuse == TRUE) {
6d2010ae
A
1773 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1774 } else {
3e170ce0 1775 if (map->holelistenabled) {
d9a64523 1776 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
3e170ce0
A
1777
1778 if (hole_entry == NULL) {
1779 /*
1780 * No more space in the map?
1781 */
1782 vm_map_entry_dispose(map, new_entry);
1783 vm_map_unlock(map);
0a7de745 1784 return KERN_NO_SPACE;
3e170ce0
A
1785 }
1786
1787 entry = hole_entry;
1788 start = entry->vme_start;
1789 } else {
1790 assert(first_free_is_valid(map));
0a7de745 1791 if ((entry = map->first_free) == vm_map_to_entry(map)) {
3e170ce0 1792 start = map->min_offset;
0a7de745 1793 } else {
3e170ce0 1794 start = entry->vme_end;
0a7de745 1795 }
3e170ce0 1796 }
6d2010ae 1797 }
1c79356b
A
1798
1799 /*
1800 * In any case, the "entry" always precedes
1801 * the proposed new region throughout the loop:
1802 */
1803
1804 while (TRUE) {
0a7de745 1805 vm_map_entry_t next;
1c79356b
A
1806
1807 /*
1808 * Find the end of the proposed new region.
1809 * Be sure we didn't go beyond the end, or
1810 * wrap around the address.
1811 */
1812
5ba3f43e 1813 if (vmk_flags.vmkf_guard_before) {
2d21ac55 1814 /* reserve space for the front guard page */
39236c6e 1815 start += VM_MAP_PAGE_SIZE(map);
2d21ac55 1816 }
1c79356b 1817 end = ((start + mask) & ~mask);
5ba3f43e 1818
1c79356b
A
1819 if (end < start) {
1820 vm_map_entry_dispose(map, new_entry);
1821 vm_map_unlock(map);
0a7de745 1822 return KERN_NO_SPACE;
1c79356b
A
1823 }
1824 start = end;
d9a64523 1825 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1c79356b 1826 end += size;
d9a64523 1827 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1c79356b
A
1828
1829 if ((end > map->max_offset) || (end < start)) {
1830 vm_map_entry_dispose(map, new_entry);
1831 vm_map_unlock(map);
0a7de745 1832 return KERN_NO_SPACE;
1c79356b
A
1833 }
1834
1c79356b 1835 next = entry->vme_next;
1c79356b 1836
3e170ce0 1837 if (map->holelistenabled) {
0a7de745 1838 if (entry->vme_end >= end) {
3e170ce0 1839 break;
0a7de745 1840 }
3e170ce0
A
1841 } else {
1842 /*
1843 * If there are no more entries, we must win.
1844 *
1845 * OR
1846 *
1847 * If there is another entry, it must be
1848 * after the end of the potential new region.
1849 */
1c79356b 1850
0a7de745 1851 if (next == vm_map_to_entry(map)) {
3e170ce0 1852 break;
0a7de745 1853 }
3e170ce0 1854
0a7de745 1855 if (next->vme_start >= end) {
3e170ce0 1856 break;
0a7de745 1857 }
3e170ce0 1858 }
1c79356b
A
1859
1860 /*
1861 * Didn't fit -- move to the next entry.
1862 */
1863
1864 entry = next;
3e170ce0
A
1865
1866 if (map->holelistenabled) {
d9a64523 1867 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
3e170ce0
A
1868 /*
1869 * Wrapped around
1870 */
1871 vm_map_entry_dispose(map, new_entry);
1872 vm_map_unlock(map);
0a7de745 1873 return KERN_NO_SPACE;
3e170ce0
A
1874 }
1875 start = entry->vme_start;
1876 } else {
1877 start = entry->vme_end;
1878 }
1879 }
1880
1881 if (map->holelistenabled) {
1882 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1883 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1884 }
1c79356b
A
1885 }
1886
1887 /*
1888 * At this point,
1889 * "start" and "end" should define the endpoints of the
1890 * available new range, and
1891 * "entry" should refer to the region before the new
1892 * range, and
1893 *
1894 * the map should be locked.
1895 */
1896
5ba3f43e 1897 if (vmk_flags.vmkf_guard_before) {
2d21ac55 1898 /* go back for the front guard page */
39236c6e 1899 start -= VM_MAP_PAGE_SIZE(map);
2d21ac55 1900 }
1c79356b
A
1901 *address = start;
1902
e2d2fc5c 1903 assert(start < end);
1c79356b
A
1904 new_entry->vme_start = start;
1905 new_entry->vme_end = end;
1906 assert(page_aligned(new_entry->vme_start));
1907 assert(page_aligned(new_entry->vme_end));
39236c6e 1908 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
0a7de745 1909 VM_MAP_PAGE_MASK(map)));
39236c6e 1910 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
0a7de745 1911 VM_MAP_PAGE_MASK(map)));
1c79356b
A
1912
1913 new_entry->is_shared = FALSE;
1914 new_entry->is_sub_map = FALSE;
fe8ab488 1915 new_entry->use_pmap = TRUE;
3e170ce0
A
1916 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1917 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1c79356b
A
1918
1919 new_entry->needs_copy = FALSE;
1920
1921 new_entry->inheritance = VM_INHERIT_DEFAULT;
1922 new_entry->protection = VM_PROT_DEFAULT;
1923 new_entry->max_protection = VM_PROT_ALL;
1924 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1925 new_entry->wired_count = 0;
1926 new_entry->user_wired_count = 0;
1927
1928 new_entry->in_transition = FALSE;
1929 new_entry->needs_wakeup = FALSE;
2d21ac55 1930 new_entry->no_cache = FALSE;
b0d623f7 1931 new_entry->permanent = FALSE;
39236c6e
A
1932 new_entry->superpage_size = FALSE;
1933 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1934 new_entry->map_aligned = TRUE;
1935 } else {
1936 new_entry->map_aligned = FALSE;
1937 }
2d21ac55 1938
3e170ce0 1939 new_entry->used_for_jit = FALSE;
d9a64523 1940 new_entry->pmap_cs_associated = FALSE;
b0d623f7 1941 new_entry->zero_wired_pages = FALSE;
fe8ab488 1942 new_entry->iokit_acct = FALSE;
3e170ce0
A
1943 new_entry->vme_resilient_codesign = FALSE;
1944 new_entry->vme_resilient_media = FALSE;
0a7de745 1945 if (vmk_flags.vmkf_atomic_entry) {
39037602 1946 new_entry->vme_atomic = TRUE;
0a7de745 1947 } else {
39037602 1948 new_entry->vme_atomic = FALSE;
0a7de745 1949 }
1c79356b 1950
5ba3f43e 1951 VME_ALIAS_SET(new_entry, tag);
0c530ab8 1952
1c79356b
A
1953 /*
1954 * Insert the new entry into the list
1955 */
1956
d9a64523 1957 vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1c79356b
A
1958
1959 map->size += size;
1960
1961 /*
1962 * Update the lookup hint
1963 */
0c530ab8 1964 SAVE_HINT_MAP_WRITE(map, new_entry);
1c79356b
A
1965
1966 *o_entry = new_entry;
0a7de745 1967 return KERN_SUCCESS;
1c79356b
A
1968}
1969
1970int vm_map_pmap_enter_print = FALSE;
1971int vm_map_pmap_enter_enable = FALSE;
1972
1973/*
91447636 1974 * Routine: vm_map_pmap_enter [internal only]
1c79356b
A
1975 *
1976 * Description:
1977 * Force pages from the specified object to be entered into
1978 * the pmap at the specified address if they are present.
1979 * As soon as a page not found in the object the scan ends.
1980 *
1981 * Returns:
5ba3f43e 1982 * Nothing.
1c79356b
A
1983 *
1984 * In/out conditions:
1985 * The source map should not be locked on entry.
1986 */
fe8ab488 1987__unused static void
1c79356b 1988vm_map_pmap_enter(
0a7de745
A
1989 vm_map_t map,
1990 vm_map_offset_t addr,
1991 vm_map_offset_t end_addr,
1992 vm_object_t object,
1993 vm_object_offset_t offset,
1994 vm_prot_t protection)
1c79356b 1995{
0a7de745
A
1996 int type_of_fault;
1997 kern_return_t kr;
d9a64523 1998 struct vm_object_fault_info fault_info = {};
0b4e3aa0 1999
0a7de745 2000 if (map->pmap == 0) {
55e303ae 2001 return;
0a7de745 2002 }
55e303ae 2003
1c79356b 2004 while (addr < end_addr) {
0a7de745 2005 vm_page_t m;
1c79356b 2006
fe8ab488
A
2007
2008 /*
0a7de745 2009 * TODO:
fe8ab488
A
2010 * From vm_map_enter(), we come into this function without the map
2011 * lock held or the object lock held.
2012 * We haven't taken a reference on the object either.
2013 * We should do a proper lookup on the map to make sure
2014 * that things are sane before we go locking objects that
2015 * could have been deallocated from under us.
2016 */
2017
1c79356b 2018 vm_object_lock(object);
1c79356b
A
2019
2020 m = vm_page_lookup(object, offset);
5ba3f43e 2021
d9a64523 2022 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
0a7de745 2023 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
1c79356b
A
2024 vm_object_unlock(object);
2025 return;
2026 }
2027
1c79356b
A
2028 if (vm_map_pmap_enter_print) {
2029 printf("vm_map_pmap_enter:");
2d21ac55 2030 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
0a7de745 2031 map, (unsigned long long)addr, object, (unsigned long long)offset);
1c79356b 2032 }
2d21ac55 2033 type_of_fault = DBG_CACHE_HIT_FAULT;
d9a64523 2034 kr = vm_fault_enter(m, map->pmap,
0a7de745
A
2035 addr, protection, protection,
2036 VM_PAGE_WIRED(m),
2037 FALSE, /* change_wiring */
2038 VM_KERN_MEMORY_NONE, /* tag - not wiring */
2039 &fault_info,
2040 NULL, /* need_retry */
2041 &type_of_fault);
1c79356b 2042
1c79356b
A
2043 vm_object_unlock(object);
2044
2045 offset += PAGE_SIZE_64;
2046 addr += PAGE_SIZE;
2047 }
2048}
2049
91447636 2050boolean_t vm_map_pmap_is_empty(
0a7de745
A
2051 vm_map_t map,
2052 vm_map_offset_t start,
91447636 2053 vm_map_offset_t end);
0a7de745
A
2054boolean_t
2055vm_map_pmap_is_empty(
2056 vm_map_t map,
2057 vm_map_offset_t start,
2058 vm_map_offset_t end)
91447636 2059{
2d21ac55
A
2060#ifdef MACHINE_PMAP_IS_EMPTY
2061 return pmap_is_empty(map->pmap, start, end);
0a7de745
A
2062#else /* MACHINE_PMAP_IS_EMPTY */
2063 vm_map_offset_t offset;
2064 ppnum_t phys_page;
91447636
A
2065
2066 if (map->pmap == NULL) {
2067 return TRUE;
2068 }
2d21ac55 2069
91447636 2070 for (offset = start;
0a7de745
A
2071 offset < end;
2072 offset += PAGE_SIZE) {
91447636
A
2073 phys_page = pmap_find_phys(map->pmap, offset);
2074 if (phys_page) {
2075 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
0a7de745
A
2076 "page %d at 0x%llx\n",
2077 map, (long long)start, (long long)end,
2078 phys_page, (long long)offset);
91447636
A
2079 return FALSE;
2080 }
2081 }
2082 return TRUE;
0a7de745 2083#endif /* MACHINE_PMAP_IS_EMPTY */
91447636
A
2084}
2085
0a7de745 2086#define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
316670eb
A
2087kern_return_t
2088vm_map_random_address_for_size(
0a7de745
A
2089 vm_map_t map,
2090 vm_map_offset_t *address,
2091 vm_map_size_t size)
316670eb 2092{
0a7de745
A
2093 kern_return_t kr = KERN_SUCCESS;
2094 int tries = 0;
2095 vm_map_offset_t random_addr = 0;
316670eb
A
2096 vm_map_offset_t hole_end;
2097
0a7de745
A
2098 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2099 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2100 vm_map_size_t vm_hole_size = 0;
2101 vm_map_size_t addr_space_size;
316670eb
A
2102
2103 addr_space_size = vm_map_max(map) - vm_map_min(map);
2104
2105 assert(page_aligned(size));
2106
2107 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2108 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
39236c6e 2109 random_addr = vm_map_trunc_page(
0a7de745 2110 vm_map_min(map) + (random_addr % addr_space_size),
39236c6e 2111 VM_MAP_PAGE_MASK(map));
316670eb
A
2112
2113 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2114 if (prev_entry == vm_map_to_entry(map)) {
2115 next_entry = vm_map_first_entry(map);
2116 } else {
2117 next_entry = prev_entry->vme_next;
2118 }
2119 if (next_entry == vm_map_to_entry(map)) {
2120 hole_end = vm_map_max(map);
2121 } else {
2122 hole_end = next_entry->vme_start;
2123 }
2124 vm_hole_size = hole_end - random_addr;
2125 if (vm_hole_size >= size) {
2126 *address = random_addr;
2127 break;
2128 }
2129 }
2130 tries++;
2131 }
2132
2133 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2134 kr = KERN_NO_SPACE;
2135 }
2136 return kr;
2137}
2138
d9a64523
A
2139static boolean_t
2140vm_memory_malloc_no_cow(
2141 int alias)
2142{
2143 uint64_t alias_mask;
2144
cb323159
A
2145 if (alias > 63) {
2146 return FALSE;
2147 }
2148
d9a64523
A
2149 alias_mask = 1ULL << alias;
2150 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2151 return TRUE;
2152 }
2153 return FALSE;
2154}
2155
1c79356b
A
2156/*
2157 * Routine: vm_map_enter
2158 *
2159 * Description:
2160 * Allocate a range in the specified virtual address map.
2161 * The resulting range will refer to memory defined by
2162 * the given memory object and offset into that object.
2163 *
2164 * Arguments are as defined in the vm_map call.
2165 */
91447636
A
2166int _map_enter_debug = 0;
2167static unsigned int vm_map_enter_restore_successes = 0;
2168static unsigned int vm_map_enter_restore_failures = 0;
1c79356b
A
2169kern_return_t
2170vm_map_enter(
0a7de745
A
2171 vm_map_t map,
2172 vm_map_offset_t *address, /* IN/OUT */
2173 vm_map_size_t size,
2174 vm_map_offset_t mask,
2175 int flags,
2176 vm_map_kernel_flags_t vmk_flags,
2177 vm_tag_t alias,
2178 vm_object_t object,
2179 vm_object_offset_t offset,
2180 boolean_t needs_copy,
2181 vm_prot_t cur_protection,
2182 vm_prot_t max_protection,
2183 vm_inherit_t inheritance)
1c79356b 2184{
0a7de745
A
2185 vm_map_entry_t entry, new_entry;
2186 vm_map_offset_t start, tmp_start, tmp_offset;
2187 vm_map_offset_t end, tmp_end;
2188 vm_map_offset_t tmp2_start, tmp2_end;
2189 vm_map_offset_t desired_empty_end;
2190 vm_map_offset_t step;
2191 kern_return_t result = KERN_SUCCESS;
2192 vm_map_t zap_old_map = VM_MAP_NULL;
2193 vm_map_t zap_new_map = VM_MAP_NULL;
2194 boolean_t map_locked = FALSE;
2195 boolean_t pmap_empty = TRUE;
2196 boolean_t new_mapping_established = FALSE;
2197 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2198 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2199 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2200 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2201 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2202 boolean_t is_submap = vmk_flags.vmkf_submap;
2203 boolean_t permanent = vmk_flags.vmkf_permanent;
cb323159 2204 boolean_t no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
0a7de745
A
2205 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2206 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
2207 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2208 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2209 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2210 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2211 vm_tag_t user_alias;
2212 vm_map_offset_t effective_min_offset, effective_max_offset;
2213 kern_return_t kr;
2214 boolean_t clear_map_aligned = FALSE;
2215 vm_map_entry_t hole_entry;
2216 vm_map_size_t chunk_size = 0;
593a1d5f 2217
5ba3f43e
A
2218 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2219
a39ff7e2
A
2220 if (flags & VM_FLAGS_4GB_CHUNK) {
2221#if defined(__LP64__)
2222 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2223#else /* __LP64__ */
2224 chunk_size = ANON_CHUNK_SIZE;
2225#endif /* __LP64__ */
2226 } else {
2227 chunk_size = ANON_CHUNK_SIZE;
2228 }
2229
b0d623f7
A
2230 if (superpage_size) {
2231 switch (superpage_size) {
2232 /*
2233 * Note that the current implementation only supports
2234 * a single size for superpages, SUPERPAGE_SIZE, per
2235 * architecture. As soon as more sizes are supposed
2236 * to be supported, SUPERPAGE_SIZE has to be replaced
2237 * with a lookup of the size depending on superpage_size.
2238 */
2239#ifdef __x86_64__
0a7de745
A
2240 case SUPERPAGE_SIZE_ANY:
2241 /* handle it like 2 MB and round up to page size */
2242 size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2243 case SUPERPAGE_SIZE_2MB:
2244 break;
b0d623f7 2245#endif
0a7de745
A
2246 default:
2247 return KERN_INVALID_ARGUMENT;
b0d623f7 2248 }
0a7de745
A
2249 mask = SUPERPAGE_SIZE - 1;
2250 if (size & (SUPERPAGE_SIZE - 1)) {
b0d623f7 2251 return KERN_INVALID_ARGUMENT;
0a7de745
A
2252 }
2253 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
b0d623f7
A
2254 }
2255
6d2010ae 2256
d9a64523
A
2257 if ((cur_protection & VM_PROT_WRITE) &&
2258 (cur_protection & VM_PROT_EXECUTE) &&
2259#if !CONFIG_EMBEDDED
2260 map != kernel_map &&
2261 (cs_process_global_enforcement() ||
0a7de745
A
2262 (vmk_flags.vmkf_cs_enforcement_override
2263 ? vmk_flags.vmkf_cs_enforcement
2264 : cs_process_enforcement(NULL))) &&
d9a64523
A
2265#endif /* !CONFIG_EMBEDDED */
2266 !entry_for_jit) {
2267 DTRACE_VM3(cs_wx,
0a7de745
A
2268 uint64_t, 0,
2269 uint64_t, 0,
2270 vm_prot_t, cur_protection);
d9a64523
A
2271 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
2272#if VM_PROTECT_WX_FAIL
0a7de745 2273 "failing\n",
d9a64523 2274#else /* VM_PROTECT_WX_FAIL */
0a7de745 2275 "turning off execute\n",
d9a64523 2276#endif /* VM_PROTECT_WX_FAIL */
0a7de745
A
2277 proc_selfpid(),
2278 (current_task()->bsd_info
2279 ? proc_name_address(current_task()->bsd_info)
2280 : "?"),
2281 __FUNCTION__);
d9a64523
A
2282 cur_protection &= ~VM_PROT_EXECUTE;
2283#if VM_PROTECT_WX_FAIL
2284 return KERN_PROTECTION_FAILURE;
2285#endif /* VM_PROTECT_WX_FAIL */
5ba3f43e 2286 }
1c79356b 2287
a39ff7e2
A
2288 /*
2289 * If the task has requested executable lockdown,
2290 * deny any new executable mapping.
2291 */
2292 if (map->map_disallow_new_exec == TRUE) {
2293 if (cur_protection & VM_PROT_EXECUTE) {
2294 return KERN_PROTECTION_FAILURE;
2295 }
2296 }
2297
cb323159
A
2298 if (resilient_codesign) {
2299 assert(!is_submap);
3e170ce0
A
2300 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
2301 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2302 return KERN_PROTECTION_FAILURE;
2303 }
2304 }
2305
cb323159
A
2306 if (resilient_media) {
2307 assert(!is_submap);
2308// assert(!needs_copy);
2309 if (object != VM_OBJECT_NULL &&
2310 !object->internal) {
2311 /*
2312 * This mapping is directly backed by an external
2313 * memory manager (e.g. a vnode pager for a file):
2314 * we would not have any safe place to inject
2315 * a zero-filled page if an actual page is not
2316 * available, without possibly impacting the actual
2317 * contents of the mapped object (e.g. the file),
2318 * so we can't provide any media resiliency here.
2319 */
2320 return KERN_INVALID_ARGUMENT;
2321 }
2322 }
2323
2d21ac55
A
2324 if (is_submap) {
2325 if (purgable) {
2326 /* submaps can not be purgeable */
2327 return KERN_INVALID_ARGUMENT;
2328 }
2329 if (object == VM_OBJECT_NULL) {
2330 /* submaps can not be created lazily */
2331 return KERN_INVALID_ARGUMENT;
2332 }
2333 }
5ba3f43e 2334 if (vmk_flags.vmkf_already) {
2d21ac55
A
2335 /*
2336 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2337 * is already present. For it to be meaningul, the requested
2338 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2339 * we shouldn't try and remove what was mapped there first
2340 * (!VM_FLAGS_OVERWRITE).
2341 */
2342 if ((flags & VM_FLAGS_ANYWHERE) ||
2343 (flags & VM_FLAGS_OVERWRITE)) {
2344 return KERN_INVALID_ARGUMENT;
2345 }
2346 }
2347
6d2010ae 2348 effective_min_offset = map->min_offset;
b0d623f7 2349
5ba3f43e 2350 if (vmk_flags.vmkf_beyond_max) {
2d21ac55 2351 /*
b0d623f7 2352 * Allow an insertion beyond the map's max offset.
2d21ac55 2353 */
0a7de745
A
2354#if !defined(__arm__) && !defined(__arm64__)
2355 if (vm_map_is_64bit(map)) {
2d21ac55 2356 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
0a7de745
A
2357 } else
2358#endif /* __arm__ */
2359 effective_max_offset = 0x00000000FFFFF000ULL;
2d21ac55 2360 } else {
cb323159
A
2361#if !defined(CONFIG_EMBEDDED)
2362 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2363 effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2364 } else {
2365 effective_max_offset = map->max_offset;
2366 }
2367#else
2d21ac55 2368 effective_max_offset = map->max_offset;
cb323159 2369#endif
2d21ac55
A
2370 }
2371
2372 if (size == 0 ||
2373 (offset & PAGE_MASK_64) != 0) {
91447636
A
2374 *address = 0;
2375 return KERN_INVALID_ARGUMENT;
2376 }
2377
3e170ce0
A
2378 if (map->pmap == kernel_pmap) {
2379 user_alias = VM_KERN_MEMORY_NONE;
2380 } else {
2381 user_alias = alias;
2382 }
2d21ac55 2383
0a7de745
A
2384 if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2385 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2386 }
2387
2388#define RETURN(value) { result = value; goto BailOut; }
1c79356b
A
2389
2390 assert(page_aligned(*address));
2391 assert(page_aligned(size));
91447636 2392
39236c6e
A
2393 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2394 /*
2395 * In most cases, the caller rounds the size up to the
2396 * map's page size.
2397 * If we get a size that is explicitly not map-aligned here,
2398 * we'll have to respect the caller's wish and mark the
2399 * mapping as "not map-aligned" to avoid tripping the
2400 * map alignment checks later.
2401 */
2402 clear_map_aligned = TRUE;
2403 }
5ba3f43e 2404 if (!anywhere &&
fe8ab488
A
2405 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2406 /*
2407 * We've been asked to map at a fixed address and that
2408 * address is not aligned to the map's specific alignment.
2409 * The caller should know what it's doing (i.e. most likely
2410 * mapping some fragmented copy map, transferring memory from
2411 * a VM map with a different alignment), so clear map_aligned
2412 * for this new VM map entry and proceed.
2413 */
2414 clear_map_aligned = TRUE;
2415 }
39236c6e 2416
91447636
A
2417 /*
2418 * Only zero-fill objects are allowed to be purgable.
2419 * LP64todo - limit purgable objects to 32-bits for now
2420 */
2421 if (purgable &&
2422 (offset != 0 ||
0a7de745
A
2423 (object != VM_OBJECT_NULL &&
2424 (object->vo_size != size ||
2425 object->purgable == VM_PURGABLE_DENY))
2426 || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
91447636 2427 return KERN_INVALID_ARGUMENT;
0a7de745 2428 }
91447636
A
2429
2430 if (!anywhere && overwrite) {
2431 /*
2432 * Create a temporary VM map to hold the old mappings in the
2433 * affected area while we create the new one.
2434 * This avoids releasing the VM map lock in
2435 * vm_map_entry_delete() and allows atomicity
2436 * when we want to replace some mappings with a new one.
2437 * It also allows us to restore the old VM mappings if the
2438 * new mapping fails.
2439 */
2440 zap_old_map = vm_map_create(PMAP_NULL,
0a7de745
A
2441 *address,
2442 *address + size,
2443 map->hdr.entries_pageable);
39236c6e 2444 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 2445 vm_map_disable_hole_optimization(zap_old_map);
91447636
A
2446 }
2447
0a7de745 2448StartAgain:;
1c79356b
A
2449
2450 start = *address;
2451
2452 if (anywhere) {
2453 vm_map_lock(map);
91447636 2454 map_locked = TRUE;
5ba3f43e 2455
316670eb 2456 if (entry_for_jit) {
d9a64523 2457#if CONFIG_EMBEDDED
316670eb
A
2458 if (map->jit_entry_exists) {
2459 result = KERN_INVALID_ARGUMENT;
2460 goto BailOut;
2461 }
39037602 2462 random_address = TRUE;
d9a64523 2463#endif /* CONFIG_EMBEDDED */
39037602
A
2464 }
2465
2466 if (random_address) {
316670eb
A
2467 /*
2468 * Get a random start address.
2469 */
2470 result = vm_map_random_address_for_size(map, address, size);
2471 if (result != KERN_SUCCESS) {
2472 goto BailOut;
2473 }
2474 start = *address;
6d2010ae 2475 }
cb323159 2476#if !CONFIG_EMBEDDED
5ba3f43e 2477 else if ((start == 0 || start == vm_map_min(map)) &&
0a7de745
A
2478 !map->disable_vmentry_reuse &&
2479 map->vmmap_high_start != 0) {
5ba3f43e
A
2480 start = map->vmmap_high_start;
2481 }
cb323159 2482#endif
1c79356b 2483
316670eb 2484
1c79356b
A
2485 /*
2486 * Calculate the first possible address.
2487 */
2488
0a7de745 2489 if (start < effective_min_offset) {
2d21ac55 2490 start = effective_min_offset;
0a7de745
A
2491 }
2492 if (start > effective_max_offset) {
1c79356b 2493 RETURN(KERN_NO_SPACE);
0a7de745 2494 }
1c79356b
A
2495
2496 /*
2497 * Look for the first possible address;
2498 * if there's already something at this
2499 * address, we have to start after it.
2500 */
2501
0a7de745 2502 if (map->disable_vmentry_reuse == TRUE) {
6d2010ae 2503 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1c79356b 2504 } else {
3e170ce0 2505 if (map->holelistenabled) {
d9a64523 2506 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
3e170ce0
A
2507
2508 if (hole_entry == NULL) {
2509 /*
2510 * No more space in the map?
2511 */
2512 result = KERN_NO_SPACE;
2513 goto BailOut;
2514 } else {
3e170ce0
A
2515 boolean_t found_hole = FALSE;
2516
2517 do {
2518 if (hole_entry->vme_start >= start) {
2519 start = hole_entry->vme_start;
2520 found_hole = TRUE;
2521 break;
2522 }
2523
2524 if (hole_entry->vme_end > start) {
2525 found_hole = TRUE;
2526 break;
2527 }
2528 hole_entry = hole_entry->vme_next;
d9a64523 2529 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
3e170ce0
A
2530
2531 if (found_hole == FALSE) {
2532 result = KERN_NO_SPACE;
2533 goto BailOut;
2534 }
2535
2536 entry = hole_entry;
6d2010ae 2537
0a7de745 2538 if (start == 0) {
3e170ce0 2539 start += PAGE_SIZE_64;
0a7de745 2540 }
3e170ce0 2541 }
6d2010ae 2542 } else {
3e170ce0
A
2543 assert(first_free_is_valid(map));
2544
2545 entry = map->first_free;
2546
2547 if (entry == vm_map_to_entry(map)) {
6d2010ae 2548 entry = NULL;
3e170ce0 2549 } else {
0a7de745
A
2550 if (entry->vme_next == vm_map_to_entry(map)) {
2551 /*
2552 * Hole at the end of the map.
2553 */
3e170ce0 2554 entry = NULL;
0a7de745
A
2555 } else {
2556 if (start < (entry->vme_next)->vme_start) {
3e170ce0
A
2557 start = entry->vme_end;
2558 start = vm_map_round_page(start,
0a7de745 2559 VM_MAP_PAGE_MASK(map));
3e170ce0
A
2560 } else {
2561 /*
2562 * Need to do a lookup.
2563 */
2564 entry = NULL;
2565 }
0a7de745 2566 }
3e170ce0
A
2567 }
2568
2569 if (entry == NULL) {
0a7de745 2570 vm_map_entry_t tmp_entry;
3e170ce0
A
2571 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2572 assert(!entry_for_jit);
2573 start = tmp_entry->vme_end;
39236c6e 2574 start = vm_map_round_page(start,
0a7de745 2575 VM_MAP_PAGE_MASK(map));
6d2010ae 2576 }
3e170ce0 2577 entry = tmp_entry;
316670eb 2578 }
6d2010ae 2579 }
1c79356b
A
2580 }
2581
2582 /*
2583 * In any case, the "entry" always precedes
2584 * the proposed new region throughout the
2585 * loop:
2586 */
2587
2588 while (TRUE) {
0a7de745 2589 vm_map_entry_t next;
1c79356b 2590
2d21ac55 2591 /*
1c79356b
A
2592 * Find the end of the proposed new region.
2593 * Be sure we didn't go beyond the end, or
2594 * wrap around the address.
2595 */
2596
2597 end = ((start + mask) & ~mask);
39236c6e 2598 end = vm_map_round_page(end,
0a7de745
A
2599 VM_MAP_PAGE_MASK(map));
2600 if (end < start) {
1c79356b 2601 RETURN(KERN_NO_SPACE);
0a7de745 2602 }
1c79356b 2603 start = end;
39236c6e 2604 assert(VM_MAP_PAGE_ALIGNED(start,
0a7de745 2605 VM_MAP_PAGE_MASK(map)));
1c79356b
A
2606 end += size;
2607
d9a64523
A
2608 /* We want an entire page of empty space, but don't increase the allocation size. */
2609 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2610
2611 if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
1c79356b 2612 if (map->wait_for_space) {
fe8ab488 2613 assert(!keep_map_locked);
2d21ac55 2614 if (size <= (effective_max_offset -
0a7de745 2615 effective_min_offset)) {
1c79356b 2616 assert_wait((event_t)map,
0a7de745 2617 THREAD_ABORTSAFE);
1c79356b 2618 vm_map_unlock(map);
91447636
A
2619 map_locked = FALSE;
2620 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
2621 goto StartAgain;
2622 }
2623 }
2624 RETURN(KERN_NO_SPACE);
2625 }
2626
1c79356b 2627 next = entry->vme_next;
1c79356b 2628
3e170ce0 2629 if (map->holelistenabled) {
0a7de745 2630 if (entry->vme_end >= desired_empty_end) {
3e170ce0 2631 break;
0a7de745 2632 }
3e170ce0
A
2633 } else {
2634 /*
2635 * If there are no more entries, we must win.
2636 *
2637 * OR
2638 *
2639 * If there is another entry, it must be
2640 * after the end of the potential new region.
2641 */
1c79356b 2642
0a7de745 2643 if (next == vm_map_to_entry(map)) {
3e170ce0 2644 break;
0a7de745 2645 }
3e170ce0 2646
0a7de745 2647 if (next->vme_start >= desired_empty_end) {
3e170ce0 2648 break;
0a7de745 2649 }
3e170ce0 2650 }
1c79356b
A
2651
2652 /*
2653 * Didn't fit -- move to the next entry.
2654 */
2655
2656 entry = next;
3e170ce0
A
2657
2658 if (map->holelistenabled) {
d9a64523 2659 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
3e170ce0
A
2660 /*
2661 * Wrapped around
2662 */
2663 result = KERN_NO_SPACE;
2664 goto BailOut;
2665 }
2666 start = entry->vme_start;
2667 } else {
2668 start = entry->vme_end;
2669 }
2670
39236c6e 2671 start = vm_map_round_page(start,
0a7de745 2672 VM_MAP_PAGE_MASK(map));
1c79356b 2673 }
3e170ce0
A
2674
2675 if (map->holelistenabled) {
2676 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2677 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2678 }
2679 }
2680
1c79356b 2681 *address = start;
39236c6e 2682 assert(VM_MAP_PAGE_ALIGNED(*address,
0a7de745 2683 VM_MAP_PAGE_MASK(map)));
1c79356b 2684 } else {
1c79356b
A
2685 /*
2686 * Verify that:
2687 * the address doesn't itself violate
2688 * the mask requirement.
2689 */
2690
2691 vm_map_lock(map);
91447636 2692 map_locked = TRUE;
0a7de745 2693 if ((start & mask) != 0) {
1c79356b 2694 RETURN(KERN_NO_SPACE);
0a7de745 2695 }
1c79356b
A
2696
2697 /*
2698 * ... the address is within bounds
2699 */
2700
2701 end = start + size;
2702
2d21ac55
A
2703 if ((start < effective_min_offset) ||
2704 (end > effective_max_offset) ||
1c79356b
A
2705 (start >= end)) {
2706 RETURN(KERN_INVALID_ADDRESS);
2707 }
2708
91447636 2709 if (overwrite && zap_old_map != VM_MAP_NULL) {
5ba3f43e 2710 int remove_flags;
91447636
A
2711 /*
2712 * Fixed mapping and "overwrite" flag: attempt to
2713 * remove all existing mappings in the specified
2714 * address range, saving them in our "zap_old_map".
2715 */
5ba3f43e
A
2716 remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2717 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2718 if (vmk_flags.vmkf_overwrite_immutable) {
2719 /* we can overwrite immutable mappings */
2720 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2721 }
91447636 2722 (void) vm_map_delete(map, start, end,
0a7de745
A
2723 remove_flags,
2724 zap_old_map);
91447636
A
2725 }
2726
1c79356b
A
2727 /*
2728 * ... the starting address isn't allocated
2729 */
2730
2d21ac55 2731 if (vm_map_lookup_entry(map, start, &entry)) {
0a7de745 2732 if (!(vmk_flags.vmkf_already)) {
2d21ac55
A
2733 RETURN(KERN_NO_SPACE);
2734 }
2735 /*
2736 * Check if what's already there is what we want.
2737 */
2738 tmp_start = start;
2739 tmp_offset = offset;
2740 if (entry->vme_start < start) {
2741 tmp_start -= start - entry->vme_start;
2742 tmp_offset -= start - entry->vme_start;
2d21ac55
A
2743 }
2744 for (; entry->vme_start < end;
0a7de745 2745 entry = entry->vme_next) {
4a3eedf9
A
2746 /*
2747 * Check if the mapping's attributes
2748 * match the existing map entry.
2749 */
2d21ac55
A
2750 if (entry == vm_map_to_entry(map) ||
2751 entry->vme_start != tmp_start ||
2752 entry->is_sub_map != is_submap ||
3e170ce0 2753 VME_OFFSET(entry) != tmp_offset ||
2d21ac55
A
2754 entry->needs_copy != needs_copy ||
2755 entry->protection != cur_protection ||
2756 entry->max_protection != max_protection ||
2757 entry->inheritance != inheritance ||
fe8ab488 2758 entry->iokit_acct != iokit_acct ||
3e170ce0 2759 VME_ALIAS(entry) != alias) {
2d21ac55
A
2760 /* not the same mapping ! */
2761 RETURN(KERN_NO_SPACE);
2762 }
4a3eedf9
A
2763 /*
2764 * Check if the same object is being mapped.
2765 */
2766 if (is_submap) {
3e170ce0 2767 if (VME_SUBMAP(entry) !=
4a3eedf9
A
2768 (vm_map_t) object) {
2769 /* not the same submap */
2770 RETURN(KERN_NO_SPACE);
2771 }
2772 } else {
3e170ce0 2773 if (VME_OBJECT(entry) != object) {
4a3eedf9
A
2774 /* not the same VM object... */
2775 vm_object_t obj2;
2776
3e170ce0 2777 obj2 = VME_OBJECT(entry);
4a3eedf9 2778 if ((obj2 == VM_OBJECT_NULL ||
0a7de745 2779 obj2->internal) &&
4a3eedf9 2780 (object == VM_OBJECT_NULL ||
0a7de745 2781 object->internal)) {
4a3eedf9
A
2782 /*
2783 * ... but both are
2784 * anonymous memory,
2785 * so equivalent.
2786 */
2787 } else {
2788 RETURN(KERN_NO_SPACE);
2789 }
2790 }
2791 }
2792
2d21ac55
A
2793 tmp_offset += entry->vme_end - entry->vme_start;
2794 tmp_start += entry->vme_end - entry->vme_start;
2795 if (entry->vme_end >= end) {
2796 /* reached the end of our mapping */
2797 break;
2798 }
2799 }
2800 /* it all matches: let's use what's already there ! */
2801 RETURN(KERN_MEMORY_PRESENT);
2802 }
1c79356b
A
2803
2804 /*
2805 * ... the next region doesn't overlap the
2806 * end point.
2807 */
2808
2809 if ((entry->vme_next != vm_map_to_entry(map)) &&
0a7de745 2810 (entry->vme_next->vme_start < end)) {
1c79356b 2811 RETURN(KERN_NO_SPACE);
0a7de745 2812 }
1c79356b
A
2813 }
2814
2815 /*
2816 * At this point,
2817 * "start" and "end" should define the endpoints of the
2818 * available new range, and
2819 * "entry" should refer to the region before the new
2820 * range, and
2821 *
2822 * the map should be locked.
2823 */
2824
2825 /*
2826 * See whether we can avoid creating a new entry (and object) by
2827 * extending one of our neighbors. [So far, we only attempt to
91447636
A
2828 * extend from below.] Note that we can never extend/join
2829 * purgable objects because they need to remain distinct
2830 * entities in order to implement their "volatile object"
2831 * semantics.
1c79356b
A
2832 */
2833
d9a64523
A
2834 if (purgable ||
2835 entry_for_jit ||
2836 vm_memory_malloc_no_cow(user_alias)) {
91447636
A
2837 if (object == VM_OBJECT_NULL) {
2838 object = vm_object_allocate(size);
2839 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
d9a64523 2840 object->true_share = FALSE;
316670eb 2841 if (purgable) {
fe8ab488 2842 task_t owner;
316670eb 2843 object->purgable = VM_PURGABLE_NONVOLATILE;
fe8ab488
A
2844 if (map->pmap == kernel_pmap) {
2845 /*
2846 * Purgeable mappings made in a kernel
2847 * map are "owned" by the kernel itself
2848 * rather than the current user task
2849 * because they're likely to be used by
2850 * more than this user task (see
2851 * execargs_purgeable_allocate(), for
2852 * example).
2853 */
2854 owner = kernel_task;
2855 } else {
2856 owner = current_task();
2857 }
d9a64523 2858 assert(object->vo_owner == NULL);
fe8ab488
A
2859 assert(object->resident_page_count == 0);
2860 assert(object->wired_page_count == 0);
2861 vm_object_lock(object);
2862 vm_purgeable_nonvolatile_enqueue(object, owner);
2863 vm_object_unlock(object);
316670eb 2864 }
91447636
A
2865 offset = (vm_object_offset_t)0;
2866 }
2d21ac55 2867 } else if ((is_submap == FALSE) &&
0a7de745
A
2868 (object == VM_OBJECT_NULL) &&
2869 (entry != vm_map_to_entry(map)) &&
2870 (entry->vme_end == start) &&
2871 (!entry->is_shared) &&
2872 (!entry->is_sub_map) &&
2873 (!entry->in_transition) &&
2874 (!entry->needs_wakeup) &&
2875 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2876 (entry->protection == cur_protection) &&
2877 (entry->max_protection == max_protection) &&
2878 (entry->inheritance == inheritance) &&
2879 ((user_alias == VM_MEMORY_REALLOC) ||
2880 (VME_ALIAS(entry) == alias)) &&
2881 (entry->no_cache == no_cache) &&
2882 (entry->permanent == permanent) &&
2883 /* no coalescing for immutable executable mappings */
2884 !((entry->protection & VM_PROT_EXECUTE) &&
2885 entry->permanent) &&
2886 (!entry->superpage_size && !superpage_size) &&
2887 /*
2888 * No coalescing if not map-aligned, to avoid propagating
2889 * that condition any further than needed:
2890 */
2891 (!entry->map_aligned || !clear_map_aligned) &&
2892 (!entry->zero_wired_pages) &&
2893 (!entry->used_for_jit && !entry_for_jit) &&
2894 (!entry->pmap_cs_associated) &&
2895 (entry->iokit_acct == iokit_acct) &&
2896 (!entry->vme_resilient_codesign) &&
2897 (!entry->vme_resilient_media) &&
2898 (!entry->vme_atomic) &&
cb323159 2899 (entry->vme_no_copy_on_read == no_copy_on_read) &&
0a7de745
A
2900
2901 ((entry->vme_end - entry->vme_start) + size <=
2902 (user_alias == VM_MEMORY_REALLOC ?
2903 ANON_CHUNK_SIZE :
2904 NO_COALESCE_LIMIT)) &&
2905
2906 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
3e170ce0 2907 if (vm_object_coalesce(VME_OBJECT(entry),
0a7de745
A
2908 VM_OBJECT_NULL,
2909 VME_OFFSET(entry),
2910 (vm_object_offset_t) 0,
2911 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2912 (vm_map_size_t)(end - entry->vme_end))) {
1c79356b
A
2913 /*
2914 * Coalesced the two objects - can extend
2915 * the previous map entry to include the
2916 * new range.
2917 */
2918 map->size += (end - entry->vme_end);
e2d2fc5c 2919 assert(entry->vme_start < end);
39236c6e 2920 assert(VM_MAP_PAGE_ALIGNED(end,
0a7de745
A
2921 VM_MAP_PAGE_MASK(map)));
2922 if (__improbable(vm_debug_events)) {
3e170ce0 2923 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
0a7de745 2924 }
1c79356b 2925 entry->vme_end = end;
3e170ce0
A
2926 if (map->holelistenabled) {
2927 vm_map_store_update_first_free(map, entry, TRUE);
2928 } else {
2929 vm_map_store_update_first_free(map, map->first_free, TRUE);
2930 }
fe8ab488 2931 new_mapping_established = TRUE;
1c79356b
A
2932 RETURN(KERN_SUCCESS);
2933 }
2934 }
2935
b0d623f7
A
2936 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2937 new_entry = NULL;
2938
0a7de745 2939 for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
b0d623f7
A
2940 tmp2_end = tmp2_start + step;
2941 /*
2942 * Create a new entry
b0d623f7
A
2943 *
2944 * XXX FBDP
2945 * The reserved "page zero" in each process's address space can
a39ff7e2 2946 * be arbitrarily large. Splitting it into separate objects and
b0d623f7
A
2947 * therefore different VM map entries serves no purpose and just
2948 * slows down operations on the VM map, so let's not split the
a39ff7e2 2949 * allocation into chunks if the max protection is NONE. That
b0d623f7
A
2950 * memory should never be accessible, so it will never get to the
2951 * default pager.
2952 */
2953 tmp_start = tmp2_start;
2954 if (object == VM_OBJECT_NULL &&
a39ff7e2 2955 size > chunk_size &&
b0d623f7 2956 max_protection != VM_PROT_NONE &&
0a7de745 2957 superpage_size == 0) {
a39ff7e2 2958 tmp_end = tmp_start + chunk_size;
0a7de745 2959 } else {
b0d623f7 2960 tmp_end = tmp2_end;
0a7de745 2961 }
b0d623f7 2962 do {
5ba3f43e
A
2963 new_entry = vm_map_entry_insert(
2964 map, entry, tmp_start, tmp_end,
0a7de745 2965 object, offset, needs_copy,
5ba3f43e
A
2966 FALSE, FALSE,
2967 cur_protection, max_protection,
2968 VM_BEHAVIOR_DEFAULT,
2969 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2970 0,
2971 no_cache,
2972 permanent,
cb323159 2973 no_copy_on_read,
5ba3f43e
A
2974 superpage_size,
2975 clear_map_aligned,
2976 is_submap,
2977 entry_for_jit,
2978 alias);
3e170ce0
A
2979
2980 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
6d2010ae 2981
3e170ce0 2982 if (resilient_codesign &&
0a7de745
A
2983 !((cur_protection | max_protection) &
2984 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
3e170ce0
A
2985 new_entry->vme_resilient_codesign = TRUE;
2986 }
2987
2988 if (resilient_media &&
cb323159
A
2989 (object == VM_OBJECT_NULL ||
2990 object->internal)) {
3e170ce0
A
2991 new_entry->vme_resilient_media = TRUE;
2992 }
2993
fe8ab488
A
2994 assert(!new_entry->iokit_acct);
2995 if (!is_submap &&
2996 object != VM_OBJECT_NULL &&
d9a64523 2997 (object->purgable != VM_PURGABLE_DENY ||
0a7de745 2998 object->vo_ledger_tag)) {
fe8ab488
A
2999 assert(new_entry->use_pmap);
3000 assert(!new_entry->iokit_acct);
3001 /*
3002 * Turn off pmap accounting since
d9a64523 3003 * purgeable (or tagged) objects have their
fe8ab488
A
3004 * own ledgers.
3005 */
3006 new_entry->use_pmap = FALSE;
3007 } else if (!is_submap &&
0a7de745
A
3008 iokit_acct &&
3009 object != VM_OBJECT_NULL &&
3010 object->internal) {
fe8ab488
A
3011 /* alternate accounting */
3012 assert(!new_entry->iokit_acct);
3013 assert(new_entry->use_pmap);
3014 new_entry->iokit_acct = TRUE;
3015 new_entry->use_pmap = FALSE;
ecc0ceb4
A
3016 DTRACE_VM4(
3017 vm_map_iokit_mapped_region,
3018 vm_map_t, map,
3019 vm_map_offset_t, new_entry->vme_start,
3020 vm_map_offset_t, new_entry->vme_end,
3021 int, VME_ALIAS(new_entry));
fe8ab488
A
3022 vm_map_iokit_mapped_region(
3023 map,
3024 (new_entry->vme_end -
0a7de745 3025 new_entry->vme_start));
fe8ab488
A
3026 } else if (!is_submap) {
3027 assert(!new_entry->iokit_acct);
3028 assert(new_entry->use_pmap);
3029 }
3030
b0d623f7 3031 if (is_submap) {
0a7de745
A
3032 vm_map_t submap;
3033 boolean_t submap_is_64bit;
3034 boolean_t use_pmap;
b0d623f7 3035
fe8ab488
A
3036 assert(new_entry->is_sub_map);
3037 assert(!new_entry->use_pmap);
3038 assert(!new_entry->iokit_acct);
b0d623f7
A
3039 submap = (vm_map_t) object;
3040 submap_is_64bit = vm_map_is_64bit(submap);
cb323159 3041 use_pmap = vmk_flags.vmkf_nested_pmap;
fe8ab488 3042#ifndef NO_NESTED_PMAP
b0d623f7 3043 if (use_pmap && submap->pmap == NULL) {
316670eb 3044 ledger_t ledger = map->pmap->ledger;
b0d623f7 3045 /* we need a sub pmap to nest... */
cb323159
A
3046 submap->pmap = pmap_create_options(ledger, 0,
3047 submap_is_64bit ? PMAP_CREATE_64BIT : 0);
b0d623f7
A
3048 if (submap->pmap == NULL) {
3049 /* let's proceed without nesting... */
3050 }
0a7de745 3051#if defined(__arm__) || defined(__arm64__)
5ba3f43e
A
3052 else {
3053 pmap_set_nested(submap->pmap);
3054 }
3055#endif
2d21ac55 3056 }
b0d623f7
A
3057 if (use_pmap && submap->pmap != NULL) {
3058 kr = pmap_nest(map->pmap,
0a7de745
A
3059 submap->pmap,
3060 tmp_start,
3061 tmp_start,
3062 tmp_end - tmp_start);
b0d623f7
A
3063 if (kr != KERN_SUCCESS) {
3064 printf("vm_map_enter: "
0a7de745
A
3065 "pmap_nest(0x%llx,0x%llx) "
3066 "error 0x%x\n",
3067 (long long)tmp_start,
3068 (long long)tmp_end,
3069 kr);
b0d623f7
A
3070 } else {
3071 /* we're now nested ! */
3072 new_entry->use_pmap = TRUE;
3073 pmap_empty = FALSE;
3074 }
3075 }
fe8ab488 3076#endif /* NO_NESTED_PMAP */
2d21ac55 3077 }
b0d623f7
A
3078 entry = new_entry;
3079
3080 if (superpage_size) {
3081 vm_page_t pages, m;
3082 vm_object_t sp_object;
5ba3f43e 3083 vm_object_offset_t sp_offset;
b0d623f7 3084
3e170ce0 3085 VME_OFFSET_SET(entry, 0);
b0d623f7
A
3086
3087 /* allocate one superpage */
0a7de745 3088 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
2d21ac55 3089 if (kr != KERN_SUCCESS) {
3e170ce0
A
3090 /* deallocate whole range... */
3091 new_mapping_established = TRUE;
3092 /* ... but only up to "tmp_end" */
3093 size -= end - tmp_end;
b0d623f7
A
3094 RETURN(kr);
3095 }
3096
3097 /* create one vm_object per superpage */
3098 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3099 sp_object->phys_contiguous = TRUE;
0a7de745 3100 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3e170ce0 3101 VME_OBJECT_SET(entry, sp_object);
fe8ab488 3102 assert(entry->use_pmap);
b0d623f7
A
3103
3104 /* enter the base pages into the object */
3105 vm_object_lock(sp_object);
5ba3f43e 3106 for (sp_offset = 0;
0a7de745
A
3107 sp_offset < SUPERPAGE_SIZE;
3108 sp_offset += PAGE_SIZE) {
b0d623f7 3109 m = pages;
39037602 3110 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7
A
3111 pages = NEXT_PAGE(m);
3112 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5ba3f43e 3113 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
2d21ac55 3114 }
b0d623f7 3115 vm_object_unlock(sp_object);
2d21ac55 3116 }
5ba3f43e 3117 } while (tmp_end != tmp2_end &&
0a7de745
A
3118 (tmp_start = tmp_end) &&
3119 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3120 tmp_end + chunk_size : tmp2_end));
b0d623f7 3121 }
91447636 3122
91447636 3123 new_mapping_established = TRUE;
1c79356b 3124
fe8ab488
A
3125BailOut:
3126 assert(map_locked == TRUE);
2d21ac55 3127
593a1d5f
A
3128 if (result == KERN_SUCCESS) {
3129 vm_prot_t pager_prot;
3130 memory_object_t pager;
91447636 3131
fe8ab488 3132#if DEBUG
593a1d5f 3133 if (pmap_empty &&
5ba3f43e 3134 !(vmk_flags.vmkf_no_pmap_check)) {
593a1d5f 3135 assert(vm_map_pmap_is_empty(map,
0a7de745
A
3136 *address,
3137 *address + size));
593a1d5f 3138 }
fe8ab488 3139#endif /* DEBUG */
593a1d5f
A
3140
3141 /*
3142 * For "named" VM objects, let the pager know that the
3143 * memory object is being mapped. Some pagers need to keep
3144 * track of this, to know when they can reclaim the memory
3145 * object, for example.
3146 * VM calls memory_object_map() for each mapping (specifying
3147 * the protection of each mapping) and calls
3148 * memory_object_last_unmap() when all the mappings are gone.
3149 */
3150 pager_prot = max_protection;
3151 if (needs_copy) {
3152 /*
3153 * Copy-On-Write mapping: won't modify
3154 * the memory object.
3155 */
3156 pager_prot &= ~VM_PROT_WRITE;
3157 }
3158 if (!is_submap &&
3159 object != VM_OBJECT_NULL &&
3160 object->named &&
3161 object->pager != MEMORY_OBJECT_NULL) {
3162 vm_object_lock(object);
3163 pager = object->pager;
3164 if (object->named &&
3165 pager != MEMORY_OBJECT_NULL) {
3166 assert(object->pager_ready);
3167 vm_object_mapping_wait(object, THREAD_UNINT);
3168 vm_object_mapping_begin(object);
3169 vm_object_unlock(object);
3170
3171 kr = memory_object_map(pager, pager_prot);
3172 assert(kr == KERN_SUCCESS);
3173
3174 vm_object_lock(object);
3175 vm_object_mapping_end(object);
3176 }
3177 vm_object_unlock(object);
3178 }
fe8ab488
A
3179 }
3180
3181 assert(map_locked == TRUE);
3182
3183 if (!keep_map_locked) {
3184 vm_map_unlock(map);
3185 map_locked = FALSE;
3186 }
3187
3188 /*
3189 * We can't hold the map lock if we enter this block.
3190 */
3191
3192 if (result == KERN_SUCCESS) {
fe8ab488
A
3193 /* Wire down the new entry if the user
3194 * requested all new map entries be wired.
3195 */
0a7de745 3196 if ((map->wiring_required) || (superpage_size)) {
fe8ab488
A
3197 assert(!keep_map_locked);
3198 pmap_empty = FALSE; /* pmap won't be empty */
5ba3f43e 3199 kr = vm_map_wire_kernel(map, start, end,
0a7de745
A
3200 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3201 TRUE);
fe8ab488
A
3202 result = kr;
3203 }
3204
3205 }
3206
3207 if (result != KERN_SUCCESS) {
91447636
A
3208 if (new_mapping_established) {
3209 /*
3210 * We have to get rid of the new mappings since we
3211 * won't make them available to the user.
3212 * Try and do that atomically, to minimize the risk
3213 * that someone else create new mappings that range.
3214 */
3215 zap_new_map = vm_map_create(PMAP_NULL,
0a7de745
A
3216 *address,
3217 *address + size,
3218 map->hdr.entries_pageable);
39236c6e 3219 vm_map_set_page_shift(zap_new_map,
0a7de745 3220 VM_MAP_PAGE_SHIFT(map));
3e170ce0
A
3221 vm_map_disable_hole_optimization(zap_new_map);
3222
91447636
A
3223 if (!map_locked) {
3224 vm_map_lock(map);
3225 map_locked = TRUE;
3226 }
0a7de745
A
3227 (void) vm_map_delete(map, *address, *address + size,
3228 (VM_MAP_REMOVE_SAVE_ENTRIES |
3229 VM_MAP_REMOVE_NO_MAP_ALIGN),
3230 zap_new_map);
91447636
A
3231 }
3232 if (zap_old_map != VM_MAP_NULL &&
3233 zap_old_map->hdr.nentries != 0) {
0a7de745 3234 vm_map_entry_t entry1, entry2;
91447636
A
3235
3236 /*
3237 * The new mapping failed. Attempt to restore
3238 * the old mappings, saved in the "zap_old_map".
3239 */
3240 if (!map_locked) {
3241 vm_map_lock(map);
3242 map_locked = TRUE;
3243 }
3244
3245 /* first check if the coast is still clear */
3246 start = vm_map_first_entry(zap_old_map)->vme_start;
3247 end = vm_map_last_entry(zap_old_map)->vme_end;
3248 if (vm_map_lookup_entry(map, start, &entry1) ||
3249 vm_map_lookup_entry(map, end, &entry2) ||
3250 entry1 != entry2) {
3251 /*
3252 * Part of that range has already been
3253 * re-mapped: we can't restore the old
3254 * mappings...
3255 */
3256 vm_map_enter_restore_failures++;
3257 } else {
3258 /*
3259 * Transfer the saved map entries from
3260 * "zap_old_map" to the original "map",
3261 * inserting them all after "entry1".
3262 */
3263 for (entry2 = vm_map_first_entry(zap_old_map);
0a7de745
A
3264 entry2 != vm_map_to_entry(zap_old_map);
3265 entry2 = vm_map_first_entry(zap_old_map)) {
2d21ac55
A
3266 vm_map_size_t entry_size;
3267
3268 entry_size = (entry2->vme_end -
0a7de745 3269 entry2->vme_start);
6d2010ae 3270 vm_map_store_entry_unlink(zap_old_map,
0a7de745 3271 entry2);
2d21ac55 3272 zap_old_map->size -= entry_size;
d9a64523 3273 vm_map_store_entry_link(map, entry1, entry2,
0a7de745 3274 VM_MAP_KERNEL_FLAGS_NONE);
2d21ac55 3275 map->size += entry_size;
91447636
A
3276 entry1 = entry2;
3277 }
3278 if (map->wiring_required) {
3279 /*
3280 * XXX TODO: we should rewire the
3281 * old pages here...
3282 */
3283 }
3284 vm_map_enter_restore_successes++;
3285 }
3286 }
3287 }
3288
fe8ab488
A
3289 /*
3290 * The caller is responsible for releasing the lock if it requested to
3291 * keep the map locked.
3292 */
3293 if (map_locked && !keep_map_locked) {
91447636
A
3294 vm_map_unlock(map);
3295 }
3296
3297 /*
3298 * Get rid of the "zap_maps" and all the map entries that
3299 * they may still contain.
3300 */
3301 if (zap_old_map != VM_MAP_NULL) {
2d21ac55 3302 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
3303 zap_old_map = VM_MAP_NULL;
3304 }
3305 if (zap_new_map != VM_MAP_NULL) {
2d21ac55 3306 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
3307 zap_new_map = VM_MAP_NULL;
3308 }
3309
3310 return result;
1c79356b 3311
0a7de745 3312#undef RETURN
1c79356b
A
3313}
3314
5ba3f43e
A
3315#if __arm64__
3316extern const struct memory_object_pager_ops fourk_pager_ops;
3317kern_return_t
3318vm_map_enter_fourk(
0a7de745
A
3319 vm_map_t map,
3320 vm_map_offset_t *address, /* IN/OUT */
3321 vm_map_size_t size,
3322 vm_map_offset_t mask,
3323 int flags,
3324 vm_map_kernel_flags_t vmk_flags,
3325 vm_tag_t alias,
3326 vm_object_t object,
3327 vm_object_offset_t offset,
3328 boolean_t needs_copy,
3329 vm_prot_t cur_protection,
3330 vm_prot_t max_protection,
3331 vm_inherit_t inheritance)
91447636 3332{
0a7de745
A
3333 vm_map_entry_t entry, new_entry;
3334 vm_map_offset_t start, fourk_start;
3335 vm_map_offset_t end, fourk_end;
3336 vm_map_size_t fourk_size;
3337 kern_return_t result = KERN_SUCCESS;
3338 vm_map_t zap_old_map = VM_MAP_NULL;
3339 vm_map_t zap_new_map = VM_MAP_NULL;
3340 boolean_t map_locked = FALSE;
3341 boolean_t pmap_empty = TRUE;
3342 boolean_t new_mapping_established = FALSE;
3343 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3344 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3345 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3346 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3347 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3348 boolean_t is_submap = vmk_flags.vmkf_submap;
3349 boolean_t permanent = vmk_flags.vmkf_permanent;
cb323159 3350 boolean_t no_copy_on_read = vmk_flags.vmkf_permanent;
0a7de745 3351 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
5ba3f43e 3352// boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
0a7de745
A
3353 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3354 vm_map_offset_t effective_min_offset, effective_max_offset;
3355 kern_return_t kr;
3356 boolean_t clear_map_aligned = FALSE;
3357 memory_object_t fourk_mem_obj;
3358 vm_object_t fourk_object;
3359 vm_map_offset_t fourk_pager_offset;
3360 int fourk_pager_index_start, fourk_pager_index_num;
3361 int cur_idx;
3362 boolean_t fourk_copy;
3363 vm_object_t copy_object;
3364 vm_object_offset_t copy_offset;
5ba3f43e
A
3365
3366 fourk_mem_obj = MEMORY_OBJECT_NULL;
3367 fourk_object = VM_OBJECT_NULL;
6d2010ae 3368
5ba3f43e
A
3369 if (superpage_size) {
3370 return KERN_NOT_SUPPORTED;
3371 }
91447636 3372
d9a64523
A
3373 if ((cur_protection & VM_PROT_WRITE) &&
3374 (cur_protection & VM_PROT_EXECUTE) &&
3375#if !CONFIG_EMBEDDED
3376 map != kernel_map &&
3377 cs_process_enforcement(NULL) &&
3378#endif /* !CONFIG_EMBEDDED */
3379 !entry_for_jit) {
3380 DTRACE_VM3(cs_wx,
0a7de745
A
3381 uint64_t, 0,
3382 uint64_t, 0,
3383 vm_prot_t, cur_protection);
d9a64523 3384 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
0a7de745
A
3385 "turning off execute\n",
3386 proc_selfpid(),
3387 (current_task()->bsd_info
3388 ? proc_name_address(current_task()->bsd_info)
3389 : "?"),
3390 __FUNCTION__);
d9a64523 3391 cur_protection &= ~VM_PROT_EXECUTE;
5ba3f43e 3392 }
5ba3f43e 3393
a39ff7e2
A
3394 /*
3395 * If the task has requested executable lockdown,
3396 * deny any new executable mapping.
3397 */
3398 if (map->map_disallow_new_exec == TRUE) {
3399 if (cur_protection & VM_PROT_EXECUTE) {
3400 return KERN_PROTECTION_FAILURE;
3401 }
3402 }
3403
5ba3f43e
A
3404 if (is_submap) {
3405 return KERN_NOT_SUPPORTED;
3406 }
3407 if (vmk_flags.vmkf_already) {
3408 return KERN_NOT_SUPPORTED;
3409 }
3410 if (purgable || entry_for_jit) {
3411 return KERN_NOT_SUPPORTED;
3412 }
3413
3414 effective_min_offset = map->min_offset;
3415
3416 if (vmk_flags.vmkf_beyond_max) {
3417 return KERN_NOT_SUPPORTED;
3418 } else {
3419 effective_max_offset = map->max_offset;
3420 }
3421
3422 if (size == 0 ||
3423 (offset & FOURK_PAGE_MASK) != 0) {
3424 *address = 0;
2d21ac55 3425 return KERN_INVALID_ARGUMENT;
3e170ce0 3426 }
5ba3f43e 3427
0a7de745 3428#define RETURN(value) { result = value; goto BailOut; }
5ba3f43e
A
3429
3430 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3431 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3432
3433 if (!anywhere && overwrite) {
3434 return KERN_NOT_SUPPORTED;
3435 }
3436 if (!anywhere && overwrite) {
3437 /*
3438 * Create a temporary VM map to hold the old mappings in the
3439 * affected area while we create the new one.
3440 * This avoids releasing the VM map lock in
3441 * vm_map_entry_delete() and allows atomicity
3442 * when we want to replace some mappings with a new one.
3443 * It also allows us to restore the old VM mappings if the
3444 * new mapping fails.
3445 */
3446 zap_old_map = vm_map_create(PMAP_NULL,
0a7de745
A
3447 *address,
3448 *address + size,
3449 map->hdr.entries_pageable);
5ba3f43e
A
3450 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3451 vm_map_disable_hole_optimization(zap_old_map);
3e170ce0 3452 }
593a1d5f 3453
5ba3f43e
A
3454 fourk_start = *address;
3455 fourk_size = size;
3456 fourk_end = fourk_start + fourk_size;
2d21ac55 3457
5ba3f43e
A
3458 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3459 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3460 size = end - start;
39236c6e 3461
5ba3f43e
A
3462 if (anywhere) {
3463 return KERN_NOT_SUPPORTED;
3464 } else {
3465 /*
3466 * Verify that:
3467 * the address doesn't itself violate
3468 * the mask requirement.
3469 */
3470
3471 vm_map_lock(map);
3472 map_locked = TRUE;
3473 if ((start & mask) != 0) {
3474 RETURN(KERN_NO_SPACE);
6d2010ae 3475 }
5ba3f43e
A
3476
3477 /*
3478 * ... the address is within bounds
3479 */
3480
3481 end = start + size;
3482
3483 if ((start < effective_min_offset) ||
3484 (end > effective_max_offset) ||
3485 (start >= end)) {
3486 RETURN(KERN_INVALID_ADDRESS);
22ba694c 3487 }
5ba3f43e
A
3488
3489 if (overwrite && zap_old_map != VM_MAP_NULL) {
3490 /*
3491 * Fixed mapping and "overwrite" flag: attempt to
3492 * remove all existing mappings in the specified
3493 * address range, saving them in our "zap_old_map".
3494 */
3495 (void) vm_map_delete(map, start, end,
0a7de745
A
3496 (VM_MAP_REMOVE_SAVE_ENTRIES |
3497 VM_MAP_REMOVE_NO_MAP_ALIGN),
3498 zap_old_map);
3e170ce0 3499 }
2d21ac55 3500
5ba3f43e
A
3501 /*
3502 * ... the starting address isn't allocated
3503 */
3504 if (vm_map_lookup_entry(map, start, &entry)) {
3505 vm_object_t cur_object, shadow_object;
3506
3507 /*
3508 * We might already some 4K mappings
3509 * in a 16K page here.
3510 */
3511
3512 if (entry->vme_end - entry->vme_start
3513 != SIXTEENK_PAGE_SIZE) {
3514 RETURN(KERN_NO_SPACE);
3515 }
3516 if (entry->is_sub_map) {
3517 RETURN(KERN_NO_SPACE);
3518 }
3519 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3520 RETURN(KERN_NO_SPACE);
3521 }
3522
3523 /* go all the way down the shadow chain */
3524 cur_object = VME_OBJECT(entry);
3525 vm_object_lock(cur_object);
3526 while (cur_object->shadow != VM_OBJECT_NULL) {
3527 shadow_object = cur_object->shadow;
3528 vm_object_lock(shadow_object);
3529 vm_object_unlock(cur_object);
3530 cur_object = shadow_object;
3531 shadow_object = VM_OBJECT_NULL;
3532 }
3533 if (cur_object->internal ||
3534 cur_object->pager == NULL) {
3535 vm_object_unlock(cur_object);
3536 RETURN(KERN_NO_SPACE);
3537 }
3538 if (cur_object->pager->mo_pager_ops
3539 != &fourk_pager_ops) {
3540 vm_object_unlock(cur_object);
3541 RETURN(KERN_NO_SPACE);
3542 }
3543 fourk_object = cur_object;
3544 fourk_mem_obj = fourk_object->pager;
3545
3546 /* keep the "4K" object alive */
3547 vm_object_reference_locked(fourk_object);
3548 vm_object_unlock(fourk_object);
3549
3550 /* merge permissions */
3551 entry->protection |= cur_protection;
3552 entry->max_protection |= max_protection;
3553 if ((entry->protection & (VM_PROT_WRITE |
0a7de745 3554 VM_PROT_EXECUTE)) ==
5ba3f43e
A
3555 (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3556 fourk_binary_compatibility_unsafe &&
3557 fourk_binary_compatibility_allow_wx) {
3558 /* write+execute: need to be "jit" */
3559 entry->used_for_jit = TRUE;
3560 }
3561
3562 goto map_in_fourk_pager;
3563 }
3564
3565 /*
3566 * ... the next region doesn't overlap the
3567 * end point.
3568 */
3569
3570 if ((entry->vme_next != vm_map_to_entry(map)) &&
3571 (entry->vme_next->vme_start < end)) {
3572 RETURN(KERN_NO_SPACE);
3573 }
3574 }
3575
3576 /*
3577 * At this point,
3578 * "start" and "end" should define the endpoints of the
3579 * available new range, and
3580 * "entry" should refer to the region before the new
3581 * range, and
3582 *
3583 * the map should be locked.
3584 */
3585
3586 /* create a new "4K" pager */
3587 fourk_mem_obj = fourk_pager_create();
3588 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3589 assert(fourk_object);
3590
3591 /* keep the "4" object alive */
3592 vm_object_reference(fourk_object);
3593
3594 /* create a "copy" object, to map the "4K" object copy-on-write */
3595 fourk_copy = TRUE;
3596 result = vm_object_copy_strategically(fourk_object,
0a7de745
A
3597 0,
3598 end - start,
3599 &copy_object,
3600 &copy_offset,
3601 &fourk_copy);
5ba3f43e
A
3602 assert(result == KERN_SUCCESS);
3603 assert(copy_object != VM_OBJECT_NULL);
3604 assert(copy_offset == 0);
3605
3606 /* take a reference on the copy object, for this mapping */
3607 vm_object_reference(copy_object);
3608
3609 /* map the "4K" pager's copy object */
3610 new_entry =
0a7de745
A
3611 vm_map_entry_insert(map, entry,
3612 vm_map_trunc_page(start,
3613 VM_MAP_PAGE_MASK(map)),
3614 vm_map_round_page(end,
3615 VM_MAP_PAGE_MASK(map)),
3616 copy_object,
3617 0, /* offset */
3618 FALSE, /* needs_copy */
cb323159
A
3619 FALSE,
3620 FALSE,
0a7de745
A
3621 cur_protection, max_protection,
3622 VM_BEHAVIOR_DEFAULT,
3623 ((entry_for_jit)
3624 ? VM_INHERIT_NONE
3625 : inheritance),
3626 0,
3627 no_cache,
3628 permanent,
cb323159 3629 no_copy_on_read,
0a7de745
A
3630 superpage_size,
3631 clear_map_aligned,
3632 is_submap,
3633 FALSE, /* jit */
3634 alias);
5ba3f43e
A
3635 entry = new_entry;
3636
3637#if VM_MAP_DEBUG_FOURK
3638 if (vm_map_debug_fourk) {
3639 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
0a7de745
A
3640 map,
3641 (uint64_t) entry->vme_start,
3642 (uint64_t) entry->vme_end,
3643 fourk_mem_obj);
5ba3f43e
A
3644 }
3645#endif /* VM_MAP_DEBUG_FOURK */
3646
3647 new_mapping_established = TRUE;
3648
3649map_in_fourk_pager:
3650 /* "map" the original "object" where it belongs in the "4K" pager */
3651 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3652 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3653 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3654 fourk_pager_index_num = 4;
3655 } else {
3656 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3657 }
3658 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3659 fourk_pager_index_num = 4 - fourk_pager_index_start;
3660 }
3661 for (cur_idx = 0;
0a7de745
A
3662 cur_idx < fourk_pager_index_num;
3663 cur_idx++) {
3664 vm_object_t old_object;
3665 vm_object_offset_t old_offset;
5ba3f43e
A
3666
3667 kr = fourk_pager_populate(fourk_mem_obj,
0a7de745
A
3668 TRUE, /* overwrite */
3669 fourk_pager_index_start + cur_idx,
3670 object,
3671 (object
3672 ? (offset +
3673 (cur_idx * FOURK_PAGE_SIZE))
3674 : 0),
3675 &old_object,
3676 &old_offset);
5ba3f43e
A
3677#if VM_MAP_DEBUG_FOURK
3678 if (vm_map_debug_fourk) {
3679 if (old_object == (vm_object_t) -1 &&
3680 old_offset == (vm_object_offset_t) -1) {
3681 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
0a7de745
A
3682 "pager [%p:0x%llx] "
3683 "populate[%d] "
3684 "[object:%p,offset:0x%llx]\n",
3685 map,
3686 (uint64_t) entry->vme_start,
3687 (uint64_t) entry->vme_end,
3688 fourk_mem_obj,
3689 VME_OFFSET(entry),
3690 fourk_pager_index_start + cur_idx,
3691 object,
3692 (object
3693 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3694 : 0));
5ba3f43e
A
3695 } else {
3696 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
0a7de745
A
3697 "pager [%p:0x%llx] "
3698 "populate[%d] [object:%p,offset:0x%llx] "
3699 "old [%p:0x%llx]\n",
3700 map,
3701 (uint64_t) entry->vme_start,
3702 (uint64_t) entry->vme_end,
3703 fourk_mem_obj,
3704 VME_OFFSET(entry),
3705 fourk_pager_index_start + cur_idx,
3706 object,
3707 (object
3708 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3709 : 0),
3710 old_object,
3711 old_offset);
5ba3f43e
A
3712 }
3713 }
3714#endif /* VM_MAP_DEBUG_FOURK */
3715
3716 assert(kr == KERN_SUCCESS);
3717 if (object != old_object &&
3718 object != VM_OBJECT_NULL &&
3719 object != (vm_object_t) -1) {
3720 vm_object_reference(object);
3721 }
3722 if (object != old_object &&
3723 old_object != VM_OBJECT_NULL &&
3724 old_object != (vm_object_t) -1) {
3725 vm_object_deallocate(old_object);
3726 }
3727 }
3728
3729BailOut:
3730 assert(map_locked == TRUE);
3731
3732 if (fourk_object != VM_OBJECT_NULL) {
3733 vm_object_deallocate(fourk_object);
3734 fourk_object = VM_OBJECT_NULL;
3735 fourk_mem_obj = MEMORY_OBJECT_NULL;
3736 }
3737
3738 if (result == KERN_SUCCESS) {
3739 vm_prot_t pager_prot;
3740 memory_object_t pager;
3741
3742#if DEBUG
3743 if (pmap_empty &&
3744 !(vmk_flags.vmkf_no_pmap_check)) {
3745 assert(vm_map_pmap_is_empty(map,
0a7de745
A
3746 *address,
3747 *address + size));
5ba3f43e
A
3748 }
3749#endif /* DEBUG */
3750
3751 /*
3752 * For "named" VM objects, let the pager know that the
3753 * memory object is being mapped. Some pagers need to keep
3754 * track of this, to know when they can reclaim the memory
3755 * object, for example.
3756 * VM calls memory_object_map() for each mapping (specifying
3757 * the protection of each mapping) and calls
3758 * memory_object_last_unmap() when all the mappings are gone.
3759 */
3760 pager_prot = max_protection;
3761 if (needs_copy) {
3762 /*
3763 * Copy-On-Write mapping: won't modify
3764 * the memory object.
3765 */
3766 pager_prot &= ~VM_PROT_WRITE;
3767 }
3768 if (!is_submap &&
3769 object != VM_OBJECT_NULL &&
3770 object->named &&
3771 object->pager != MEMORY_OBJECT_NULL) {
3772 vm_object_lock(object);
3773 pager = object->pager;
3774 if (object->named &&
3775 pager != MEMORY_OBJECT_NULL) {
3776 assert(object->pager_ready);
3777 vm_object_mapping_wait(object, THREAD_UNINT);
3778 vm_object_mapping_begin(object);
3779 vm_object_unlock(object);
3780
3781 kr = memory_object_map(pager, pager_prot);
3782 assert(kr == KERN_SUCCESS);
3783
3784 vm_object_lock(object);
3785 vm_object_mapping_end(object);
3786 }
3787 vm_object_unlock(object);
3788 }
3789 if (!is_submap &&
3790 fourk_object != VM_OBJECT_NULL &&
3791 fourk_object->named &&
3792 fourk_object->pager != MEMORY_OBJECT_NULL) {
3793 vm_object_lock(fourk_object);
3794 pager = fourk_object->pager;
3795 if (fourk_object->named &&
3796 pager != MEMORY_OBJECT_NULL) {
3797 assert(fourk_object->pager_ready);
3798 vm_object_mapping_wait(fourk_object,
0a7de745 3799 THREAD_UNINT);
5ba3f43e
A
3800 vm_object_mapping_begin(fourk_object);
3801 vm_object_unlock(fourk_object);
3802
3803 kr = memory_object_map(pager, VM_PROT_READ);
3804 assert(kr == KERN_SUCCESS);
3805
3806 vm_object_lock(fourk_object);
3807 vm_object_mapping_end(fourk_object);
3808 }
3809 vm_object_unlock(fourk_object);
3810 }
3811 }
3812
3813 assert(map_locked == TRUE);
3814
3815 if (!keep_map_locked) {
3816 vm_map_unlock(map);
3817 map_locked = FALSE;
3818 }
3819
3820 /*
3821 * We can't hold the map lock if we enter this block.
3822 */
3823
3824 if (result == KERN_SUCCESS) {
5ba3f43e
A
3825 /* Wire down the new entry if the user
3826 * requested all new map entries be wired.
3827 */
0a7de745 3828 if ((map->wiring_required) || (superpage_size)) {
5ba3f43e
A
3829 assert(!keep_map_locked);
3830 pmap_empty = FALSE; /* pmap won't be empty */
3831 kr = vm_map_wire_kernel(map, start, end,
0a7de745
A
3832 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3833 TRUE);
5ba3f43e
A
3834 result = kr;
3835 }
3836
3837 }
3838
3839 if (result != KERN_SUCCESS) {
3840 if (new_mapping_established) {
3841 /*
3842 * We have to get rid of the new mappings since we
3843 * won't make them available to the user.
3844 * Try and do that atomically, to minimize the risk
3845 * that someone else create new mappings that range.
3846 */
3847 zap_new_map = vm_map_create(PMAP_NULL,
0a7de745
A
3848 *address,
3849 *address + size,
3850 map->hdr.entries_pageable);
5ba3f43e 3851 vm_map_set_page_shift(zap_new_map,
0a7de745 3852 VM_MAP_PAGE_SHIFT(map));
5ba3f43e
A
3853 vm_map_disable_hole_optimization(zap_new_map);
3854
3855 if (!map_locked) {
3856 vm_map_lock(map);
3857 map_locked = TRUE;
3858 }
0a7de745
A
3859 (void) vm_map_delete(map, *address, *address + size,
3860 (VM_MAP_REMOVE_SAVE_ENTRIES |
3861 VM_MAP_REMOVE_NO_MAP_ALIGN),
3862 zap_new_map);
5ba3f43e
A
3863 }
3864 if (zap_old_map != VM_MAP_NULL &&
3865 zap_old_map->hdr.nentries != 0) {
0a7de745 3866 vm_map_entry_t entry1, entry2;
5ba3f43e
A
3867
3868 /*
3869 * The new mapping failed. Attempt to restore
3870 * the old mappings, saved in the "zap_old_map".
3871 */
3872 if (!map_locked) {
3873 vm_map_lock(map);
3874 map_locked = TRUE;
3875 }
3876
3877 /* first check if the coast is still clear */
3878 start = vm_map_first_entry(zap_old_map)->vme_start;
3879 end = vm_map_last_entry(zap_old_map)->vme_end;
3880 if (vm_map_lookup_entry(map, start, &entry1) ||
3881 vm_map_lookup_entry(map, end, &entry2) ||
3882 entry1 != entry2) {
3883 /*
3884 * Part of that range has already been
3885 * re-mapped: we can't restore the old
3886 * mappings...
3887 */
3888 vm_map_enter_restore_failures++;
3889 } else {
3890 /*
3891 * Transfer the saved map entries from
3892 * "zap_old_map" to the original "map",
3893 * inserting them all after "entry1".
3894 */
3895 for (entry2 = vm_map_first_entry(zap_old_map);
0a7de745
A
3896 entry2 != vm_map_to_entry(zap_old_map);
3897 entry2 = vm_map_first_entry(zap_old_map)) {
5ba3f43e
A
3898 vm_map_size_t entry_size;
3899
3900 entry_size = (entry2->vme_end -
0a7de745 3901 entry2->vme_start);
5ba3f43e 3902 vm_map_store_entry_unlink(zap_old_map,
0a7de745 3903 entry2);
5ba3f43e 3904 zap_old_map->size -= entry_size;
d9a64523 3905 vm_map_store_entry_link(map, entry1, entry2,
0a7de745 3906 VM_MAP_KERNEL_FLAGS_NONE);
5ba3f43e
A
3907 map->size += entry_size;
3908 entry1 = entry2;
3909 }
3910 if (map->wiring_required) {
3911 /*
3912 * XXX TODO: we should rewire the
3913 * old pages here...
3914 */
3915 }
3916 vm_map_enter_restore_successes++;
3917 }
3918 }
3919 }
3920
3921 /*
3922 * The caller is responsible for releasing the lock if it requested to
3923 * keep the map locked.
3924 */
3925 if (map_locked && !keep_map_locked) {
3926 vm_map_unlock(map);
3927 }
3928
3929 /*
3930 * Get rid of the "zap_maps" and all the map entries that
3931 * they may still contain.
3932 */
3933 if (zap_old_map != VM_MAP_NULL) {
3934 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3935 zap_old_map = VM_MAP_NULL;
3936 }
3937 if (zap_new_map != VM_MAP_NULL) {
3938 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3939 zap_new_map = VM_MAP_NULL;
3940 }
3941
3942 return result;
3943
0a7de745 3944#undef RETURN
5ba3f43e
A
3945}
3946#endif /* __arm64__ */
3947
3948/*
3949 * Counters for the prefault optimization.
3950 */
3951int64_t vm_prefault_nb_pages = 0;
3952int64_t vm_prefault_nb_bailout = 0;
3953
3954static kern_return_t
3955vm_map_enter_mem_object_helper(
0a7de745
A
3956 vm_map_t target_map,
3957 vm_map_offset_t *address,
3958 vm_map_size_t initial_size,
3959 vm_map_offset_t mask,
3960 int flags,
3961 vm_map_kernel_flags_t vmk_flags,
3962 vm_tag_t tag,
3963 ipc_port_t port,
3964 vm_object_offset_t offset,
3965 boolean_t copy,
3966 vm_prot_t cur_protection,
3967 vm_prot_t max_protection,
3968 vm_inherit_t inheritance,
3969 upl_page_list_ptr_t page_list,
3970 unsigned int page_list_count)
5ba3f43e 3971{
0a7de745
A
3972 vm_map_address_t map_addr;
3973 vm_map_size_t map_size;
3974 vm_object_t object;
3975 vm_object_size_t size;
3976 kern_return_t result;
3977 boolean_t mask_cur_protection, mask_max_protection;
3978 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
3979 vm_map_offset_t offset_in_mapping = 0;
5ba3f43e 3980#if __arm64__
0a7de745 3981 boolean_t fourk = vmk_flags.vmkf_fourk;
5ba3f43e
A
3982#endif /* __arm64__ */
3983
3984 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
3985
3986 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
3987 mask_max_protection = max_protection & VM_PROT_IS_MASK;
3988 cur_protection &= ~VM_PROT_IS_MASK;
3989 max_protection &= ~VM_PROT_IS_MASK;
3990
3991 /*
3992 * Check arguments for validity
3993 */
3994 if ((target_map == VM_MAP_NULL) ||
3995 (cur_protection & ~VM_PROT_ALL) ||
3996 (max_protection & ~VM_PROT_ALL) ||
3997 (inheritance > VM_INHERIT_LAST_VALID) ||
3998 (try_prefault && (copy || !page_list)) ||
3999 initial_size == 0) {
4000 return KERN_INVALID_ARGUMENT;
4001 }
4002
4003#if __arm64__
4004 if (fourk) {
4005 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4006 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4007 } else
4008#endif /* __arm64__ */
4009 {
4010 map_addr = vm_map_trunc_page(*address,
0a7de745 4011 VM_MAP_PAGE_MASK(target_map));
5ba3f43e 4012 map_size = vm_map_round_page(initial_size,
0a7de745 4013 VM_MAP_PAGE_MASK(target_map));
5ba3f43e
A
4014 }
4015 size = vm_object_round_page(initial_size);
4016
4017 /*
4018 * Find the vm object (if any) corresponding to this port.
4019 */
4020 if (!IP_VALID(port)) {
4021 object = VM_OBJECT_NULL;
4022 offset = 0;
4023 copy = FALSE;
4024 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
0a7de745 4025 vm_named_entry_t named_entry;
5ba3f43e 4026
ea3f0419 4027 named_entry = (vm_named_entry_t) ip_get_kobject(port);
5ba3f43e
A
4028
4029 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4030 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
5ba3f43e
A
4031 offset += named_entry->data_offset;
4032 }
4033
4034 /* a few checks to make sure user is obeying rules */
4035 if (size == 0) {
0a7de745 4036 if (offset >= named_entry->size) {
5ba3f43e 4037 return KERN_INVALID_RIGHT;
0a7de745 4038 }
5ba3f43e
A
4039 size = named_entry->size - offset;
4040 }
4041 if (mask_max_protection) {
4042 max_protection &= named_entry->protection;
4043 }
4044 if (mask_cur_protection) {
4045 cur_protection &= named_entry->protection;
4046 }
4047 if ((named_entry->protection & max_protection) !=
0a7de745 4048 max_protection) {
5ba3f43e 4049 return KERN_INVALID_RIGHT;
0a7de745 4050 }
5ba3f43e 4051 if ((named_entry->protection & cur_protection) !=
0a7de745 4052 cur_protection) {
5ba3f43e 4053 return KERN_INVALID_RIGHT;
0a7de745 4054 }
5ba3f43e
A
4055 if (offset + size < offset) {
4056 /* overflow */
4057 return KERN_INVALID_ARGUMENT;
4058 }
4059 if (named_entry->size < (offset + initial_size)) {
4060 return KERN_INVALID_ARGUMENT;
4061 }
4062
4063 if (named_entry->is_copy) {
4064 /* for a vm_map_copy, we can only map it whole */
4065 if ((size != named_entry->size) &&
4066 (vm_map_round_page(size,
0a7de745
A
4067 VM_MAP_PAGE_MASK(target_map)) ==
4068 named_entry->size)) {
5ba3f43e 4069 /* XXX FBDP use the rounded size... */
39236c6e
A
4070 size = vm_map_round_page(
4071 size,
4072 VM_MAP_PAGE_MASK(target_map));
4073 }
5ba3f43e 4074
fe8ab488
A
4075 if (!(flags & VM_FLAGS_ANYWHERE) &&
4076 (offset != 0 ||
0a7de745 4077 size != named_entry->size)) {
fe8ab488
A
4078 /*
4079 * XXX for a mapping at a "fixed" address,
4080 * we can't trim after mapping the whole
4081 * memory entry, so reject a request for a
4082 * partial mapping.
4083 */
39236c6e
A
4084 return KERN_INVALID_ARGUMENT;
4085 }
4086 }
4087
2d21ac55
A
4088 /* the callers parameter offset is defined to be the */
4089 /* offset from beginning of named entry offset in object */
4090 offset = offset + named_entry->offset;
5ba3f43e 4091
0a7de745
A
4092 if (!VM_MAP_PAGE_ALIGNED(size,
4093 VM_MAP_PAGE_MASK(target_map))) {
39236c6e
A
4094 /*
4095 * Let's not map more than requested;
4096 * vm_map_enter() will handle this "not map-aligned"
4097 * case.
4098 */
4099 map_size = size;
4100 }
4101
2d21ac55
A
4102 named_entry_lock(named_entry);
4103 if (named_entry->is_sub_map) {
0a7de745 4104 vm_map_t submap;
2d21ac55 4105
3e170ce0 4106 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4107 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4108 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4109 }
4110
2d21ac55
A
4111 submap = named_entry->backing.map;
4112 vm_map_lock(submap);
4113 vm_map_reference(submap);
4114 vm_map_unlock(submap);
4115 named_entry_unlock(named_entry);
4116
5ba3f43e
A
4117 vmk_flags.vmkf_submap = TRUE;
4118
2d21ac55 4119 result = vm_map_enter(target_map,
0a7de745
A
4120 &map_addr,
4121 map_size,
4122 mask,
4123 flags,
4124 vmk_flags,
4125 tag,
4126 (vm_object_t)(uintptr_t) submap,
4127 offset,
4128 copy,
4129 cur_protection,
4130 max_protection,
4131 inheritance);
2d21ac55
A
4132 if (result != KERN_SUCCESS) {
4133 vm_map_deallocate(submap);
4134 } else {
4135 /*
4136 * No need to lock "submap" just to check its
4137 * "mapped" flag: that flag is never reset
4138 * once it's been set and if we race, we'll
4139 * just end up setting it twice, which is OK.
4140 */
316670eb
A
4141 if (submap->mapped_in_other_pmaps == FALSE &&
4142 vm_map_pmap(submap) != PMAP_NULL &&
4143 vm_map_pmap(submap) !=
4144 vm_map_pmap(target_map)) {
2d21ac55 4145 /*
316670eb
A
4146 * This submap is being mapped in a map
4147 * that uses a different pmap.
4148 * Set its "mapped_in_other_pmaps" flag
5ba3f43e 4149 * to indicate that we now need to
316670eb
A
4150 * remove mappings from all pmaps rather
4151 * than just the submap's pmap.
2d21ac55
A
4152 */
4153 vm_map_lock(submap);
316670eb 4154 submap->mapped_in_other_pmaps = TRUE;
2d21ac55
A
4155 vm_map_unlock(submap);
4156 }
4157 *address = map_addr;
4158 }
4159 return result;
39236c6e 4160 } else if (named_entry->is_copy) {
0a7de745
A
4161 kern_return_t kr;
4162 vm_map_copy_t copy_map;
4163 vm_map_entry_t copy_entry;
4164 vm_map_offset_t copy_addr;
39236c6e
A
4165
4166 if (flags & ~(VM_FLAGS_FIXED |
0a7de745
A
4167 VM_FLAGS_ANYWHERE |
4168 VM_FLAGS_OVERWRITE |
4169 VM_FLAGS_RETURN_4K_DATA_ADDR |
4170 VM_FLAGS_RETURN_DATA_ADDR |
4171 VM_FLAGS_ALIAS_MASK)) {
39236c6e
A
4172 named_entry_unlock(named_entry);
4173 return KERN_INVALID_ARGUMENT;
4174 }
4175
3e170ce0 4176 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4177 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e 4178 offset_in_mapping = offset - vm_object_trunc_page(offset);
0a7de745 4179 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
3e170ce0 4180 offset_in_mapping &= ~((signed)(0xFFF));
0a7de745 4181 }
39236c6e
A
4182 offset = vm_object_trunc_page(offset);
4183 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4184 }
4185
4186 copy_map = named_entry->backing.copy;
4187 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4188 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4189 /* unsupported type; should not happen */
4190 printf("vm_map_enter_mem_object: "
0a7de745
A
4191 "memory_entry->backing.copy "
4192 "unsupported type 0x%x\n",
4193 copy_map->type);
39236c6e
A
4194 named_entry_unlock(named_entry);
4195 return KERN_INVALID_ARGUMENT;
4196 }
4197
4198 /* reserve a contiguous range */
4199 kr = vm_map_enter(target_map,
0a7de745
A
4200 &map_addr,
4201 /* map whole mem entry, trim later: */
4202 named_entry->size,
4203 mask,
4204 flags & (VM_FLAGS_ANYWHERE |
4205 VM_FLAGS_OVERWRITE |
4206 VM_FLAGS_RETURN_4K_DATA_ADDR |
4207 VM_FLAGS_RETURN_DATA_ADDR),
4208 vmk_flags,
4209 tag,
4210 VM_OBJECT_NULL,
4211 0,
4212 FALSE, /* copy */
4213 cur_protection,
4214 max_protection,
4215 inheritance);
39236c6e
A
4216 if (kr != KERN_SUCCESS) {
4217 named_entry_unlock(named_entry);
4218 return kr;
4219 }
4220
4221 copy_addr = map_addr;
4222
4223 for (copy_entry = vm_map_copy_first_entry(copy_map);
0a7de745
A
4224 copy_entry != vm_map_copy_to_entry(copy_map);
4225 copy_entry = copy_entry->vme_next) {
4226 int remap_flags;
4227 vm_map_kernel_flags_t vmk_remap_flags;
4228 vm_map_t copy_submap;
4229 vm_object_t copy_object;
4230 vm_map_size_t copy_size;
4231 vm_object_offset_t copy_offset;
4232 int copy_vm_alias;
39236c6e 4233
5ba3f43e
A
4234 remap_flags = 0;
4235 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4236
813fb2f6 4237 copy_object = VME_OBJECT(copy_entry);
3e170ce0 4238 copy_offset = VME_OFFSET(copy_entry);
39236c6e 4239 copy_size = (copy_entry->vme_end -
0a7de745 4240 copy_entry->vme_start);
39037602
A
4241 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4242 if (copy_vm_alias == 0) {
4243 /*
4244 * Caller does not want a specific
4245 * alias for this new mapping: use
4246 * the alias of the original mapping.
4247 */
4248 copy_vm_alias = VME_ALIAS(copy_entry);
4249 }
39236c6e
A
4250
4251 /* sanity check */
fe8ab488
A
4252 if ((copy_addr + copy_size) >
4253 (map_addr +
0a7de745 4254 named_entry->size /* XXX full size */)) {
39236c6e
A
4255 /* over-mapping too much !? */
4256 kr = KERN_INVALID_ARGUMENT;
4257 /* abort */
4258 break;
4259 }
4260
4261 /* take a reference on the object */
4262 if (copy_entry->is_sub_map) {
5ba3f43e 4263 vmk_remap_flags.vmkf_submap = TRUE;
3e170ce0 4264 copy_submap = VME_SUBMAP(copy_entry);
39236c6e
A
4265 vm_map_lock(copy_submap);
4266 vm_map_reference(copy_submap);
4267 vm_map_unlock(copy_submap);
d9a64523 4268 copy_object = (vm_object_t)(uintptr_t) copy_submap;
813fb2f6 4269 } else if (!copy &&
0a7de745
A
4270 copy_object != VM_OBJECT_NULL &&
4271 (copy_entry->needs_copy ||
4272 copy_object->shadowed ||
4273 (!copy_object->true_share &&
4274 !copy_entry->is_shared &&
4275 copy_object->vo_size > copy_size))) {
813fb2f6
A
4276 /*
4277 * We need to resolve our side of this
4278 * "symmetric" copy-on-write now; we
4279 * need a new object to map and share,
4280 * instead of the current one which
4281 * might still be shared with the
4282 * original mapping.
4283 *
4284 * Note: A "vm_map_copy_t" does not
4285 * have a lock but we're protected by
4286 * the named entry's lock here.
4287 */
4288 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4289 VME_OBJECT_SHADOW(copy_entry, copy_size);
4290 if (!copy_entry->needs_copy &&
4291 copy_entry->protection & VM_PROT_WRITE) {
4292 vm_prot_t prot;
4293
4294 prot = copy_entry->protection & ~VM_PROT_WRITE;
4295 vm_object_pmap_protect(copy_object,
0a7de745
A
4296 copy_offset,
4297 copy_size,
4298 PMAP_NULL,
4299 0,
4300 prot);
813fb2f6
A
4301 }
4302
4303 copy_entry->needs_copy = FALSE;
4304 copy_entry->is_shared = TRUE;
4305 copy_object = VME_OBJECT(copy_entry);
4306 copy_offset = VME_OFFSET(copy_entry);
4307 vm_object_lock(copy_object);
4308 vm_object_reference_locked(copy_object);
4309 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4310 /* we're about to make a shared mapping of this object */
4311 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4312 copy_object->true_share = TRUE;
4313 }
4314 vm_object_unlock(copy_object);
39236c6e 4315 } else {
813fb2f6
A
4316 /*
4317 * We already have the right object
4318 * to map.
4319 */
3e170ce0 4320 copy_object = VME_OBJECT(copy_entry);
39236c6e
A
4321 vm_object_reference(copy_object);
4322 }
4323
4324 /* over-map the object into destination */
4325 remap_flags |= flags;
4326 remap_flags |= VM_FLAGS_FIXED;
4327 remap_flags |= VM_FLAGS_OVERWRITE;
4328 remap_flags &= ~VM_FLAGS_ANYWHERE;
813fb2f6
A
4329 if (!copy && !copy_entry->is_sub_map) {
4330 /*
4331 * copy-on-write should have been
4332 * resolved at this point, or we would
4333 * end up sharing instead of copying.
4334 */
4335 assert(!copy_entry->needs_copy);
4336 }
d9a64523
A
4337#if !CONFIG_EMBEDDED
4338 if (copy_entry->used_for_jit) {
4339 vmk_remap_flags.vmkf_map_jit = TRUE;
4340 }
4341#endif /* !CONFIG_EMBEDDED */
39236c6e 4342 kr = vm_map_enter(target_map,
0a7de745
A
4343 &copy_addr,
4344 copy_size,
4345 (vm_map_offset_t) 0,
4346 remap_flags,
4347 vmk_remap_flags,
4348 copy_vm_alias,
4349 copy_object,
4350 copy_offset,
4351 ((copy_object == NULL) ? FALSE : copy),
4352 cur_protection,
4353 max_protection,
4354 inheritance);
39236c6e
A
4355 if (kr != KERN_SUCCESS) {
4356 if (copy_entry->is_sub_map) {
4357 vm_map_deallocate(copy_submap);
4358 } else {
4359 vm_object_deallocate(copy_object);
4360 }
4361 /* abort */
4362 break;
4363 }
4364
4365 /* next mapping */
4366 copy_addr += copy_size;
4367 }
5ba3f43e 4368
39236c6e 4369 if (kr == KERN_SUCCESS) {
3e170ce0 4370 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4371 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4372 *address = map_addr + offset_in_mapping;
4373 } else {
4374 *address = map_addr;
4375 }
fe8ab488
A
4376
4377 if (offset) {
4378 /*
4379 * Trim in front, from 0 to "offset".
4380 */
4381 vm_map_remove(target_map,
0a7de745
A
4382 map_addr,
4383 map_addr + offset,
4384 VM_MAP_REMOVE_NO_FLAGS);
fe8ab488
A
4385 *address += offset;
4386 }
4387 if (offset + map_size < named_entry->size) {
4388 /*
4389 * Trim in back, from
4390 * "offset + map_size" to
4391 * "named_entry->size".
4392 */
4393 vm_map_remove(target_map,
0a7de745
A
4394 (map_addr +
4395 offset + map_size),
4396 (map_addr +
4397 named_entry->size),
4398 VM_MAP_REMOVE_NO_FLAGS);
fe8ab488 4399 }
39236c6e
A
4400 }
4401 named_entry_unlock(named_entry);
4402
4403 if (kr != KERN_SUCCESS) {
0a7de745 4404 if (!(flags & VM_FLAGS_OVERWRITE)) {
39236c6e
A
4405 /* deallocate the contiguous range */
4406 (void) vm_deallocate(target_map,
0a7de745
A
4407 map_addr,
4408 map_size);
39236c6e
A
4409 }
4410 }
4411
4412 return kr;
2d21ac55 4413 } else {
0a7de745
A
4414 unsigned int access;
4415 vm_prot_t protections;
4416 unsigned int wimg_mode;
5ba3f43e
A
4417
4418 /* we are mapping a VM object */
4419
4420 protections = named_entry->protection & VM_PROT_ALL;
4421 access = GET_MAP_MEM(named_entry->protection);
4422
3e170ce0 4423 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4424 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e 4425 offset_in_mapping = offset - vm_object_trunc_page(offset);
0a7de745 4426 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
3e170ce0 4427 offset_in_mapping &= ~((signed)(0xFFF));
0a7de745 4428 }
39236c6e
A
4429 offset = vm_object_trunc_page(offset);
4430 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
5ba3f43e 4431 }
39236c6e 4432
2d21ac55
A
4433 object = named_entry->backing.object;
4434 assert(object != VM_OBJECT_NULL);
5ba3f43e 4435 vm_object_lock(object);
2d21ac55 4436 named_entry_unlock(named_entry);
5ba3f43e
A
4437
4438 vm_object_reference_locked(object);
4439
4440 wimg_mode = object->wimg_bits;
0a7de745
A
4441 vm_prot_to_wimg(access, &wimg_mode);
4442 if (object->wimg_bits != wimg_mode) {
5ba3f43e 4443 vm_object_change_wimg_mode(object, wimg_mode);
0a7de745 4444 }
5ba3f43e
A
4445
4446 vm_object_unlock(object);
2d21ac55
A
4447 }
4448 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4449 /*
4450 * JMM - This is temporary until we unify named entries
4451 * and raw memory objects.
4452 *
4453 * Detected fake ip_kotype for a memory object. In
4454 * this case, the port isn't really a port at all, but
4455 * instead is just a raw memory object.
4456 */
3e170ce0 4457 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4458 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4459 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4460 }
4461
5ba3f43e 4462 object = memory_object_to_vm_object((memory_object_t)port);
0a7de745 4463 if (object == VM_OBJECT_NULL) {
2d21ac55 4464 return KERN_INVALID_OBJECT;
0a7de745 4465 }
5ba3f43e 4466 vm_object_reference(object);
2d21ac55
A
4467
4468 /* wait for object (if any) to be ready */
4469 if (object != VM_OBJECT_NULL) {
4470 if (object == kernel_object) {
4471 printf("Warning: Attempt to map kernel object"
0a7de745 4472 " by a non-private kernel entity\n");
2d21ac55
A
4473 return KERN_INVALID_OBJECT;
4474 }
b0d623f7 4475 if (!object->pager_ready) {
2d21ac55 4476 vm_object_lock(object);
b0d623f7
A
4477
4478 while (!object->pager_ready) {
4479 vm_object_wait(object,
0a7de745
A
4480 VM_OBJECT_EVENT_PAGER_READY,
4481 THREAD_UNINT);
b0d623f7
A
4482 vm_object_lock(object);
4483 }
4484 vm_object_unlock(object);
2d21ac55 4485 }
2d21ac55
A
4486 }
4487 } else {
4488 return KERN_INVALID_OBJECT;
4489 }
4490
593a1d5f
A
4491 if (object != VM_OBJECT_NULL &&
4492 object->named &&
4493 object->pager != MEMORY_OBJECT_NULL &&
4494 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4495 memory_object_t pager;
0a7de745
A
4496 vm_prot_t pager_prot;
4497 kern_return_t kr;
593a1d5f
A
4498
4499 /*
4500 * For "named" VM objects, let the pager know that the
4501 * memory object is being mapped. Some pagers need to keep
4502 * track of this, to know when they can reclaim the memory
4503 * object, for example.
4504 * VM calls memory_object_map() for each mapping (specifying
4505 * the protection of each mapping) and calls
4506 * memory_object_last_unmap() when all the mappings are gone.
4507 */
4508 pager_prot = max_protection;
4509 if (copy) {
4510 /*
4511 * Copy-On-Write mapping: won't modify the
4512 * memory object.
4513 */
4514 pager_prot &= ~VM_PROT_WRITE;
4515 }
4516 vm_object_lock(object);
4517 pager = object->pager;
4518 if (object->named &&
4519 pager != MEMORY_OBJECT_NULL &&
4520 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4521 assert(object->pager_ready);
4522 vm_object_mapping_wait(object, THREAD_UNINT);
4523 vm_object_mapping_begin(object);
4524 vm_object_unlock(object);
4525
4526 kr = memory_object_map(pager, pager_prot);
4527 assert(kr == KERN_SUCCESS);
4528
4529 vm_object_lock(object);
4530 vm_object_mapping_end(object);
4531 }
4532 vm_object_unlock(object);
4533 }
4534
2d21ac55
A
4535 /*
4536 * Perform the copy if requested
4537 */
4538
4539 if (copy) {
0a7de745
A
4540 vm_object_t new_object;
4541 vm_object_offset_t new_offset;
2d21ac55 4542
3e170ce0 4543 result = vm_object_copy_strategically(object, offset,
0a7de745
A
4544 map_size,
4545 &new_object, &new_offset,
4546 &copy);
2d21ac55
A
4547
4548
4549 if (result == KERN_MEMORY_RESTART_COPY) {
4550 boolean_t success;
4551 boolean_t src_needs_copy;
4552
4553 /*
4554 * XXX
4555 * We currently ignore src_needs_copy.
4556 * This really is the issue of how to make
4557 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4558 * non-kernel users to use. Solution forthcoming.
4559 * In the meantime, since we don't allow non-kernel
4560 * memory managers to specify symmetric copy,
4561 * we won't run into problems here.
4562 */
4563 new_object = object;
4564 new_offset = offset;
4565 success = vm_object_copy_quickly(&new_object,
0a7de745
A
4566 new_offset,
4567 map_size,
4568 &src_needs_copy,
4569 &copy);
2d21ac55
A
4570 assert(success);
4571 result = KERN_SUCCESS;
4572 }
4573 /*
4574 * Throw away the reference to the
4575 * original object, as it won't be mapped.
4576 */
4577
4578 vm_object_deallocate(object);
4579
3e170ce0 4580 if (result != KERN_SUCCESS) {
2d21ac55 4581 return result;
3e170ce0 4582 }
2d21ac55
A
4583
4584 object = new_object;
4585 offset = new_offset;
4586 }
4587
fe8ab488 4588 /*
5ba3f43e 4589 * If non-kernel users want to try to prefault pages, the mapping and prefault
fe8ab488
A
4590 * needs to be atomic.
4591 */
5ba3f43e
A
4592 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4593 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4594
4595#if __arm64__
4596 if (fourk) {
4597 /* map this object in a "4K" pager */
4598 result = vm_map_enter_fourk(target_map,
0a7de745
A
4599 &map_addr,
4600 map_size,
4601 (vm_map_offset_t) mask,
4602 flags,
4603 vmk_flags,
4604 tag,
4605 object,
4606 offset,
4607 copy,
4608 cur_protection,
4609 max_protection,
4610 inheritance);
5ba3f43e
A
4611 } else
4612#endif /* __arm64__ */
3e170ce0
A
4613 {
4614 result = vm_map_enter(target_map,
0a7de745
A
4615 &map_addr, map_size,
4616 (vm_map_offset_t)mask,
4617 flags,
4618 vmk_flags,
4619 tag,
4620 object, offset,
4621 copy,
4622 cur_protection, max_protection,
4623 inheritance);
4624 }
4625 if (result != KERN_SUCCESS) {
2d21ac55 4626 vm_object_deallocate(object);
0a7de745 4627 }
39236c6e 4628
fe8ab488
A
4629 /*
4630 * Try to prefault, and do not forget to release the vm map lock.
4631 */
4632 if (result == KERN_SUCCESS && try_prefault) {
4633 mach_vm_address_t va = map_addr;
4634 kern_return_t kr = KERN_SUCCESS;
4635 unsigned int i = 0;
39037602
A
4636 int pmap_options;
4637
5ba3f43e 4638 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
39037602
A
4639 if (object->internal) {
4640 pmap_options |= PMAP_OPTIONS_INTERNAL;
4641 }
fe8ab488
A
4642
4643 for (i = 0; i < page_list_count; ++i) {
5ba3f43e
A
4644 if (!UPL_VALID_PAGE(page_list, i)) {
4645 if (kernel_prefault) {
4646 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4647 result = KERN_MEMORY_ERROR;
4648 break;
4649 }
4650 } else {
fe8ab488
A
4651 /*
4652 * If this function call failed, we should stop
4653 * trying to optimize, other calls are likely
4654 * going to fail too.
4655 *
4656 * We are not gonna report an error for such
4657 * failure though. That's an optimization, not
4658 * something critical.
4659 */
4660 kr = pmap_enter_options(target_map->pmap,
0a7de745
A
4661 va, UPL_PHYS_PAGE(page_list, i),
4662 cur_protection, VM_PROT_NONE,
4663 0, TRUE, pmap_options, NULL);
fe8ab488
A
4664 if (kr != KERN_SUCCESS) {
4665 OSIncrementAtomic64(&vm_prefault_nb_bailout);
5ba3f43e
A
4666 if (kernel_prefault) {
4667 result = kr;
4668 }
3e170ce0 4669 break;
fe8ab488
A
4670 }
4671 OSIncrementAtomic64(&vm_prefault_nb_pages);
4672 }
4673
4674 /* Next virtual address */
4675 va += PAGE_SIZE;
4676 }
5ba3f43e
A
4677 if (vmk_flags.vmkf_keep_map_locked) {
4678 vm_map_unlock(target_map);
4679 }
fe8ab488
A
4680 }
4681
3e170ce0 4682 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4683 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4684 *address = map_addr + offset_in_mapping;
4685 } else {
4686 *address = map_addr;
4687 }
2d21ac55
A
4688 return result;
4689}
4690
fe8ab488
A
4691kern_return_t
4692vm_map_enter_mem_object(
0a7de745
A
4693 vm_map_t target_map,
4694 vm_map_offset_t *address,
4695 vm_map_size_t initial_size,
4696 vm_map_offset_t mask,
4697 int flags,
4698 vm_map_kernel_flags_t vmk_flags,
4699 vm_tag_t tag,
4700 ipc_port_t port,
4701 vm_object_offset_t offset,
4702 boolean_t copy,
4703 vm_prot_t cur_protection,
4704 vm_prot_t max_protection,
4705 vm_inherit_t inheritance)
fe8ab488 4706{
5ba3f43e
A
4707 kern_return_t ret;
4708
4709 ret = vm_map_enter_mem_object_helper(target_map,
0a7de745
A
4710 address,
4711 initial_size,
4712 mask,
4713 flags,
4714 vmk_flags,
4715 tag,
4716 port,
4717 offset,
4718 copy,
4719 cur_protection,
4720 max_protection,
4721 inheritance,
4722 NULL,
4723 0);
5ba3f43e
A
4724
4725#if KASAN
4726 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4727 kasan_notify_address(*address, initial_size);
4728 }
4729#endif
4730
4731 return ret;
fe8ab488 4732}
b0d623f7 4733
fe8ab488
A
4734kern_return_t
4735vm_map_enter_mem_object_prefault(
0a7de745
A
4736 vm_map_t target_map,
4737 vm_map_offset_t *address,
4738 vm_map_size_t initial_size,
4739 vm_map_offset_t mask,
4740 int flags,
4741 vm_map_kernel_flags_t vmk_flags,
4742 vm_tag_t tag,
4743 ipc_port_t port,
4744 vm_object_offset_t offset,
4745 vm_prot_t cur_protection,
4746 vm_prot_t max_protection,
4747 upl_page_list_ptr_t page_list,
4748 unsigned int page_list_count)
fe8ab488 4749{
5ba3f43e
A
4750 kern_return_t ret;
4751
4752 ret = vm_map_enter_mem_object_helper(target_map,
0a7de745
A
4753 address,
4754 initial_size,
4755 mask,
4756 flags,
4757 vmk_flags,
4758 tag,
4759 port,
4760 offset,
4761 FALSE,
4762 cur_protection,
4763 max_protection,
4764 VM_INHERIT_DEFAULT,
4765 page_list,
4766 page_list_count);
5ba3f43e
A
4767
4768#if KASAN
4769 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4770 kasan_notify_address(*address, initial_size);
4771 }
4772#endif
4773
4774 return ret;
fe8ab488 4775}
b0d623f7
A
4776
4777
4778kern_return_t
4779vm_map_enter_mem_object_control(
0a7de745
A
4780 vm_map_t target_map,
4781 vm_map_offset_t *address,
4782 vm_map_size_t initial_size,
4783 vm_map_offset_t mask,
4784 int flags,
4785 vm_map_kernel_flags_t vmk_flags,
4786 vm_tag_t tag,
4787 memory_object_control_t control,
4788 vm_object_offset_t offset,
4789 boolean_t copy,
4790 vm_prot_t cur_protection,
4791 vm_prot_t max_protection,
4792 vm_inherit_t inheritance)
b0d623f7 4793{
0a7de745
A
4794 vm_map_address_t map_addr;
4795 vm_map_size_t map_size;
4796 vm_object_t object;
4797 vm_object_size_t size;
4798 kern_return_t result;
4799 memory_object_t pager;
4800 vm_prot_t pager_prot;
4801 kern_return_t kr;
5ba3f43e 4802#if __arm64__
0a7de745 4803 boolean_t fourk = vmk_flags.vmkf_fourk;
5ba3f43e 4804#endif /* __arm64__ */
b0d623f7
A
4805
4806 /*
4807 * Check arguments for validity
4808 */
4809 if ((target_map == VM_MAP_NULL) ||
4810 (cur_protection & ~VM_PROT_ALL) ||
4811 (max_protection & ~VM_PROT_ALL) ||
4812 (inheritance > VM_INHERIT_LAST_VALID) ||
3e170ce0 4813 initial_size == 0) {
b0d623f7 4814 return KERN_INVALID_ARGUMENT;
3e170ce0 4815 }
b0d623f7 4816
5ba3f43e
A
4817#if __arm64__
4818 if (fourk) {
4819 map_addr = vm_map_trunc_page(*address,
0a7de745 4820 FOURK_PAGE_MASK);
5ba3f43e 4821 map_size = vm_map_round_page(initial_size,
0a7de745 4822 FOURK_PAGE_MASK);
5ba3f43e
A
4823 } else
4824#endif /* __arm64__ */
3e170ce0
A
4825 {
4826 map_addr = vm_map_trunc_page(*address,
0a7de745 4827 VM_MAP_PAGE_MASK(target_map));
3e170ce0 4828 map_size = vm_map_round_page(initial_size,
0a7de745 4829 VM_MAP_PAGE_MASK(target_map));
3e170ce0
A
4830 }
4831 size = vm_object_round_page(initial_size);
b0d623f7
A
4832
4833 object = memory_object_control_to_vm_object(control);
4834
0a7de745 4835 if (object == VM_OBJECT_NULL) {
b0d623f7 4836 return KERN_INVALID_OBJECT;
0a7de745 4837 }
b0d623f7
A
4838
4839 if (object == kernel_object) {
4840 printf("Warning: Attempt to map kernel object"
0a7de745 4841 " by a non-private kernel entity\n");
b0d623f7
A
4842 return KERN_INVALID_OBJECT;
4843 }
4844
4845 vm_object_lock(object);
4846 object->ref_count++;
4847 vm_object_res_reference(object);
4848
4849 /*
4850 * For "named" VM objects, let the pager know that the
4851 * memory object is being mapped. Some pagers need to keep
4852 * track of this, to know when they can reclaim the memory
4853 * object, for example.
4854 * VM calls memory_object_map() for each mapping (specifying
4855 * the protection of each mapping) and calls
4856 * memory_object_last_unmap() when all the mappings are gone.
4857 */
4858 pager_prot = max_protection;
4859 if (copy) {
4860 pager_prot &= ~VM_PROT_WRITE;
4861 }
4862 pager = object->pager;
4863 if (object->named &&
4864 pager != MEMORY_OBJECT_NULL &&
4865 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4866 assert(object->pager_ready);
4867 vm_object_mapping_wait(object, THREAD_UNINT);
4868 vm_object_mapping_begin(object);
4869 vm_object_unlock(object);
4870
4871 kr = memory_object_map(pager, pager_prot);
4872 assert(kr == KERN_SUCCESS);
4873
4874 vm_object_lock(object);
4875 vm_object_mapping_end(object);
4876 }
4877 vm_object_unlock(object);
4878
4879 /*
4880 * Perform the copy if requested
4881 */
4882
4883 if (copy) {
0a7de745
A
4884 vm_object_t new_object;
4885 vm_object_offset_t new_offset;
b0d623f7
A
4886
4887 result = vm_object_copy_strategically(object, offset, size,
0a7de745
A
4888 &new_object, &new_offset,
4889 &copy);
b0d623f7
A
4890
4891
4892 if (result == KERN_MEMORY_RESTART_COPY) {
4893 boolean_t success;
4894 boolean_t src_needs_copy;
4895
4896 /*
4897 * XXX
4898 * We currently ignore src_needs_copy.
4899 * This really is the issue of how to make
4900 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4901 * non-kernel users to use. Solution forthcoming.
4902 * In the meantime, since we don't allow non-kernel
4903 * memory managers to specify symmetric copy,
4904 * we won't run into problems here.
4905 */
4906 new_object = object;
4907 new_offset = offset;
4908 success = vm_object_copy_quickly(&new_object,
0a7de745
A
4909 new_offset, size,
4910 &src_needs_copy,
4911 &copy);
b0d623f7
A
4912 assert(success);
4913 result = KERN_SUCCESS;
4914 }
4915 /*
4916 * Throw away the reference to the
4917 * original object, as it won't be mapped.
4918 */
4919
4920 vm_object_deallocate(object);
4921
3e170ce0 4922 if (result != KERN_SUCCESS) {
b0d623f7 4923 return result;
3e170ce0 4924 }
b0d623f7
A
4925
4926 object = new_object;
4927 offset = new_offset;
4928 }
4929
5ba3f43e
A
4930#if __arm64__
4931 if (fourk) {
4932 result = vm_map_enter_fourk(target_map,
0a7de745
A
4933 &map_addr,
4934 map_size,
4935 (vm_map_offset_t)mask,
4936 flags,
4937 vmk_flags,
4938 tag,
4939 object, offset,
4940 copy,
4941 cur_protection, max_protection,
4942 inheritance);
5ba3f43e
A
4943 } else
4944#endif /* __arm64__ */
3e170ce0
A
4945 {
4946 result = vm_map_enter(target_map,
0a7de745
A
4947 &map_addr, map_size,
4948 (vm_map_offset_t)mask,
4949 flags,
4950 vmk_flags,
4951 tag,
4952 object, offset,
4953 copy,
4954 cur_protection, max_protection,
4955 inheritance);
4956 }
4957 if (result != KERN_SUCCESS) {
b0d623f7 4958 vm_object_deallocate(object);
0a7de745 4959 }
b0d623f7
A
4960 *address = map_addr;
4961
4962 return result;
4963}
4964
4965
0a7de745 4966#if VM_CPM
2d21ac55
A
4967
4968#ifdef MACH_ASSERT
0a7de745 4969extern pmap_paddr_t avail_start, avail_end;
2d21ac55
A
4970#endif
4971
4972/*
4973 * Allocate memory in the specified map, with the caveat that
4974 * the memory is physically contiguous. This call may fail
4975 * if the system can't find sufficient contiguous memory.
4976 * This call may cause or lead to heart-stopping amounts of
4977 * paging activity.
4978 *
4979 * Memory obtained from this call should be freed in the
4980 * normal way, viz., via vm_deallocate.
4981 */
4982kern_return_t
4983vm_map_enter_cpm(
0a7de745
A
4984 vm_map_t map,
4985 vm_map_offset_t *addr,
4986 vm_map_size_t size,
4987 int flags)
2d21ac55 4988{
0a7de745
A
4989 vm_object_t cpm_obj;
4990 pmap_t pmap;
4991 vm_page_t m, pages;
4992 kern_return_t kr;
4993 vm_map_offset_t va, start, end, offset;
4994#if MACH_ASSERT
4995 vm_map_offset_t prev_addr = 0;
4996#endif /* MACH_ASSERT */
4997
4998 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3e170ce0
A
4999 vm_tag_t tag;
5000
5001 VM_GET_FLAGS_ALIAS(flags, tag);
2d21ac55 5002
2d21ac55
A
5003 if (size == 0) {
5004 *addr = 0;
5005 return KERN_SUCCESS;
5006 }
0a7de745 5007 if (anywhere) {
2d21ac55 5008 *addr = vm_map_min(map);
0a7de745 5009 } else {
39236c6e 5010 *addr = vm_map_trunc_page(*addr,
0a7de745
A
5011 VM_MAP_PAGE_MASK(map));
5012 }
39236c6e 5013 size = vm_map_round_page(size,
0a7de745 5014 VM_MAP_PAGE_MASK(map));
2d21ac55
A
5015
5016 /*
5017 * LP64todo - cpm_allocate should probably allow
5018 * allocations of >4GB, but not with the current
5019 * algorithm, so just cast down the size for now.
5020 */
0a7de745 5021 if (size > VM_MAX_ADDRESS) {
2d21ac55 5022 return KERN_RESOURCE_SHORTAGE;
0a7de745 5023 }
2d21ac55 5024 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
0a7de745 5025 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
2d21ac55 5026 return kr;
0a7de745 5027 }
2d21ac55
A
5028
5029 cpm_obj = vm_object_allocate((vm_object_size_t)size);
5030 assert(cpm_obj != VM_OBJECT_NULL);
5031 assert(cpm_obj->internal);
316670eb 5032 assert(cpm_obj->vo_size == (vm_object_size_t)size);
2d21ac55
A
5033 assert(cpm_obj->can_persist == FALSE);
5034 assert(cpm_obj->pager_created == FALSE);
5035 assert(cpm_obj->pageout == FALSE);
5036 assert(cpm_obj->shadow == VM_OBJECT_NULL);
91447636
A
5037
5038 /*
5039 * Insert pages into object.
5040 */
5041
5042 vm_object_lock(cpm_obj);
5043 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5044 m = pages;
5045 pages = NEXT_PAGE(m);
0c530ab8 5046 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
91447636 5047
d9a64523
A
5048 assert(!m->vmp_gobbled);
5049 assert(!m->vmp_wanted);
5050 assert(!m->vmp_pageout);
5051 assert(!m->vmp_tabled);
b0d623f7 5052 assert(VM_PAGE_WIRED(m));
d9a64523 5053 assert(m->vmp_busy);
0a7de745 5054 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
91447636 5055
d9a64523 5056 m->vmp_busy = FALSE;
91447636
A
5057 vm_page_insert(m, cpm_obj, offset);
5058 }
5059 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5060 vm_object_unlock(cpm_obj);
5061
5062 /*
5063 * Hang onto a reference on the object in case a
5064 * multi-threaded application for some reason decides
5065 * to deallocate the portion of the address space into
5066 * which we will insert this object.
5067 *
5068 * Unfortunately, we must insert the object now before
5069 * we can talk to the pmap module about which addresses
5070 * must be wired down. Hence, the race with a multi-
5071 * threaded app.
5072 */
5073 vm_object_reference(cpm_obj);
5074
5075 /*
5076 * Insert object into map.
5077 */
5078
5079 kr = vm_map_enter(
2d21ac55
A
5080 map,
5081 addr,
5082 size,
5083 (vm_map_offset_t)0,
5084 flags,
5ba3f43e 5085 VM_MAP_KERNEL_FLAGS_NONE,
2d21ac55
A
5086 cpm_obj,
5087 (vm_object_offset_t)0,
5088 FALSE,
5089 VM_PROT_ALL,
5090 VM_PROT_ALL,
5091 VM_INHERIT_DEFAULT);
91447636
A
5092
5093 if (kr != KERN_SUCCESS) {
5094 /*
5095 * A CPM object doesn't have can_persist set,
5096 * so all we have to do is deallocate it to
5097 * free up these pages.
5098 */
5099 assert(cpm_obj->pager_created == FALSE);
5100 assert(cpm_obj->can_persist == FALSE);
5101 assert(cpm_obj->pageout == FALSE);
5102 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5103 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5104 vm_object_deallocate(cpm_obj); /* kill creation ref */
5105 }
5106
5107 /*
5108 * Inform the physical mapping system that the
5109 * range of addresses may not fault, so that
5110 * page tables and such can be locked down as well.
5111 */
5112 start = *addr;
5113 end = start + size;
5114 pmap = vm_map_pmap(map);
5115 pmap_pageable(pmap, start, end, FALSE);
5116
5117 /*
5118 * Enter each page into the pmap, to avoid faults.
5119 * Note that this loop could be coded more efficiently,
5120 * if the need arose, rather than looking up each page
5121 * again.
5122 */
5123 for (offset = 0, va = start; offset < size;
0a7de745
A
5124 va += PAGE_SIZE, offset += PAGE_SIZE) {
5125 int type_of_fault;
2d21ac55 5126
91447636
A
5127 vm_object_lock(cpm_obj);
5128 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
91447636 5129 assert(m != VM_PAGE_NULL);
2d21ac55
A
5130
5131 vm_page_zero_fill(m);
5132
5133 type_of_fault = DBG_ZERO_FILL_FAULT;
5134
6d2010ae 5135 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
0a7de745
A
5136 VM_PAGE_WIRED(m),
5137 FALSE, /* change_wiring */
5138 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5139 FALSE, /* no_cache */
5140 FALSE, /* cs_bypass */
5141 0, /* user_tag */
5142 0, /* pmap_options */
5143 NULL, /* need_retry */
5144 &type_of_fault);
2d21ac55
A
5145
5146 vm_object_unlock(cpm_obj);
91447636
A
5147 }
5148
0a7de745 5149#if MACH_ASSERT
91447636
A
5150 /*
5151 * Verify ordering in address space.
5152 */
5153 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5154 vm_object_lock(cpm_obj);
5155 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5156 vm_object_unlock(cpm_obj);
0a7de745 5157 if (m == VM_PAGE_NULL) {
316670eb 5158 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
0a7de745
A
5159 cpm_obj, (uint64_t)offset);
5160 }
d9a64523
A
5161 assert(m->vmp_tabled);
5162 assert(!m->vmp_busy);
5163 assert(!m->vmp_wanted);
5164 assert(!m->vmp_fictitious);
5165 assert(!m->vmp_private);
5166 assert(!m->vmp_absent);
5167 assert(!m->vmp_error);
5168 assert(!m->vmp_cleaning);
5169 assert(!m->vmp_laundry);
5170 assert(!m->vmp_precious);
5171 assert(!m->vmp_clustered);
91447636 5172 if (offset != 0) {
39037602 5173 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
316670eb 5174 printf("start 0x%llx end 0x%llx va 0x%llx\n",
0a7de745 5175 (uint64_t)start, (uint64_t)end, (uint64_t)va);
316670eb
A
5176 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5177 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
91447636
A
5178 panic("vm_allocate_cpm: pages not contig!");
5179 }
5180 }
39037602 5181 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
91447636 5182 }
0a7de745 5183#endif /* MACH_ASSERT */
91447636
A
5184
5185 vm_object_deallocate(cpm_obj); /* kill extra ref */
5186
5187 return kr;
5188}
5189
5190
0a7de745 5191#else /* VM_CPM */
91447636
A
5192
5193/*
5194 * Interface is defined in all cases, but unless the kernel
5195 * is built explicitly for this option, the interface does
5196 * nothing.
5197 */
5198
5199kern_return_t
5200vm_map_enter_cpm(
0a7de745
A
5201 __unused vm_map_t map,
5202 __unused vm_map_offset_t *addr,
5203 __unused vm_map_size_t size,
5204 __unused int flags)
91447636
A
5205{
5206 return KERN_FAILURE;
5207}
5208#endif /* VM_CPM */
5209
b0d623f7
A
5210/* Not used without nested pmaps */
5211#ifndef NO_NESTED_PMAP
2d21ac55
A
5212/*
5213 * Clip and unnest a portion of a nested submap mapping.
5214 */
b0d623f7
A
5215
5216
2d21ac55
A
5217static void
5218vm_map_clip_unnest(
0a7de745
A
5219 vm_map_t map,
5220 vm_map_entry_t entry,
5221 vm_map_offset_t start_unnest,
5222 vm_map_offset_t end_unnest)
2d21ac55 5223{
b0d623f7
A
5224 vm_map_offset_t old_start_unnest = start_unnest;
5225 vm_map_offset_t old_end_unnest = end_unnest;
5226
2d21ac55 5227 assert(entry->is_sub_map);
3e170ce0 5228 assert(VME_SUBMAP(entry) != NULL);
fe8ab488 5229 assert(entry->use_pmap);
2d21ac55 5230
b0d623f7
A
5231 /*
5232 * Query the platform for the optimal unnest range.
5233 * DRK: There's some duplication of effort here, since
5234 * callers may have adjusted the range to some extent. This
5235 * routine was introduced to support 1GiB subtree nesting
5236 * for x86 platforms, which can also nest on 2MiB boundaries
5237 * depending on size/alignment.
5238 */
5239 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
39037602
A
5240 assert(VME_SUBMAP(entry)->is_nested_map);
5241 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5242 log_unnest_badness(map,
0a7de745
A
5243 old_start_unnest,
5244 old_end_unnest,
5245 VME_SUBMAP(entry)->is_nested_map,
5246 (entry->vme_start +
5247 VME_SUBMAP(entry)->lowest_unnestable_start -
5248 VME_OFFSET(entry)));
b0d623f7
A
5249 }
5250
2d21ac55
A
5251 if (entry->vme_start > start_unnest ||
5252 entry->vme_end < end_unnest) {
5253 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
0a7de745
A
5254 "bad nested entry: start=0x%llx end=0x%llx\n",
5255 (long long)start_unnest, (long long)end_unnest,
5256 (long long)entry->vme_start, (long long)entry->vme_end);
2d21ac55 5257 }
b0d623f7 5258
2d21ac55
A
5259 if (start_unnest > entry->vme_start) {
5260 _vm_map_clip_start(&map->hdr,
0a7de745
A
5261 entry,
5262 start_unnest);
3e170ce0
A
5263 if (map->holelistenabled) {
5264 vm_map_store_update_first_free(map, NULL, FALSE);
5265 } else {
5266 vm_map_store_update_first_free(map, map->first_free, FALSE);
5267 }
2d21ac55
A
5268 }
5269 if (entry->vme_end > end_unnest) {
5270 _vm_map_clip_end(&map->hdr,
0a7de745
A
5271 entry,
5272 end_unnest);
3e170ce0
A
5273 if (map->holelistenabled) {
5274 vm_map_store_update_first_free(map, NULL, FALSE);
5275 } else {
5276 vm_map_store_update_first_free(map, map->first_free, FALSE);
5277 }
2d21ac55
A
5278 }
5279
5280 pmap_unnest(map->pmap,
0a7de745
A
5281 entry->vme_start,
5282 entry->vme_end - entry->vme_start);
cb323159 5283 if ((map->mapped_in_other_pmaps) && os_ref_get_count(&map->map_refcnt) != 0) {
2d21ac55
A
5284 /* clean up parent map/maps */
5285 vm_map_submap_pmap_clean(
5286 map, entry->vme_start,
5287 entry->vme_end,
3e170ce0
A
5288 VME_SUBMAP(entry),
5289 VME_OFFSET(entry));
2d21ac55
A
5290 }
5291 entry->use_pmap = FALSE;
3e170ce0
A
5292 if ((map->pmap != kernel_pmap) &&
5293 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5294 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
316670eb 5295 }
2d21ac55 5296}
0a7de745 5297#endif /* NO_NESTED_PMAP */
2d21ac55 5298
1c79356b
A
5299/*
5300 * vm_map_clip_start: [ internal use only ]
5301 *
5302 * Asserts that the given entry begins at or after
5303 * the specified address; if necessary,
5304 * it splits the entry into two.
5305 */
e2d2fc5c 5306void
2d21ac55 5307vm_map_clip_start(
0a7de745
A
5308 vm_map_t map,
5309 vm_map_entry_t entry,
5310 vm_map_offset_t startaddr)
2d21ac55 5311{
0c530ab8 5312#ifndef NO_NESTED_PMAP
fe8ab488
A
5313 if (entry->is_sub_map &&
5314 entry->use_pmap &&
2d21ac55 5315 startaddr >= entry->vme_start) {
0a7de745 5316 vm_map_offset_t start_unnest, end_unnest;
2d21ac55
A
5317
5318 /*
5319 * Make sure "startaddr" is no longer in a nested range
5320 * before we clip. Unnest only the minimum range the platform
5321 * can handle.
b0d623f7
A
5322 * vm_map_clip_unnest may perform additional adjustments to
5323 * the unnest range.
2d21ac55
A
5324 */
5325 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
5326 end_unnest = start_unnest + pmap_nesting_size_min;
5327 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5328 }
5329#endif /* NO_NESTED_PMAP */
5330 if (startaddr > entry->vme_start) {
3e170ce0 5331 if (VME_OBJECT(entry) &&
2d21ac55 5332 !entry->is_sub_map &&
3e170ce0 5333 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55 5334 pmap_remove(map->pmap,
0a7de745
A
5335 (addr64_t)(entry->vme_start),
5336 (addr64_t)(entry->vme_end));
2d21ac55 5337 }
39037602
A
5338 if (entry->vme_atomic) {
5339 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5ba3f43e 5340 }
d9a64523
A
5341
5342 DTRACE_VM5(
5343 vm_map_clip_start,
5344 vm_map_t, map,
5345 vm_map_offset_t, entry->vme_start,
5346 vm_map_offset_t, entry->vme_end,
5347 vm_map_offset_t, startaddr,
5348 int, VME_ALIAS(entry));
5349
2d21ac55 5350 _vm_map_clip_start(&map->hdr, entry, startaddr);
3e170ce0
A
5351 if (map->holelistenabled) {
5352 vm_map_store_update_first_free(map, NULL, FALSE);
5353 } else {
5354 vm_map_store_update_first_free(map, map->first_free, FALSE);
5355 }
2d21ac55
A
5356 }
5357}
5358
1c79356b
A
5359
5360#define vm_map_copy_clip_start(copy, entry, startaddr) \
5361 MACRO_BEGIN \
5362 if ((startaddr) > (entry)->vme_start) \
0a7de745 5363 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
1c79356b
A
5364 MACRO_END
5365
5366/*
5367 * This routine is called only when it is known that
5368 * the entry must be split.
5369 */
91447636 5370static void
1c79356b 5371_vm_map_clip_start(
0a7de745
A
5372 struct vm_map_header *map_header,
5373 vm_map_entry_t entry,
5374 vm_map_offset_t start)
1c79356b 5375{
0a7de745 5376 vm_map_entry_t new_entry;
1c79356b
A
5377
5378 /*
5379 * Split off the front portion --
5380 * note that we must insert the new
5381 * entry BEFORE this one, so that
5382 * this entry has the specified starting
5383 * address.
5384 */
5385
fe8ab488
A
5386 if (entry->map_aligned) {
5387 assert(VM_MAP_PAGE_ALIGNED(start,
0a7de745 5388 VM_MAP_HDR_PAGE_MASK(map_header)));
fe8ab488
A
5389 }
5390
7ddcb079 5391 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
5392 vm_map_entry_copy_full(new_entry, entry);
5393
5394 new_entry->vme_end = start;
e2d2fc5c 5395 assert(new_entry->vme_start < new_entry->vme_end);
3e170ce0 5396 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
e2d2fc5c 5397 assert(start < entry->vme_end);
1c79356b
A
5398 entry->vme_start = start;
5399
6d2010ae 5400 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
1c79356b 5401
0a7de745 5402 if (entry->is_sub_map) {
3e170ce0 5403 vm_map_reference(VME_SUBMAP(new_entry));
0a7de745 5404 } else {
3e170ce0 5405 vm_object_reference(VME_OBJECT(new_entry));
0a7de745 5406 }
1c79356b
A
5407}
5408
5409
5410/*
5411 * vm_map_clip_end: [ internal use only ]
5412 *
5413 * Asserts that the given entry ends at or before
5414 * the specified address; if necessary,
5415 * it splits the entry into two.
5416 */
e2d2fc5c 5417void
2d21ac55 5418vm_map_clip_end(
0a7de745
A
5419 vm_map_t map,
5420 vm_map_entry_t entry,
5421 vm_map_offset_t endaddr)
2d21ac55
A
5422{
5423 if (endaddr > entry->vme_end) {
5424 /*
5425 * Within the scope of this clipping, limit "endaddr" to
5426 * the end of this map entry...
5427 */
5428 endaddr = entry->vme_end;
5429 }
5430#ifndef NO_NESTED_PMAP
fe8ab488 5431 if (entry->is_sub_map && entry->use_pmap) {
0a7de745 5432 vm_map_offset_t start_unnest, end_unnest;
2d21ac55
A
5433
5434 /*
5435 * Make sure the range between the start of this entry and
5436 * the new "endaddr" is no longer nested before we clip.
5437 * Unnest only the minimum range the platform can handle.
b0d623f7
A
5438 * vm_map_clip_unnest may perform additional adjustments to
5439 * the unnest range.
2d21ac55
A
5440 */
5441 start_unnest = entry->vme_start;
5442 end_unnest =
0a7de745
A
5443 (endaddr + pmap_nesting_size_min - 1) &
5444 ~(pmap_nesting_size_min - 1);
2d21ac55
A
5445 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5446 }
5447#endif /* NO_NESTED_PMAP */
5448 if (endaddr < entry->vme_end) {
3e170ce0 5449 if (VME_OBJECT(entry) &&
2d21ac55 5450 !entry->is_sub_map &&
3e170ce0 5451 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55 5452 pmap_remove(map->pmap,
0a7de745
A
5453 (addr64_t)(entry->vme_start),
5454 (addr64_t)(entry->vme_end));
2d21ac55 5455 }
39037602
A
5456 if (entry->vme_atomic) {
5457 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5458 }
d9a64523
A
5459 DTRACE_VM5(
5460 vm_map_clip_end,
5461 vm_map_t, map,
5462 vm_map_offset_t, entry->vme_start,
5463 vm_map_offset_t, entry->vme_end,
5464 vm_map_offset_t, endaddr,
5465 int, VME_ALIAS(entry));
5466
2d21ac55 5467 _vm_map_clip_end(&map->hdr, entry, endaddr);
3e170ce0
A
5468 if (map->holelistenabled) {
5469 vm_map_store_update_first_free(map, NULL, FALSE);
5470 } else {
5471 vm_map_store_update_first_free(map, map->first_free, FALSE);
5472 }
2d21ac55
A
5473 }
5474}
0c530ab8 5475
1c79356b
A
5476
5477#define vm_map_copy_clip_end(copy, entry, endaddr) \
5478 MACRO_BEGIN \
5479 if ((endaddr) < (entry)->vme_end) \
0a7de745 5480 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
1c79356b
A
5481 MACRO_END
5482
5483/*
5484 * This routine is called only when it is known that
5485 * the entry must be split.
5486 */
91447636 5487static void
1c79356b 5488_vm_map_clip_end(
0a7de745
A
5489 struct vm_map_header *map_header,
5490 vm_map_entry_t entry,
5491 vm_map_offset_t end)
1c79356b 5492{
0a7de745 5493 vm_map_entry_t new_entry;
1c79356b
A
5494
5495 /*
5496 * Create a new entry and insert it
5497 * AFTER the specified entry
5498 */
5499
fe8ab488
A
5500 if (entry->map_aligned) {
5501 assert(VM_MAP_PAGE_ALIGNED(end,
0a7de745 5502 VM_MAP_HDR_PAGE_MASK(map_header)));
fe8ab488
A
5503 }
5504
7ddcb079 5505 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
5506 vm_map_entry_copy_full(new_entry, entry);
5507
e2d2fc5c 5508 assert(entry->vme_start < end);
1c79356b 5509 new_entry->vme_start = entry->vme_end = end;
3e170ce0 5510 VME_OFFSET_SET(new_entry,
0a7de745 5511 VME_OFFSET(new_entry) + (end - entry->vme_start));
e2d2fc5c 5512 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 5513
6d2010ae 5514 _vm_map_store_entry_link(map_header, entry, new_entry);
1c79356b 5515
0a7de745 5516 if (entry->is_sub_map) {
3e170ce0 5517 vm_map_reference(VME_SUBMAP(new_entry));
0a7de745 5518 } else {
3e170ce0 5519 vm_object_reference(VME_OBJECT(new_entry));
0a7de745 5520 }
1c79356b
A
5521}
5522
5523
5524/*
5525 * VM_MAP_RANGE_CHECK: [ internal use only ]
5526 *
5527 * Asserts that the starting and ending region
5528 * addresses fall within the valid range of the map.
5529 */
0a7de745
A
5530#define VM_MAP_RANGE_CHECK(map, start, end) \
5531 MACRO_BEGIN \
5532 if (start < vm_map_min(map)) \
5533 start = vm_map_min(map); \
5534 if (end > vm_map_max(map)) \
5535 end = vm_map_max(map); \
5536 if (start > end) \
5537 start = end; \
2d21ac55 5538 MACRO_END
1c79356b
A
5539
5540/*
5541 * vm_map_range_check: [ internal use only ]
5ba3f43e 5542 *
1c79356b
A
5543 * Check that the region defined by the specified start and
5544 * end addresses are wholly contained within a single map
5545 * entry or set of adjacent map entries of the spacified map,
5546 * i.e. the specified region contains no unmapped space.
5547 * If any or all of the region is unmapped, FALSE is returned.
5548 * Otherwise, TRUE is returned and if the output argument 'entry'
5549 * is not NULL it points to the map entry containing the start
5550 * of the region.
5551 *
5552 * The map is locked for reading on entry and is left locked.
5553 */
91447636 5554static boolean_t
1c79356b 5555vm_map_range_check(
0a7de745
A
5556 vm_map_t map,
5557 vm_map_offset_t start,
5558 vm_map_offset_t end,
5559 vm_map_entry_t *entry)
1c79356b 5560{
0a7de745
A
5561 vm_map_entry_t cur;
5562 vm_map_offset_t prev;
1c79356b
A
5563
5564 /*
0a7de745 5565 * Basic sanity checks first
1c79356b 5566 */
0a7de745
A
5567 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5568 return FALSE;
5569 }
1c79356b
A
5570
5571 /*
0a7de745 5572 * Check first if the region starts within a valid
1c79356b
A
5573 * mapping for the map.
5574 */
0a7de745
A
5575 if (!vm_map_lookup_entry(map, start, &cur)) {
5576 return FALSE;
5577 }
1c79356b
A
5578
5579 /*
5ba3f43e 5580 * Optimize for the case that the region is contained
1c79356b
A
5581 * in a single map entry.
5582 */
0a7de745 5583 if (entry != (vm_map_entry_t *) NULL) {
1c79356b 5584 *entry = cur;
0a7de745
A
5585 }
5586 if (end <= cur->vme_end) {
5587 return TRUE;
5588 }
1c79356b
A
5589
5590 /*
0a7de745
A
5591 * If the region is not wholly contained within a
5592 * single entry, walk the entries looking for holes.
1c79356b
A
5593 */
5594 prev = cur->vme_end;
5595 cur = cur->vme_next;
5596 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
0a7de745
A
5597 if (end <= cur->vme_end) {
5598 return TRUE;
5599 }
1c79356b
A
5600 prev = cur->vme_end;
5601 cur = cur->vme_next;
5602 }
0a7de745 5603 return FALSE;
1c79356b
A
5604}
5605
5606/*
5607 * vm_map_submap: [ kernel use only ]
5608 *
5609 * Mark the given range as handled by a subordinate map.
5610 *
5611 * This range must have been created with vm_map_find using
5612 * the vm_submap_object, and no other operations may have been
5613 * performed on this range prior to calling vm_map_submap.
5614 *
5615 * Only a limited number of operations can be performed
5616 * within this rage after calling vm_map_submap:
5617 * vm_fault
5618 * [Don't try vm_map_copyin!]
5619 *
5620 * To remove a submapping, one must first remove the
5621 * range from the superior map, and then destroy the
5622 * submap (if desired). [Better yet, don't try it.]
5623 */
5624kern_return_t
5625vm_map_submap(
0a7de745
A
5626 vm_map_t map,
5627 vm_map_offset_t start,
5628 vm_map_offset_t end,
5629 vm_map_t submap,
5630 vm_map_offset_t offset,
0c530ab8 5631#ifdef NO_NESTED_PMAP
91447636 5632 __unused
0a7de745
A
5633#endif /* NO_NESTED_PMAP */
5634 boolean_t use_pmap)
1c79356b 5635{
0a7de745
A
5636 vm_map_entry_t entry;
5637 kern_return_t result = KERN_INVALID_ARGUMENT;
5638 vm_object_t object;
1c79356b
A
5639
5640 vm_map_lock(map);
5641
0a7de745 5642 if (!vm_map_lookup_entry(map, start, &entry)) {
1c79356b 5643 entry = entry->vme_next;
2d21ac55 5644 }
1c79356b 5645
2d21ac55
A
5646 if (entry == vm_map_to_entry(map) ||
5647 entry->is_sub_map) {
1c79356b
A
5648 vm_map_unlock(map);
5649 return KERN_INVALID_ARGUMENT;
5650 }
5651
2d21ac55 5652 vm_map_clip_start(map, entry, start);
1c79356b
A
5653 vm_map_clip_end(map, entry, end);
5654
5655 if ((entry->vme_start == start) && (entry->vme_end == end) &&
5656 (!entry->is_sub_map) &&
3e170ce0 5657 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
1c79356b
A
5658 (object->resident_page_count == 0) &&
5659 (object->copy == VM_OBJECT_NULL) &&
5660 (object->shadow == VM_OBJECT_NULL) &&
5661 (!object->pager_created)) {
3e170ce0
A
5662 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5663 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
2d21ac55
A
5664 vm_object_deallocate(object);
5665 entry->is_sub_map = TRUE;
fe8ab488 5666 entry->use_pmap = FALSE;
3e170ce0 5667 VME_SUBMAP_SET(entry, submap);
2d21ac55 5668 vm_map_reference(submap);
316670eb
A
5669 if (submap->mapped_in_other_pmaps == FALSE &&
5670 vm_map_pmap(submap) != PMAP_NULL &&
5671 vm_map_pmap(submap) != vm_map_pmap(map)) {
5672 /*
5673 * This submap is being mapped in a map
5674 * that uses a different pmap.
5675 * Set its "mapped_in_other_pmaps" flag
5ba3f43e 5676 * to indicate that we now need to
316670eb
A
5677 * remove mappings from all pmaps rather
5678 * than just the submap's pmap.
5679 */
5680 submap->mapped_in_other_pmaps = TRUE;
5681 }
2d21ac55 5682
0c530ab8 5683#ifndef NO_NESTED_PMAP
2d21ac55
A
5684 if (use_pmap) {
5685 /* nest if platform code will allow */
0a7de745 5686 if (submap->pmap == NULL) {
316670eb 5687 ledger_t ledger = map->pmap->ledger;
cb323159
A
5688 submap->pmap = pmap_create_options(ledger,
5689 (vm_map_size_t) 0, 0);
0a7de745 5690 if (submap->pmap == PMAP_NULL) {
2d21ac55 5691 vm_map_unlock(map);
0a7de745 5692 return KERN_NO_SPACE;
55e303ae 5693 }
0a7de745 5694#if defined(__arm__) || defined(__arm64__)
5ba3f43e
A
5695 pmap_set_nested(submap->pmap);
5696#endif
55e303ae 5697 }
2d21ac55 5698 result = pmap_nest(map->pmap,
0a7de745
A
5699 (VME_SUBMAP(entry))->pmap,
5700 (addr64_t)start,
5701 (addr64_t)start,
5702 (uint64_t)(end - start));
5703 if (result) {
2d21ac55 5704 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
0a7de745 5705 }
2d21ac55
A
5706 entry->use_pmap = TRUE;
5707 }
0a7de745 5708#else /* NO_NESTED_PMAP */
2d21ac55 5709 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
0a7de745 5710#endif /* NO_NESTED_PMAP */
2d21ac55 5711 result = KERN_SUCCESS;
1c79356b
A
5712 }
5713 vm_map_unlock(map);
5714
0a7de745 5715 return result;
1c79356b
A
5716}
5717
5718/*
5719 * vm_map_protect:
5720 *
5721 * Sets the protection of the specified address
5722 * region in the target map. If "set_max" is
5723 * specified, the maximum protection is to be set;
5724 * otherwise, only the current protection is affected.
5725 */
5726kern_return_t
5727vm_map_protect(
0a7de745
A
5728 vm_map_t map,
5729 vm_map_offset_t start,
5730 vm_map_offset_t end,
5731 vm_prot_t new_prot,
5732 boolean_t set_max)
39037602 5733{
0a7de745
A
5734 vm_map_entry_t current;
5735 vm_map_offset_t prev;
5736 vm_map_entry_t entry;
5737 vm_prot_t new_max;
5738 int pmap_options = 0;
5739 kern_return_t kr;
1c79356b 5740
5c9f4661 5741 if (new_prot & VM_PROT_COPY) {
0a7de745
A
5742 vm_map_offset_t new_start;
5743 vm_prot_t cur_prot, max_prot;
5744 vm_map_kernel_flags_t kflags;
5c9f4661
A
5745
5746 /* LP64todo - see below */
5747 if (start >= map->max_offset) {
5748 return KERN_INVALID_ADDRESS;
5749 }
5750
d9a64523
A
5751#if VM_PROTECT_WX_FAIL
5752 if ((new_prot & VM_PROT_EXECUTE) &&
5753 map != kernel_map &&
5754 cs_process_enforcement(NULL)) {
5755 DTRACE_VM3(cs_wx,
0a7de745
A
5756 uint64_t, (uint64_t) start,
5757 uint64_t, (uint64_t) end,
5758 vm_prot_t, new_prot);
d9a64523 5759 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
0a7de745
A
5760 proc_selfpid(),
5761 (current_task()->bsd_info
5762 ? proc_name_address(current_task()->bsd_info)
5763 : "?"),
5764 __FUNCTION__);
d9a64523
A
5765 return KERN_PROTECTION_FAILURE;
5766 }
5767#endif /* VM_PROTECT_WX_FAIL */
5768
5769 /*
5770 * Let vm_map_remap_extract() know that it will need to:
5771 * + make a copy of the mapping
5772 * + add VM_PROT_WRITE to the max protections
5773 * + remove any protections that are no longer allowed from the
5774 * max protections (to avoid any WRITE/EXECUTE conflict, for
5775 * example).
5776 * Note that "max_prot" is an IN/OUT parameter only for this
5777 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5778 * only.
5779 */
5780 max_prot = new_prot & VM_PROT_ALL;
5c9f4661
A
5781 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5782 kflags.vmkf_remap_prot_copy = TRUE;
d9a64523 5783 kflags.vmkf_overwrite_immutable = TRUE;
5c9f4661
A
5784 new_start = start;
5785 kr = vm_map_remap(map,
0a7de745
A
5786 &new_start,
5787 end - start,
5788 0, /* mask */
5789 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5790 kflags,
5791 0,
5792 map,
5793 start,
5794 TRUE, /* copy-on-write remapping! */
5795 &cur_prot,
5796 &max_prot,
5797 VM_INHERIT_DEFAULT);
5c9f4661
A
5798 if (kr != KERN_SUCCESS) {
5799 return kr;
5800 }
5801 new_prot &= ~VM_PROT_COPY;
5802 }
5803
1c79356b
A
5804 vm_map_lock(map);
5805
91447636
A
5806 /* LP64todo - remove this check when vm_map_commpage64()
5807 * no longer has to stuff in a map_entry for the commpage
5808 * above the map's max_offset.
5809 */
5810 if (start >= map->max_offset) {
5811 vm_map_unlock(map);
0a7de745 5812 return KERN_INVALID_ADDRESS;
91447636
A
5813 }
5814
0a7de745 5815 while (1) {
b0d623f7 5816 /*
0a7de745 5817 * Lookup the entry. If it doesn't start in a valid
b0d623f7
A
5818 * entry, return an error.
5819 */
0a7de745 5820 if (!vm_map_lookup_entry(map, start, &entry)) {
b0d623f7 5821 vm_map_unlock(map);
0a7de745 5822 return KERN_INVALID_ADDRESS;
b0d623f7
A
5823 }
5824
0a7de745 5825 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
b0d623f7
A
5826 start = SUPERPAGE_ROUND_DOWN(start);
5827 continue;
5828 }
5829 break;
0a7de745
A
5830 }
5831 if (entry->superpage_size) {
5832 end = SUPERPAGE_ROUND_UP(end);
5833 }
1c79356b
A
5834
5835 /*
5836 * Make a first pass to check for protection and address
5837 * violations.
5838 */
5839
5840 current = entry;
5841 prev = current->vme_start;
5842 while ((current != vm_map_to_entry(map)) &&
0a7de745 5843 (current->vme_start < end)) {
1c79356b
A
5844 /*
5845 * If there is a hole, return an error.
5846 */
5847 if (current->vme_start != prev) {
5848 vm_map_unlock(map);
0a7de745 5849 return KERN_INVALID_ADDRESS;
1c79356b
A
5850 }
5851
5852 new_max = current->max_protection;
5c9f4661
A
5853 if ((new_prot & new_max) != new_prot) {
5854 vm_map_unlock(map);
0a7de745 5855 return KERN_PROTECTION_FAILURE;
1c79356b 5856 }
5ba3f43e 5857
d9a64523
A
5858 if ((new_prot & VM_PROT_WRITE) &&
5859 (new_prot & VM_PROT_EXECUTE) &&
5860#if !CONFIG_EMBEDDED
5861 map != kernel_map &&
5862 cs_process_enforcement(NULL) &&
5863#endif /* !CONFIG_EMBEDDED */
5864 !(current->used_for_jit)) {
5865 DTRACE_VM3(cs_wx,
0a7de745
A
5866 uint64_t, (uint64_t) current->vme_start,
5867 uint64_t, (uint64_t) current->vme_end,
5868 vm_prot_t, new_prot);
d9a64523 5869 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
0a7de745
A
5870 proc_selfpid(),
5871 (current_task()->bsd_info
5872 ? proc_name_address(current_task()->bsd_info)
5873 : "?"),
5874 __FUNCTION__);
d9a64523
A
5875 new_prot &= ~VM_PROT_EXECUTE;
5876#if VM_PROTECT_WX_FAIL
5877 vm_map_unlock(map);
5878 return KERN_PROTECTION_FAILURE;
5879#endif /* VM_PROTECT_WX_FAIL */
5ba3f43e 5880 }
593a1d5f 5881
a39ff7e2
A
5882 /*
5883 * If the task has requested executable lockdown,
5884 * deny both:
5885 * - adding executable protections OR
5886 * - adding write protections to an existing executable mapping.
5887 */
5888 if (map->map_disallow_new_exec == TRUE) {
5889 if ((new_prot & VM_PROT_EXECUTE) ||
5890 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5891 vm_map_unlock(map);
0a7de745 5892 return KERN_PROTECTION_FAILURE;
a39ff7e2
A
5893 }
5894 }
5895
1c79356b
A
5896 prev = current->vme_end;
5897 current = current->vme_next;
5898 }
39037602 5899
5ba3f43e
A
5900#if __arm64__
5901 if (end > prev &&
5902 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5903 vm_map_entry_t prev_entry;
5904
5905 prev_entry = current->vme_prev;
5906 if (prev_entry != vm_map_to_entry(map) &&
5907 !prev_entry->map_aligned &&
5908 (vm_map_round_page(prev_entry->vme_end,
0a7de745
A
5909 VM_MAP_PAGE_MASK(map))
5910 == end)) {
5ba3f43e
A
5911 /*
5912 * The last entry in our range is not "map-aligned"
5913 * but it would have reached all the way to "end"
5914 * if it had been map-aligned, so this is not really
5915 * a hole in the range and we can proceed.
5916 */
5917 prev = end;
5918 }
5919 }
5920#endif /* __arm64__ */
39037602 5921
1c79356b
A
5922 if (end > prev) {
5923 vm_map_unlock(map);
0a7de745 5924 return KERN_INVALID_ADDRESS;
1c79356b
A
5925 }
5926
5927 /*
5928 * Go back and fix up protections.
5929 * Clip to start here if the range starts within
5930 * the entry.
5931 */
5932
5933 current = entry;
2d21ac55
A
5934 if (current != vm_map_to_entry(map)) {
5935 /* clip and unnest if necessary */
5936 vm_map_clip_start(map, current, start);
1c79356b 5937 }
2d21ac55 5938
1c79356b 5939 while ((current != vm_map_to_entry(map)) &&
0a7de745
A
5940 (current->vme_start < end)) {
5941 vm_prot_t old_prot;
1c79356b
A
5942
5943 vm_map_clip_end(map, current, end);
5944
fe8ab488
A
5945 if (current->is_sub_map) {
5946 /* clipping did unnest if needed */
5947 assert(!current->use_pmap);
5948 }
2d21ac55 5949
1c79356b
A
5950 old_prot = current->protection;
5951
5c9f4661
A
5952 if (set_max) {
5953 current->max_protection = new_prot;
5954 current->protection = new_prot & old_prot;
5955 } else {
5956 current->protection = new_prot;
5957 }
1c79356b
A
5958
5959 /*
5960 * Update physical map if necessary.
5ba3f43e
A
5961 * If the request is to turn off write protection,
5962 * we won't do it for real (in pmap). This is because
5963 * it would cause copy-on-write to fail. We've already
5964 * set, the new protection in the map, so if a
5965 * write-protect fault occurred, it will be fixed up
1c79356b
A
5966 * properly, COW or not.
5967 */
1c79356b 5968 if (current->protection != old_prot) {
1c79356b
A
5969 /* Look one level in we support nested pmaps */
5970 /* from mapped submaps which are direct entries */
5971 /* in our map */
0c530ab8 5972
2d21ac55 5973 vm_prot_t prot;
0c530ab8 5974
39037602
A
5975 prot = current->protection;
5976 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
0a7de745
A
5977 prot &= ~VM_PROT_WRITE;
5978 } else {
5979 assert(!VME_OBJECT(current)->code_signed);
5980 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
39037602 5981 }
2d21ac55 5982
0a7de745
A
5983 if (override_nx(map, VME_ALIAS(current)) && prot) {
5984 prot |= VM_PROT_EXECUTE;
5985 }
2d21ac55 5986
5ba3f43e
A
5987#if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5988 if (!(old_prot & VM_PROT_EXECUTE) &&
5989 (prot & VM_PROT_EXECUTE) &&
d9a64523
A
5990 panic_on_unsigned_execute &&
5991 (proc_selfcsflags() & CS_KILL)) {
5ba3f43e
A
5992 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
5993 }
5994#endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5995
5996 if (pmap_has_prot_policy(prot)) {
5997 if (current->wired_count) {
5998 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
0a7de745 5999 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
5ba3f43e
A
6000 }
6001
6002 /* If the pmap layer cares about this
6003 * protection type, force a fault for
6004 * each page so that vm_fault will
6005 * repopulate the page with the full
6006 * set of protections.
6007 */
6008 /*
6009 * TODO: We don't seem to need this,
6010 * but this is due to an internal
6011 * implementation detail of
6012 * pmap_protect. Do we want to rely
6013 * on this?
6014 */
6015 prot = VM_PROT_NONE;
6016 }
490019cf 6017
0c530ab8 6018 if (current->is_sub_map && current->use_pmap) {
5ba3f43e 6019 pmap_protect(VME_SUBMAP(current)->pmap,
0a7de745
A
6020 current->vme_start,
6021 current->vme_end,
6022 prot);
1c79356b 6023 } else {
5ba3f43e
A
6024 if (prot & VM_PROT_WRITE) {
6025 if (VME_OBJECT(current) == compressor_object) {
6026 /*
6027 * For write requests on the
6028 * compressor, we wil ask the
6029 * pmap layer to prevent us from
6030 * taking a write fault when we
6031 * attempt to access the mapping
6032 * next.
6033 */
6034 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6035 }
6036 }
6037
6038 pmap_protect_options(map->pmap,
0a7de745
A
6039 current->vme_start,
6040 current->vme_end,
6041 prot,
6042 pmap_options,
6043 NULL);
1c79356b 6044 }
1c79356b
A
6045 }
6046 current = current->vme_next;
6047 }
6048
5353443c 6049 current = entry;
91447636 6050 while ((current != vm_map_to_entry(map)) &&
0a7de745 6051 (current->vme_start <= end)) {
5353443c
A
6052 vm_map_simplify_entry(map, current);
6053 current = current->vme_next;
6054 }
6055
1c79356b 6056 vm_map_unlock(map);
0a7de745 6057 return KERN_SUCCESS;
1c79356b
A
6058}
6059
6060/*
6061 * vm_map_inherit:
6062 *
6063 * Sets the inheritance of the specified address
6064 * range in the target map. Inheritance
6065 * affects how the map will be shared with
6066 * child maps at the time of vm_map_fork.
6067 */
6068kern_return_t
6069vm_map_inherit(
0a7de745
A
6070 vm_map_t map,
6071 vm_map_offset_t start,
6072 vm_map_offset_t end,
6073 vm_inherit_t new_inheritance)
1c79356b 6074{
0a7de745
A
6075 vm_map_entry_t entry;
6076 vm_map_entry_t temp_entry;
1c79356b
A
6077
6078 vm_map_lock(map);
6079
6080 VM_MAP_RANGE_CHECK(map, start, end);
6081
6082 if (vm_map_lookup_entry(map, start, &temp_entry)) {
6083 entry = temp_entry;
0a7de745 6084 } else {
1c79356b
A
6085 temp_entry = temp_entry->vme_next;
6086 entry = temp_entry;
6087 }
6088
6089 /* first check entire range for submaps which can't support the */
6090 /* given inheritance. */
6091 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
0a7de745
A
6092 if (entry->is_sub_map) {
6093 if (new_inheritance == VM_INHERIT_COPY) {
91447636 6094 vm_map_unlock(map);
0a7de745 6095 return KERN_INVALID_ARGUMENT;
91447636 6096 }
1c79356b
A
6097 }
6098
6099 entry = entry->vme_next;
6100 }
6101
6102 entry = temp_entry;
2d21ac55
A
6103 if (entry != vm_map_to_entry(map)) {
6104 /* clip and unnest if necessary */
6105 vm_map_clip_start(map, entry, start);
6106 }
1c79356b
A
6107
6108 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6109 vm_map_clip_end(map, entry, end);
fe8ab488
A
6110 if (entry->is_sub_map) {
6111 /* clip did unnest if needed */
6112 assert(!entry->use_pmap);
6113 }
1c79356b
A
6114
6115 entry->inheritance = new_inheritance;
6116
6117 entry = entry->vme_next;
6118 }
6119
6120 vm_map_unlock(map);
0a7de745 6121 return KERN_SUCCESS;
1c79356b
A
6122}
6123
2d21ac55
A
6124/*
6125 * Update the accounting for the amount of wired memory in this map. If the user has
6126 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6127 */
6128
6129static kern_return_t
6130add_wire_counts(
0a7de745
A
6131 vm_map_t map,
6132 vm_map_entry_t entry,
6133 boolean_t user_wire)
5ba3f43e 6134{
0a7de745 6135 vm_map_size_t size;
2d21ac55
A
6136
6137 if (user_wire) {
6d2010ae 6138 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
2d21ac55
A
6139
6140 /*
6141 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6142 * this map entry.
6143 */
6144
6145 if (entry->user_wired_count == 0) {
6146 size = entry->vme_end - entry->vme_start;
5ba3f43e 6147
2d21ac55
A
6148 /*
6149 * Since this is the first time the user is wiring this map entry, check to see if we're
6150 * exceeding the user wire limits. There is a per map limit which is the smaller of either
4ba76501 6151 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
2d21ac55
A
6152 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6153 * limit, then we fail.
6154 */
6155
4ba76501
A
6156 if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6157 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
2d21ac55 6158 return KERN_RESOURCE_SHORTAGE;
0a7de745 6159 }
2d21ac55
A
6160
6161 /*
6162 * The first time the user wires an entry, we also increment the wired_count and add this to
6163 * the total that has been wired in the map.
6164 */
6165
0a7de745 6166 if (entry->wired_count >= MAX_WIRE_COUNT) {
2d21ac55 6167 return KERN_FAILURE;
0a7de745 6168 }
2d21ac55
A
6169
6170 entry->wired_count++;
6171 map->user_wire_size += size;
6172 }
6173
0a7de745 6174 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
2d21ac55 6175 return KERN_FAILURE;
0a7de745 6176 }
2d21ac55
A
6177
6178 entry->user_wired_count++;
2d21ac55 6179 } else {
2d21ac55
A
6180 /*
6181 * The kernel's wiring the memory. Just bump the count and continue.
6182 */
6183
0a7de745 6184 if (entry->wired_count >= MAX_WIRE_COUNT) {
2d21ac55 6185 panic("vm_map_wire: too many wirings");
0a7de745 6186 }
2d21ac55
A
6187
6188 entry->wired_count++;
6189 }
6190
6191 return KERN_SUCCESS;
6192}
6193
6194/*
6195 * Update the memory wiring accounting now that the given map entry is being unwired.
6196 */
6197
6198static void
6199subtract_wire_counts(
0a7de745
A
6200 vm_map_t map,
6201 vm_map_entry_t entry,
6202 boolean_t user_wire)
5ba3f43e 6203{
2d21ac55 6204 if (user_wire) {
2d21ac55
A
6205 /*
6206 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6207 */
6208
6209 if (entry->user_wired_count == 1) {
2d21ac55
A
6210 /*
6211 * We're removing the last user wire reference. Decrement the wired_count and the total
6212 * user wired memory for this map.
6213 */
6214
6215 assert(entry->wired_count >= 1);
6216 entry->wired_count--;
6217 map->user_wire_size -= entry->vme_end - entry->vme_start;
6218 }
6219
6220 assert(entry->user_wired_count >= 1);
6221 entry->user_wired_count--;
2d21ac55 6222 } else {
2d21ac55
A
6223 /*
6224 * The kernel is unwiring the memory. Just update the count.
6225 */
6226
6227 assert(entry->wired_count >= 1);
6228 entry->wired_count--;
6229 }
6230}
6231
5ba3f43e 6232int cs_executable_wire = 0;
39037602 6233
1c79356b
A
6234/*
6235 * vm_map_wire:
6236 *
6237 * Sets the pageability of the specified address range in the
6238 * target map as wired. Regions specified as not pageable require
6239 * locked-down physical memory and physical page maps. The
6240 * access_type variable indicates types of accesses that must not
6241 * generate page faults. This is checked against protection of
6242 * memory being locked-down.
6243 *
6244 * The map must not be locked, but a reference must remain to the
6245 * map throughout the call.
6246 */
91447636 6247static kern_return_t
1c79356b 6248vm_map_wire_nested(
0a7de745
A
6249 vm_map_t map,
6250 vm_map_offset_t start,
6251 vm_map_offset_t end,
6252 vm_prot_t caller_prot,
6253 vm_tag_t tag,
6254 boolean_t user_wire,
6255 pmap_t map_pmap,
6256 vm_map_offset_t pmap_addr,
6257 ppnum_t *physpage_p)
1c79356b 6258{
0a7de745
A
6259 vm_map_entry_t entry;
6260 vm_prot_t access_type;
6261 struct vm_map_entry *first_entry, tmp_entry;
6262 vm_map_t real_map;
6263 vm_map_offset_t s, e;
6264 kern_return_t rc;
6265 boolean_t need_wakeup;
6266 boolean_t main_map = FALSE;
6267 wait_interrupt_t interruptible_state;
6268 thread_t cur_thread;
6269 unsigned int last_timestamp;
6270 vm_map_size_t size;
6271 boolean_t wire_and_extract;
fe8ab488 6272
3e170ce0
A
6273 access_type = (caller_prot & VM_PROT_ALL);
6274
fe8ab488
A
6275 wire_and_extract = FALSE;
6276 if (physpage_p != NULL) {
6277 /*
6278 * The caller wants the physical page number of the
6279 * wired page. We return only one physical page number
6280 * so this works for only one page at a time.
6281 */
6282 if ((end - start) != PAGE_SIZE) {
6283 return KERN_INVALID_ARGUMENT;
6284 }
6285 wire_and_extract = TRUE;
6286 *physpage_p = 0;
6287 }
1c79356b
A
6288
6289 vm_map_lock(map);
0a7de745 6290 if (map_pmap == NULL) {
1c79356b 6291 main_map = TRUE;
0a7de745 6292 }
1c79356b
A
6293 last_timestamp = map->timestamp;
6294
6295 VM_MAP_RANGE_CHECK(map, start, end);
6296 assert(page_aligned(start));
6297 assert(page_aligned(end));
39236c6e
A
6298 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6299 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
0b4e3aa0
A
6300 if (start == end) {
6301 /* We wired what the caller asked for, zero pages */
6302 vm_map_unlock(map);
6303 return KERN_SUCCESS;
6304 }
1c79356b 6305
2d21ac55
A
6306 need_wakeup = FALSE;
6307 cur_thread = current_thread();
6308
6309 s = start;
6310 rc = KERN_SUCCESS;
6311
6312 if (vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b 6313 entry = first_entry;
2d21ac55
A
6314 /*
6315 * vm_map_clip_start will be done later.
6316 * We don't want to unnest any nested submaps here !
6317 */
1c79356b
A
6318 } else {
6319 /* Start address is not in map */
2d21ac55
A
6320 rc = KERN_INVALID_ADDRESS;
6321 goto done;
1c79356b
A
6322 }
6323
2d21ac55
A
6324 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6325 /*
6326 * At this point, we have wired from "start" to "s".
6327 * We still need to wire from "s" to "end".
6328 *
6329 * "entry" hasn't been clipped, so it could start before "s"
6330 * and/or end after "end".
6331 */
6332
6333 /* "e" is how far we want to wire in this entry */
6334 e = entry->vme_end;
0a7de745 6335 if (e > end) {
2d21ac55 6336 e = end;
0a7de745 6337 }
2d21ac55 6338
1c79356b
A
6339 /*
6340 * If another thread is wiring/unwiring this entry then
6341 * block after informing other thread to wake us up.
6342 */
6343 if (entry->in_transition) {
9bccf70c
A
6344 wait_result_t wait_result;
6345
1c79356b
A
6346 /*
6347 * We have not clipped the entry. Make sure that
6348 * the start address is in range so that the lookup
6349 * below will succeed.
2d21ac55
A
6350 * "s" is the current starting point: we've already
6351 * wired from "start" to "s" and we still have
6352 * to wire from "s" to "end".
1c79356b 6353 */
1c79356b
A
6354
6355 entry->needs_wakeup = TRUE;
6356
6357 /*
6358 * wake up anybody waiting on entries that we have
6359 * already wired.
6360 */
6361 if (need_wakeup) {
6362 vm_map_entry_wakeup(map);
6363 need_wakeup = FALSE;
6364 }
6365 /*
6366 * User wiring is interruptible
6367 */
5ba3f43e 6368 wait_result = vm_map_entry_wait(map,
0a7de745
A
6369 (user_wire) ? THREAD_ABORTSAFE :
6370 THREAD_UNINT);
6371 if (user_wire && wait_result == THREAD_INTERRUPTED) {
1c79356b
A
6372 /*
6373 * undo the wirings we have done so far
6374 * We do not clear the needs_wakeup flag,
6375 * because we cannot tell if we were the
6376 * only one waiting.
6377 */
2d21ac55
A
6378 rc = KERN_FAILURE;
6379 goto done;
1c79356b
A
6380 }
6381
1c79356b
A
6382 /*
6383 * Cannot avoid a lookup here. reset timestamp.
6384 */
6385 last_timestamp = map->timestamp;
6386
6387 /*
6388 * The entry could have been clipped, look it up again.
6389 * Worse that can happen is, it may not exist anymore.
6390 */
6391 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
6392 /*
6393 * User: undo everything upto the previous
6394 * entry. let vm_map_unwire worry about
6395 * checking the validity of the range.
6396 */
2d21ac55
A
6397 rc = KERN_FAILURE;
6398 goto done;
1c79356b
A
6399 }
6400 entry = first_entry;
6401 continue;
6402 }
5ba3f43e 6403
2d21ac55 6404 if (entry->is_sub_map) {
0a7de745
A
6405 vm_map_offset_t sub_start;
6406 vm_map_offset_t sub_end;
6407 vm_map_offset_t local_start;
6408 vm_map_offset_t local_end;
6409 pmap_t pmap;
2d21ac55 6410
fe8ab488
A
6411 if (wire_and_extract) {
6412 /*
6413 * Wiring would result in copy-on-write
6414 * which would not be compatible with
6415 * the sharing we have with the original
6416 * provider of this memory.
6417 */
6418 rc = KERN_INVALID_ARGUMENT;
6419 goto done;
6420 }
6421
2d21ac55 6422 vm_map_clip_start(map, entry, s);
1c79356b
A
6423 vm_map_clip_end(map, entry, end);
6424
3e170ce0 6425 sub_start = VME_OFFSET(entry);
2d21ac55 6426 sub_end = entry->vme_end;
3e170ce0 6427 sub_end += VME_OFFSET(entry) - entry->vme_start;
5ba3f43e 6428
1c79356b 6429 local_end = entry->vme_end;
0a7de745
A
6430 if (map_pmap == NULL) {
6431 vm_object_t object;
6432 vm_object_offset_t offset;
6433 vm_prot_t prot;
6434 boolean_t wired;
6435 vm_map_entry_t local_entry;
6436 vm_map_version_t version;
6437 vm_map_t lookup_map;
6438
6439 if (entry->use_pmap) {
3e170ce0 6440 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c
A
6441 /* ppc implementation requires that */
6442 /* submaps pmap address ranges line */
6443 /* up with parent map */
6444#ifdef notdef
6445 pmap_addr = sub_start;
6446#endif
2d21ac55 6447 pmap_addr = s;
1c79356b
A
6448 } else {
6449 pmap = map->pmap;
2d21ac55 6450 pmap_addr = s;
1c79356b 6451 }
2d21ac55 6452
1c79356b 6453 if (entry->wired_count) {
0a7de745 6454 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
2d21ac55 6455 goto done;
0a7de745 6456 }
2d21ac55
A
6457
6458 /*
6459 * The map was not unlocked:
6460 * no need to goto re-lookup.
6461 * Just go directly to next entry.
6462 */
1c79356b 6463 entry = entry->vme_next;
2d21ac55 6464 s = entry->vme_start;
1c79356b 6465 continue;
2d21ac55 6466 }
9bccf70c 6467
2d21ac55
A
6468 /* call vm_map_lookup_locked to */
6469 /* cause any needs copy to be */
6470 /* evaluated */
6471 local_start = entry->vme_start;
6472 lookup_map = map;
6473 vm_map_lock_write_to_read(map);
0a7de745
A
6474 if (vm_map_lookup_locked(
6475 &lookup_map, local_start,
6476 access_type | VM_PROT_COPY,
6477 OBJECT_LOCK_EXCLUSIVE,
6478 &version, &object,
6479 &offset, &prot, &wired,
6480 NULL,
6481 &real_map)) {
2d21ac55 6482 vm_map_unlock_read(lookup_map);
4bd07ac2 6483 assert(map_pmap == NULL);
2d21ac55 6484 vm_map_unwire(map, start,
0a7de745
A
6485 s, user_wire);
6486 return KERN_FAILURE;
2d21ac55 6487 }
316670eb 6488 vm_object_unlock(object);
0a7de745 6489 if (real_map != lookup_map) {
2d21ac55 6490 vm_map_unlock(real_map);
0a7de745 6491 }
2d21ac55
A
6492 vm_map_unlock_read(lookup_map);
6493 vm_map_lock(map);
1c79356b 6494
2d21ac55 6495 /* we unlocked, so must re-lookup */
5ba3f43e 6496 if (!vm_map_lookup_entry(map,
0a7de745
A
6497 local_start,
6498 &local_entry)) {
2d21ac55
A
6499 rc = KERN_FAILURE;
6500 goto done;
6501 }
6502
6503 /*
6504 * entry could have been "simplified",
6505 * so re-clip
6506 */
6507 entry = local_entry;
6508 assert(s == local_start);
6509 vm_map_clip_start(map, entry, s);
6510 vm_map_clip_end(map, entry, end);
6511 /* re-compute "e" */
6512 e = entry->vme_end;
0a7de745 6513 if (e > end) {
2d21ac55 6514 e = end;
0a7de745 6515 }
2d21ac55
A
6516
6517 /* did we have a change of type? */
6518 if (!entry->is_sub_map) {
6519 last_timestamp = map->timestamp;
6520 continue;
1c79356b
A
6521 }
6522 } else {
9bccf70c 6523 local_start = entry->vme_start;
2d21ac55
A
6524 pmap = map_pmap;
6525 }
6526
0a7de745 6527 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
2d21ac55 6528 goto done;
0a7de745 6529 }
2d21ac55
A
6530
6531 entry->in_transition = TRUE;
6532
6533 vm_map_unlock(map);
5ba3f43e 6534 rc = vm_map_wire_nested(VME_SUBMAP(entry),
0a7de745
A
6535 sub_start, sub_end,
6536 caller_prot, tag,
6537 user_wire, pmap, pmap_addr,
6538 NULL);
2d21ac55 6539 vm_map_lock(map);
9bccf70c 6540
1c79356b
A
6541 /*
6542 * Find the entry again. It could have been clipped
6543 * after we unlocked the map.
6544 */
9bccf70c 6545 if (!vm_map_lookup_entry(map, local_start,
0a7de745 6546 &first_entry)) {
9bccf70c 6547 panic("vm_map_wire: re-lookup failed");
0a7de745 6548 }
9bccf70c 6549 entry = first_entry;
1c79356b 6550
2d21ac55
A
6551 assert(local_start == s);
6552 /* re-compute "e" */
6553 e = entry->vme_end;
0a7de745 6554 if (e > end) {
2d21ac55 6555 e = end;
0a7de745 6556 }
2d21ac55 6557
1c79356b
A
6558 last_timestamp = map->timestamp;
6559 while ((entry != vm_map_to_entry(map)) &&
0a7de745 6560 (entry->vme_start < e)) {
1c79356b
A
6561 assert(entry->in_transition);
6562 entry->in_transition = FALSE;
6563 if (entry->needs_wakeup) {
6564 entry->needs_wakeup = FALSE;
6565 need_wakeup = TRUE;
6566 }
6567 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
2d21ac55 6568 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
6569 }
6570 entry = entry->vme_next;
6571 }
0a7de745 6572 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 6573 goto done;
1c79356b 6574 }
2d21ac55
A
6575
6576 /* no need to relookup again */
6577 s = entry->vme_start;
1c79356b
A
6578 continue;
6579 }
6580
6581 /*
6582 * If this entry is already wired then increment
6583 * the appropriate wire reference count.
6584 */
9bccf70c 6585 if (entry->wired_count) {
fe8ab488
A
6586 if ((entry->protection & access_type) != access_type) {
6587 /* found a protection problem */
6588
6589 /*
6590 * XXX FBDP
6591 * We should always return an error
6592 * in this case but since we didn't
6593 * enforce it before, let's do
6594 * it only for the new "wire_and_extract"
6595 * code path for now...
6596 */
6597 if (wire_and_extract) {
6598 rc = KERN_PROTECTION_FAILURE;
6599 goto done;
6600 }
6601 }
6602
1c79356b
A
6603 /*
6604 * entry is already wired down, get our reference
6605 * after clipping to our range.
6606 */
2d21ac55 6607 vm_map_clip_start(map, entry, s);
1c79356b 6608 vm_map_clip_end(map, entry, end);
1c79356b 6609
0a7de745 6610 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
2d21ac55 6611 goto done;
0a7de745 6612 }
2d21ac55 6613
fe8ab488 6614 if (wire_and_extract) {
0a7de745
A
6615 vm_object_t object;
6616 vm_object_offset_t offset;
6617 vm_page_t m;
fe8ab488
A
6618
6619 /*
6620 * We don't have to "wire" the page again
6621 * bit we still have to "extract" its
6622 * physical page number, after some sanity
6623 * checks.
6624 */
6625 assert((entry->vme_end - entry->vme_start)
0a7de745 6626 == PAGE_SIZE);
fe8ab488
A
6627 assert(!entry->needs_copy);
6628 assert(!entry->is_sub_map);
3e170ce0 6629 assert(VME_OBJECT(entry));
fe8ab488 6630 if (((entry->vme_end - entry->vme_start)
0a7de745 6631 != PAGE_SIZE) ||
fe8ab488
A
6632 entry->needs_copy ||
6633 entry->is_sub_map ||
3e170ce0 6634 VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
6635 rc = KERN_INVALID_ARGUMENT;
6636 goto done;
6637 }
6638
3e170ce0
A
6639 object = VME_OBJECT(entry);
6640 offset = VME_OFFSET(entry);
fe8ab488
A
6641 /* need exclusive lock to update m->dirty */
6642 if (entry->protection & VM_PROT_WRITE) {
6643 vm_object_lock(object);
6644 } else {
6645 vm_object_lock_shared(object);
6646 }
6647 m = vm_page_lookup(object, offset);
6648 assert(m != VM_PAGE_NULL);
39037602
A
6649 assert(VM_PAGE_WIRED(m));
6650 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6651 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
fe8ab488
A
6652 if (entry->protection & VM_PROT_WRITE) {
6653 vm_object_lock_assert_exclusive(
39037602 6654 object);
d9a64523 6655 m->vmp_dirty = TRUE;
fe8ab488
A
6656 }
6657 } else {
6658 /* not already wired !? */
6659 *physpage_p = 0;
6660 }
6661 vm_object_unlock(object);
6662 }
6663
2d21ac55 6664 /* map was not unlocked: no need to relookup */
1c79356b 6665 entry = entry->vme_next;
2d21ac55 6666 s = entry->vme_start;
1c79356b
A
6667 continue;
6668 }
6669
6670 /*
6671 * Unwired entry or wire request transmitted via submap
6672 */
6673
5ba3f43e
A
6674 /*
6675 * Wiring would copy the pages to the shadow object.
6676 * The shadow object would not be code-signed so
6677 * attempting to execute code from these copied pages
6678 * would trigger a code-signing violation.
6679 */
d9a64523
A
6680
6681 if ((entry->protection & VM_PROT_EXECUTE)
6682#if !CONFIG_EMBEDDED
6683 &&
6684 map != kernel_map &&
6685 cs_process_enforcement(NULL)
6686#endif /* !CONFIG_EMBEDDED */
0a7de745 6687 ) {
5ba3f43e
A
6688#if MACH_ASSERT
6689 printf("pid %d[%s] wiring executable range from "
0a7de745
A
6690 "0x%llx to 0x%llx: rejected to preserve "
6691 "code-signing\n",
6692 proc_selfpid(),
6693 (current_task()->bsd_info
6694 ? proc_name_address(current_task()->bsd_info)
6695 : "?"),
6696 (uint64_t) entry->vme_start,
6697 (uint64_t) entry->vme_end);
5ba3f43e
A
6698#endif /* MACH_ASSERT */
6699 DTRACE_VM2(cs_executable_wire,
0a7de745
A
6700 uint64_t, (uint64_t)entry->vme_start,
6701 uint64_t, (uint64_t)entry->vme_end);
5ba3f43e
A
6702 cs_executable_wire++;
6703 rc = KERN_PROTECTION_FAILURE;
6704 goto done;
6705 }
39037602 6706
1c79356b
A
6707 /*
6708 * Perform actions of vm_map_lookup that need the write
6709 * lock on the map: create a shadow object for a
6710 * copy-on-write region, or an object for a zero-fill
6711 * region.
6712 */
6713 size = entry->vme_end - entry->vme_start;
6714 /*
6715 * If wiring a copy-on-write page, we need to copy it now
6716 * even if we're only (currently) requesting read access.
6717 * This is aggressive, but once it's wired we can't move it.
6718 */
6719 if (entry->needs_copy) {
fe8ab488
A
6720 if (wire_and_extract) {
6721 /*
6722 * We're supposed to share with the original
6723 * provider so should not be "needs_copy"
6724 */
6725 rc = KERN_INVALID_ARGUMENT;
6726 goto done;
6727 }
3e170ce0
A
6728
6729 VME_OBJECT_SHADOW(entry, size);
1c79356b 6730 entry->needs_copy = FALSE;
3e170ce0 6731 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
6732 if (wire_and_extract) {
6733 /*
6734 * We're supposed to share with the original
6735 * provider so should already have an object.
6736 */
6737 rc = KERN_INVALID_ARGUMENT;
6738 goto done;
6739 }
3e170ce0
A
6740 VME_OBJECT_SET(entry, vm_object_allocate(size));
6741 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
fe8ab488 6742 assert(entry->use_pmap);
1c79356b
A
6743 }
6744
2d21ac55 6745 vm_map_clip_start(map, entry, s);
1c79356b
A
6746 vm_map_clip_end(map, entry, end);
6747
2d21ac55 6748 /* re-compute "e" */
1c79356b 6749 e = entry->vme_end;
0a7de745 6750 if (e > end) {
2d21ac55 6751 e = end;
0a7de745 6752 }
1c79356b
A
6753
6754 /*
6755 * Check for holes and protection mismatch.
6756 * Holes: Next entry should be contiguous unless this
6757 * is the end of the region.
6758 * Protection: Access requested must be allowed, unless
6759 * wiring is by protection class
6760 */
2d21ac55
A
6761 if ((entry->vme_end < end) &&
6762 ((entry->vme_next == vm_map_to_entry(map)) ||
0a7de745 6763 (entry->vme_next->vme_start > entry->vme_end))) {
2d21ac55
A
6764 /* found a hole */
6765 rc = KERN_INVALID_ADDRESS;
6766 goto done;
6767 }
6768 if ((entry->protection & access_type) != access_type) {
6769 /* found a protection problem */
6770 rc = KERN_PROTECTION_FAILURE;
6771 goto done;
1c79356b
A
6772 }
6773
6774 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6775
0a7de745 6776 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
2d21ac55 6777 goto done;
0a7de745 6778 }
1c79356b
A
6779
6780 entry->in_transition = TRUE;
6781
6782 /*
6783 * This entry might get split once we unlock the map.
6784 * In vm_fault_wire(), we need the current range as
6785 * defined by this entry. In order for this to work
6786 * along with a simultaneous clip operation, we make a
6787 * temporary copy of this entry and use that for the
6788 * wiring. Note that the underlying objects do not
6789 * change during a clip.
6790 */
6791 tmp_entry = *entry;
6792
6793 /*
6794 * The in_transition state guarentees that the entry
6795 * (or entries for this range, if split occured) will be
6796 * there when the map lock is acquired for the second time.
6797 */
6798 vm_map_unlock(map);
0b4e3aa0 6799
0a7de745 6800 if (!user_wire && cur_thread != THREAD_NULL) {
9bccf70c 6801 interruptible_state = thread_interrupt_level(THREAD_UNINT);
0a7de745 6802 } else {
91447636 6803 interruptible_state = THREAD_UNINT;
0a7de745 6804 }
9bccf70c 6805
0a7de745 6806 if (map_pmap) {
5ba3f43e 6807 rc = vm_fault_wire(map,
0a7de745
A
6808 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6809 physpage_p);
6810 } else {
5ba3f43e 6811 rc = vm_fault_wire(map,
0a7de745
A
6812 &tmp_entry, caller_prot, tag, map->pmap,
6813 tmp_entry.vme_start,
6814 physpage_p);
6815 }
0b4e3aa0 6816
0a7de745 6817 if (!user_wire && cur_thread != THREAD_NULL) {
9bccf70c 6818 thread_interrupt_level(interruptible_state);
0a7de745 6819 }
0b4e3aa0 6820
1c79356b
A
6821 vm_map_lock(map);
6822
0a7de745 6823 if (last_timestamp + 1 != map->timestamp) {
1c79356b
A
6824 /*
6825 * Find the entry again. It could have been clipped
6826 * after we unlocked the map.
6827 */
6828 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
0a7de745 6829 &first_entry)) {
1c79356b 6830 panic("vm_map_wire: re-lookup failed");
0a7de745 6831 }
1c79356b
A
6832
6833 entry = first_entry;
6834 }
6835
6836 last_timestamp = map->timestamp;
6837
6838 while ((entry != vm_map_to_entry(map)) &&
0a7de745 6839 (entry->vme_start < tmp_entry.vme_end)) {
1c79356b
A
6840 assert(entry->in_transition);
6841 entry->in_transition = FALSE;
6842 if (entry->needs_wakeup) {
6843 entry->needs_wakeup = FALSE;
6844 need_wakeup = TRUE;
6845 }
0a7de745 6846 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 6847 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
6848 }
6849 entry = entry->vme_next;
6850 }
6851
0a7de745 6852 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 6853 goto done;
1c79356b 6854 }
2d21ac55 6855
d190cdc3
A
6856 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6857 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
6858 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6859 /* found a "new" hole */
6860 s = tmp_entry.vme_end;
6861 rc = KERN_INVALID_ADDRESS;
6862 goto done;
6863 }
6864
2d21ac55 6865 s = entry->vme_start;
1c79356b 6866 } /* end while loop through map entries */
2d21ac55
A
6867
6868done:
6869 if (rc == KERN_SUCCESS) {
6870 /* repair any damage we may have made to the VM map */
6871 vm_map_simplify_range(map, start, end);
6872 }
6873
1c79356b
A
6874 vm_map_unlock(map);
6875
6876 /*
6877 * wake up anybody waiting on entries we wired.
6878 */
0a7de745 6879 if (need_wakeup) {
1c79356b 6880 vm_map_entry_wakeup(map);
0a7de745 6881 }
1c79356b 6882
2d21ac55
A
6883 if (rc != KERN_SUCCESS) {
6884 /* undo what has been wired so far */
4bd07ac2 6885 vm_map_unwire_nested(map, start, s, user_wire,
0a7de745 6886 map_pmap, pmap_addr);
fe8ab488
A
6887 if (physpage_p) {
6888 *physpage_p = 0;
6889 }
2d21ac55
A
6890 }
6891
6892 return rc;
1c79356b
A
6893}
6894
6895kern_return_t
3e170ce0 6896vm_map_wire_external(
0a7de745
A
6897 vm_map_t map,
6898 vm_map_offset_t start,
6899 vm_map_offset_t end,
6900 vm_prot_t caller_prot,
6901 boolean_t user_wire)
1c79356b 6902{
0a7de745 6903 kern_return_t kret;
3e170ce0 6904
5ba3f43e 6905 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
0a7de745 6906 user_wire, (pmap_t)NULL, 0, NULL);
3e170ce0
A
6907 return kret;
6908}
1c79356b 6909
3e170ce0 6910kern_return_t
5ba3f43e 6911vm_map_wire_kernel(
0a7de745
A
6912 vm_map_t map,
6913 vm_map_offset_t start,
6914 vm_map_offset_t end,
6915 vm_prot_t caller_prot,
6916 vm_tag_t tag,
6917 boolean_t user_wire)
3e170ce0 6918{
0a7de745 6919 kern_return_t kret;
1c79356b 6920
5ba3f43e 6921 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
0a7de745 6922 user_wire, (pmap_t)NULL, 0, NULL);
fe8ab488
A
6923 return kret;
6924}
6925
6926kern_return_t
3e170ce0 6927vm_map_wire_and_extract_external(
0a7de745
A
6928 vm_map_t map,
6929 vm_map_offset_t start,
6930 vm_prot_t caller_prot,
6931 boolean_t user_wire,
6932 ppnum_t *physpage_p)
fe8ab488 6933{
0a7de745 6934 kern_return_t kret;
3e170ce0 6935
3e170ce0 6936 kret = vm_map_wire_nested(map,
0a7de745
A
6937 start,
6938 start + VM_MAP_PAGE_SIZE(map),
6939 caller_prot,
6940 vm_tag_bt(),
6941 user_wire,
6942 (pmap_t)NULL,
6943 0,
6944 physpage_p);
3e170ce0
A
6945 if (kret != KERN_SUCCESS &&
6946 physpage_p != NULL) {
6947 *physpage_p = 0;
6948 }
6949 return kret;
6950}
fe8ab488 6951
3e170ce0 6952kern_return_t
5ba3f43e 6953vm_map_wire_and_extract_kernel(
0a7de745
A
6954 vm_map_t map,
6955 vm_map_offset_t start,
6956 vm_prot_t caller_prot,
6957 vm_tag_t tag,
6958 boolean_t user_wire,
6959 ppnum_t *physpage_p)
3e170ce0 6960{
0a7de745 6961 kern_return_t kret;
fe8ab488
A
6962
6963 kret = vm_map_wire_nested(map,
0a7de745
A
6964 start,
6965 start + VM_MAP_PAGE_SIZE(map),
6966 caller_prot,
6967 tag,
6968 user_wire,
6969 (pmap_t)NULL,
6970 0,
6971 physpage_p);
fe8ab488
A
6972 if (kret != KERN_SUCCESS &&
6973 physpage_p != NULL) {
6974 *physpage_p = 0;
6975 }
1c79356b
A
6976 return kret;
6977}
6978
6979/*
6980 * vm_map_unwire:
6981 *
6982 * Sets the pageability of the specified address range in the target
6983 * as pageable. Regions specified must have been wired previously.
6984 *
6985 * The map must not be locked, but a reference must remain to the map
6986 * throughout the call.
6987 *
6988 * Kernel will panic on failures. User unwire ignores holes and
6989 * unwired and intransition entries to avoid losing memory by leaving
6990 * it unwired.
6991 */
91447636 6992static kern_return_t
1c79356b 6993vm_map_unwire_nested(
0a7de745
A
6994 vm_map_t map,
6995 vm_map_offset_t start,
6996 vm_map_offset_t end,
6997 boolean_t user_wire,
6998 pmap_t map_pmap,
6999 vm_map_offset_t pmap_addr)
1c79356b 7000{
0a7de745
A
7001 vm_map_entry_t entry;
7002 struct vm_map_entry *first_entry, tmp_entry;
7003 boolean_t need_wakeup;
7004 boolean_t main_map = FALSE;
7005 unsigned int last_timestamp;
1c79356b
A
7006
7007 vm_map_lock(map);
0a7de745 7008 if (map_pmap == NULL) {
1c79356b 7009 main_map = TRUE;
0a7de745 7010 }
1c79356b
A
7011 last_timestamp = map->timestamp;
7012
7013 VM_MAP_RANGE_CHECK(map, start, end);
7014 assert(page_aligned(start));
7015 assert(page_aligned(end));
39236c6e
A
7016 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7017 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1c79356b 7018
2d21ac55
A
7019 if (start == end) {
7020 /* We unwired what the caller asked for: zero pages */
7021 vm_map_unlock(map);
7022 return KERN_SUCCESS;
7023 }
7024
1c79356b
A
7025 if (vm_map_lookup_entry(map, start, &first_entry)) {
7026 entry = first_entry;
2d21ac55
A
7027 /*
7028 * vm_map_clip_start will be done later.
7029 * We don't want to unnest any nested sub maps here !
7030 */
0a7de745 7031 } else {
2d21ac55
A
7032 if (!user_wire) {
7033 panic("vm_map_unwire: start not found");
7034 }
1c79356b
A
7035 /* Start address is not in map. */
7036 vm_map_unlock(map);
0a7de745 7037 return KERN_INVALID_ADDRESS;
1c79356b
A
7038 }
7039
b0d623f7
A
7040 if (entry->superpage_size) {
7041 /* superpages are always wired */
7042 vm_map_unlock(map);
7043 return KERN_INVALID_ADDRESS;
7044 }
7045
1c79356b
A
7046 need_wakeup = FALSE;
7047 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7048 if (entry->in_transition) {
7049 /*
7050 * 1)
7051 * Another thread is wiring down this entry. Note
7052 * that if it is not for the other thread we would
7053 * be unwiring an unwired entry. This is not
7054 * permitted. If we wait, we will be unwiring memory
7055 * we did not wire.
7056 *
7057 * 2)
7058 * Another thread is unwiring this entry. We did not
7059 * have a reference to it, because if we did, this
7060 * entry will not be getting unwired now.
7061 */
2d21ac55
A
7062 if (!user_wire) {
7063 /*
7064 * XXX FBDP
7065 * This could happen: there could be some
7066 * overlapping vslock/vsunlock operations
7067 * going on.
7068 * We should probably just wait and retry,
7069 * but then we have to be careful that this
5ba3f43e 7070 * entry could get "simplified" after
2d21ac55
A
7071 * "in_transition" gets unset and before
7072 * we re-lookup the entry, so we would
7073 * have to re-clip the entry to avoid
7074 * re-unwiring what we have already unwired...
7075 * See vm_map_wire_nested().
7076 *
7077 * Or we could just ignore "in_transition"
7078 * here and proceed to decement the wired
7079 * count(s) on this entry. That should be fine
7080 * as long as "wired_count" doesn't drop all
7081 * the way to 0 (and we should panic if THAT
7082 * happens).
7083 */
1c79356b 7084 panic("vm_map_unwire: in_transition entry");
2d21ac55 7085 }
1c79356b
A
7086
7087 entry = entry->vme_next;
7088 continue;
7089 }
7090
2d21ac55 7091 if (entry->is_sub_map) {
0a7de745
A
7092 vm_map_offset_t sub_start;
7093 vm_map_offset_t sub_end;
7094 vm_map_offset_t local_end;
7095 pmap_t pmap;
5ba3f43e 7096
1c79356b
A
7097 vm_map_clip_start(map, entry, start);
7098 vm_map_clip_end(map, entry, end);
7099
3e170ce0 7100 sub_start = VME_OFFSET(entry);
1c79356b 7101 sub_end = entry->vme_end - entry->vme_start;
3e170ce0 7102 sub_end += VME_OFFSET(entry);
1c79356b 7103 local_end = entry->vme_end;
0a7de745
A
7104 if (map_pmap == NULL) {
7105 if (entry->use_pmap) {
3e170ce0 7106 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c 7107 pmap_addr = sub_start;
2d21ac55 7108 } else {
1c79356b 7109 pmap = map->pmap;
9bccf70c 7110 pmap_addr = start;
2d21ac55
A
7111 }
7112 if (entry->wired_count == 0 ||
7113 (user_wire && entry->user_wired_count == 0)) {
0a7de745 7114 if (!user_wire) {
2d21ac55 7115 panic("vm_map_unwire: entry is unwired");
0a7de745 7116 }
2d21ac55
A
7117 entry = entry->vme_next;
7118 continue;
7119 }
7120
7121 /*
7122 * Check for holes
7123 * Holes: Next entry should be contiguous unless
7124 * this is the end of the region.
7125 */
5ba3f43e 7126 if (((entry->vme_end < end) &&
0a7de745
A
7127 ((entry->vme_next == vm_map_to_entry(map)) ||
7128 (entry->vme_next->vme_start
7129 > entry->vme_end)))) {
7130 if (!user_wire) {
2d21ac55 7131 panic("vm_map_unwire: non-contiguous region");
0a7de745 7132 }
1c79356b 7133/*
0a7de745
A
7134 * entry = entry->vme_next;
7135 * continue;
7136 */
2d21ac55 7137 }
1c79356b 7138
2d21ac55 7139 subtract_wire_counts(map, entry, user_wire);
1c79356b 7140
2d21ac55
A
7141 if (entry->wired_count != 0) {
7142 entry = entry->vme_next;
7143 continue;
7144 }
1c79356b 7145
2d21ac55
A
7146 entry->in_transition = TRUE;
7147 tmp_entry = *entry;/* see comment in vm_map_wire() */
7148
7149 /*
7150 * We can unlock the map now. The in_transition state
7151 * guarantees existance of the entry.
7152 */
7153 vm_map_unlock(map);
5ba3f43e 7154 vm_map_unwire_nested(VME_SUBMAP(entry),
0a7de745 7155 sub_start, sub_end, user_wire, pmap, pmap_addr);
2d21ac55 7156 vm_map_lock(map);
1c79356b 7157
0a7de745 7158 if (last_timestamp + 1 != map->timestamp) {
2d21ac55 7159 /*
5ba3f43e 7160 * Find the entry again. It could have been
2d21ac55
A
7161 * clipped or deleted after we unlocked the map.
7162 */
5ba3f43e 7163 if (!vm_map_lookup_entry(map,
0a7de745
A
7164 tmp_entry.vme_start,
7165 &first_entry)) {
7166 if (!user_wire) {
2d21ac55 7167 panic("vm_map_unwire: re-lookup failed");
0a7de745 7168 }
2d21ac55 7169 entry = first_entry->vme_next;
0a7de745 7170 } else {
2d21ac55 7171 entry = first_entry;
0a7de745 7172 }
2d21ac55
A
7173 }
7174 last_timestamp = map->timestamp;
1c79356b 7175
1c79356b 7176 /*
2d21ac55 7177 * clear transition bit for all constituent entries
5ba3f43e 7178 * that were in the original entry (saved in
2d21ac55
A
7179 * tmp_entry). Also check for waiters.
7180 */
7181 while ((entry != vm_map_to_entry(map)) &&
0a7de745 7182 (entry->vme_start < tmp_entry.vme_end)) {
2d21ac55
A
7183 assert(entry->in_transition);
7184 entry->in_transition = FALSE;
7185 if (entry->needs_wakeup) {
7186 entry->needs_wakeup = FALSE;
7187 need_wakeup = TRUE;
7188 }
7189 entry = entry->vme_next;
1c79356b 7190 }
2d21ac55 7191 continue;
1c79356b 7192 } else {
2d21ac55 7193 vm_map_unlock(map);
3e170ce0 7194 vm_map_unwire_nested(VME_SUBMAP(entry),
0a7de745
A
7195 sub_start, sub_end, user_wire, map_pmap,
7196 pmap_addr);
2d21ac55 7197 vm_map_lock(map);
1c79356b 7198
0a7de745 7199 if (last_timestamp + 1 != map->timestamp) {
2d21ac55 7200 /*
5ba3f43e 7201 * Find the entry again. It could have been
2d21ac55
A
7202 * clipped or deleted after we unlocked the map.
7203 */
5ba3f43e 7204 if (!vm_map_lookup_entry(map,
0a7de745
A
7205 tmp_entry.vme_start,
7206 &first_entry)) {
7207 if (!user_wire) {
2d21ac55 7208 panic("vm_map_unwire: re-lookup failed");
0a7de745 7209 }
2d21ac55 7210 entry = first_entry->vme_next;
0a7de745 7211 } else {
2d21ac55 7212 entry = first_entry;
0a7de745 7213 }
2d21ac55
A
7214 }
7215 last_timestamp = map->timestamp;
1c79356b
A
7216 }
7217 }
7218
7219
9bccf70c 7220 if ((entry->wired_count == 0) ||
2d21ac55 7221 (user_wire && entry->user_wired_count == 0)) {
0a7de745 7222 if (!user_wire) {
1c79356b 7223 panic("vm_map_unwire: entry is unwired");
0a7de745 7224 }
1c79356b
A
7225
7226 entry = entry->vme_next;
7227 continue;
7228 }
5ba3f43e 7229
1c79356b 7230 assert(entry->wired_count > 0 &&
0a7de745 7231 (!user_wire || entry->user_wired_count > 0));
1c79356b
A
7232
7233 vm_map_clip_start(map, entry, start);
7234 vm_map_clip_end(map, entry, end);
7235
7236 /*
7237 * Check for holes
7238 * Holes: Next entry should be contiguous unless
7239 * this is the end of the region.
7240 */
5ba3f43e 7241 if (((entry->vme_end < end) &&
0a7de745
A
7242 ((entry->vme_next == vm_map_to_entry(map)) ||
7243 (entry->vme_next->vme_start > entry->vme_end)))) {
7244 if (!user_wire) {
1c79356b 7245 panic("vm_map_unwire: non-contiguous region");
0a7de745 7246 }
1c79356b
A
7247 entry = entry->vme_next;
7248 continue;
7249 }
7250
2d21ac55 7251 subtract_wire_counts(map, entry, user_wire);
1c79356b 7252
9bccf70c 7253 if (entry->wired_count != 0) {
1c79356b
A
7254 entry = entry->vme_next;
7255 continue;
1c79356b
A
7256 }
7257
0a7de745 7258 if (entry->zero_wired_pages) {
b0d623f7
A
7259 entry->zero_wired_pages = FALSE;
7260 }
7261
1c79356b 7262 entry->in_transition = TRUE;
0a7de745 7263 tmp_entry = *entry; /* see comment in vm_map_wire() */
1c79356b
A
7264
7265 /*
7266 * We can unlock the map now. The in_transition state
7267 * guarantees existance of the entry.
7268 */
7269 vm_map_unlock(map);
0a7de745 7270 if (map_pmap) {
5ba3f43e 7271 vm_fault_unwire(map,
0a7de745 7272 &tmp_entry, FALSE, map_pmap, pmap_addr);
1c79356b 7273 } else {
5ba3f43e 7274 vm_fault_unwire(map,
0a7de745
A
7275 &tmp_entry, FALSE, map->pmap,
7276 tmp_entry.vme_start);
1c79356b
A
7277 }
7278 vm_map_lock(map);
7279
0a7de745 7280 if (last_timestamp + 1 != map->timestamp) {
1c79356b
A
7281 /*
7282 * Find the entry again. It could have been clipped
7283 * or deleted after we unlocked the map.
7284 */
7285 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
0a7de745
A
7286 &first_entry)) {
7287 if (!user_wire) {
2d21ac55 7288 panic("vm_map_unwire: re-lookup failed");
0a7de745 7289 }
1c79356b 7290 entry = first_entry->vme_next;
0a7de745 7291 } else {
1c79356b 7292 entry = first_entry;
0a7de745 7293 }
1c79356b
A
7294 }
7295 last_timestamp = map->timestamp;
7296
7297 /*
7298 * clear transition bit for all constituent entries that
7299 * were in the original entry (saved in tmp_entry). Also
7300 * check for waiters.
7301 */
7302 while ((entry != vm_map_to_entry(map)) &&
0a7de745 7303 (entry->vme_start < tmp_entry.vme_end)) {
1c79356b
A
7304 assert(entry->in_transition);
7305 entry->in_transition = FALSE;
7306 if (entry->needs_wakeup) {
7307 entry->needs_wakeup = FALSE;
7308 need_wakeup = TRUE;
7309 }
7310 entry = entry->vme_next;
7311 }
7312 }
91447636
A
7313
7314 /*
7315 * We might have fragmented the address space when we wired this
7316 * range of addresses. Attempt to re-coalesce these VM map entries
7317 * with their neighbors now that they're no longer wired.
7318 * Under some circumstances, address space fragmentation can
7319 * prevent VM object shadow chain collapsing, which can cause
7320 * swap space leaks.
7321 */
7322 vm_map_simplify_range(map, start, end);
7323
1c79356b
A
7324 vm_map_unlock(map);
7325 /*
7326 * wake up anybody waiting on entries that we have unwired.
7327 */
0a7de745 7328 if (need_wakeup) {
1c79356b 7329 vm_map_entry_wakeup(map);
0a7de745
A
7330 }
7331 return KERN_SUCCESS;
1c79356b
A
7332}
7333
7334kern_return_t
7335vm_map_unwire(
0a7de745
A
7336 vm_map_t map,
7337 vm_map_offset_t start,
7338 vm_map_offset_t end,
7339 boolean_t user_wire)
1c79356b 7340{
5ba3f43e 7341 return vm_map_unwire_nested(map, start, end,
0a7de745 7342 user_wire, (pmap_t)NULL, 0);
1c79356b
A
7343}
7344
7345
7346/*
7347 * vm_map_entry_delete: [ internal use only ]
7348 *
7349 * Deallocate the given entry from the target map.
5ba3f43e 7350 */
91447636 7351static void
1c79356b 7352vm_map_entry_delete(
0a7de745
A
7353 vm_map_t map,
7354 vm_map_entry_t entry)
1c79356b 7355{
0a7de745
A
7356 vm_map_offset_t s, e;
7357 vm_object_t object;
7358 vm_map_t submap;
1c79356b
A
7359
7360 s = entry->vme_start;
7361 e = entry->vme_end;
7362 assert(page_aligned(s));
7363 assert(page_aligned(e));
39236c6e
A
7364 if (entry->map_aligned == TRUE) {
7365 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7366 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7367 }
1c79356b
A
7368 assert(entry->wired_count == 0);
7369 assert(entry->user_wired_count == 0);
b0d623f7 7370 assert(!entry->permanent);
1c79356b
A
7371
7372 if (entry->is_sub_map) {
7373 object = NULL;
3e170ce0 7374 submap = VME_SUBMAP(entry);
1c79356b
A
7375 } else {
7376 submap = NULL;
3e170ce0 7377 object = VME_OBJECT(entry);
1c79356b
A
7378 }
7379
6d2010ae 7380 vm_map_store_entry_unlink(map, entry);
1c79356b
A
7381 map->size -= e - s;
7382
7383 vm_map_entry_dispose(map, entry);
7384
7385 vm_map_unlock(map);
7386 /*
7387 * Deallocate the object only after removing all
7388 * pmap entries pointing to its pages.
7389 */
0a7de745 7390 if (submap) {
1c79356b 7391 vm_map_deallocate(submap);
0a7de745 7392 } else {
2d21ac55 7393 vm_object_deallocate(object);
0a7de745 7394 }
1c79356b
A
7395}
7396
7397void
7398vm_map_submap_pmap_clean(
0a7de745
A
7399 vm_map_t map,
7400 vm_map_offset_t start,
7401 vm_map_offset_t end,
7402 vm_map_t sub_map,
7403 vm_map_offset_t offset)
1c79356b 7404{
0a7de745
A
7405 vm_map_offset_t submap_start;
7406 vm_map_offset_t submap_end;
7407 vm_map_size_t remove_size;
7408 vm_map_entry_t entry;
1c79356b
A
7409
7410 submap_end = offset + (end - start);
7411 submap_start = offset;
b7266188
A
7412
7413 vm_map_lock_read(sub_map);
0a7de745 7414 if (vm_map_lookup_entry(sub_map, offset, &entry)) {
1c79356b 7415 remove_size = (entry->vme_end - entry->vme_start);
0a7de745 7416 if (offset > entry->vme_start) {
1c79356b 7417 remove_size -= offset - entry->vme_start;
0a7de745 7418 }
5ba3f43e 7419
1c79356b 7420
0a7de745 7421 if (submap_end < entry->vme_end) {
1c79356b 7422 remove_size -=
0a7de745 7423 entry->vme_end - submap_end;
1c79356b 7424 }
0a7de745 7425 if (entry->is_sub_map) {
1c79356b
A
7426 vm_map_submap_pmap_clean(
7427 sub_map,
7428 start,
7429 start + remove_size,
3e170ce0
A
7430 VME_SUBMAP(entry),
7431 VME_OFFSET(entry));
1c79356b 7432 } else {
cb323159
A
7433 if (map->mapped_in_other_pmaps &&
7434 os_ref_get_count(&map->map_refcnt) != 0 &&
7435 VME_OBJECT(entry) != NULL) {
3e170ce0
A
7436 vm_object_pmap_protect_options(
7437 VME_OBJECT(entry),
7438 (VME_OFFSET(entry) +
0a7de745
A
7439 offset -
7440 entry->vme_start),
9bccf70c
A
7441 remove_size,
7442 PMAP_NULL,
7443 entry->vme_start,
3e170ce0
A
7444 VM_PROT_NONE,
7445 PMAP_OPTIONS_REMOVE);
9bccf70c 7446 } else {
5ba3f43e 7447 pmap_remove(map->pmap,
0a7de745
A
7448 (addr64_t)start,
7449 (addr64_t)(start + remove_size));
9bccf70c 7450 }
1c79356b
A
7451 }
7452 }
7453
7454 entry = entry->vme_next;
2d21ac55 7455
0a7de745
A
7456 while ((entry != vm_map_to_entry(sub_map))
7457 && (entry->vme_start < submap_end)) {
5ba3f43e 7458 remove_size = (entry->vme_end - entry->vme_start);
0a7de745 7459 if (submap_end < entry->vme_end) {
1c79356b
A
7460 remove_size -= entry->vme_end - submap_end;
7461 }
0a7de745 7462 if (entry->is_sub_map) {
1c79356b
A
7463 vm_map_submap_pmap_clean(
7464 sub_map,
7465 (start + entry->vme_start) - offset,
7466 ((start + entry->vme_start) - offset) + remove_size,
3e170ce0
A
7467 VME_SUBMAP(entry),
7468 VME_OFFSET(entry));
1c79356b 7469 } else {
cb323159
A
7470 if (map->mapped_in_other_pmaps &&
7471 os_ref_get_count(&map->map_refcnt) != 0 &&
7472 VME_OBJECT(entry) != NULL) {
3e170ce0
A
7473 vm_object_pmap_protect_options(
7474 VME_OBJECT(entry),
7475 VME_OFFSET(entry),
9bccf70c
A
7476 remove_size,
7477 PMAP_NULL,
7478 entry->vme_start,
3e170ce0
A
7479 VM_PROT_NONE,
7480 PMAP_OPTIONS_REMOVE);
9bccf70c 7481 } else {
5ba3f43e 7482 pmap_remove(map->pmap,
0a7de745
A
7483 (addr64_t)((start + entry->vme_start)
7484 - offset),
7485 (addr64_t)(((start + entry->vme_start)
7486 - offset) + remove_size));
9bccf70c 7487 }
1c79356b
A
7488 }
7489 entry = entry->vme_next;
b7266188
A
7490 }
7491 vm_map_unlock_read(sub_map);
1c79356b
A
7492 return;
7493}
7494
d9a64523
A
7495/*
7496 * virt_memory_guard_ast:
7497 *
7498 * Handle the AST callout for a virtual memory guard.
7499 * raise an EXC_GUARD exception and terminate the task
7500 * if configured to do so.
7501 */
7502void
7503virt_memory_guard_ast(
7504 thread_t thread,
7505 mach_exception_data_type_t code,
7506 mach_exception_data_type_t subcode)
7507{
7508 task_t task = thread->task;
7509 assert(task != kernel_task);
7510 assert(task == current_task());
7511 uint32_t behavior;
7512
7513 behavior = task->task_exc_guard;
7514
7515 /* Is delivery enabled */
7516 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7517 return;
7518 }
7519
7520 /* If only once, make sure we're that once */
7521 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7522 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7523
7524 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7525 break;
7526 }
7527 behavior = task->task_exc_guard;
7528 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7529 return;
7530 }
7531 }
7532
7533 /* Raise exception via corpse fork or synchronously */
7534 if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7535 (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7536 task_violated_guard(code, subcode, NULL);
7537 } else {
7538 task_exception_notify(EXC_GUARD, code, subcode);
7539 }
7540
7541 /* Terminate the task if desired */
7542 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7543 task_bsdtask_kill(current_task());
7544 }
7545}
7546
7547/*
7548 * vm_map_guard_exception:
7549 *
7550 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7551 *
7552 * Right now, we do this when we find nothing mapped, or a
7553 * gap in the mapping when a user address space deallocate
7554 * was requested. We report the address of the first gap found.
7555 */
7556static void
7557vm_map_guard_exception(
7558 vm_map_offset_t gap_start,
7559 unsigned reason)
7560{
7561 mach_exception_code_t code = 0;
7562 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7563 unsigned int target = 0; /* should we pass in pid associated with map? */
7564 mach_exception_data_type_t subcode = (uint64_t)gap_start;
cb323159
A
7565 boolean_t fatal = FALSE;
7566
7567 task_t task = current_task();
d9a64523
A
7568
7569 /* Can't deliver exceptions to kernel task */
cb323159 7570 if (task == kernel_task) {
d9a64523 7571 return;
0a7de745 7572 }
d9a64523
A
7573
7574 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7575 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7576 EXC_GUARD_ENCODE_TARGET(code, target);
cb323159
A
7577
7578 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7579 fatal = TRUE;
7580 }
7581 thread_guard_violation(current_thread(), code, subcode, fatal);
d9a64523
A
7582}
7583
1c79356b
A
7584/*
7585 * vm_map_delete: [ internal use only ]
7586 *
7587 * Deallocates the given address range from the target map.
7588 * Removes all user wirings. Unwires one kernel wiring if
7589 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7590 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7591 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7592 *
7593 * This routine is called with map locked and leaves map locked.
7594 */
91447636 7595static kern_return_t
1c79356b 7596vm_map_delete(
0a7de745
A
7597 vm_map_t map,
7598 vm_map_offset_t start,
7599 vm_map_offset_t end,
7600 int flags,
7601 vm_map_t zap_map)
1c79356b 7602{
0a7de745
A
7603 vm_map_entry_t entry, next;
7604 struct vm_map_entry *first_entry, tmp_entry;
7605 vm_map_offset_t s;
7606 vm_object_t object;
7607 boolean_t need_wakeup;
7608 unsigned int last_timestamp = ~0; /* unlikely value */
7609 int interruptible;
7610 vm_map_offset_t gap_start;
cb323159
A
7611 __unused vm_map_offset_t save_start = start;
7612 __unused vm_map_offset_t save_end = end;
0a7de745
A
7613 const vm_map_offset_t FIND_GAP = 1; /* a not page aligned value */
7614 const vm_map_offset_t GAPS_OK = 2; /* a different not page aligned value */
7615
ea3f0419 7616 if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK) && !map->terminated) {
d9a64523 7617 gap_start = FIND_GAP;
0a7de745 7618 } else {
d9a64523 7619 gap_start = GAPS_OK;
0a7de745 7620 }
1c79356b 7621
5ba3f43e 7622 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
0a7de745 7623 THREAD_ABORTSAFE : THREAD_UNINT;
1c79356b
A
7624
7625 /*
7626 * All our DMA I/O operations in IOKit are currently done by
7627 * wiring through the map entries of the task requesting the I/O.
7628 * Because of this, we must always wait for kernel wirings
7629 * to go away on the entries before deleting them.
7630 *
7631 * Any caller who wants to actually remove a kernel wiring
7632 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7633 * properly remove one wiring instead of blasting through
7634 * them all.
7635 */
7636 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7637
0a7de745 7638 while (1) {
b0d623f7
A
7639 /*
7640 * Find the start of the region, and clip it
7641 */
7642 if (vm_map_lookup_entry(map, start, &first_entry)) {
7643 entry = first_entry;
fe8ab488
A
7644 if (map == kalloc_map &&
7645 (entry->vme_start != start ||
0a7de745 7646 entry->vme_end != end)) {
fe8ab488 7647 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
7648 "mismatched entry %p [0x%llx:0x%llx]\n",
7649 map,
7650 (uint64_t)start,
7651 (uint64_t)end,
7652 entry,
7653 (uint64_t)entry->vme_start,
7654 (uint64_t)entry->vme_end);
fe8ab488 7655 }
d9a64523
A
7656
7657 /*
7658 * If in a superpage, extend the range to include the start of the mapping.
7659 */
7660 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
b0d623f7
A
7661 start = SUPERPAGE_ROUND_DOWN(start);
7662 continue;
7663 }
d9a64523 7664
b0d623f7
A
7665 if (start == entry->vme_start) {
7666 /*
7667 * No need to clip. We don't want to cause
7668 * any unnecessary unnesting in this case...
7669 */
7670 } else {
fe8ab488
A
7671 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7672 entry->map_aligned &&
7673 !VM_MAP_PAGE_ALIGNED(
7674 start,
7675 VM_MAP_PAGE_MASK(map))) {
7676 /*
7677 * The entry will no longer be
7678 * map-aligned after clipping
7679 * and the caller said it's OK.
7680 */
7681 entry->map_aligned = FALSE;
7682 }
7683 if (map == kalloc_map) {
7684 panic("vm_map_delete(%p,0x%llx,0x%llx):"
0a7de745
A
7685 " clipping %p at 0x%llx\n",
7686 map,
7687 (uint64_t)start,
7688 (uint64_t)end,
7689 entry,
7690 (uint64_t)start);
fe8ab488 7691 }
b0d623f7
A
7692 vm_map_clip_start(map, entry, start);
7693 }
7694
2d21ac55 7695 /*
b0d623f7
A
7696 * Fix the lookup hint now, rather than each
7697 * time through the loop.
2d21ac55 7698 */
b0d623f7 7699 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
2d21ac55 7700 } else {
fe8ab488 7701 if (map->pmap == kernel_pmap &&
cb323159 7702 os_ref_get_count(&map->map_refcnt) != 0) {
fe8ab488 7703 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
7704 "no map entry at 0x%llx\n",
7705 map,
7706 (uint64_t)start,
7707 (uint64_t)end,
7708 (uint64_t)start);
fe8ab488 7709 }
b0d623f7 7710 entry = first_entry->vme_next;
0a7de745 7711 if (gap_start == FIND_GAP) {
d9a64523 7712 gap_start = start;
0a7de745 7713 }
2d21ac55 7714 }
b0d623f7 7715 break;
1c79356b 7716 }
0a7de745 7717 if (entry->superpage_size) {
b0d623f7 7718 end = SUPERPAGE_ROUND_UP(end);
0a7de745 7719 }
1c79356b
A
7720
7721 need_wakeup = FALSE;
7722 /*
7723 * Step through all entries in this region
7724 */
2d21ac55
A
7725 s = entry->vme_start;
7726 while ((entry != vm_map_to_entry(map)) && (s < end)) {
7727 /*
7728 * At this point, we have deleted all the memory entries
7729 * between "start" and "s". We still need to delete
7730 * all memory entries between "s" and "end".
7731 * While we were blocked and the map was unlocked, some
7732 * new memory entries could have been re-allocated between
7733 * "start" and "s" and we don't want to mess with those.
7734 * Some of those entries could even have been re-assembled
7735 * with an entry after "s" (in vm_map_simplify_entry()), so
7736 * we may have to vm_map_clip_start() again.
7737 */
1c79356b 7738
2d21ac55
A
7739 if (entry->vme_start >= s) {
7740 /*
7741 * This entry starts on or after "s"
7742 * so no need to clip its start.
7743 */
7744 } else {
7745 /*
7746 * This entry has been re-assembled by a
7747 * vm_map_simplify_entry(). We need to
7748 * re-clip its start.
7749 */
fe8ab488
A
7750 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7751 entry->map_aligned &&
7752 !VM_MAP_PAGE_ALIGNED(s,
0a7de745 7753 VM_MAP_PAGE_MASK(map))) {
fe8ab488
A
7754 /*
7755 * The entry will no longer be map-aligned
7756 * after clipping and the caller said it's OK.
7757 */
7758 entry->map_aligned = FALSE;
7759 }
7760 if (map == kalloc_map) {
7761 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
7762 "clipping %p at 0x%llx\n",
7763 map,
7764 (uint64_t)start,
7765 (uint64_t)end,
7766 entry,
7767 (uint64_t)s);
fe8ab488 7768 }
2d21ac55
A
7769 vm_map_clip_start(map, entry, s);
7770 }
7771 if (entry->vme_end <= end) {
7772 /*
7773 * This entry is going away completely, so no need
7774 * to clip and possibly cause an unnecessary unnesting.
7775 */
7776 } else {
fe8ab488
A
7777 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7778 entry->map_aligned &&
7779 !VM_MAP_PAGE_ALIGNED(end,
0a7de745 7780 VM_MAP_PAGE_MASK(map))) {
fe8ab488
A
7781 /*
7782 * The entry will no longer be map-aligned
7783 * after clipping and the caller said it's OK.
7784 */
7785 entry->map_aligned = FALSE;
7786 }
7787 if (map == kalloc_map) {
7788 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
7789 "clipping %p at 0x%llx\n",
7790 map,
7791 (uint64_t)start,
7792 (uint64_t)end,
7793 entry,
7794 (uint64_t)end);
fe8ab488 7795 }
2d21ac55
A
7796 vm_map_clip_end(map, entry, end);
7797 }
b0d623f7
A
7798
7799 if (entry->permanent) {
5ba3f43e
A
7800 if (map->pmap == kernel_pmap) {
7801 panic("%s(%p,0x%llx,0x%llx): "
0a7de745
A
7802 "attempt to remove permanent "
7803 "VM map entry "
7804 "%p [0x%llx:0x%llx]\n",
7805 __FUNCTION__,
7806 map,
7807 (uint64_t) start,
7808 (uint64_t) end,
7809 entry,
7810 (uint64_t) entry->vme_start,
7811 (uint64_t) entry->vme_end);
5ba3f43e
A
7812 } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7813// printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7814 entry->permanent = FALSE;
d9a64523
A
7815#if PMAP_CS
7816 } else if ((entry->protection & VM_PROT_EXECUTE) && !pmap_cs_enforced(map->pmap)) {
7817 entry->permanent = FALSE;
7818
7819 printf("%d[%s] %s(0x%llx,0x%llx): "
0a7de745
A
7820 "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
7821 "prot 0x%x/0x%x\n",
7822 proc_selfpid(),
7823 (current_task()->bsd_info
7824 ? proc_name_address(current_task()->bsd_info)
7825 : "?"),
7826 __FUNCTION__,
7827 (uint64_t) start,
7828 (uint64_t) end,
7829 (uint64_t)entry->vme_start,
7830 (uint64_t)entry->vme_end,
7831 entry->protection,
7832 entry->max_protection);
d9a64523 7833#endif
5ba3f43e 7834 } else {
d9a64523 7835 if (vm_map_executable_immutable_verbose) {
5ba3f43e 7836 printf("%d[%s] %s(0x%llx,0x%llx): "
0a7de745
A
7837 "permanent entry [0x%llx:0x%llx] "
7838 "prot 0x%x/0x%x\n",
7839 proc_selfpid(),
7840 (current_task()->bsd_info
7841 ? proc_name_address(current_task()->bsd_info)
7842 : "?"),
7843 __FUNCTION__,
7844 (uint64_t) start,
7845 (uint64_t) end,
7846 (uint64_t)entry->vme_start,
7847 (uint64_t)entry->vme_end,
7848 entry->protection,
7849 entry->max_protection);
5ba3f43e
A
7850 }
7851 /*
7852 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7853 */
7854 DTRACE_VM5(vm_map_delete_permanent,
0a7de745
A
7855 vm_map_offset_t, entry->vme_start,
7856 vm_map_offset_t, entry->vme_end,
7857 vm_prot_t, entry->protection,
7858 vm_prot_t, entry->max_protection,
7859 int, VME_ALIAS(entry));
5ba3f43e 7860 }
b0d623f7
A
7861 }
7862
7863
1c79356b 7864 if (entry->in_transition) {
9bccf70c
A
7865 wait_result_t wait_result;
7866
1c79356b
A
7867 /*
7868 * Another thread is wiring/unwiring this entry.
7869 * Let the other thread know we are waiting.
7870 */
2d21ac55 7871 assert(s == entry->vme_start);
1c79356b
A
7872 entry->needs_wakeup = TRUE;
7873
7874 /*
7875 * wake up anybody waiting on entries that we have
7876 * already unwired/deleted.
7877 */
7878 if (need_wakeup) {
7879 vm_map_entry_wakeup(map);
7880 need_wakeup = FALSE;
7881 }
7882
9bccf70c 7883 wait_result = vm_map_entry_wait(map, interruptible);
1c79356b
A
7884
7885 if (interruptible &&
9bccf70c 7886 wait_result == THREAD_INTERRUPTED) {
1c79356b
A
7887 /*
7888 * We do not clear the needs_wakeup flag,
7889 * since we cannot tell if we were the only one.
7890 */
7891 return KERN_ABORTED;
9bccf70c 7892 }
1c79356b
A
7893
7894 /*
7895 * The entry could have been clipped or it
7896 * may not exist anymore. Look it up again.
7897 */
7898 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
7899 /*
7900 * User: use the next entry
7901 */
0a7de745 7902 if (gap_start == FIND_GAP) {
d9a64523 7903 gap_start = s;
0a7de745 7904 }
1c79356b 7905 entry = first_entry->vme_next;
2d21ac55 7906 s = entry->vme_start;
1c79356b
A
7907 } else {
7908 entry = first_entry;
0c530ab8 7909 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 7910 }
9bccf70c 7911 last_timestamp = map->timestamp;
1c79356b
A
7912 continue;
7913 } /* end in_transition */
7914
7915 if (entry->wired_count) {
0a7de745 7916 boolean_t user_wire;
2d21ac55
A
7917
7918 user_wire = entry->user_wired_count > 0;
7919
1c79356b 7920 /*
0a7de745 7921 * Remove a kernel wiring if requested
1c79356b 7922 */
b0d623f7 7923 if (flags & VM_MAP_REMOVE_KUNWIRE) {
1c79356b 7924 entry->wired_count--;
b0d623f7 7925 }
5ba3f43e 7926
b0d623f7
A
7927 /*
7928 * Remove all user wirings for proper accounting
7929 */
7930 if (entry->user_wired_count > 0) {
0a7de745 7931 while (entry->user_wired_count) {
b0d623f7 7932 subtract_wire_counts(map, entry, user_wire);
0a7de745 7933 }
b0d623f7 7934 }
1c79356b
A
7935
7936 if (entry->wired_count != 0) {
2d21ac55 7937 assert(map != kernel_map);
1c79356b
A
7938 /*
7939 * Cannot continue. Typical case is when
7940 * a user thread has physical io pending on
7941 * on this page. Either wait for the
7942 * kernel wiring to go away or return an
7943 * error.
7944 */
7945 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
9bccf70c 7946 wait_result_t wait_result;
1c79356b 7947
2d21ac55 7948 assert(s == entry->vme_start);
1c79356b 7949 entry->needs_wakeup = TRUE;
9bccf70c 7950 wait_result = vm_map_entry_wait(map,
0a7de745 7951 interruptible);
1c79356b
A
7952
7953 if (interruptible &&
2d21ac55 7954 wait_result == THREAD_INTERRUPTED) {
1c79356b 7955 /*
5ba3f43e
A
7956 * We do not clear the
7957 * needs_wakeup flag, since we
7958 * cannot tell if we were the
1c79356b 7959 * only one.
2d21ac55 7960 */
1c79356b 7961 return KERN_ABORTED;
9bccf70c 7962 }
1c79356b
A
7963
7964 /*
2d21ac55 7965 * The entry could have been clipped or
1c79356b
A
7966 * it may not exist anymore. Look it
7967 * up again.
2d21ac55 7968 */
5ba3f43e 7969 if (!vm_map_lookup_entry(map, s,
0a7de745 7970 &first_entry)) {
2d21ac55 7971 assert(map != kernel_map);
1c79356b 7972 /*
2d21ac55
A
7973 * User: use the next entry
7974 */
0a7de745 7975 if (gap_start == FIND_GAP) {
d9a64523 7976 gap_start = s;
0a7de745 7977 }
1c79356b 7978 entry = first_entry->vme_next;
2d21ac55 7979 s = entry->vme_start;
1c79356b
A
7980 } else {
7981 entry = first_entry;
0c530ab8 7982 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 7983 }
9bccf70c 7984 last_timestamp = map->timestamp;
1c79356b 7985 continue;
0a7de745 7986 } else {
1c79356b
A
7987 return KERN_FAILURE;
7988 }
7989 }
7990
7991 entry->in_transition = TRUE;
7992 /*
7993 * copy current entry. see comment in vm_map_wire()
7994 */
7995 tmp_entry = *entry;
2d21ac55 7996 assert(s == entry->vme_start);
1c79356b
A
7997
7998 /*
7999 * We can unlock the map now. The in_transition
8000 * state guarentees existance of the entry.
8001 */
8002 vm_map_unlock(map);
2d21ac55
A
8003
8004 if (tmp_entry.is_sub_map) {
8005 vm_map_t sub_map;
8006 vm_map_offset_t sub_start, sub_end;
8007 pmap_t pmap;
8008 vm_map_offset_t pmap_addr;
5ba3f43e 8009
2d21ac55 8010
3e170ce0
A
8011 sub_map = VME_SUBMAP(&tmp_entry);
8012 sub_start = VME_OFFSET(&tmp_entry);
2d21ac55 8013 sub_end = sub_start + (tmp_entry.vme_end -
0a7de745 8014 tmp_entry.vme_start);
2d21ac55
A
8015 if (tmp_entry.use_pmap) {
8016 pmap = sub_map->pmap;
8017 pmap_addr = tmp_entry.vme_start;
8018 } else {
8019 pmap = map->pmap;
8020 pmap_addr = tmp_entry.vme_start;
8021 }
8022 (void) vm_map_unwire_nested(sub_map,
0a7de745
A
8023 sub_start, sub_end,
8024 user_wire,
8025 pmap, pmap_addr);
2d21ac55 8026 } else {
3e170ce0 8027 if (VME_OBJECT(&tmp_entry) == kernel_object) {
39236c6e
A
8028 pmap_protect_options(
8029 map->pmap,
8030 tmp_entry.vme_start,
8031 tmp_entry.vme_end,
8032 VM_PROT_NONE,
8033 PMAP_OPTIONS_REMOVE,
8034 NULL);
8035 }
2d21ac55 8036 vm_fault_unwire(map, &tmp_entry,
0a7de745
A
8037 VME_OBJECT(&tmp_entry) == kernel_object,
8038 map->pmap, tmp_entry.vme_start);
2d21ac55
A
8039 }
8040
1c79356b
A
8041 vm_map_lock(map);
8042
0a7de745 8043 if (last_timestamp + 1 != map->timestamp) {
1c79356b
A
8044 /*
8045 * Find the entry again. It could have
8046 * been clipped after we unlocked the map.
8047 */
0a7de745 8048 if (!vm_map_lookup_entry(map, s, &first_entry)) {
5ba3f43e 8049 assert((map != kernel_map) &&
0a7de745
A
8050 (!entry->is_sub_map));
8051 if (gap_start == FIND_GAP) {
d9a64523 8052 gap_start = s;
0a7de745 8053 }
1c79356b 8054 first_entry = first_entry->vme_next;
2d21ac55 8055 s = first_entry->vme_start;
1c79356b 8056 } else {
0c530ab8 8057 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
8058 }
8059 } else {
0c530ab8 8060 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
8061 first_entry = entry;
8062 }
8063
8064 last_timestamp = map->timestamp;
8065
8066 entry = first_entry;
8067 while ((entry != vm_map_to_entry(map)) &&
0a7de745 8068 (entry->vme_start < tmp_entry.vme_end)) {
1c79356b
A
8069 assert(entry->in_transition);
8070 entry->in_transition = FALSE;
8071 if (entry->needs_wakeup) {
8072 entry->needs_wakeup = FALSE;
8073 need_wakeup = TRUE;
8074 }
8075 entry = entry->vme_next;
8076 }
8077 /*
8078 * We have unwired the entry(s). Go back and
8079 * delete them.
8080 */
8081 entry = first_entry;
8082 continue;
8083 }
8084
8085 /* entry is unwired */
8086 assert(entry->wired_count == 0);
8087 assert(entry->user_wired_count == 0);
8088
2d21ac55
A
8089 assert(s == entry->vme_start);
8090
8091 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8092 /*
8093 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8094 * vm_map_delete(), some map entries might have been
8095 * transferred to a "zap_map", which doesn't have a
8096 * pmap. The original pmap has already been flushed
8097 * in the vm_map_delete() call targeting the original
8098 * map, but when we get to destroying the "zap_map",
8099 * we don't have any pmap to flush, so let's just skip
8100 * all this.
8101 */
8102 } else if (entry->is_sub_map) {
8103 if (entry->use_pmap) {
0c530ab8 8104#ifndef NO_NESTED_PMAP
3e170ce0
A
8105 int pmap_flags;
8106
8107 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8108 /*
8109 * This is the final cleanup of the
8110 * address space being terminated.
8111 * No new mappings are expected and
8112 * we don't really need to unnest the
8113 * shared region (and lose the "global"
8114 * pmap mappings, if applicable).
8115 *
8116 * Tell the pmap layer that we're
8117 * "clean" wrt nesting.
8118 */
8119 pmap_flags = PMAP_UNNEST_CLEAN;
8120 } else {
8121 /*
8122 * We're unmapping part of the nested
8123 * shared region, so we can't keep the
8124 * nested pmap.
8125 */
8126 pmap_flags = 0;
8127 }
8128 pmap_unnest_options(
8129 map->pmap,
8130 (addr64_t)entry->vme_start,
8131 entry->vme_end - entry->vme_start,
8132 pmap_flags);
0a7de745 8133#endif /* NO_NESTED_PMAP */
cb323159
A
8134 if (map->mapped_in_other_pmaps &&
8135 os_ref_get_count(&map->map_refcnt) != 0) {
9bccf70c
A
8136 /* clean up parent map/maps */
8137 vm_map_submap_pmap_clean(
8138 map, entry->vme_start,
8139 entry->vme_end,
3e170ce0
A
8140 VME_SUBMAP(entry),
8141 VME_OFFSET(entry));
9bccf70c 8142 }
2d21ac55 8143 } else {
1c79356b
A
8144 vm_map_submap_pmap_clean(
8145 map, entry->vme_start, entry->vme_end,
3e170ce0
A
8146 VME_SUBMAP(entry),
8147 VME_OFFSET(entry));
2d21ac55 8148 }
3e170ce0 8149 } else if (VME_OBJECT(entry) != kernel_object &&
0a7de745 8150 VME_OBJECT(entry) != compressor_object) {
3e170ce0 8151 object = VME_OBJECT(entry);
cb323159
A
8152 if (map->mapped_in_other_pmaps &&
8153 os_ref_get_count(&map->map_refcnt) != 0) {
39236c6e 8154 vm_object_pmap_protect_options(
3e170ce0 8155 object, VME_OFFSET(entry),
55e303ae
A
8156 entry->vme_end - entry->vme_start,
8157 PMAP_NULL,
8158 entry->vme_start,
39236c6e
A
8159 VM_PROT_NONE,
8160 PMAP_OPTIONS_REMOVE);
3e170ce0 8161 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
0a7de745 8162 (map->pmap == kernel_pmap)) {
39236c6e
A
8163 /* Remove translations associated
8164 * with this range unless the entry
8165 * does not have an object, or
8166 * it's the kernel map or a descendant
8167 * since the platform could potentially
8168 * create "backdoor" mappings invisible
8169 * to the VM. It is expected that
8170 * objectless, non-kernel ranges
8171 * do not have such VM invisible
8172 * translations.
8173 */
8174 pmap_remove_options(map->pmap,
0a7de745
A
8175 (addr64_t)entry->vme_start,
8176 (addr64_t)entry->vme_end,
8177 PMAP_OPTIONS_REMOVE);
1c79356b
A
8178 }
8179 }
8180
fe8ab488
A
8181 if (entry->iokit_acct) {
8182 /* alternate accounting */
ecc0ceb4 8183 DTRACE_VM4(vm_map_iokit_unmapped_region,
0a7de745
A
8184 vm_map_t, map,
8185 vm_map_offset_t, entry->vme_start,
8186 vm_map_offset_t, entry->vme_end,
8187 int, VME_ALIAS(entry));
fe8ab488 8188 vm_map_iokit_unmapped_region(map,
0a7de745
A
8189 (entry->vme_end -
8190 entry->vme_start));
fe8ab488 8191 entry->iokit_acct = FALSE;
a39ff7e2 8192 entry->use_pmap = FALSE;
fe8ab488
A
8193 }
8194
91447636
A
8195 /*
8196 * All pmap mappings for this map entry must have been
8197 * cleared by now.
8198 */
fe8ab488 8199#if DEBUG
91447636 8200 assert(vm_map_pmap_is_empty(map,
0a7de745
A
8201 entry->vme_start,
8202 entry->vme_end));
fe8ab488 8203#endif /* DEBUG */
91447636 8204
1c79356b 8205 next = entry->vme_next;
fe8ab488
A
8206
8207 if (map->pmap == kernel_pmap &&
cb323159 8208 os_ref_get_count(&map->map_refcnt) != 0 &&
fe8ab488
A
8209 entry->vme_end < end &&
8210 (next == vm_map_to_entry(map) ||
0a7de745 8211 next->vme_start != entry->vme_end)) {
fe8ab488 8212 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
8213 "hole after %p at 0x%llx\n",
8214 map,
8215 (uint64_t)start,
8216 (uint64_t)end,
8217 entry,
8218 (uint64_t)entry->vme_end);
fe8ab488
A
8219 }
8220
d9a64523
A
8221 /*
8222 * If the desired range didn't end with "entry", then there is a gap if
8223 * we wrapped around to the start of the map or if "entry" and "next"
8224 * aren't contiguous.
8225 *
8226 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8227 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8228 */
8229 if (gap_start == FIND_GAP &&
8230 vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8231 (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8232 gap_start = entry->vme_end;
8233 }
1c79356b
A
8234 s = next->vme_start;
8235 last_timestamp = map->timestamp;
91447636 8236
5ba3f43e
A
8237 if (entry->permanent) {
8238 /*
8239 * A permanent entry can not be removed, so leave it
8240 * in place but remove all access permissions.
8241 */
8242 entry->protection = VM_PROT_NONE;
8243 entry->max_protection = VM_PROT_NONE;
8244 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
0a7de745 8245 zap_map != VM_MAP_NULL) {
2d21ac55 8246 vm_map_size_t entry_size;
91447636
A
8247 /*
8248 * The caller wants to save the affected VM map entries
8249 * into the "zap_map". The caller will take care of
8250 * these entries.
8251 */
8252 /* unlink the entry from "map" ... */
6d2010ae 8253 vm_map_store_entry_unlink(map, entry);
91447636 8254 /* ... and add it to the end of the "zap_map" */
6d2010ae 8255 vm_map_store_entry_link(zap_map,
0a7de745
A
8256 vm_map_last_entry(zap_map),
8257 entry,
8258 VM_MAP_KERNEL_FLAGS_NONE);
2d21ac55
A
8259 entry_size = entry->vme_end - entry->vme_start;
8260 map->size -= entry_size;
8261 zap_map->size += entry_size;
8262 /* we didn't unlock the map, so no timestamp increase */
8263 last_timestamp--;
91447636
A
8264 } else {
8265 vm_map_entry_delete(map, entry);
8266 /* vm_map_entry_delete unlocks the map */
8267 vm_map_lock(map);
8268 }
8269
1c79356b
A
8270 entry = next;
8271
0a7de745 8272 if (entry == vm_map_to_entry(map)) {
1c79356b
A
8273 break;
8274 }
d9a64523 8275 if (last_timestamp + 1 != map->timestamp) {
1c79356b 8276 /*
d9a64523
A
8277 * We are responsible for deleting everything
8278 * from the given space. If someone has interfered,
8279 * we pick up where we left off. Back fills should
8280 * be all right for anyone, except map_delete, and
1c79356b
A
8281 * we have to assume that the task has been fully
8282 * disabled before we get here
8283 */
0a7de745
A
8284 if (!vm_map_lookup_entry(map, s, &entry)) {
8285 entry = entry->vme_next;
d9a64523
A
8286
8287 /*
8288 * Nothing found for s. If we weren't already done, then there is a gap.
8289 */
0a7de745 8290 if (gap_start == FIND_GAP && s < end) {
d9a64523 8291 gap_start = s;
0a7de745 8292 }
2d21ac55 8293 s = entry->vme_start;
0a7de745 8294 } else {
2d21ac55 8295 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
0a7de745 8296 }
5ba3f43e
A
8297 /*
8298 * others can not only allocate behind us, we can
8299 * also see coalesce while we don't have the map lock
1c79356b 8300 */
d9a64523 8301 if (entry == vm_map_to_entry(map)) {
1c79356b
A
8302 break;
8303 }
1c79356b
A
8304 }
8305 last_timestamp = map->timestamp;
8306 }
8307
0a7de745 8308 if (map->wait_for_space) {
1c79356b 8309 thread_wakeup((event_t) map);
0a7de745 8310 }
1c79356b
A
8311 /*
8312 * wake up anybody waiting on entries that we have already deleted.
8313 */
0a7de745 8314 if (need_wakeup) {
1c79356b 8315 vm_map_entry_wakeup(map);
0a7de745 8316 }
1c79356b 8317
d9a64523
A
8318 if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8319 DTRACE_VM3(kern_vm_deallocate_gap,
8320 vm_map_offset_t, gap_start,
8321 vm_map_offset_t, save_start,
8322 vm_map_offset_t, save_end);
8323 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
d9a64523
A
8324 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8325 }
8326 }
8327
1c79356b
A
8328 return KERN_SUCCESS;
8329}
8330
ea3f0419
A
8331
8332/*
8333 * vm_map_terminate:
8334 *
8335 * Clean out a task's map.
8336 */
8337kern_return_t
8338vm_map_terminate(
8339 vm_map_t map)
8340{
8341 vm_map_lock(map);
8342 map->terminated = TRUE;
8343 vm_map_unlock(map);
8344
8345 return vm_map_remove(map,
8346 map->min_offset,
8347 map->max_offset,
8348 /*
8349 * Final cleanup:
8350 * + no unnesting
8351 * + remove immutable mappings
8352 * + allow gaps in range
8353 */
8354 (VM_MAP_REMOVE_NO_UNNESTING |
8355 VM_MAP_REMOVE_IMMUTABLE |
8356 VM_MAP_REMOVE_GAPS_OK));
8357}
8358
1c79356b
A
8359/*
8360 * vm_map_remove:
8361 *
8362 * Remove the given address range from the target map.
8363 * This is the exported form of vm_map_delete.
8364 */
8365kern_return_t
8366vm_map_remove(
0a7de745
A
8367 vm_map_t map,
8368 vm_map_offset_t start,
8369 vm_map_offset_t end,
8370 boolean_t flags)
1c79356b 8371{
0a7de745 8372 kern_return_t result;
9bccf70c 8373
1c79356b
A
8374 vm_map_lock(map);
8375 VM_MAP_RANGE_CHECK(map, start, end);
39236c6e
A
8376 /*
8377 * For the zone_map, the kernel controls the allocation/freeing of memory.
8378 * Any free to the zone_map should be within the bounds of the map and
8379 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8380 * free to the zone_map into a no-op, there is a problem and we should
8381 * panic.
8382 */
0a7de745 8383 if ((map == zone_map) && (start == end)) {
39236c6e 8384 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
0a7de745 8385 }
91447636 8386 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
1c79356b 8387 vm_map_unlock(map);
91447636 8388
0a7de745 8389 return result;
1c79356b
A
8390}
8391
39037602
A
8392/*
8393 * vm_map_remove_locked:
8394 *
8395 * Remove the given address range from the target locked map.
8396 * This is the exported form of vm_map_delete.
8397 */
8398kern_return_t
8399vm_map_remove_locked(
0a7de745
A
8400 vm_map_t map,
8401 vm_map_offset_t start,
8402 vm_map_offset_t end,
8403 boolean_t flags)
39037602 8404{
0a7de745 8405 kern_return_t result;
39037602
A
8406
8407 VM_MAP_RANGE_CHECK(map, start, end);
8408 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
0a7de745 8409 return result;
39037602
A
8410}
8411
1c79356b 8412
d9a64523
A
8413/*
8414 * Routine: vm_map_copy_allocate
8415 *
8416 * Description:
8417 * Allocates and initializes a map copy object.
8418 */
8419static vm_map_copy_t
8420vm_map_copy_allocate(void)
8421{
8422 vm_map_copy_t new_copy;
8423
8424 new_copy = zalloc(vm_map_copy_zone);
0a7de745 8425 bzero(new_copy, sizeof(*new_copy));
d9a64523
A
8426 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8427 vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8428 vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8429 return new_copy;
8430}
8431
1c79356b
A
8432/*
8433 * Routine: vm_map_copy_discard
8434 *
8435 * Description:
8436 * Dispose of a map copy object (returned by
8437 * vm_map_copyin).
8438 */
8439void
8440vm_map_copy_discard(
0a7de745 8441 vm_map_copy_t copy)
1c79356b 8442{
0a7de745 8443 if (copy == VM_MAP_COPY_NULL) {
1c79356b 8444 return;
0a7de745 8445 }
1c79356b
A
8446
8447 switch (copy->type) {
8448 case VM_MAP_COPY_ENTRY_LIST:
8449 while (vm_map_copy_first_entry(copy) !=
0a7de745
A
8450 vm_map_copy_to_entry(copy)) {
8451 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
1c79356b
A
8452
8453 vm_map_copy_entry_unlink(copy, entry);
39236c6e 8454 if (entry->is_sub_map) {
3e170ce0 8455 vm_map_deallocate(VME_SUBMAP(entry));
39236c6e 8456 } else {
3e170ce0 8457 vm_object_deallocate(VME_OBJECT(entry));
39236c6e 8458 }
1c79356b
A
8459 vm_map_copy_entry_dispose(copy, entry);
8460 }
8461 break;
0a7de745 8462 case VM_MAP_COPY_OBJECT:
1c79356b
A
8463 vm_object_deallocate(copy->cpy_object);
8464 break;
1c79356b
A
8465 case VM_MAP_COPY_KERNEL_BUFFER:
8466
8467 /*
8468 * The vm_map_copy_t and possibly the data buffer were
8469 * allocated by a single call to kalloc(), i.e. the
8470 * vm_map_copy_t was not allocated out of the zone.
8471 */
0a7de745 8472 if (copy->size > msg_ool_size_small || copy->offset) {
3e170ce0 8473 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
0a7de745
A
8474 (long long)copy->size, (long long)copy->offset);
8475 }
3e170ce0 8476 kfree(copy, copy->size + cpy_kdata_hdr_sz);
1c79356b
A
8477 return;
8478 }
91447636 8479 zfree(vm_map_copy_zone, copy);
1c79356b
A
8480}
8481
8482/*
8483 * Routine: vm_map_copy_copy
8484 *
8485 * Description:
8486 * Move the information in a map copy object to
8487 * a new map copy object, leaving the old one
8488 * empty.
8489 *
8490 * This is used by kernel routines that need
8491 * to look at out-of-line data (in copyin form)
8492 * before deciding whether to return SUCCESS.
8493 * If the routine returns FAILURE, the original
8494 * copy object will be deallocated; therefore,
8495 * these routines must make a copy of the copy
8496 * object and leave the original empty so that
8497 * deallocation will not fail.
8498 */
8499vm_map_copy_t
8500vm_map_copy_copy(
0a7de745 8501 vm_map_copy_t copy)
1c79356b 8502{
0a7de745 8503 vm_map_copy_t new_copy;
1c79356b 8504
0a7de745 8505 if (copy == VM_MAP_COPY_NULL) {
1c79356b 8506 return VM_MAP_COPY_NULL;
0a7de745 8507 }
1c79356b
A
8508
8509 /*
8510 * Allocate a new copy object, and copy the information
8511 * from the old one into it.
8512 */
8513
8514 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8515 *new_copy = *copy;
8516
8517 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8518 /*
8519 * The links in the entry chain must be
8520 * changed to point to the new copy object.
8521 */
8522 vm_map_copy_first_entry(copy)->vme_prev
0a7de745 8523 = vm_map_copy_to_entry(new_copy);
1c79356b 8524 vm_map_copy_last_entry(copy)->vme_next
0a7de745 8525 = vm_map_copy_to_entry(new_copy);
1c79356b
A
8526 }
8527
8528 /*
8529 * Change the old copy object into one that contains
8530 * nothing to be deallocated.
8531 */
8532 copy->type = VM_MAP_COPY_OBJECT;
8533 copy->cpy_object = VM_OBJECT_NULL;
8534
8535 /*
8536 * Return the new object.
8537 */
8538 return new_copy;
8539}
8540
91447636 8541static kern_return_t
1c79356b 8542vm_map_overwrite_submap_recurse(
0a7de745
A
8543 vm_map_t dst_map,
8544 vm_map_offset_t dst_addr,
8545 vm_map_size_t dst_size)
1c79356b 8546{
0a7de745
A
8547 vm_map_offset_t dst_end;
8548 vm_map_entry_t tmp_entry;
8549 vm_map_entry_t entry;
8550 kern_return_t result;
8551 boolean_t encountered_sub_map = FALSE;
1c79356b
A
8552
8553
8554
8555 /*
8556 * Verify that the destination is all writeable
8557 * initially. We have to trunc the destination
8558 * address and round the copy size or we'll end up
8559 * splitting entries in strange ways.
8560 */
8561
39236c6e 8562 dst_end = vm_map_round_page(dst_addr + dst_size,
0a7de745 8563 VM_MAP_PAGE_MASK(dst_map));
9bccf70c 8564 vm_map_lock(dst_map);
1c79356b
A
8565
8566start_pass_1:
1c79356b
A
8567 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8568 vm_map_unlock(dst_map);
0a7de745 8569 return KERN_INVALID_ADDRESS;
1c79356b
A
8570 }
8571
39236c6e 8572 vm_map_clip_start(dst_map,
0a7de745
A
8573 tmp_entry,
8574 vm_map_trunc_page(dst_addr,
8575 VM_MAP_PAGE_MASK(dst_map)));
fe8ab488
A
8576 if (tmp_entry->is_sub_map) {
8577 /* clipping did unnest if needed */
8578 assert(!tmp_entry->use_pmap);
8579 }
1c79356b
A
8580
8581 for (entry = tmp_entry;;) {
0a7de745 8582 vm_map_entry_t next;
1c79356b
A
8583
8584 next = entry->vme_next;
0a7de745
A
8585 while (entry->is_sub_map) {
8586 vm_map_offset_t sub_start;
8587 vm_map_offset_t sub_end;
8588 vm_map_offset_t local_end;
1c79356b
A
8589
8590 if (entry->in_transition) {
2d21ac55
A
8591 /*
8592 * Say that we are waiting, and wait for entry.
8593 */
0a7de745
A
8594 entry->needs_wakeup = TRUE;
8595 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
8596
8597 goto start_pass_1;
8598 }
8599
8600 encountered_sub_map = TRUE;
3e170ce0 8601 sub_start = VME_OFFSET(entry);
1c79356b 8602
0a7de745 8603 if (entry->vme_end < dst_end) {
1c79356b 8604 sub_end = entry->vme_end;
0a7de745 8605 } else {
1c79356b 8606 sub_end = dst_end;
0a7de745 8607 }
1c79356b 8608 sub_end -= entry->vme_start;
3e170ce0 8609 sub_end += VME_OFFSET(entry);
1c79356b
A
8610 local_end = entry->vme_end;
8611 vm_map_unlock(dst_map);
5ba3f43e 8612
1c79356b 8613 result = vm_map_overwrite_submap_recurse(
3e170ce0 8614 VME_SUBMAP(entry),
2d21ac55
A
8615 sub_start,
8616 sub_end - sub_start);
1c79356b 8617
0a7de745 8618 if (result != KERN_SUCCESS) {
1c79356b 8619 return result;
0a7de745
A
8620 }
8621 if (dst_end <= entry->vme_end) {
1c79356b 8622 return KERN_SUCCESS;
0a7de745 8623 }
1c79356b 8624 vm_map_lock(dst_map);
0a7de745
A
8625 if (!vm_map_lookup_entry(dst_map, local_end,
8626 &tmp_entry)) {
1c79356b 8627 vm_map_unlock(dst_map);
0a7de745 8628 return KERN_INVALID_ADDRESS;
1c79356b
A
8629 }
8630 entry = tmp_entry;
8631 next = entry->vme_next;
8632 }
8633
0a7de745 8634 if (!(entry->protection & VM_PROT_WRITE)) {
1c79356b 8635 vm_map_unlock(dst_map);
0a7de745 8636 return KERN_PROTECTION_FAILURE;
1c79356b
A
8637 }
8638
8639 /*
8640 * If the entry is in transition, we must wait
8641 * for it to exit that state. Anything could happen
8642 * when we unlock the map, so start over.
8643 */
0a7de745
A
8644 if (entry->in_transition) {
8645 /*
8646 * Say that we are waiting, and wait for entry.
8647 */
8648 entry->needs_wakeup = TRUE;
8649 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
8650
8651 goto start_pass_1;
8652 }
8653
8654/*
8655 * our range is contained completely within this map entry
8656 */
8657 if (dst_end <= entry->vme_end) {
8658 vm_map_unlock(dst_map);
8659 return KERN_SUCCESS;
8660 }
8661/*
8662 * check that range specified is contiguous region
8663 */
8664 if ((next == vm_map_to_entry(dst_map)) ||
8665 (next->vme_start != entry->vme_end)) {
8666 vm_map_unlock(dst_map);
0a7de745 8667 return KERN_INVALID_ADDRESS;
1c79356b
A
8668 }
8669
8670 /*
8671 * Check for permanent objects in the destination.
8672 */
3e170ce0
A
8673 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8674 ((!VME_OBJECT(entry)->internal) ||
0a7de745
A
8675 (VME_OBJECT(entry)->true_share))) {
8676 if (encountered_sub_map) {
1c79356b 8677 vm_map_unlock(dst_map);
0a7de745 8678 return KERN_FAILURE;
1c79356b
A
8679 }
8680 }
8681
8682
8683 entry = next;
8684 }/* for */
8685 vm_map_unlock(dst_map);
0a7de745 8686 return KERN_SUCCESS;
1c79356b
A
8687}
8688
8689/*
8690 * Routine: vm_map_copy_overwrite
8691 *
8692 * Description:
8693 * Copy the memory described by the map copy
8694 * object (copy; returned by vm_map_copyin) onto
8695 * the specified destination region (dst_map, dst_addr).
8696 * The destination must be writeable.
8697 *
8698 * Unlike vm_map_copyout, this routine actually
8699 * writes over previously-mapped memory. If the
8700 * previous mapping was to a permanent (user-supplied)
8701 * memory object, it is preserved.
8702 *
8703 * The attributes (protection and inheritance) of the
8704 * destination region are preserved.
8705 *
8706 * If successful, consumes the copy object.
8707 * Otherwise, the caller is responsible for it.
8708 *
8709 * Implementation notes:
8710 * To overwrite aligned temporary virtual memory, it is
8711 * sufficient to remove the previous mapping and insert
8712 * the new copy. This replacement is done either on
8713 * the whole region (if no permanent virtual memory
8714 * objects are embedded in the destination region) or
8715 * in individual map entries.
8716 *
8717 * To overwrite permanent virtual memory , it is necessary
8718 * to copy each page, as the external memory management
8719 * interface currently does not provide any optimizations.
8720 *
8721 * Unaligned memory also has to be copied. It is possible
8722 * to use 'vm_trickery' to copy the aligned data. This is
8723 * not done but not hard to implement.
8724 *
8725 * Once a page of permanent memory has been overwritten,
8726 * it is impossible to interrupt this function; otherwise,
8727 * the call would be neither atomic nor location-independent.
8728 * The kernel-state portion of a user thread must be
8729 * interruptible.
8730 *
8731 * It may be expensive to forward all requests that might
8732 * overwrite permanent memory (vm_write, vm_copy) to
8733 * uninterruptible kernel threads. This routine may be
8734 * called by interruptible threads; however, success is
8735 * not guaranteed -- if the request cannot be performed
8736 * atomically and interruptibly, an error indication is
8737 * returned.
8738 */
8739
91447636 8740static kern_return_t
1c79356b 8741vm_map_copy_overwrite_nested(
0a7de745
A
8742 vm_map_t dst_map,
8743 vm_map_address_t dst_addr,
8744 vm_map_copy_t copy,
8745 boolean_t interruptible,
8746 pmap_t pmap,
8747 boolean_t discard_on_success)
1c79356b 8748{
0a7de745
A
8749 vm_map_offset_t dst_end;
8750 vm_map_entry_t tmp_entry;
8751 vm_map_entry_t entry;
8752 kern_return_t kr;
8753 boolean_t aligned = TRUE;
8754 boolean_t contains_permanent_objects = FALSE;
8755 boolean_t encountered_sub_map = FALSE;
8756 vm_map_offset_t base_addr;
8757 vm_map_size_t copy_size;
8758 vm_map_size_t total_size;
1c79356b
A
8759
8760
8761 /*
8762 * Check for null copy object.
8763 */
8764
0a7de745
A
8765 if (copy == VM_MAP_COPY_NULL) {
8766 return KERN_SUCCESS;
8767 }
1c79356b
A
8768
8769 /*
8770 * Check for special kernel buffer allocated
8771 * by new_ipc_kmsg_copyin.
8772 */
8773
8774 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
0a7de745
A
8775 return vm_map_copyout_kernel_buffer(
8776 dst_map, &dst_addr,
8777 copy, copy->size, TRUE, discard_on_success);
1c79356b
A
8778 }
8779
8780 /*
8781 * Only works for entry lists at the moment. Will
8782 * support page lists later.
8783 */
8784
8785 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8786
8787 if (copy->size == 0) {
0a7de745 8788 if (discard_on_success) {
6d2010ae 8789 vm_map_copy_discard(copy);
0a7de745
A
8790 }
8791 return KERN_SUCCESS;
1c79356b
A
8792 }
8793
8794 /*
8795 * Verify that the destination is all writeable
8796 * initially. We have to trunc the destination
8797 * address and round the copy size or we'll end up
8798 * splitting entries in strange ways.
8799 */
8800
39236c6e 8801 if (!VM_MAP_PAGE_ALIGNED(copy->size,
0a7de745 8802 VM_MAP_PAGE_MASK(dst_map)) ||
39236c6e 8803 !VM_MAP_PAGE_ALIGNED(copy->offset,
0a7de745 8804 VM_MAP_PAGE_MASK(dst_map)) ||
39236c6e 8805 !VM_MAP_PAGE_ALIGNED(dst_addr,
0a7de745 8806 VM_MAP_PAGE_MASK(dst_map))) {
1c79356b 8807 aligned = FALSE;
39236c6e 8808 dst_end = vm_map_round_page(dst_addr + copy->size,
0a7de745 8809 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
8810 } else {
8811 dst_end = dst_addr + copy->size;
8812 }
8813
1c79356b 8814 vm_map_lock(dst_map);
9bccf70c 8815
91447636
A
8816 /* LP64todo - remove this check when vm_map_commpage64()
8817 * no longer has to stuff in a map_entry for the commpage
8818 * above the map's max_offset.
8819 */
8820 if (dst_addr >= dst_map->max_offset) {
8821 vm_map_unlock(dst_map);
0a7de745 8822 return KERN_INVALID_ADDRESS;
91447636 8823 }
5ba3f43e 8824
9bccf70c 8825start_pass_1:
1c79356b
A
8826 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8827 vm_map_unlock(dst_map);
0a7de745 8828 return KERN_INVALID_ADDRESS;
1c79356b 8829 }
39236c6e 8830 vm_map_clip_start(dst_map,
0a7de745
A
8831 tmp_entry,
8832 vm_map_trunc_page(dst_addr,
8833 VM_MAP_PAGE_MASK(dst_map)));
1c79356b 8834 for (entry = tmp_entry;;) {
0a7de745 8835 vm_map_entry_t next = entry->vme_next;
1c79356b 8836
0a7de745
A
8837 while (entry->is_sub_map) {
8838 vm_map_offset_t sub_start;
8839 vm_map_offset_t sub_end;
8840 vm_map_offset_t local_end;
1c79356b 8841
0a7de745 8842 if (entry->in_transition) {
2d21ac55
A
8843 /*
8844 * Say that we are waiting, and wait for entry.
8845 */
0a7de745
A
8846 entry->needs_wakeup = TRUE;
8847 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
8848
8849 goto start_pass_1;
8850 }
8851
8852 local_end = entry->vme_end;
0a7de745 8853 if (!(entry->needs_copy)) {
1c79356b
A
8854 /* if needs_copy we are a COW submap */
8855 /* in such a case we just replace so */
8856 /* there is no need for the follow- */
8857 /* ing check. */
8858 encountered_sub_map = TRUE;
3e170ce0 8859 sub_start = VME_OFFSET(entry);
1c79356b 8860
0a7de745 8861 if (entry->vme_end < dst_end) {
1c79356b 8862 sub_end = entry->vme_end;
0a7de745 8863 } else {
1c79356b 8864 sub_end = dst_end;
0a7de745 8865 }
1c79356b 8866 sub_end -= entry->vme_start;
3e170ce0 8867 sub_end += VME_OFFSET(entry);
1c79356b 8868 vm_map_unlock(dst_map);
5ba3f43e 8869
1c79356b 8870 kr = vm_map_overwrite_submap_recurse(
3e170ce0 8871 VME_SUBMAP(entry),
1c79356b
A
8872 sub_start,
8873 sub_end - sub_start);
0a7de745 8874 if (kr != KERN_SUCCESS) {
1c79356b 8875 return kr;
0a7de745 8876 }
1c79356b
A
8877 vm_map_lock(dst_map);
8878 }
8879
0a7de745 8880 if (dst_end <= entry->vme_end) {
1c79356b 8881 goto start_overwrite;
0a7de745
A
8882 }
8883 if (!vm_map_lookup_entry(dst_map, local_end,
8884 &entry)) {
1c79356b 8885 vm_map_unlock(dst_map);
0a7de745 8886 return KERN_INVALID_ADDRESS;
1c79356b
A
8887 }
8888 next = entry->vme_next;
8889 }
8890
0a7de745 8891 if (!(entry->protection & VM_PROT_WRITE)) {
1c79356b 8892 vm_map_unlock(dst_map);
0a7de745 8893 return KERN_PROTECTION_FAILURE;
1c79356b
A
8894 }
8895
8896 /*
8897 * If the entry is in transition, we must wait
8898 * for it to exit that state. Anything could happen
8899 * when we unlock the map, so start over.
8900 */
0a7de745
A
8901 if (entry->in_transition) {
8902 /*
8903 * Say that we are waiting, and wait for entry.
8904 */
8905 entry->needs_wakeup = TRUE;
8906 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
8907
8908 goto start_pass_1;
8909 }
8910
8911/*
8912 * our range is contained completely within this map entry
8913 */
0a7de745 8914 if (dst_end <= entry->vme_end) {
1c79356b 8915 break;
0a7de745 8916 }
1c79356b
A
8917/*
8918 * check that range specified is contiguous region
8919 */
8920 if ((next == vm_map_to_entry(dst_map)) ||
8921 (next->vme_start != entry->vme_end)) {
8922 vm_map_unlock(dst_map);
0a7de745 8923 return KERN_INVALID_ADDRESS;
1c79356b
A
8924 }
8925
8926
8927 /*
8928 * Check for permanent objects in the destination.
8929 */
3e170ce0
A
8930 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8931 ((!VME_OBJECT(entry)->internal) ||
0a7de745 8932 (VME_OBJECT(entry)->true_share))) {
1c79356b
A
8933 contains_permanent_objects = TRUE;
8934 }
8935
8936 entry = next;
8937 }/* for */
8938
8939start_overwrite:
8940 /*
8941 * If there are permanent objects in the destination, then
8942 * the copy cannot be interrupted.
8943 */
8944
8945 if (interruptible && contains_permanent_objects) {
8946 vm_map_unlock(dst_map);
0a7de745 8947 return KERN_FAILURE; /* XXX */
1c79356b
A
8948 }
8949
8950 /*
0a7de745 8951 *
1c79356b
A
8952 * Make a second pass, overwriting the data
8953 * At the beginning of each loop iteration,
8954 * the next entry to be overwritten is "tmp_entry"
8955 * (initially, the value returned from the lookup above),
8956 * and the starting address expected in that entry
8957 * is "start".
8958 */
8959
8960 total_size = copy->size;
0a7de745 8961 if (encountered_sub_map) {
1c79356b
A
8962 copy_size = 0;
8963 /* re-calculate tmp_entry since we've had the map */
8964 /* unlocked */
8965 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
8966 vm_map_unlock(dst_map);
0a7de745 8967 return KERN_INVALID_ADDRESS;
1c79356b
A
8968 }
8969 } else {
8970 copy_size = copy->size;
8971 }
5ba3f43e 8972
1c79356b 8973 base_addr = dst_addr;
0a7de745 8974 while (TRUE) {
1c79356b
A
8975 /* deconstruct the copy object and do in parts */
8976 /* only in sub_map, interruptable case */
0a7de745
A
8977 vm_map_entry_t copy_entry;
8978 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
8979 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
8980 int nentries;
8981 int remaining_entries = 0;
8982 vm_map_offset_t new_offset = 0;
5ba3f43e 8983
1c79356b 8984 for (entry = tmp_entry; copy_size == 0;) {
0a7de745 8985 vm_map_entry_t next;
1c79356b
A
8986
8987 next = entry->vme_next;
8988
8989 /* tmp_entry and base address are moved along */
8990 /* each time we encounter a sub-map. Otherwise */
8991 /* entry can outpase tmp_entry, and the copy_size */
8992 /* may reflect the distance between them */
8993 /* if the current entry is found to be in transition */
8994 /* we will start over at the beginning or the last */
8995 /* encounter of a submap as dictated by base_addr */
8996 /* we will zero copy_size accordingly. */
8997 if (entry->in_transition) {
0a7de745
A
8998 /*
8999 * Say that we are waiting, and wait for entry.
9000 */
9001 entry->needs_wakeup = TRUE;
9002 vm_map_entry_wait(dst_map, THREAD_UNINT);
9003
9004 if (!vm_map_lookup_entry(dst_map, base_addr,
9005 &tmp_entry)) {
1c79356b 9006 vm_map_unlock(dst_map);
0a7de745 9007 return KERN_INVALID_ADDRESS;
1c79356b
A
9008 }
9009 copy_size = 0;
9010 entry = tmp_entry;
9011 continue;
9012 }
5ba3f43e 9013 if (entry->is_sub_map) {
0a7de745
A
9014 vm_map_offset_t sub_start;
9015 vm_map_offset_t sub_end;
9016 vm_map_offset_t local_end;
1c79356b 9017
0a7de745 9018 if (entry->needs_copy) {
1c79356b
A
9019 /* if this is a COW submap */
9020 /* just back the range with a */
9021 /* anonymous entry */
0a7de745 9022 if (entry->vme_end < dst_end) {
1c79356b 9023 sub_end = entry->vme_end;
0a7de745 9024 } else {
1c79356b 9025 sub_end = dst_end;
0a7de745
A
9026 }
9027 if (entry->vme_start < base_addr) {
1c79356b 9028 sub_start = base_addr;
0a7de745 9029 } else {
1c79356b 9030 sub_start = entry->vme_start;
0a7de745 9031 }
1c79356b
A
9032 vm_map_clip_end(
9033 dst_map, entry, sub_end);
9034 vm_map_clip_start(
9035 dst_map, entry, sub_start);
2d21ac55 9036 assert(!entry->use_pmap);
a39ff7e2
A
9037 assert(!entry->iokit_acct);
9038 entry->use_pmap = TRUE;
1c79356b
A
9039 entry->is_sub_map = FALSE;
9040 vm_map_deallocate(
3e170ce0 9041 VME_SUBMAP(entry));
cb323159 9042 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5ba3f43e 9043 VME_OFFSET_SET(entry, 0);
1c79356b
A
9044 entry->is_shared = FALSE;
9045 entry->needs_copy = FALSE;
5ba3f43e 9046 entry->protection = VM_PROT_DEFAULT;
1c79356b
A
9047 entry->max_protection = VM_PROT_ALL;
9048 entry->wired_count = 0;
9049 entry->user_wired_count = 0;
0a7de745
A
9050 if (entry->inheritance
9051 == VM_INHERIT_SHARE) {
2d21ac55 9052 entry->inheritance = VM_INHERIT_COPY;
0a7de745 9053 }
1c79356b
A
9054 continue;
9055 }
9056 /* first take care of any non-sub_map */
9057 /* entries to send */
0a7de745 9058 if (base_addr < entry->vme_start) {
1c79356b 9059 /* stuff to send */
5ba3f43e 9060 copy_size =
0a7de745 9061 entry->vme_start - base_addr;
1c79356b
A
9062 break;
9063 }
3e170ce0 9064 sub_start = VME_OFFSET(entry);
1c79356b 9065
0a7de745 9066 if (entry->vme_end < dst_end) {
1c79356b 9067 sub_end = entry->vme_end;
0a7de745 9068 } else {
1c79356b 9069 sub_end = dst_end;
0a7de745 9070 }
1c79356b 9071 sub_end -= entry->vme_start;
3e170ce0 9072 sub_end += VME_OFFSET(entry);
1c79356b
A
9073 local_end = entry->vme_end;
9074 vm_map_unlock(dst_map);
9075 copy_size = sub_end - sub_start;
9076
9077 /* adjust the copy object */
9078 if (total_size > copy_size) {
0a7de745
A
9079 vm_map_size_t local_size = 0;
9080 vm_map_size_t entry_size;
1c79356b 9081
2d21ac55
A
9082 nentries = 1;
9083 new_offset = copy->offset;
9084 copy_entry = vm_map_copy_first_entry(copy);
0a7de745
A
9085 while (copy_entry !=
9086 vm_map_copy_to_entry(copy)) {
5ba3f43e 9087 entry_size = copy_entry->vme_end -
0a7de745
A
9088 copy_entry->vme_start;
9089 if ((local_size < copy_size) &&
9090 ((local_size + entry_size)
2d21ac55 9091 >= copy_size)) {
5ba3f43e 9092 vm_map_copy_clip_end(copy,
0a7de745
A
9093 copy_entry,
9094 copy_entry->vme_start +
9095 (copy_size - local_size));
5ba3f43e 9096 entry_size = copy_entry->vme_end -
0a7de745 9097 copy_entry->vme_start;
2d21ac55
A
9098 local_size += entry_size;
9099 new_offset += entry_size;
9100 }
0a7de745 9101 if (local_size >= copy_size) {
2d21ac55 9102 next_copy = copy_entry->vme_next;
5ba3f43e 9103 copy_entry->vme_next =
0a7de745 9104 vm_map_copy_to_entry(copy);
5ba3f43e 9105 previous_prev =
0a7de745 9106 copy->cpy_hdr.links.prev;
2d21ac55
A
9107 copy->cpy_hdr.links.prev = copy_entry;
9108 copy->size = copy_size;
5ba3f43e 9109 remaining_entries =
0a7de745 9110 copy->cpy_hdr.nentries;
2d21ac55
A
9111 remaining_entries -= nentries;
9112 copy->cpy_hdr.nentries = nentries;
9113 break;
9114 } else {
9115 local_size += entry_size;
9116 new_offset += entry_size;
9117 nentries++;
9118 }
9119 copy_entry = copy_entry->vme_next;
9120 }
1c79356b 9121 }
5ba3f43e 9122
0a7de745 9123 if ((entry->use_pmap) && (pmap == NULL)) {
1c79356b 9124 kr = vm_map_copy_overwrite_nested(
3e170ce0 9125 VME_SUBMAP(entry),
1c79356b
A
9126 sub_start,
9127 copy,
5ba3f43e 9128 interruptible,
3e170ce0 9129 VME_SUBMAP(entry)->pmap,
6d2010ae 9130 TRUE);
1c79356b
A
9131 } else if (pmap != NULL) {
9132 kr = vm_map_copy_overwrite_nested(
3e170ce0 9133 VME_SUBMAP(entry),
1c79356b
A
9134 sub_start,
9135 copy,
6d2010ae
A
9136 interruptible, pmap,
9137 TRUE);
1c79356b
A
9138 } else {
9139 kr = vm_map_copy_overwrite_nested(
3e170ce0 9140 VME_SUBMAP(entry),
1c79356b
A
9141 sub_start,
9142 copy,
9143 interruptible,
6d2010ae
A
9144 dst_map->pmap,
9145 TRUE);
1c79356b 9146 }
0a7de745
A
9147 if (kr != KERN_SUCCESS) {
9148 if (next_copy != NULL) {
5ba3f43e 9149 copy->cpy_hdr.nentries +=
0a7de745 9150 remaining_entries;
5ba3f43e 9151 copy->cpy_hdr.links.prev->vme_next =
0a7de745 9152 next_copy;
5ba3f43e 9153 copy->cpy_hdr.links.prev
0a7de745 9154 = previous_prev;
2d21ac55 9155 copy->size = total_size;
1c79356b
A
9156 }
9157 return kr;
9158 }
9159 if (dst_end <= local_end) {
0a7de745 9160 return KERN_SUCCESS;
1c79356b
A
9161 }
9162 /* otherwise copy no longer exists, it was */
9163 /* destroyed after successful copy_overwrite */
d9a64523 9164 copy = vm_map_copy_allocate();
1c79356b
A
9165 copy->type = VM_MAP_COPY_ENTRY_LIST;
9166 copy->offset = new_offset;
9167
e2d2fc5c
A
9168 /*
9169 * XXX FBDP
9170 * this does not seem to deal with
9171 * the VM map store (R&B tree)
9172 */
9173
1c79356b
A
9174 total_size -= copy_size;
9175 copy_size = 0;
9176 /* put back remainder of copy in container */
0a7de745 9177 if (next_copy != NULL) {
2d21ac55
A
9178 copy->cpy_hdr.nentries = remaining_entries;
9179 copy->cpy_hdr.links.next = next_copy;
9180 copy->cpy_hdr.links.prev = previous_prev;
9181 copy->size = total_size;
5ba3f43e 9182 next_copy->vme_prev =
0a7de745 9183 vm_map_copy_to_entry(copy);
2d21ac55 9184 next_copy = NULL;
1c79356b
A
9185 }
9186 base_addr = local_end;
9187 vm_map_lock(dst_map);
0a7de745
A
9188 if (!vm_map_lookup_entry(dst_map,
9189 local_end, &tmp_entry)) {
1c79356b 9190 vm_map_unlock(dst_map);
0a7de745 9191 return KERN_INVALID_ADDRESS;
1c79356b
A
9192 }
9193 entry = tmp_entry;
9194 continue;
5ba3f43e 9195 }
1c79356b
A
9196 if (dst_end <= entry->vme_end) {
9197 copy_size = dst_end - base_addr;
9198 break;
9199 }
9200
9201 if ((next == vm_map_to_entry(dst_map)) ||
2d21ac55 9202 (next->vme_start != entry->vme_end)) {
1c79356b 9203 vm_map_unlock(dst_map);
0a7de745 9204 return KERN_INVALID_ADDRESS;
1c79356b
A
9205 }
9206
9207 entry = next;
9208 }/* for */
9209
9210 next_copy = NULL;
9211 nentries = 1;
9212
9213 /* adjust the copy object */
9214 if (total_size > copy_size) {
0a7de745
A
9215 vm_map_size_t local_size = 0;
9216 vm_map_size_t entry_size;
1c79356b
A
9217
9218 new_offset = copy->offset;
9219 copy_entry = vm_map_copy_first_entry(copy);
0a7de745 9220 while (copy_entry != vm_map_copy_to_entry(copy)) {
5ba3f43e 9221 entry_size = copy_entry->vme_end -
0a7de745
A
9222 copy_entry->vme_start;
9223 if ((local_size < copy_size) &&
9224 ((local_size + entry_size)
2d21ac55 9225 >= copy_size)) {
5ba3f43e 9226 vm_map_copy_clip_end(copy, copy_entry,
0a7de745
A
9227 copy_entry->vme_start +
9228 (copy_size - local_size));
5ba3f43e 9229 entry_size = copy_entry->vme_end -
0a7de745 9230 copy_entry->vme_start;
1c79356b
A
9231 local_size += entry_size;
9232 new_offset += entry_size;
9233 }
0a7de745 9234 if (local_size >= copy_size) {
1c79356b 9235 next_copy = copy_entry->vme_next;
5ba3f43e 9236 copy_entry->vme_next =
0a7de745 9237 vm_map_copy_to_entry(copy);
5ba3f43e 9238 previous_prev =
0a7de745 9239 copy->cpy_hdr.links.prev;
1c79356b
A
9240 copy->cpy_hdr.links.prev = copy_entry;
9241 copy->size = copy_size;
5ba3f43e 9242 remaining_entries =
0a7de745 9243 copy->cpy_hdr.nentries;
1c79356b
A
9244 remaining_entries -= nentries;
9245 copy->cpy_hdr.nentries = nentries;
9246 break;
9247 } else {
9248 local_size += entry_size;
9249 new_offset += entry_size;
9250 nentries++;
9251 }
9252 copy_entry = copy_entry->vme_next;
9253 }
9254 }
9255
9256 if (aligned) {
0a7de745 9257 pmap_t local_pmap;
1c79356b 9258
0a7de745 9259 if (pmap) {
1c79356b 9260 local_pmap = pmap;
0a7de745 9261 } else {
1c79356b 9262 local_pmap = dst_map->pmap;
0a7de745 9263 }
1c79356b 9264
5ba3f43e 9265 if ((kr = vm_map_copy_overwrite_aligned(
0a7de745
A
9266 dst_map, tmp_entry, copy,
9267 base_addr, local_pmap)) != KERN_SUCCESS) {
9268 if (next_copy != NULL) {
5ba3f43e 9269 copy->cpy_hdr.nentries +=
0a7de745
A
9270 remaining_entries;
9271 copy->cpy_hdr.links.prev->vme_next =
9272 next_copy;
9273 copy->cpy_hdr.links.prev =
9274 previous_prev;
1c79356b
A
9275 copy->size += copy_size;
9276 }
9277 return kr;
9278 }
9279 vm_map_unlock(dst_map);
9280 } else {
2d21ac55
A
9281 /*
9282 * Performance gain:
9283 *
9284 * if the copy and dst address are misaligned but the same
9285 * offset within the page we can copy_not_aligned the
9286 * misaligned parts and copy aligned the rest. If they are
9287 * aligned but len is unaligned we simply need to copy
9288 * the end bit unaligned. We'll need to split the misaligned
9289 * bits of the region in this case !
9290 */
9291 /* ALWAYS UNLOCKS THE dst_map MAP */
39236c6e
A
9292 kr = vm_map_copy_overwrite_unaligned(
9293 dst_map,
9294 tmp_entry,
9295 copy,
9296 base_addr,
9297 discard_on_success);
9298 if (kr != KERN_SUCCESS) {
0a7de745 9299 if (next_copy != NULL) {
1c79356b 9300 copy->cpy_hdr.nentries +=
0a7de745
A
9301 remaining_entries;
9302 copy->cpy_hdr.links.prev->vme_next =
9303 next_copy;
9304 copy->cpy_hdr.links.prev =
9305 previous_prev;
1c79356b
A
9306 copy->size += copy_size;
9307 }
9308 return kr;
9309 }
9310 }
9311 total_size -= copy_size;
0a7de745 9312 if (total_size == 0) {
1c79356b 9313 break;
0a7de745 9314 }
1c79356b
A
9315 base_addr += copy_size;
9316 copy_size = 0;
9317 copy->offset = new_offset;
0a7de745 9318 if (next_copy != NULL) {
1c79356b
A
9319 copy->cpy_hdr.nentries = remaining_entries;
9320 copy->cpy_hdr.links.next = next_copy;
9321 copy->cpy_hdr.links.prev = previous_prev;
9322 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9323 copy->size = total_size;
9324 }
9325 vm_map_lock(dst_map);
0a7de745 9326 while (TRUE) {
5ba3f43e 9327 if (!vm_map_lookup_entry(dst_map,
0a7de745 9328 base_addr, &tmp_entry)) {
1c79356b 9329 vm_map_unlock(dst_map);
0a7de745 9330 return KERN_INVALID_ADDRESS;
1c79356b 9331 }
0a7de745
A
9332 if (tmp_entry->in_transition) {
9333 entry->needs_wakeup = TRUE;
9334 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
9335 } else {
9336 break;
9337 }
9338 }
39236c6e 9339 vm_map_clip_start(dst_map,
0a7de745
A
9340 tmp_entry,
9341 vm_map_trunc_page(base_addr,
9342 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
9343
9344 entry = tmp_entry;
9345 } /* while */
9346
9347 /*
9348 * Throw away the vm_map_copy object
9349 */
0a7de745 9350 if (discard_on_success) {
6d2010ae 9351 vm_map_copy_discard(copy);
0a7de745 9352 }
1c79356b 9353
0a7de745 9354 return KERN_SUCCESS;
1c79356b
A
9355}/* vm_map_copy_overwrite */
9356
9357kern_return_t
9358vm_map_copy_overwrite(
0a7de745
A
9359 vm_map_t dst_map,
9360 vm_map_offset_t dst_addr,
9361 vm_map_copy_t copy,
9362 boolean_t interruptible)
1c79356b 9363{
0a7de745
A
9364 vm_map_size_t head_size, tail_size;
9365 vm_map_copy_t head_copy, tail_copy;
9366 vm_map_offset_t head_addr, tail_addr;
9367 vm_map_entry_t entry;
9368 kern_return_t kr;
9369 vm_map_offset_t effective_page_mask, effective_page_size;
6d2010ae
A
9370
9371 head_size = 0;
9372 tail_size = 0;
9373 head_copy = NULL;
9374 tail_copy = NULL;
9375 head_addr = 0;
9376 tail_addr = 0;
9377
9378 if (interruptible ||
9379 copy == VM_MAP_COPY_NULL ||
9380 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9381 /*
9382 * We can't split the "copy" map if we're interruptible
9383 * or if we don't have a "copy" map...
9384 */
0a7de745 9385blunt_copy:
6d2010ae 9386 return vm_map_copy_overwrite_nested(dst_map,
0a7de745
A
9387 dst_addr,
9388 copy,
9389 interruptible,
9390 (pmap_t) NULL,
9391 TRUE);
6d2010ae
A
9392 }
9393
5ba3f43e
A
9394 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9395 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
0a7de745 9396 effective_page_mask);
5ba3f43e
A
9397 effective_page_size = effective_page_mask + 1;
9398
9399 if (copy->size < 3 * effective_page_size) {
6d2010ae
A
9400 /*
9401 * Too small to bother with optimizing...
9402 */
9403 goto blunt_copy;
9404 }
9405
5ba3f43e
A
9406 if ((dst_addr & effective_page_mask) !=
9407 (copy->offset & effective_page_mask)) {
6d2010ae
A
9408 /*
9409 * Incompatible mis-alignment of source and destination...
9410 */
9411 goto blunt_copy;
9412 }
9413
9414 /*
9415 * Proper alignment or identical mis-alignment at the beginning.
9416 * Let's try and do a small unaligned copy first (if needed)
9417 * and then an aligned copy for the rest.
9418 */
5ba3f43e 9419 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
6d2010ae 9420 head_addr = dst_addr;
5ba3f43e 9421 head_size = (effective_page_size -
0a7de745 9422 (copy->offset & effective_page_mask));
5ba3f43e 9423 head_size = MIN(head_size, copy->size);
6d2010ae 9424 }
5ba3f43e 9425 if (!vm_map_page_aligned(copy->offset + copy->size,
0a7de745 9426 effective_page_mask)) {
6d2010ae
A
9427 /*
9428 * Mis-alignment at the end.
9429 * Do an aligned copy up to the last page and
9430 * then an unaligned copy for the remaining bytes.
9431 */
39236c6e 9432 tail_size = ((copy->offset + copy->size) &
0a7de745 9433 effective_page_mask);
5ba3f43e 9434 tail_size = MIN(tail_size, copy->size);
6d2010ae 9435 tail_addr = dst_addr + copy->size - tail_size;
5ba3f43e 9436 assert(tail_addr >= head_addr + head_size);
6d2010ae 9437 }
5ba3f43e 9438 assert(head_size + tail_size <= copy->size);
6d2010ae
A
9439
9440 if (head_size + tail_size == copy->size) {
9441 /*
9442 * It's all unaligned, no optimization possible...
9443 */
9444 goto blunt_copy;
9445 }
9446
9447 /*
9448 * Can't optimize if there are any submaps in the
9449 * destination due to the way we free the "copy" map
9450 * progressively in vm_map_copy_overwrite_nested()
9451 * in that case.
9452 */
9453 vm_map_lock_read(dst_map);
0a7de745 9454 if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6d2010ae
A
9455 vm_map_unlock_read(dst_map);
9456 goto blunt_copy;
9457 }
9458 for (;
0a7de745
A
9459 (entry != vm_map_copy_to_entry(copy) &&
9460 entry->vme_start < dst_addr + copy->size);
9461 entry = entry->vme_next) {
6d2010ae
A
9462 if (entry->is_sub_map) {
9463 vm_map_unlock_read(dst_map);
9464 goto blunt_copy;
9465 }
9466 }
9467 vm_map_unlock_read(dst_map);
9468
9469 if (head_size) {
9470 /*
9471 * Unaligned copy of the first "head_size" bytes, to reach
9472 * a page boundary.
9473 */
5ba3f43e 9474
6d2010ae
A
9475 /*
9476 * Extract "head_copy" out of "copy".
9477 */
d9a64523 9478 head_copy = vm_map_copy_allocate();
6d2010ae 9479 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6d2010ae 9480 head_copy->cpy_hdr.entries_pageable =
0a7de745 9481 copy->cpy_hdr.entries_pageable;
6d2010ae
A
9482 vm_map_store_init(&head_copy->cpy_hdr);
9483
5ba3f43e
A
9484 entry = vm_map_copy_first_entry(copy);
9485 if (entry->vme_end < copy->offset + head_size) {
9486 head_size = entry->vme_end - copy->offset;
9487 }
9488
6d2010ae
A
9489 head_copy->offset = copy->offset;
9490 head_copy->size = head_size;
6d2010ae
A
9491 copy->offset += head_size;
9492 copy->size -= head_size;
9493
6d2010ae
A
9494 vm_map_copy_clip_end(copy, entry, copy->offset);
9495 vm_map_copy_entry_unlink(copy, entry);
9496 vm_map_copy_entry_link(head_copy,
0a7de745
A
9497 vm_map_copy_to_entry(head_copy),
9498 entry);
6d2010ae
A
9499
9500 /*
9501 * Do the unaligned copy.
9502 */
9503 kr = vm_map_copy_overwrite_nested(dst_map,
0a7de745
A
9504 head_addr,
9505 head_copy,
9506 interruptible,
9507 (pmap_t) NULL,
9508 FALSE);
9509 if (kr != KERN_SUCCESS) {
6d2010ae 9510 goto done;
0a7de745 9511 }
6d2010ae
A
9512 }
9513
9514 if (tail_size) {
9515 /*
9516 * Extract "tail_copy" out of "copy".
9517 */
d9a64523 9518 tail_copy = vm_map_copy_allocate();
6d2010ae 9519 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6d2010ae 9520 tail_copy->cpy_hdr.entries_pageable =
0a7de745 9521 copy->cpy_hdr.entries_pageable;
6d2010ae
A
9522 vm_map_store_init(&tail_copy->cpy_hdr);
9523
9524 tail_copy->offset = copy->offset + copy->size - tail_size;
9525 tail_copy->size = tail_size;
9526
9527 copy->size -= tail_size;
9528
9529 entry = vm_map_copy_last_entry(copy);
9530 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9531 entry = vm_map_copy_last_entry(copy);
9532 vm_map_copy_entry_unlink(copy, entry);
9533 vm_map_copy_entry_link(tail_copy,
0a7de745
A
9534 vm_map_copy_last_entry(tail_copy),
9535 entry);
6d2010ae
A
9536 }
9537
9538 /*
9539 * Copy most (or possibly all) of the data.
9540 */
9541 kr = vm_map_copy_overwrite_nested(dst_map,
0a7de745
A
9542 dst_addr + head_size,
9543 copy,
9544 interruptible,
9545 (pmap_t) NULL,
9546 FALSE);
6d2010ae
A
9547 if (kr != KERN_SUCCESS) {
9548 goto done;
9549 }
9550
9551 if (tail_size) {
9552 kr = vm_map_copy_overwrite_nested(dst_map,
0a7de745
A
9553 tail_addr,
9554 tail_copy,
9555 interruptible,
9556 (pmap_t) NULL,
9557 FALSE);
6d2010ae
A
9558 }
9559
9560done:
9561 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9562 if (kr == KERN_SUCCESS) {
9563 /*
9564 * Discard all the copy maps.
9565 */
9566 if (head_copy) {
9567 vm_map_copy_discard(head_copy);
9568 head_copy = NULL;
9569 }
9570 vm_map_copy_discard(copy);
9571 if (tail_copy) {
9572 vm_map_copy_discard(tail_copy);
9573 tail_copy = NULL;
9574 }
9575 } else {
9576 /*
9577 * Re-assemble the original copy map.
9578 */
9579 if (head_copy) {
9580 entry = vm_map_copy_first_entry(head_copy);
9581 vm_map_copy_entry_unlink(head_copy, entry);
9582 vm_map_copy_entry_link(copy,
0a7de745
A
9583 vm_map_copy_to_entry(copy),
9584 entry);
6d2010ae
A
9585 copy->offset -= head_size;
9586 copy->size += head_size;
9587 vm_map_copy_discard(head_copy);
9588 head_copy = NULL;
9589 }
9590 if (tail_copy) {
9591 entry = vm_map_copy_last_entry(tail_copy);
9592 vm_map_copy_entry_unlink(tail_copy, entry);
9593 vm_map_copy_entry_link(copy,
0a7de745
A
9594 vm_map_copy_last_entry(copy),
9595 entry);
6d2010ae
A
9596 copy->size += tail_size;
9597 vm_map_copy_discard(tail_copy);
9598 tail_copy = NULL;
9599 }
9600 }
9601 return kr;
1c79356b
A
9602}
9603
9604
9605/*
91447636 9606 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
1c79356b
A
9607 *
9608 * Decription:
9609 * Physically copy unaligned data
9610 *
9611 * Implementation:
9612 * Unaligned parts of pages have to be physically copied. We use
9613 * a modified form of vm_fault_copy (which understands none-aligned
9614 * page offsets and sizes) to do the copy. We attempt to copy as
9615 * much memory in one go as possibly, however vm_fault_copy copies
9616 * within 1 memory object so we have to find the smaller of "amount left"
9617 * "source object data size" and "target object data size". With
9618 * unaligned data we don't need to split regions, therefore the source
9619 * (copy) object should be one map entry, the target range may be split
9620 * over multiple map entries however. In any event we are pessimistic
9621 * about these assumptions.
9622 *
9623 * Assumptions:
9624 * dst_map is locked on entry and is return locked on success,
9625 * unlocked on error.
9626 */
9627
91447636 9628static kern_return_t
1c79356b 9629vm_map_copy_overwrite_unaligned(
0a7de745
A
9630 vm_map_t dst_map,
9631 vm_map_entry_t entry,
9632 vm_map_copy_t copy,
9633 vm_map_offset_t start,
9634 boolean_t discard_on_success)
1c79356b 9635{
0a7de745
A
9636 vm_map_entry_t copy_entry;
9637 vm_map_entry_t copy_entry_next;
9638 vm_map_version_t version;
9639 vm_object_t dst_object;
9640 vm_object_offset_t dst_offset;
9641 vm_object_offset_t src_offset;
9642 vm_object_offset_t entry_offset;
9643 vm_map_offset_t entry_end;
9644 vm_map_size_t src_size,
9645 dst_size,
9646 copy_size,
9647 amount_left;
9648 kern_return_t kr = KERN_SUCCESS;
1c79356b 9649
5ba3f43e 9650
39236c6e
A
9651 copy_entry = vm_map_copy_first_entry(copy);
9652
1c79356b
A
9653 vm_map_lock_write_to_read(dst_map);
9654
91447636 9655 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
1c79356b
A
9656 amount_left = copy->size;
9657/*
9658 * unaligned so we never clipped this entry, we need the offset into
9659 * the vm_object not just the data.
5ba3f43e 9660 */
1c79356b 9661 while (amount_left > 0) {
1c79356b
A
9662 if (entry == vm_map_to_entry(dst_map)) {
9663 vm_map_unlock_read(dst_map);
9664 return KERN_INVALID_ADDRESS;
9665 }
9666
9667 /* "start" must be within the current map entry */
0a7de745 9668 assert((start >= entry->vme_start) && (start < entry->vme_end));
1c79356b
A
9669
9670 dst_offset = start - entry->vme_start;
9671
9672 dst_size = entry->vme_end - start;
9673
9674 src_size = copy_entry->vme_end -
0a7de745 9675 (copy_entry->vme_start + src_offset);
1c79356b
A
9676
9677 if (dst_size < src_size) {
9678/*
9679 * we can only copy dst_size bytes before
9680 * we have to get the next destination entry
9681 */
9682 copy_size = dst_size;
9683 } else {
9684/*
9685 * we can only copy src_size bytes before
9686 * we have to get the next source copy entry
9687 */
9688 copy_size = src_size;
9689 }
9690
9691 if (copy_size > amount_left) {
9692 copy_size = amount_left;
9693 }
9694/*
9695 * Entry needs copy, create a shadow shadow object for
9696 * Copy on write region.
9697 */
9698 if (entry->needs_copy &&
0a7de745 9699 ((entry->protection & VM_PROT_WRITE) != 0)) {
1c79356b
A
9700 if (vm_map_lock_read_to_write(dst_map)) {
9701 vm_map_lock_read(dst_map);
9702 goto RetryLookup;
9703 }
3e170ce0 9704 VME_OBJECT_SHADOW(entry,
0a7de745
A
9705 (vm_map_size_t)(entry->vme_end
9706 - entry->vme_start));
1c79356b
A
9707 entry->needs_copy = FALSE;
9708 vm_map_lock_write_to_read(dst_map);
9709 }
3e170ce0 9710 dst_object = VME_OBJECT(entry);
1c79356b
A
9711/*
9712 * unlike with the virtual (aligned) copy we're going
9713 * to fault on it therefore we need a target object.
9714 */
0a7de745 9715 if (dst_object == VM_OBJECT_NULL) {
1c79356b
A
9716 if (vm_map_lock_read_to_write(dst_map)) {
9717 vm_map_lock_read(dst_map);
9718 goto RetryLookup;
9719 }
91447636 9720 dst_object = vm_object_allocate((vm_map_size_t)
0a7de745 9721 entry->vme_end - entry->vme_start);
cb323159 9722 VME_OBJECT_SET(entry, dst_object);
3e170ce0 9723 VME_OFFSET_SET(entry, 0);
fe8ab488 9724 assert(entry->use_pmap);
1c79356b
A
9725 vm_map_lock_write_to_read(dst_map);
9726 }
9727/*
9728 * Take an object reference and unlock map. The "entry" may
9729 * disappear or change when the map is unlocked.
9730 */
9731 vm_object_reference(dst_object);
9732 version.main_timestamp = dst_map->timestamp;
3e170ce0 9733 entry_offset = VME_OFFSET(entry);
1c79356b
A
9734 entry_end = entry->vme_end;
9735 vm_map_unlock_read(dst_map);
9736/*
9737 * Copy as much as possible in one pass
9738 */
9739 kr = vm_fault_copy(
3e170ce0
A
9740 VME_OBJECT(copy_entry),
9741 VME_OFFSET(copy_entry) + src_offset,
1c79356b
A
9742 &copy_size,
9743 dst_object,
9744 entry_offset + dst_offset,
9745 dst_map,
9746 &version,
9747 THREAD_UNINT );
9748
9749 start += copy_size;
9750 src_offset += copy_size;
9751 amount_left -= copy_size;
9752/*
9753 * Release the object reference
9754 */
9755 vm_object_deallocate(dst_object);
9756/*
9757 * If a hard error occurred, return it now
9758 */
0a7de745 9759 if (kr != KERN_SUCCESS) {
1c79356b 9760 return kr;
0a7de745 9761 }
1c79356b
A
9762
9763 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
0a7de745 9764 || amount_left == 0) {
1c79356b
A
9765/*
9766 * all done with this copy entry, dispose.
9767 */
39236c6e
A
9768 copy_entry_next = copy_entry->vme_next;
9769
9770 if (discard_on_success) {
9771 vm_map_copy_entry_unlink(copy, copy_entry);
9772 assert(!copy_entry->is_sub_map);
3e170ce0 9773 vm_object_deallocate(VME_OBJECT(copy_entry));
39236c6e
A
9774 vm_map_copy_entry_dispose(copy, copy_entry);
9775 }
1c79356b 9776
39236c6e
A
9777 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9778 amount_left) {
1c79356b
A
9779/*
9780 * not finished copying but run out of source
9781 */
9782 return KERN_INVALID_ADDRESS;
9783 }
39236c6e
A
9784
9785 copy_entry = copy_entry_next;
9786
1c79356b
A
9787 src_offset = 0;
9788 }
9789
0a7de745 9790 if (amount_left == 0) {
1c79356b 9791 return KERN_SUCCESS;
0a7de745 9792 }
1c79356b
A
9793
9794 vm_map_lock_read(dst_map);
9795 if (version.main_timestamp == dst_map->timestamp) {
9796 if (start == entry_end) {
9797/*
9798 * destination region is split. Use the version
9799 * information to avoid a lookup in the normal
9800 * case.
9801 */
9802 entry = entry->vme_next;
9803/*
9804 * should be contiguous. Fail if we encounter
9805 * a hole in the destination.
9806 */
9807 if (start != entry->vme_start) {
9808 vm_map_unlock_read(dst_map);
0a7de745 9809 return KERN_INVALID_ADDRESS;
1c79356b
A
9810 }
9811 }
9812 } else {
9813/*
9814 * Map version check failed.
9815 * we must lookup the entry because somebody
9816 * might have changed the map behind our backs.
9817 */
0a7de745
A
9818RetryLookup:
9819 if (!vm_map_lookup_entry(dst_map, start, &entry)) {
1c79356b 9820 vm_map_unlock_read(dst_map);
0a7de745 9821 return KERN_INVALID_ADDRESS;
1c79356b
A
9822 }
9823 }
9824 }/* while */
9825
1c79356b
A
9826 return KERN_SUCCESS;
9827}/* vm_map_copy_overwrite_unaligned */
9828
9829/*
91447636 9830 * Routine: vm_map_copy_overwrite_aligned [internal use only]
1c79356b
A
9831 *
9832 * Description:
9833 * Does all the vm_trickery possible for whole pages.
9834 *
9835 * Implementation:
9836 *
9837 * If there are no permanent objects in the destination,
9838 * and the source and destination map entry zones match,
9839 * and the destination map entry is not shared,
9840 * then the map entries can be deleted and replaced
9841 * with those from the copy. The following code is the
9842 * basic idea of what to do, but there are lots of annoying
9843 * little details about getting protection and inheritance
9844 * right. Should add protection, inheritance, and sharing checks
9845 * to the above pass and make sure that no wiring is involved.
9846 */
9847
e2d2fc5c
A
9848int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9849int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9850int vm_map_copy_overwrite_aligned_src_large = 0;
9851
91447636 9852static kern_return_t
1c79356b 9853vm_map_copy_overwrite_aligned(
0a7de745
A
9854 vm_map_t dst_map,
9855 vm_map_entry_t tmp_entry,
9856 vm_map_copy_t copy,
9857 vm_map_offset_t start,
9858 __unused pmap_t pmap)
1c79356b 9859{
0a7de745
A
9860 vm_object_t object;
9861 vm_map_entry_t copy_entry;
9862 vm_map_size_t copy_size;
9863 vm_map_size_t size;
9864 vm_map_entry_t entry;
5ba3f43e 9865
1c79356b 9866 while ((copy_entry = vm_map_copy_first_entry(copy))
0a7de745 9867 != vm_map_copy_to_entry(copy)) {
1c79356b 9868 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
5ba3f43e 9869
1c79356b 9870 entry = tmp_entry;
fe8ab488
A
9871 if (entry->is_sub_map) {
9872 /* unnested when clipped earlier */
9873 assert(!entry->use_pmap);
9874 }
1c79356b
A
9875 if (entry == vm_map_to_entry(dst_map)) {
9876 vm_map_unlock(dst_map);
9877 return KERN_INVALID_ADDRESS;
9878 }
9879 size = (entry->vme_end - entry->vme_start);
9880 /*
9881 * Make sure that no holes popped up in the
9882 * address map, and that the protection is
9883 * still valid, in case the map was unlocked
9884 * earlier.
9885 */
9886
9887 if ((entry->vme_start != start) || ((entry->is_sub_map)
0a7de745 9888 && !entry->needs_copy)) {
1c79356b 9889 vm_map_unlock(dst_map);
0a7de745 9890 return KERN_INVALID_ADDRESS;
1c79356b
A
9891 }
9892 assert(entry != vm_map_to_entry(dst_map));
9893
9894 /*
9895 * Check protection again
9896 */
9897
0a7de745 9898 if (!(entry->protection & VM_PROT_WRITE)) {
1c79356b 9899 vm_map_unlock(dst_map);
0a7de745 9900 return KERN_PROTECTION_FAILURE;
1c79356b
A
9901 }
9902
9903 /*
9904 * Adjust to source size first
9905 */
9906
9907 if (copy_size < size) {
fe8ab488
A
9908 if (entry->map_aligned &&
9909 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
0a7de745 9910 VM_MAP_PAGE_MASK(dst_map))) {
fe8ab488
A
9911 /* no longer map-aligned */
9912 entry->map_aligned = FALSE;
9913 }
1c79356b
A
9914 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
9915 size = copy_size;
9916 }
9917
9918 /*
9919 * Adjust to destination size
9920 */
9921
9922 if (size < copy_size) {
9923 vm_map_copy_clip_end(copy, copy_entry,
0a7de745 9924 copy_entry->vme_start + size);
1c79356b
A
9925 copy_size = size;
9926 }
9927
9928 assert((entry->vme_end - entry->vme_start) == size);
9929 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
9930 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
9931
9932 /*
9933 * If the destination contains temporary unshared memory,
9934 * we can perform the copy by throwing it away and
9935 * installing the source data.
9936 */
9937
3e170ce0 9938 object = VME_OBJECT(entry);
5ba3f43e 9939 if ((!entry->is_shared &&
0a7de745
A
9940 ((object == VM_OBJECT_NULL) ||
9941 (object->internal && !object->true_share))) ||
1c79356b 9942 entry->needs_copy) {
0a7de745
A
9943 vm_object_t old_object = VME_OBJECT(entry);
9944 vm_object_offset_t old_offset = VME_OFFSET(entry);
9945 vm_object_offset_t offset;
1c79356b
A
9946
9947 /*
9948 * Ensure that the source and destination aren't
9949 * identical
9950 */
3e170ce0
A
9951 if (old_object == VME_OBJECT(copy_entry) &&
9952 old_offset == VME_OFFSET(copy_entry)) {
1c79356b
A
9953 vm_map_copy_entry_unlink(copy, copy_entry);
9954 vm_map_copy_entry_dispose(copy, copy_entry);
9955
0a7de745 9956 if (old_object != VM_OBJECT_NULL) {
1c79356b 9957 vm_object_deallocate(old_object);
0a7de745 9958 }
1c79356b
A
9959
9960 start = tmp_entry->vme_end;
9961 tmp_entry = tmp_entry->vme_next;
9962 continue;
9963 }
9964
5ba3f43e 9965#if !CONFIG_EMBEDDED
0a7de745
A
9966#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9967#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
3e170ce0
A
9968 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
9969 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
e2d2fc5c
A
9970 copy_size <= __TRADEOFF1_COPY_SIZE) {
9971 /*
9972 * Virtual vs. Physical copy tradeoff #1.
9973 *
9974 * Copying only a few pages out of a large
9975 * object: do a physical copy instead of
9976 * a virtual copy, to avoid possibly keeping
9977 * the entire large object alive because of
9978 * those few copy-on-write pages.
9979 */
9980 vm_map_copy_overwrite_aligned_src_large++;
9981 goto slow_copy;
9982 }
5ba3f43e 9983#endif /* !CONFIG_EMBEDDED */
e2d2fc5c 9984
3e170ce0
A
9985 if ((dst_map->pmap != kernel_pmap) &&
9986 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
0a7de745 9987 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
ebb1b9f4
A
9988 vm_object_t new_object, new_shadow;
9989
9990 /*
9991 * We're about to map something over a mapping
9992 * established by malloc()...
9993 */
3e170ce0 9994 new_object = VME_OBJECT(copy_entry);
ebb1b9f4
A
9995 if (new_object != VM_OBJECT_NULL) {
9996 vm_object_lock_shared(new_object);
9997 }
9998 while (new_object != VM_OBJECT_NULL &&
5ba3f43e 9999#if !CONFIG_EMBEDDED
0a7de745
A
10000 !new_object->true_share &&
10001 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
5ba3f43e 10002#endif /* !CONFIG_EMBEDDED */
0a7de745 10003 new_object->internal) {
ebb1b9f4
A
10004 new_shadow = new_object->shadow;
10005 if (new_shadow == VM_OBJECT_NULL) {
10006 break;
10007 }
10008 vm_object_lock_shared(new_shadow);
10009 vm_object_unlock(new_object);
10010 new_object = new_shadow;
10011 }
10012 if (new_object != VM_OBJECT_NULL) {
10013 if (!new_object->internal) {
10014 /*
10015 * The new mapping is backed
10016 * by an external object. We
10017 * don't want malloc'ed memory
10018 * to be replaced with such a
10019 * non-anonymous mapping, so
10020 * let's go off the optimized
10021 * path...
10022 */
e2d2fc5c 10023 vm_map_copy_overwrite_aligned_src_not_internal++;
ebb1b9f4
A
10024 vm_object_unlock(new_object);
10025 goto slow_copy;
10026 }
5ba3f43e 10027#if !CONFIG_EMBEDDED
e2d2fc5c
A
10028 if (new_object->true_share ||
10029 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10030 /*
10031 * Same if there's a "true_share"
10032 * object in the shadow chain, or
10033 * an object with a non-default
10034 * (SYMMETRIC) copy strategy.
10035 */
10036 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10037 vm_object_unlock(new_object);
10038 goto slow_copy;
10039 }
5ba3f43e 10040#endif /* !CONFIG_EMBEDDED */
ebb1b9f4
A
10041 vm_object_unlock(new_object);
10042 }
10043 /*
10044 * The new mapping is still backed by
10045 * anonymous (internal) memory, so it's
10046 * OK to substitute it for the original
10047 * malloc() mapping.
10048 */
10049 }
10050
1c79356b 10051 if (old_object != VM_OBJECT_NULL) {
0a7de745
A
10052 if (entry->is_sub_map) {
10053 if (entry->use_pmap) {
0c530ab8 10054#ifndef NO_NESTED_PMAP
5ba3f43e 10055 pmap_unnest(dst_map->pmap,
0a7de745
A
10056 (addr64_t)entry->vme_start,
10057 entry->vme_end - entry->vme_start);
10058#endif /* NO_NESTED_PMAP */
10059 if (dst_map->mapped_in_other_pmaps) {
9bccf70c
A
10060 /* clean up parent */
10061 /* map/maps */
2d21ac55
A
10062 vm_map_submap_pmap_clean(
10063 dst_map, entry->vme_start,
10064 entry->vme_end,
3e170ce0
A
10065 VME_SUBMAP(entry),
10066 VME_OFFSET(entry));
9bccf70c
A
10067 }
10068 } else {
10069 vm_map_submap_pmap_clean(
5ba3f43e 10070 dst_map, entry->vme_start,
9bccf70c 10071 entry->vme_end,
3e170ce0
A
10072 VME_SUBMAP(entry),
10073 VME_OFFSET(entry));
9bccf70c 10074 }
0a7de745
A
10075 vm_map_deallocate(VME_SUBMAP(entry));
10076 } else {
10077 if (dst_map->mapped_in_other_pmaps) {
39236c6e 10078 vm_object_pmap_protect_options(
3e170ce0
A
10079 VME_OBJECT(entry),
10080 VME_OFFSET(entry),
5ba3f43e 10081 entry->vme_end
2d21ac55 10082 - entry->vme_start,
9bccf70c
A
10083 PMAP_NULL,
10084 entry->vme_start,
39236c6e
A
10085 VM_PROT_NONE,
10086 PMAP_OPTIONS_REMOVE);
9bccf70c 10087 } else {
39236c6e 10088 pmap_remove_options(
5ba3f43e
A
10089 dst_map->pmap,
10090 (addr64_t)(entry->vme_start),
39236c6e
A
10091 (addr64_t)(entry->vme_end),
10092 PMAP_OPTIONS_REMOVE);
9bccf70c 10093 }
1c79356b 10094 vm_object_deallocate(old_object);
0a7de745 10095 }
1c79356b
A
10096 }
10097
a39ff7e2
A
10098 if (entry->iokit_acct) {
10099 /* keep using iokit accounting */
10100 entry->use_pmap = FALSE;
10101 } else {
10102 /* use pmap accounting */
10103 entry->use_pmap = TRUE;
10104 }
1c79356b 10105 entry->is_sub_map = FALSE;
3e170ce0
A
10106 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10107 object = VME_OBJECT(entry);
1c79356b
A
10108 entry->needs_copy = copy_entry->needs_copy;
10109 entry->wired_count = 0;
10110 entry->user_wired_count = 0;
3e170ce0 10111 offset = VME_OFFSET(copy_entry);
5ba3f43e 10112 VME_OFFSET_SET(entry, offset);
1c79356b
A
10113
10114 vm_map_copy_entry_unlink(copy, copy_entry);
10115 vm_map_copy_entry_dispose(copy, copy_entry);
2d21ac55 10116
1c79356b 10117 /*
2d21ac55 10118 * we could try to push pages into the pmap at this point, BUT
1c79356b
A
10119 * this optimization only saved on average 2 us per page if ALL
10120 * the pages in the source were currently mapped
10121 * and ALL the pages in the dest were touched, if there were fewer
10122 * than 2/3 of the pages touched, this optimization actually cost more cycles
2d21ac55 10123 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
1c79356b
A
10124 */
10125
1c79356b
A
10126 /*
10127 * Set up for the next iteration. The map
10128 * has not been unlocked, so the next
10129 * address should be at the end of this
10130 * entry, and the next map entry should be
10131 * the one following it.
10132 */
10133
10134 start = tmp_entry->vme_end;
10135 tmp_entry = tmp_entry->vme_next;
10136 } else {
0a7de745
A
10137 vm_map_version_t version;
10138 vm_object_t dst_object;
10139 vm_object_offset_t dst_offset;
10140 kern_return_t r;
1c79356b 10141
0a7de745 10142slow_copy:
e2d2fc5c 10143 if (entry->needs_copy) {
3e170ce0 10144 VME_OBJECT_SHADOW(entry,
0a7de745
A
10145 (entry->vme_end -
10146 entry->vme_start));
e2d2fc5c
A
10147 entry->needs_copy = FALSE;
10148 }
10149
3e170ce0
A
10150 dst_object = VME_OBJECT(entry);
10151 dst_offset = VME_OFFSET(entry);
ebb1b9f4 10152
1c79356b
A
10153 /*
10154 * Take an object reference, and record
10155 * the map version information so that the
10156 * map can be safely unlocked.
10157 */
10158
ebb1b9f4
A
10159 if (dst_object == VM_OBJECT_NULL) {
10160 /*
10161 * We would usually have just taken the
10162 * optimized path above if the destination
10163 * object has not been allocated yet. But we
10164 * now disable that optimization if the copy
10165 * entry's object is not backed by anonymous
10166 * memory to avoid replacing malloc'ed
10167 * (i.e. re-usable) anonymous memory with a
10168 * not-so-anonymous mapping.
10169 * So we have to handle this case here and
10170 * allocate a new VM object for this map entry.
10171 */
10172 dst_object = vm_object_allocate(
10173 entry->vme_end - entry->vme_start);
10174 dst_offset = 0;
3e170ce0
A
10175 VME_OBJECT_SET(entry, dst_object);
10176 VME_OFFSET_SET(entry, dst_offset);
fe8ab488 10177 assert(entry->use_pmap);
ebb1b9f4
A
10178 }
10179
1c79356b
A
10180 vm_object_reference(dst_object);
10181
9bccf70c
A
10182 /* account for unlock bumping up timestamp */
10183 version.main_timestamp = dst_map->timestamp + 1;
1c79356b
A
10184
10185 vm_map_unlock(dst_map);
10186
10187 /*
10188 * Copy as much as possible in one pass
10189 */
10190
10191 copy_size = size;
10192 r = vm_fault_copy(
3e170ce0
A
10193 VME_OBJECT(copy_entry),
10194 VME_OFFSET(copy_entry),
2d21ac55
A
10195 &copy_size,
10196 dst_object,
10197 dst_offset,
10198 dst_map,
10199 &version,
10200 THREAD_UNINT );
1c79356b
A
10201
10202 /*
10203 * Release the object reference
10204 */
10205
10206 vm_object_deallocate(dst_object);
10207
10208 /*
10209 * If a hard error occurred, return it now
10210 */
10211
0a7de745
A
10212 if (r != KERN_SUCCESS) {
10213 return r;
10214 }
1c79356b
A
10215
10216 if (copy_size != 0) {
10217 /*
10218 * Dispose of the copied region
10219 */
10220
10221 vm_map_copy_clip_end(copy, copy_entry,
0a7de745 10222 copy_entry->vme_start + copy_size);
1c79356b 10223 vm_map_copy_entry_unlink(copy, copy_entry);
3e170ce0 10224 vm_object_deallocate(VME_OBJECT(copy_entry));
1c79356b
A
10225 vm_map_copy_entry_dispose(copy, copy_entry);
10226 }
10227
10228 /*
10229 * Pick up in the destination map where we left off.
10230 *
10231 * Use the version information to avoid a lookup
10232 * in the normal case.
10233 */
10234
10235 start += copy_size;
10236 vm_map_lock(dst_map);
e2d2fc5c
A
10237 if (version.main_timestamp == dst_map->timestamp &&
10238 copy_size != 0) {
1c79356b
A
10239 /* We can safely use saved tmp_entry value */
10240
fe8ab488
A
10241 if (tmp_entry->map_aligned &&
10242 !VM_MAP_PAGE_ALIGNED(
10243 start,
10244 VM_MAP_PAGE_MASK(dst_map))) {
10245 /* no longer map-aligned */
10246 tmp_entry->map_aligned = FALSE;
10247 }
1c79356b
A
10248 vm_map_clip_end(dst_map, tmp_entry, start);
10249 tmp_entry = tmp_entry->vme_next;
10250 } else {
10251 /* Must do lookup of tmp_entry */
10252
10253 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10254 vm_map_unlock(dst_map);
0a7de745 10255 return KERN_INVALID_ADDRESS;
1c79356b 10256 }
fe8ab488
A
10257 if (tmp_entry->map_aligned &&
10258 !VM_MAP_PAGE_ALIGNED(
10259 start,
10260 VM_MAP_PAGE_MASK(dst_map))) {
10261 /* no longer map-aligned */
10262 tmp_entry->map_aligned = FALSE;
10263 }
1c79356b
A
10264 vm_map_clip_start(dst_map, tmp_entry, start);
10265 }
10266 }
10267 }/* while */
10268
0a7de745 10269 return KERN_SUCCESS;
1c79356b
A
10270}/* vm_map_copy_overwrite_aligned */
10271
10272/*
91447636 10273 * Routine: vm_map_copyin_kernel_buffer [internal use only]
1c79356b
A
10274 *
10275 * Description:
10276 * Copy in data to a kernel buffer from space in the
91447636 10277 * source map. The original space may be optionally
1c79356b
A
10278 * deallocated.
10279 *
10280 * If successful, returns a new copy object.
10281 */
91447636 10282static kern_return_t
1c79356b 10283vm_map_copyin_kernel_buffer(
0a7de745
A
10284 vm_map_t src_map,
10285 vm_map_offset_t src_addr,
10286 vm_map_size_t len,
10287 boolean_t src_destroy,
10288 vm_map_copy_t *copy_result)
1c79356b 10289{
91447636 10290 kern_return_t kr;
1c79356b 10291 vm_map_copy_t copy;
b0d623f7
A
10292 vm_size_t kalloc_size;
10293
0a7de745 10294 if (len > msg_ool_size_small) {
3e170ce0 10295 return KERN_INVALID_ARGUMENT;
0a7de745 10296 }
1c79356b 10297
3e170ce0
A
10298 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
10299
10300 copy = (vm_map_copy_t)kalloc(kalloc_size);
0a7de745 10301 if (copy == VM_MAP_COPY_NULL) {
1c79356b 10302 return KERN_RESOURCE_SHORTAGE;
0a7de745 10303 }
1c79356b
A
10304 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10305 copy->size = len;
10306 copy->offset = 0;
1c79356b 10307
3e170ce0 10308 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
91447636
A
10309 if (kr != KERN_SUCCESS) {
10310 kfree(copy, kalloc_size);
10311 return kr;
1c79356b
A
10312 }
10313 if (src_destroy) {
39236c6e
A
10314 (void) vm_map_remove(
10315 src_map,
10316 vm_map_trunc_page(src_addr,
0a7de745 10317 VM_MAP_PAGE_MASK(src_map)),
39236c6e 10318 vm_map_round_page(src_addr + len,
0a7de745 10319 VM_MAP_PAGE_MASK(src_map)),
39236c6e 10320 (VM_MAP_REMOVE_INTERRUPTIBLE |
0a7de745
A
10321 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10322 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
1c79356b
A
10323 }
10324 *copy_result = copy;
10325 return KERN_SUCCESS;
10326}
10327
10328/*
91447636 10329 * Routine: vm_map_copyout_kernel_buffer [internal use only]
1c79356b
A
10330 *
10331 * Description:
10332 * Copy out data from a kernel buffer into space in the
10333 * destination map. The space may be otpionally dynamically
10334 * allocated.
10335 *
10336 * If successful, consumes the copy object.
10337 * Otherwise, the caller is responsible for it.
10338 */
91447636
A
10339static int vm_map_copyout_kernel_buffer_failures = 0;
10340static kern_return_t
1c79356b 10341vm_map_copyout_kernel_buffer(
0a7de745
A
10342 vm_map_t map,
10343 vm_map_address_t *addr, /* IN/OUT */
10344 vm_map_copy_t copy,
10345 vm_map_size_t copy_size,
10346 boolean_t overwrite,
10347 boolean_t consume_on_success)
1c79356b
A
10348{
10349 kern_return_t kr = KERN_SUCCESS;
91447636 10350 thread_t thread = current_thread();
1c79356b 10351
39037602
A
10352 assert(copy->size == copy_size);
10353
3e170ce0
A
10354 /*
10355 * check for corrupted vm_map_copy structure
10356 */
0a7de745 10357 if (copy_size > msg_ool_size_small || copy->offset) {
3e170ce0 10358 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
0a7de745
A
10359 (long long)copy->size, (long long)copy->offset);
10360 }
3e170ce0 10361
1c79356b 10362 if (!overwrite) {
1c79356b
A
10363 /*
10364 * Allocate space in the target map for the data
10365 */
10366 *addr = 0;
5ba3f43e 10367 kr = vm_map_enter(map,
0a7de745
A
10368 addr,
10369 vm_map_round_page(copy_size,
10370 VM_MAP_PAGE_MASK(map)),
10371 (vm_map_offset_t) 0,
10372 VM_FLAGS_ANYWHERE,
10373 VM_MAP_KERNEL_FLAGS_NONE,
10374 VM_KERN_MEMORY_NONE,
10375 VM_OBJECT_NULL,
10376 (vm_object_offset_t) 0,
10377 FALSE,
10378 VM_PROT_DEFAULT,
10379 VM_PROT_ALL,
10380 VM_INHERIT_DEFAULT);
10381 if (kr != KERN_SUCCESS) {
91447636 10382 return kr;
0a7de745 10383 }
5ba3f43e
A
10384#if KASAN
10385 if (map->pmap == kernel_pmap) {
10386 kasan_notify_address(*addr, copy->size);
10387 }
10388#endif
1c79356b
A
10389 }
10390
10391 /*
10392 * Copyout the data from the kernel buffer to the target map.
5ba3f43e 10393 */
91447636 10394 if (thread->map == map) {
1c79356b
A
10395 /*
10396 * If the target map is the current map, just do
10397 * the copy.
10398 */
39037602
A
10399 assert((vm_size_t)copy_size == copy_size);
10400 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
91447636 10401 kr = KERN_INVALID_ADDRESS;
1c79356b 10402 }
0a7de745 10403 } else {
1c79356b
A
10404 vm_map_t oldmap;
10405
10406 /*
10407 * If the target map is another map, assume the
10408 * target's address space identity for the duration
10409 * of the copy.
10410 */
10411 vm_map_reference(map);
10412 oldmap = vm_map_switch(map);
10413
39037602
A
10414 assert((vm_size_t)copy_size == copy_size);
10415 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
91447636
A
10416 vm_map_copyout_kernel_buffer_failures++;
10417 kr = KERN_INVALID_ADDRESS;
1c79356b 10418 }
5ba3f43e 10419
1c79356b
A
10420 (void) vm_map_switch(oldmap);
10421 vm_map_deallocate(map);
10422 }
10423
91447636
A
10424 if (kr != KERN_SUCCESS) {
10425 /* the copy failed, clean up */
10426 if (!overwrite) {
10427 /*
10428 * Deallocate the space we allocated in the target map.
10429 */
39236c6e
A
10430 (void) vm_map_remove(
10431 map,
10432 vm_map_trunc_page(*addr,
0a7de745 10433 VM_MAP_PAGE_MASK(map)),
39236c6e 10434 vm_map_round_page((*addr +
0a7de745
A
10435 vm_map_round_page(copy_size,
10436 VM_MAP_PAGE_MASK(map))),
10437 VM_MAP_PAGE_MASK(map)),
d9a64523 10438 VM_MAP_REMOVE_NO_FLAGS);
91447636
A
10439 *addr = 0;
10440 }
10441 } else {
10442 /* copy was successful, dicard the copy structure */
39236c6e 10443 if (consume_on_success) {
39037602 10444 kfree(copy, copy_size + cpy_kdata_hdr_sz);
39236c6e 10445 }
91447636 10446 }
1c79356b 10447
91447636 10448 return kr;
1c79356b 10449}
5ba3f43e 10450
1c79356b 10451/*
0a7de745 10452 * Routine: vm_map_copy_insert [internal use only]
5ba3f43e 10453 *
1c79356b
A
10454 * Description:
10455 * Link a copy chain ("copy") into a map at the
10456 * specified location (after "where").
10457 * Side effects:
10458 * The copy chain is destroyed.
1c79356b 10459 */
d9a64523
A
10460static void
10461vm_map_copy_insert(
0a7de745
A
10462 vm_map_t map,
10463 vm_map_entry_t after_where,
10464 vm_map_copy_t copy)
d9a64523 10465{
0a7de745 10466 vm_map_entry_t entry;
d9a64523
A
10467
10468 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10469 entry = vm_map_copy_first_entry(copy);
10470 vm_map_copy_entry_unlink(copy, entry);
10471 vm_map_store_entry_link(map, after_where, entry,
0a7de745 10472 VM_MAP_KERNEL_FLAGS_NONE);
d9a64523
A
10473 after_where = entry;
10474 }
10475 zfree(vm_map_copy_zone, copy);
10476}
1c79356b 10477
39236c6e
A
10478void
10479vm_map_copy_remap(
0a7de745
A
10480 vm_map_t map,
10481 vm_map_entry_t where,
10482 vm_map_copy_t copy,
10483 vm_map_offset_t adjustment,
10484 vm_prot_t cur_prot,
10485 vm_prot_t max_prot,
10486 vm_inherit_t inheritance)
39236c6e 10487{
0a7de745 10488 vm_map_entry_t copy_entry, new_entry;
39236c6e
A
10489
10490 for (copy_entry = vm_map_copy_first_entry(copy);
0a7de745
A
10491 copy_entry != vm_map_copy_to_entry(copy);
10492 copy_entry = copy_entry->vme_next) {
39236c6e
A
10493 /* get a new VM map entry for the map */
10494 new_entry = vm_map_entry_create(map,
0a7de745 10495 !map->hdr.entries_pageable);
39236c6e
A
10496 /* copy the "copy entry" to the new entry */
10497 vm_map_entry_copy(new_entry, copy_entry);
10498 /* adjust "start" and "end" */
10499 new_entry->vme_start += adjustment;
10500 new_entry->vme_end += adjustment;
10501 /* clear some attributes */
10502 new_entry->inheritance = inheritance;
10503 new_entry->protection = cur_prot;
10504 new_entry->max_protection = max_prot;
10505 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10506 /* take an extra reference on the entry's "object" */
10507 if (new_entry->is_sub_map) {
fe8ab488 10508 assert(!new_entry->use_pmap); /* not nested */
3e170ce0
A
10509 vm_map_lock(VME_SUBMAP(new_entry));
10510 vm_map_reference(VME_SUBMAP(new_entry));
10511 vm_map_unlock(VME_SUBMAP(new_entry));
39236c6e 10512 } else {
3e170ce0 10513 vm_object_reference(VME_OBJECT(new_entry));
39236c6e
A
10514 }
10515 /* insert the new entry in the map */
d9a64523 10516 vm_map_store_entry_link(map, where, new_entry,
0a7de745 10517 VM_MAP_KERNEL_FLAGS_NONE);
39236c6e
A
10518 /* continue inserting the "copy entries" after the new entry */
10519 where = new_entry;
10520 }
10521}
10522
2dced7af 10523
39037602
A
10524/*
10525 * Returns true if *size matches (or is in the range of) copy->size.
10526 * Upon returning true, the *size field is updated with the actual size of the
10527 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10528 */
2dced7af
A
10529boolean_t
10530vm_map_copy_validate_size(
0a7de745
A
10531 vm_map_t dst_map,
10532 vm_map_copy_t copy,
10533 vm_map_size_t *size)
2dced7af 10534{
0a7de745 10535 if (copy == VM_MAP_COPY_NULL) {
2dced7af 10536 return FALSE;
0a7de745 10537 }
39037602
A
10538 vm_map_size_t copy_sz = copy->size;
10539 vm_map_size_t sz = *size;
2dced7af
A
10540 switch (copy->type) {
10541 case VM_MAP_COPY_OBJECT:
10542 case VM_MAP_COPY_KERNEL_BUFFER:
0a7de745 10543 if (sz == copy_sz) {
2dced7af 10544 return TRUE;
0a7de745 10545 }
2dced7af
A
10546 break;
10547 case VM_MAP_COPY_ENTRY_LIST:
10548 /*
10549 * potential page-size rounding prevents us from exactly
10550 * validating this flavor of vm_map_copy, but we can at least
10551 * assert that it's within a range.
10552 */
39037602
A
10553 if (copy_sz >= sz &&
10554 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10555 *size = copy_sz;
2dced7af 10556 return TRUE;
39037602 10557 }
2dced7af
A
10558 break;
10559 default:
10560 break;
10561 }
10562 return FALSE;
10563}
10564
39037602
A
10565/*
10566 * Routine: vm_map_copyout_size
10567 *
10568 * Description:
10569 * Copy out a copy chain ("copy") into newly-allocated
10570 * space in the destination map. Uses a prevalidated
10571 * size for the copy object (vm_map_copy_validate_size).
10572 *
10573 * If successful, consumes the copy object.
10574 * Otherwise, the caller is responsible for it.
10575 */
10576kern_return_t
10577vm_map_copyout_size(
0a7de745
A
10578 vm_map_t dst_map,
10579 vm_map_address_t *dst_addr, /* OUT */
10580 vm_map_copy_t copy,
10581 vm_map_size_t copy_size)
39037602
A
10582{
10583 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
0a7de745
A
10584 TRUE, /* consume_on_success */
10585 VM_PROT_DEFAULT,
10586 VM_PROT_ALL,
10587 VM_INHERIT_DEFAULT);
39037602 10588}
2dced7af 10589
1c79356b
A
10590/*
10591 * Routine: vm_map_copyout
10592 *
10593 * Description:
10594 * Copy out a copy chain ("copy") into newly-allocated
10595 * space in the destination map.
10596 *
10597 * If successful, consumes the copy object.
10598 * Otherwise, the caller is responsible for it.
10599 */
10600kern_return_t
10601vm_map_copyout(
0a7de745
A
10602 vm_map_t dst_map,
10603 vm_map_address_t *dst_addr, /* OUT */
10604 vm_map_copy_t copy)
39236c6e 10605{
39037602 10606 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
0a7de745
A
10607 TRUE, /* consume_on_success */
10608 VM_PROT_DEFAULT,
10609 VM_PROT_ALL,
10610 VM_INHERIT_DEFAULT);
39236c6e
A
10611}
10612
10613kern_return_t
10614vm_map_copyout_internal(
0a7de745
A
10615 vm_map_t dst_map,
10616 vm_map_address_t *dst_addr, /* OUT */
10617 vm_map_copy_t copy,
10618 vm_map_size_t copy_size,
10619 boolean_t consume_on_success,
10620 vm_prot_t cur_protection,
10621 vm_prot_t max_protection,
10622 vm_inherit_t inheritance)
1c79356b 10623{
0a7de745
A
10624 vm_map_size_t size;
10625 vm_map_size_t adjustment;
10626 vm_map_offset_t start;
10627 vm_object_offset_t vm_copy_start;
10628 vm_map_entry_t last;
10629 vm_map_entry_t entry;
10630 vm_map_entry_t hole_entry;
1c79356b
A
10631
10632 /*
10633 * Check for null copy object.
10634 */
10635
10636 if (copy == VM_MAP_COPY_NULL) {
10637 *dst_addr = 0;
0a7de745 10638 return KERN_SUCCESS;
1c79356b
A
10639 }
10640
39037602
A
10641 if (copy->size != copy_size) {
10642 *dst_addr = 0;
10643 return KERN_FAILURE;
10644 }
10645
1c79356b
A
10646 /*
10647 * Check for special copy object, created
10648 * by vm_map_copyin_object.
10649 */
10650
10651 if (copy->type == VM_MAP_COPY_OBJECT) {
0a7de745
A
10652 vm_object_t object = copy->cpy_object;
10653 kern_return_t kr;
10654 vm_object_offset_t offset;
1c79356b 10655
91447636 10656 offset = vm_object_trunc_page(copy->offset);
39037602 10657 size = vm_map_round_page((copy_size +
0a7de745
A
10658 (vm_map_size_t)(copy->offset -
10659 offset)),
10660 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
10661 *dst_addr = 0;
10662 kr = vm_map_enter(dst_map, dst_addr, size,
0a7de745
A
10663 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10664 VM_MAP_KERNEL_FLAGS_NONE,
10665 VM_KERN_MEMORY_NONE,
10666 object, offset, FALSE,
10667 VM_PROT_DEFAULT, VM_PROT_ALL,
10668 VM_INHERIT_DEFAULT);
10669 if (kr != KERN_SUCCESS) {
10670 return kr;
10671 }
1c79356b 10672 /* Account for non-pagealigned copy object */
91447636 10673 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
0a7de745 10674 if (consume_on_success) {
39236c6e 10675 zfree(vm_map_copy_zone, copy);
0a7de745
A
10676 }
10677 return KERN_SUCCESS;
1c79356b
A
10678 }
10679
10680 /*
10681 * Check for special kernel buffer allocated
10682 * by new_ipc_kmsg_copyin.
10683 */
10684
10685 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
39037602 10686 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
0a7de745
A
10687 copy, copy_size, FALSE,
10688 consume_on_success);
1c79356b
A
10689 }
10690
39236c6e 10691
1c79356b
A
10692 /*
10693 * Find space for the data
10694 */
10695
39236c6e 10696 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
0a7de745 10697 VM_MAP_COPY_PAGE_MASK(copy));
39037602 10698 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
0a7de745
A
10699 VM_MAP_COPY_PAGE_MASK(copy))
10700 - vm_copy_start;
1c79356b 10701
39236c6e 10702
0a7de745 10703StartAgain:;
1c79356b
A
10704
10705 vm_map_lock(dst_map);
0a7de745 10706 if (dst_map->disable_vmentry_reuse == TRUE) {
6d2010ae
A
10707 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10708 last = entry;
10709 } else {
3e170ce0 10710 if (dst_map->holelistenabled) {
d9a64523 10711 hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
3e170ce0
A
10712
10713 if (hole_entry == NULL) {
10714 /*
10715 * No more space in the map?
10716 */
10717 vm_map_unlock(dst_map);
0a7de745 10718 return KERN_NO_SPACE;
3e170ce0
A
10719 }
10720
10721 last = hole_entry;
10722 start = last->vme_start;
10723 } else {
10724 assert(first_free_is_valid(dst_map));
10725 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
0a7de745 10726 vm_map_min(dst_map) : last->vme_end;
3e170ce0 10727 }
39236c6e 10728 start = vm_map_round_page(start,
0a7de745 10729 VM_MAP_PAGE_MASK(dst_map));
6d2010ae 10730 }
1c79356b
A
10731
10732 while (TRUE) {
0a7de745
A
10733 vm_map_entry_t next = last->vme_next;
10734 vm_map_offset_t end = start + size;
1c79356b
A
10735
10736 if ((end > dst_map->max_offset) || (end < start)) {
10737 if (dst_map->wait_for_space) {
10738 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10739 assert_wait((event_t) dst_map,
0a7de745 10740 THREAD_INTERRUPTIBLE);
1c79356b 10741 vm_map_unlock(dst_map);
91447636 10742 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
10743 goto StartAgain;
10744 }
10745 }
10746 vm_map_unlock(dst_map);
0a7de745 10747 return KERN_NO_SPACE;
1c79356b
A
10748 }
10749
3e170ce0 10750 if (dst_map->holelistenabled) {
0a7de745 10751 if (last->vme_end >= end) {
3e170ce0 10752 break;
0a7de745 10753 }
3e170ce0
A
10754 } else {
10755 /*
10756 * If there are no more entries, we must win.
10757 *
10758 * OR
10759 *
10760 * If there is another entry, it must be
10761 * after the end of the potential new region.
10762 */
10763
0a7de745 10764 if (next == vm_map_to_entry(dst_map)) {
3e170ce0 10765 break;
0a7de745 10766 }
3e170ce0 10767
0a7de745 10768 if (next->vme_start >= end) {
3e170ce0 10769 break;
0a7de745 10770 }
3e170ce0 10771 }
1c79356b
A
10772
10773 last = next;
3e170ce0
A
10774
10775 if (dst_map->holelistenabled) {
d9a64523 10776 if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
3e170ce0
A
10777 /*
10778 * Wrapped around
10779 */
10780 vm_map_unlock(dst_map);
0a7de745 10781 return KERN_NO_SPACE;
3e170ce0
A
10782 }
10783 start = last->vme_start;
10784 } else {
10785 start = last->vme_end;
10786 }
39236c6e 10787 start = vm_map_round_page(start,
0a7de745 10788 VM_MAP_PAGE_MASK(dst_map));
39236c6e
A
10789 }
10790
3e170ce0
A
10791 if (dst_map->holelistenabled) {
10792 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10793 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10794 }
10795 }
10796
10797
39236c6e 10798 adjustment = start - vm_copy_start;
0a7de745 10799 if (!consume_on_success) {
39236c6e
A
10800 /*
10801 * We're not allowed to consume "copy", so we'll have to
10802 * copy its map entries into the destination map below.
10803 * No need to re-allocate map entries from the correct
10804 * (pageable or not) zone, since we'll get new map entries
10805 * during the transfer.
10806 * We'll also adjust the map entries's "start" and "end"
10807 * during the transfer, to keep "copy"'s entries consistent
10808 * with its "offset".
10809 */
10810 goto after_adjustments;
1c79356b
A
10811 }
10812
10813 /*
10814 * Since we're going to just drop the map
10815 * entries from the copy into the destination
10816 * map, they must come from the same pool.
10817 */
10818
10819 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
2d21ac55
A
10820 /*
10821 * Mismatches occur when dealing with the default
10822 * pager.
10823 */
0a7de745
A
10824 zone_t old_zone;
10825 vm_map_entry_t next, new;
2d21ac55
A
10826
10827 /*
10828 * Find the zone that the copies were allocated from
10829 */
7ddcb079 10830
2d21ac55
A
10831 entry = vm_map_copy_first_entry(copy);
10832
10833 /*
10834 * Reinitialize the copy so that vm_map_copy_entry_link
10835 * will work.
10836 */
6d2010ae 10837 vm_map_store_copy_reset(copy, entry);
2d21ac55 10838 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
2d21ac55
A
10839
10840 /*
10841 * Copy each entry.
10842 */
10843 while (entry != vm_map_copy_to_entry(copy)) {
7ddcb079 10844 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
2d21ac55 10845 vm_map_entry_copy_full(new, entry);
cb323159 10846 new->vme_no_copy_on_read = FALSE;
fe8ab488
A
10847 assert(!new->iokit_acct);
10848 if (new->is_sub_map) {
10849 /* clr address space specifics */
10850 new->use_pmap = FALSE;
10851 }
2d21ac55 10852 vm_map_copy_entry_link(copy,
0a7de745
A
10853 vm_map_copy_last_entry(copy),
10854 new);
2d21ac55 10855 next = entry->vme_next;
7ddcb079 10856 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
2d21ac55
A
10857 zfree(old_zone, entry);
10858 entry = next;
10859 }
1c79356b
A
10860 }
10861
10862 /*
10863 * Adjust the addresses in the copy chain, and
10864 * reset the region attributes.
10865 */
10866
1c79356b 10867 for (entry = vm_map_copy_first_entry(copy);
0a7de745
A
10868 entry != vm_map_copy_to_entry(copy);
10869 entry = entry->vme_next) {
39236c6e
A
10870 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
10871 /*
10872 * We're injecting this copy entry into a map that
10873 * has the standard page alignment, so clear
10874 * "map_aligned" (which might have been inherited
10875 * from the original map entry).
10876 */
10877 entry->map_aligned = FALSE;
10878 }
10879
1c79356b
A
10880 entry->vme_start += adjustment;
10881 entry->vme_end += adjustment;
10882
39236c6e
A
10883 if (entry->map_aligned) {
10884 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
0a7de745 10885 VM_MAP_PAGE_MASK(dst_map)));
39236c6e 10886 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
0a7de745 10887 VM_MAP_PAGE_MASK(dst_map)));
39236c6e
A
10888 }
10889
1c79356b
A
10890 entry->inheritance = VM_INHERIT_DEFAULT;
10891 entry->protection = VM_PROT_DEFAULT;
10892 entry->max_protection = VM_PROT_ALL;
10893 entry->behavior = VM_BEHAVIOR_DEFAULT;
10894
10895 /*
10896 * If the entry is now wired,
10897 * map the pages into the destination map.
10898 */
10899 if (entry->wired_count != 0) {
39037602 10900 vm_map_offset_t va;
0a7de745 10901 vm_object_offset_t offset;
39037602 10902 vm_object_t object;
2d21ac55 10903 vm_prot_t prot;
0a7de745 10904 int type_of_fault;
1c79356b 10905
3e170ce0
A
10906 object = VME_OBJECT(entry);
10907 offset = VME_OFFSET(entry);
2d21ac55 10908 va = entry->vme_start;
1c79356b 10909
2d21ac55 10910 pmap_pageable(dst_map->pmap,
0a7de745
A
10911 entry->vme_start,
10912 entry->vme_end,
10913 TRUE);
1c79356b 10914
2d21ac55 10915 while (va < entry->vme_end) {
0a7de745 10916 vm_page_t m;
d9a64523 10917 struct vm_object_fault_info fault_info = {};
1c79356b 10918
2d21ac55
A
10919 /*
10920 * Look up the page in the object.
10921 * Assert that the page will be found in the
10922 * top object:
10923 * either
10924 * the object was newly created by
10925 * vm_object_copy_slowly, and has
10926 * copies of all of the pages from
10927 * the source object
10928 * or
10929 * the object was moved from the old
10930 * map entry; because the old map
10931 * entry was wired, all of the pages
10932 * were in the top-level object.
10933 * (XXX not true if we wire pages for
10934 * reading)
10935 */
10936 vm_object_lock(object);
91447636 10937
2d21ac55 10938 m = vm_page_lookup(object, offset);
b0d623f7 10939 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
0a7de745 10940 m->vmp_absent) {
2d21ac55 10941 panic("vm_map_copyout: wiring %p", m);
0a7de745 10942 }
1c79356b 10943
2d21ac55 10944 prot = entry->protection;
1c79356b 10945
3e170ce0 10946 if (override_nx(dst_map, VME_ALIAS(entry)) &&
0a7de745
A
10947 prot) {
10948 prot |= VM_PROT_EXECUTE;
10949 }
1c79356b 10950
2d21ac55 10951 type_of_fault = DBG_CACHE_HIT_FAULT;
1c79356b 10952
d9a64523
A
10953 fault_info.user_tag = VME_ALIAS(entry);
10954 fault_info.pmap_options = 0;
10955 if (entry->iokit_acct ||
10956 (!entry->is_sub_map && !entry->use_pmap)) {
10957 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
10958 }
10959
10960 vm_fault_enter(m,
0a7de745
A
10961 dst_map->pmap,
10962 va,
10963 prot,
10964 prot,
10965 VM_PAGE_WIRED(m),
10966 FALSE, /* change_wiring */
10967 VM_KERN_MEMORY_NONE, /* tag - not wiring */
10968 &fault_info,
10969 NULL, /* need_retry */
10970 &type_of_fault);
1c79356b 10971
2d21ac55 10972 vm_object_unlock(object);
1c79356b 10973
2d21ac55
A
10974 offset += PAGE_SIZE_64;
10975 va += PAGE_SIZE;
1c79356b
A
10976 }
10977 }
10978 }
10979
39236c6e
A
10980after_adjustments:
10981
1c79356b
A
10982 /*
10983 * Correct the page alignment for the result
10984 */
10985
10986 *dst_addr = start + (copy->offset - vm_copy_start);
10987
5ba3f43e
A
10988#if KASAN
10989 kasan_notify_address(*dst_addr, size);
10990#endif
10991
1c79356b
A
10992 /*
10993 * Update the hints and the map size
10994 */
10995
39236c6e
A
10996 if (consume_on_success) {
10997 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
10998 } else {
10999 SAVE_HINT_MAP_WRITE(dst_map, last);
11000 }
1c79356b
A
11001
11002 dst_map->size += size;
11003
11004 /*
11005 * Link in the copy
11006 */
11007
39236c6e
A
11008 if (consume_on_success) {
11009 vm_map_copy_insert(dst_map, last, copy);
11010 } else {
11011 vm_map_copy_remap(dst_map, last, copy, adjustment,
0a7de745
A
11012 cur_protection, max_protection,
11013 inheritance);
39236c6e 11014 }
1c79356b
A
11015
11016 vm_map_unlock(dst_map);
11017
11018 /*
11019 * XXX If wiring_required, call vm_map_pageable
11020 */
11021
0a7de745 11022 return KERN_SUCCESS;
1c79356b
A
11023}
11024
1c79356b
A
11025/*
11026 * Routine: vm_map_copyin
11027 *
11028 * Description:
2d21ac55
A
11029 * see vm_map_copyin_common. Exported via Unsupported.exports.
11030 *
11031 */
11032
11033#undef vm_map_copyin
11034
11035kern_return_t
11036vm_map_copyin(
0a7de745
A
11037 vm_map_t src_map,
11038 vm_map_address_t src_addr,
11039 vm_map_size_t len,
11040 boolean_t src_destroy,
11041 vm_map_copy_t *copy_result) /* OUT */
2d21ac55 11042{
0a7de745
A
11043 return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11044 FALSE, copy_result, FALSE);
2d21ac55
A
11045}
11046
11047/*
11048 * Routine: vm_map_copyin_common
11049 *
11050 * Description:
1c79356b
A
11051 * Copy the specified region (src_addr, len) from the
11052 * source address space (src_map), possibly removing
11053 * the region from the source address space (src_destroy).
11054 *
11055 * Returns:
11056 * A vm_map_copy_t object (copy_result), suitable for
11057 * insertion into another address space (using vm_map_copyout),
11058 * copying over another address space region (using
11059 * vm_map_copy_overwrite). If the copy is unused, it
11060 * should be destroyed (using vm_map_copy_discard).
11061 *
11062 * In/out conditions:
11063 * The source map should not be locked on entry.
11064 */
11065
11066typedef struct submap_map {
0a7de745
A
11067 vm_map_t parent_map;
11068 vm_map_offset_t base_start;
11069 vm_map_offset_t base_end;
11070 vm_map_size_t base_len;
1c79356b
A
11071 struct submap_map *next;
11072} submap_map_t;
11073
11074kern_return_t
11075vm_map_copyin_common(
0a7de745 11076 vm_map_t src_map,
91447636 11077 vm_map_address_t src_addr,
0a7de745
A
11078 vm_map_size_t len,
11079 boolean_t src_destroy,
11080 __unused boolean_t src_volatile,
11081 vm_map_copy_t *copy_result, /* OUT */
11082 boolean_t use_maxprot)
4bd07ac2
A
11083{
11084 int flags;
11085
11086 flags = 0;
11087 if (src_destroy) {
11088 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11089 }
11090 if (use_maxprot) {
11091 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11092 }
11093 return vm_map_copyin_internal(src_map,
0a7de745
A
11094 src_addr,
11095 len,
11096 flags,
11097 copy_result);
4bd07ac2
A
11098}
11099kern_return_t
11100vm_map_copyin_internal(
0a7de745 11101 vm_map_t src_map,
4bd07ac2 11102 vm_map_address_t src_addr,
0a7de745
A
11103 vm_map_size_t len,
11104 int flags,
11105 vm_map_copy_t *copy_result) /* OUT */
1c79356b 11106{
0a7de745
A
11107 vm_map_entry_t tmp_entry; /* Result of last map lookup --
11108 * in multi-level lookup, this
11109 * entry contains the actual
11110 * vm_object/offset.
11111 */
11112 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
11113
11114 vm_map_offset_t src_start; /* Start of current entry --
11115 * where copy is taking place now
11116 */
11117 vm_map_offset_t src_end; /* End of entire region to be
11118 * copied */
2d21ac55 11119 vm_map_offset_t src_base;
0a7de745
A
11120 vm_map_t base_map = src_map;
11121 boolean_t map_share = FALSE;
11122 submap_map_t *parent_maps = NULL;
1c79356b 11123
0a7de745 11124 vm_map_copy_t copy; /* Resulting copy */
fe8ab488 11125 vm_map_address_t copy_addr;
0a7de745
A
11126 vm_map_size_t copy_size;
11127 boolean_t src_destroy;
11128 boolean_t use_maxprot;
11129 boolean_t preserve_purgeable;
11130 boolean_t entry_was_shared;
11131 vm_map_entry_t saved_src_entry;
4bd07ac2
A
11132
11133 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11134 return KERN_INVALID_ARGUMENT;
11135 }
5ba3f43e 11136
4bd07ac2
A
11137 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11138 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
39037602 11139 preserve_purgeable =
0a7de745 11140 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
1c79356b
A
11141
11142 /*
11143 * Check for copies of zero bytes.
11144 */
11145
11146 if (len == 0) {
11147 *copy_result = VM_MAP_COPY_NULL;
0a7de745 11148 return KERN_SUCCESS;
1c79356b
A
11149 }
11150
4a249263
A
11151 /*
11152 * Check that the end address doesn't overflow
11153 */
11154 src_end = src_addr + len;
0a7de745 11155 if (src_end < src_addr) {
4a249263 11156 return KERN_INVALID_ADDRESS;
0a7de745 11157 }
4a249263 11158
39037602
A
11159 /*
11160 * Compute (page aligned) start and end of region
11161 */
11162 src_start = vm_map_trunc_page(src_addr,
0a7de745 11163 VM_MAP_PAGE_MASK(src_map));
39037602 11164 src_end = vm_map_round_page(src_end,
0a7de745 11165 VM_MAP_PAGE_MASK(src_map));
39037602 11166
1c79356b
A
11167 /*
11168 * If the copy is sufficiently small, use a kernel buffer instead
11169 * of making a virtual copy. The theory being that the cost of
11170 * setting up VM (and taking C-O-W faults) dominates the copy costs
11171 * for small regions.
11172 */
4bd07ac2
A
11173 if ((len < msg_ool_size_small) &&
11174 !use_maxprot &&
39037602
A
11175 !preserve_purgeable &&
11176 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11177 /*
11178 * Since the "msg_ool_size_small" threshold was increased and
11179 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11180 * address space limits, we revert to doing a virtual copy if the
11181 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11182 * of the commpage would now fail when it used to work.
11183 */
11184 (src_start >= vm_map_min(src_map) &&
0a7de745
A
11185 src_start < vm_map_max(src_map) &&
11186 src_end >= vm_map_min(src_map) &&
11187 src_end < vm_map_max(src_map))) {
2d21ac55 11188 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
0a7de745
A
11189 src_destroy, copy_result);
11190 }
1c79356b 11191
1c79356b
A
11192 /*
11193 * Allocate a header element for the list.
11194 *
5ba3f43e 11195 * Use the start and end in the header to
1c79356b
A
11196 * remember the endpoints prior to rounding.
11197 */
11198
d9a64523 11199 copy = vm_map_copy_allocate();
1c79356b 11200 copy->type = VM_MAP_COPY_ENTRY_LIST;
1c79356b 11201 copy->cpy_hdr.entries_pageable = TRUE;
39236c6e
A
11202#if 00
11203 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
11204#else
11205 /*
11206 * The copy entries can be broken down for a variety of reasons,
11207 * so we can't guarantee that they will remain map-aligned...
11208 * Will need to adjust the first copy_entry's "vme_start" and
11209 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
11210 * rather than the original map's alignment.
11211 */
11212 copy->cpy_hdr.page_shift = PAGE_SHIFT;
11213#endif
1c79356b 11214
0a7de745 11215 vm_map_store_init( &(copy->cpy_hdr));
6d2010ae 11216
1c79356b
A
11217 copy->offset = src_addr;
11218 copy->size = len;
5ba3f43e 11219
7ddcb079 11220 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b 11221
0a7de745
A
11222#define RETURN(x) \
11223 MACRO_BEGIN \
11224 vm_map_unlock(src_map); \
11225 if(src_map != base_map) \
11226 vm_map_deallocate(src_map); \
11227 if (new_entry != VM_MAP_ENTRY_NULL) \
11228 vm_map_copy_entry_dispose(copy,new_entry); \
11229 vm_map_copy_discard(copy); \
11230 { \
11231 submap_map_t *_ptr; \
11232 \
11233 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11234 parent_maps=parent_maps->next; \
11235 if (_ptr->parent_map != base_map) \
11236 vm_map_deallocate(_ptr->parent_map); \
11237 kfree(_ptr, sizeof(submap_map_t)); \
11238 } \
11239 } \
11240 MACRO_RETURN(x); \
1c79356b
A
11241 MACRO_END
11242
11243 /*
11244 * Find the beginning of the region.
11245 */
11246
0a7de745 11247 vm_map_lock(src_map);
1c79356b 11248
fe8ab488
A
11249 /*
11250 * Lookup the original "src_addr" rather than the truncated
11251 * "src_start", in case "src_start" falls in a non-map-aligned
11252 * map entry *before* the map entry that contains "src_addr"...
11253 */
0a7de745 11254 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
1c79356b 11255 RETURN(KERN_INVALID_ADDRESS);
0a7de745
A
11256 }
11257 if (!tmp_entry->is_sub_map) {
fe8ab488
A
11258 /*
11259 * ... but clip to the map-rounded "src_start" rather than
11260 * "src_addr" to preserve map-alignment. We'll adjust the
11261 * first copy entry at the end, if needed.
11262 */
1c79356b
A
11263 vm_map_clip_start(src_map, tmp_entry, src_start);
11264 }
fe8ab488
A
11265 if (src_start < tmp_entry->vme_start) {
11266 /*
11267 * Move "src_start" up to the start of the
11268 * first map entry to copy.
11269 */
11270 src_start = tmp_entry->vme_start;
11271 }
1c79356b
A
11272 /* set for later submap fix-up */
11273 copy_addr = src_start;
11274
11275 /*
11276 * Go through entries until we get to the end.
11277 */
11278
11279 while (TRUE) {
0a7de745
A
11280 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11281 vm_map_size_t src_size; /* Size of source
11282 * map entry (in both
11283 * maps)
11284 */
11285
11286 vm_object_t src_object; /* Object to copy */
11287 vm_object_offset_t src_offset;
11288
11289 boolean_t src_needs_copy; /* Should source map
11290 * be made read-only
11291 * for copy-on-write?
11292 */
11293
11294 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11295
11296 boolean_t was_wired; /* Was source wired? */
11297 vm_map_version_t version; /* Version before locks
11298 * dropped to make copy
11299 */
11300 kern_return_t result; /* Return value from
11301 * copy_strategically.
11302 */
11303 while (tmp_entry->is_sub_map) {
91447636 11304 vm_map_size_t submap_len;
1c79356b
A
11305 submap_map_t *ptr;
11306
11307 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11308 ptr->next = parent_maps;
11309 parent_maps = ptr;
11310 ptr->parent_map = src_map;
11311 ptr->base_start = src_start;
11312 ptr->base_end = src_end;
11313 submap_len = tmp_entry->vme_end - src_start;
0a7de745
A
11314 if (submap_len > (src_end - src_start)) {
11315 submap_len = src_end - src_start;
11316 }
2d21ac55 11317 ptr->base_len = submap_len;
5ba3f43e 11318
1c79356b 11319 src_start -= tmp_entry->vme_start;
3e170ce0 11320 src_start += VME_OFFSET(tmp_entry);
1c79356b 11321 src_end = src_start + submap_len;
3e170ce0 11322 src_map = VME_SUBMAP(tmp_entry);
1c79356b 11323 vm_map_lock(src_map);
9bccf70c
A
11324 /* keep an outstanding reference for all maps in */
11325 /* the parents tree except the base map */
11326 vm_map_reference(src_map);
1c79356b
A
11327 vm_map_unlock(ptr->parent_map);
11328 if (!vm_map_lookup_entry(
0a7de745 11329 src_map, src_start, &tmp_entry)) {
1c79356b 11330 RETURN(KERN_INVALID_ADDRESS);
0a7de745 11331 }
1c79356b 11332 map_share = TRUE;
0a7de745 11333 if (!tmp_entry->is_sub_map) {
2d21ac55 11334 vm_map_clip_start(src_map, tmp_entry, src_start);
0a7de745 11335 }
1c79356b
A
11336 src_entry = tmp_entry;
11337 }
2d21ac55
A
11338 /* we are now in the lowest level submap... */
11339
5ba3f43e 11340 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
3e170ce0 11341 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
55e303ae
A
11342 /* This is not, supported for now.In future */
11343 /* we will need to detect the phys_contig */
11344 /* condition and then upgrade copy_slowly */
11345 /* to do physical copy from the device mem */
11346 /* based object. We can piggy-back off of */
11347 /* the was wired boolean to set-up the */
11348 /* proper handling */
0b4e3aa0
A
11349 RETURN(KERN_PROTECTION_FAILURE);
11350 }
1c79356b 11351 /*
5ba3f43e 11352 * Create a new address map entry to hold the result.
1c79356b
A
11353 * Fill in the fields from the appropriate source entries.
11354 * We must unlock the source map to do this if we need
11355 * to allocate a map entry.
11356 */
11357 if (new_entry == VM_MAP_ENTRY_NULL) {
2d21ac55
A
11358 version.main_timestamp = src_map->timestamp;
11359 vm_map_unlock(src_map);
1c79356b 11360
7ddcb079 11361 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b 11362
2d21ac55
A
11363 vm_map_lock(src_map);
11364 if ((version.main_timestamp + 1) != src_map->timestamp) {
11365 if (!vm_map_lookup_entry(src_map, src_start,
0a7de745 11366 &tmp_entry)) {
2d21ac55
A
11367 RETURN(KERN_INVALID_ADDRESS);
11368 }
0a7de745 11369 if (!tmp_entry->is_sub_map) {
2d21ac55 11370 vm_map_clip_start(src_map, tmp_entry, src_start);
0a7de745 11371 }
2d21ac55 11372 continue; /* restart w/ new tmp_entry */
1c79356b 11373 }
1c79356b
A
11374 }
11375
11376 /*
11377 * Verify that the region can be read.
11378 */
11379 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
0a7de745
A
11380 !use_maxprot) ||
11381 (src_entry->max_protection & VM_PROT_READ) == 0) {
1c79356b 11382 RETURN(KERN_PROTECTION_FAILURE);
0a7de745 11383 }
1c79356b
A
11384
11385 /*
11386 * Clip against the endpoints of the entire region.
11387 */
11388
11389 vm_map_clip_end(src_map, src_entry, src_end);
11390
11391 src_size = src_entry->vme_end - src_start;
3e170ce0
A
11392 src_object = VME_OBJECT(src_entry);
11393 src_offset = VME_OFFSET(src_entry);
1c79356b
A
11394 was_wired = (src_entry->wired_count != 0);
11395
11396 vm_map_entry_copy(new_entry, src_entry);
fe8ab488
A
11397 if (new_entry->is_sub_map) {
11398 /* clr address space specifics */
11399 new_entry->use_pmap = FALSE;
a39ff7e2
A
11400 } else {
11401 /*
11402 * We're dealing with a copy-on-write operation,
11403 * so the resulting mapping should not inherit the
11404 * original mapping's accounting settings.
11405 * "iokit_acct" should have been cleared in
11406 * vm_map_entry_copy().
11407 * "use_pmap" should be reset to its default (TRUE)
11408 * so that the new mapping gets accounted for in
11409 * the task's memory footprint.
11410 */
11411 assert(!new_entry->iokit_acct);
11412 new_entry->use_pmap = TRUE;
fe8ab488 11413 }
1c79356b
A
11414
11415 /*
11416 * Attempt non-blocking copy-on-write optimizations.
11417 */
11418
4ba76501
A
11419 /*
11420 * If we are destroying the source, and the object
11421 * is internal, we could move the object reference
11422 * from the source to the copy. The copy is
11423 * copy-on-write only if the source is.
11424 * We make another reference to the object, because
11425 * destroying the source entry will deallocate it.
11426 *
11427 * This memory transfer has to be atomic, (to prevent
11428 * the VM object from being shared or copied while
11429 * it's being moved here), so we could only do this
11430 * if we won't have to unlock the VM map until the
11431 * original mapping has been fully removed.
11432 */
1c79356b 11433
0a7de745 11434RestartCopy:
55e303ae 11435 if ((src_object == VM_OBJECT_NULL ||
0a7de745 11436 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
2d21ac55 11437 vm_object_copy_quickly(
cb323159 11438 VME_OBJECT_PTR(new_entry),
2d21ac55
A
11439 src_offset,
11440 src_size,
11441 &src_needs_copy,
11442 &new_entry_needs_copy)) {
1c79356b
A
11443 new_entry->needs_copy = new_entry_needs_copy;
11444
11445 /*
11446 * Handle copy-on-write obligations
11447 */
11448
11449 if (src_needs_copy && !tmp_entry->needs_copy) {
0a7de745 11450 vm_prot_t prot;
0c530ab8
A
11451
11452 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 11453
3e170ce0 11454 if (override_nx(src_map, VME_ALIAS(src_entry))
0a7de745
A
11455 && prot) {
11456 prot |= VM_PROT_EXECUTE;
11457 }
2d21ac55 11458
55e303ae
A
11459 vm_object_pmap_protect(
11460 src_object,
11461 src_offset,
11462 src_size,
0a7de745
A
11463 (src_entry->is_shared ?
11464 PMAP_NULL
11465 : src_map->pmap),
55e303ae 11466 src_entry->vme_start,
0c530ab8
A
11467 prot);
11468
3e170ce0 11469 assert(tmp_entry->wired_count == 0);
55e303ae 11470 tmp_entry->needs_copy = TRUE;
1c79356b
A
11471 }
11472
11473 /*
11474 * The map has never been unlocked, so it's safe
11475 * to move to the next entry rather than doing
11476 * another lookup.
11477 */
11478
11479 goto CopySuccessful;
11480 }
11481
5ba3f43e
A
11482 entry_was_shared = tmp_entry->is_shared;
11483
1c79356b
A
11484 /*
11485 * Take an object reference, so that we may
11486 * release the map lock(s).
11487 */
11488
11489 assert(src_object != VM_OBJECT_NULL);
11490 vm_object_reference(src_object);
11491
11492 /*
11493 * Record the timestamp for later verification.
11494 * Unlock the map.
11495 */
11496
11497 version.main_timestamp = src_map->timestamp;
0a7de745 11498 vm_map_unlock(src_map); /* Increments timestamp once! */
5ba3f43e
A
11499 saved_src_entry = src_entry;
11500 tmp_entry = VM_MAP_ENTRY_NULL;
11501 src_entry = VM_MAP_ENTRY_NULL;
1c79356b
A
11502
11503 /*
11504 * Perform the copy
11505 */
11506
11507 if (was_wired) {
0a7de745 11508CopySlowly:
1c79356b
A
11509 vm_object_lock(src_object);
11510 result = vm_object_copy_slowly(
2d21ac55
A
11511 src_object,
11512 src_offset,
11513 src_size,
11514 THREAD_UNINT,
cb323159 11515 VME_OBJECT_PTR(new_entry));
3e170ce0 11516 VME_OFFSET_SET(new_entry, 0);
1c79356b 11517 new_entry->needs_copy = FALSE;
0a7de745
A
11518 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11519 (entry_was_shared || map_share)) {
11520 vm_object_t new_object;
55e303ae 11521
2d21ac55 11522 vm_object_lock_shared(src_object);
55e303ae 11523 new_object = vm_object_copy_delayed(
2d21ac55 11524 src_object,
5ba3f43e 11525 src_offset,
2d21ac55
A
11526 src_size,
11527 TRUE);
0a7de745
A
11528 if (new_object == VM_OBJECT_NULL) {
11529 goto CopySlowly;
11530 }
55e303ae 11531
3e170ce0
A
11532 VME_OBJECT_SET(new_entry, new_object);
11533 assert(new_entry->wired_count == 0);
55e303ae 11534 new_entry->needs_copy = TRUE;
fe8ab488
A
11535 assert(!new_entry->iokit_acct);
11536 assert(new_object->purgable == VM_PURGABLE_DENY);
a39ff7e2 11537 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
55e303ae 11538 result = KERN_SUCCESS;
1c79356b 11539 } else {
3e170ce0
A
11540 vm_object_offset_t new_offset;
11541 new_offset = VME_OFFSET(new_entry);
1c79356b 11542 result = vm_object_copy_strategically(src_object,
0a7de745
A
11543 src_offset,
11544 src_size,
cb323159 11545 VME_OBJECT_PTR(new_entry),
0a7de745
A
11546 &new_offset,
11547 &new_entry_needs_copy);
3e170ce0
A
11548 if (new_offset != VME_OFFSET(new_entry)) {
11549 VME_OFFSET_SET(new_entry, new_offset);
11550 }
1c79356b
A
11551
11552 new_entry->needs_copy = new_entry_needs_copy;
1c79356b
A
11553 }
11554
39037602
A
11555 if (result == KERN_SUCCESS &&
11556 preserve_purgeable &&
11557 src_object->purgable != VM_PURGABLE_DENY) {
0a7de745 11558 vm_object_t new_object;
39037602
A
11559
11560 new_object = VME_OBJECT(new_entry);
11561 assert(new_object != src_object);
11562 vm_object_lock(new_object);
11563 assert(new_object->ref_count == 1);
11564 assert(new_object->shadow == VM_OBJECT_NULL);
11565 assert(new_object->copy == VM_OBJECT_NULL);
d9a64523 11566 assert(new_object->vo_owner == NULL);
39037602
A
11567
11568 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
11569 new_object->true_share = TRUE;
11570 /* start as non-volatile with no owner... */
11571 new_object->purgable = VM_PURGABLE_NONVOLATILE;
11572 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11573 /* ... and move to src_object's purgeable state */
11574 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11575 int state;
11576 state = src_object->purgable;
11577 vm_object_purgable_control(
11578 new_object,
5ba3f43e 11579 VM_PURGABLE_SET_STATE_FROM_KERNEL,
39037602
A
11580 &state);
11581 }
11582 vm_object_unlock(new_object);
11583 new_object = VM_OBJECT_NULL;
a39ff7e2
A
11584 /* no pmap accounting for purgeable objects */
11585 new_entry->use_pmap = FALSE;
39037602
A
11586 }
11587
1c79356b
A
11588 if (result != KERN_SUCCESS &&
11589 result != KERN_MEMORY_RESTART_COPY) {
11590 vm_map_lock(src_map);
11591 RETURN(result);
11592 }
11593
11594 /*
11595 * Throw away the extra reference
11596 */
11597
11598 vm_object_deallocate(src_object);
11599
11600 /*
11601 * Verify that the map has not substantially
11602 * changed while the copy was being made.
11603 */
11604
9bccf70c 11605 vm_map_lock(src_map);
1c79356b 11606
5ba3f43e
A
11607 if ((version.main_timestamp + 1) == src_map->timestamp) {
11608 /* src_map hasn't changed: src_entry is still valid */
11609 src_entry = saved_src_entry;
1c79356b 11610 goto VerificationSuccessful;
5ba3f43e 11611 }
1c79356b
A
11612
11613 /*
11614 * Simple version comparison failed.
11615 *
11616 * Retry the lookup and verify that the
11617 * same object/offset are still present.
11618 *
11619 * [Note: a memory manager that colludes with
11620 * the calling task can detect that we have
11621 * cheated. While the map was unlocked, the
11622 * mapping could have been changed and restored.]
11623 */
11624
11625 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
fe8ab488 11626 if (result != KERN_MEMORY_RESTART_COPY) {
3e170ce0
A
11627 vm_object_deallocate(VME_OBJECT(new_entry));
11628 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
a39ff7e2
A
11629 /* reset accounting state */
11630 new_entry->iokit_acct = FALSE;
fe8ab488
A
11631 new_entry->use_pmap = TRUE;
11632 }
1c79356b
A
11633 RETURN(KERN_INVALID_ADDRESS);
11634 }
11635
11636 src_entry = tmp_entry;
11637 vm_map_clip_start(src_map, src_entry, src_start);
11638
91447636 11639 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
0a7de745
A
11640 !use_maxprot) ||
11641 ((src_entry->max_protection & VM_PROT_READ) == 0)) {
1c79356b 11642 goto VerificationFailed;
0a7de745 11643 }
1c79356b 11644
39236c6e 11645 if (src_entry->vme_end < new_entry->vme_end) {
39037602
A
11646 /*
11647 * This entry might have been shortened
11648 * (vm_map_clip_end) or been replaced with
11649 * an entry that ends closer to "src_start"
11650 * than before.
11651 * Adjust "new_entry" accordingly; copying
11652 * less memory would be correct but we also
11653 * redo the copy (see below) if the new entry
11654 * no longer points at the same object/offset.
11655 */
39236c6e 11656 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
0a7de745 11657 VM_MAP_COPY_PAGE_MASK(copy)));
39236c6e
A
11658 new_entry->vme_end = src_entry->vme_end;
11659 src_size = new_entry->vme_end - src_start;
39037602
A
11660 } else if (src_entry->vme_end > new_entry->vme_end) {
11661 /*
11662 * This entry might have been extended
11663 * (vm_map_entry_simplify() or coalesce)
11664 * or been replaced with an entry that ends farther
5ba3f43e 11665 * from "src_start" than before.
39037602
A
11666 *
11667 * We've called vm_object_copy_*() only on
11668 * the previous <start:end> range, so we can't
11669 * just extend new_entry. We have to re-do
11670 * the copy based on the new entry as if it was
11671 * pointing at a different object/offset (see
11672 * "Verification failed" below).
11673 */
39236c6e 11674 }
1c79356b 11675
3e170ce0 11676 if ((VME_OBJECT(src_entry) != src_object) ||
39037602
A
11677 (VME_OFFSET(src_entry) != src_offset) ||
11678 (src_entry->vme_end > new_entry->vme_end)) {
1c79356b
A
11679 /*
11680 * Verification failed.
11681 *
11682 * Start over with this top-level entry.
11683 */
11684
0a7de745 11685VerificationFailed: ;
1c79356b 11686
3e170ce0 11687 vm_object_deallocate(VME_OBJECT(new_entry));
1c79356b
A
11688 tmp_entry = src_entry;
11689 continue;
11690 }
11691
11692 /*
11693 * Verification succeeded.
11694 */
11695
0a7de745 11696VerificationSuccessful:;
1c79356b 11697
0a7de745 11698 if (result == KERN_MEMORY_RESTART_COPY) {
1c79356b 11699 goto RestartCopy;
0a7de745 11700 }
1c79356b
A
11701
11702 /*
11703 * Copy succeeded.
11704 */
11705
0a7de745 11706CopySuccessful: ;
1c79356b
A
11707
11708 /*
11709 * Link in the new copy entry.
11710 */
11711
11712 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
0a7de745 11713 new_entry);
5ba3f43e 11714
1c79356b
A
11715 /*
11716 * Determine whether the entire region
11717 * has been copied.
11718 */
2d21ac55 11719 src_base = src_start;
1c79356b
A
11720 src_start = new_entry->vme_end;
11721 new_entry = VM_MAP_ENTRY_NULL;
11722 while ((src_start >= src_end) && (src_end != 0)) {
0a7de745 11723 submap_map_t *ptr;
fe8ab488
A
11724
11725 if (src_map == base_map) {
11726 /* back to the top */
1c79356b 11727 break;
fe8ab488
A
11728 }
11729
11730 ptr = parent_maps;
11731 assert(ptr != NULL);
11732 parent_maps = parent_maps->next;
11733
11734 /* fix up the damage we did in that submap */
11735 vm_map_simplify_range(src_map,
0a7de745
A
11736 src_base,
11737 src_end);
fe8ab488
A
11738
11739 vm_map_unlock(src_map);
11740 vm_map_deallocate(src_map);
11741 vm_map_lock(ptr->parent_map);
11742 src_map = ptr->parent_map;
11743 src_base = ptr->base_start;
11744 src_start = ptr->base_start + ptr->base_len;
11745 src_end = ptr->base_end;
11746 if (!vm_map_lookup_entry(src_map,
0a7de745
A
11747 src_start,
11748 &tmp_entry) &&
fe8ab488
A
11749 (src_end > src_start)) {
11750 RETURN(KERN_INVALID_ADDRESS);
11751 }
11752 kfree(ptr, sizeof(submap_map_t));
0a7de745 11753 if (parent_maps == NULL) {
fe8ab488 11754 map_share = FALSE;
0a7de745 11755 }
fe8ab488
A
11756 src_entry = tmp_entry->vme_prev;
11757 }
11758
11759 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11760 (src_start >= src_addr + len) &&
11761 (src_addr + len != 0)) {
11762 /*
11763 * Stop copying now, even though we haven't reached
11764 * "src_end". We'll adjust the end of the last copy
11765 * entry at the end, if needed.
11766 *
11767 * If src_map's aligment is different from the
11768 * system's page-alignment, there could be
11769 * extra non-map-aligned map entries between
11770 * the original (non-rounded) "src_addr + len"
11771 * and the rounded "src_end".
11772 * We do not want to copy those map entries since
11773 * they're not part of the copied range.
11774 */
11775 break;
1c79356b 11776 }
fe8ab488 11777
0a7de745 11778 if ((src_start >= src_end) && (src_end != 0)) {
1c79356b 11779 break;
0a7de745 11780 }
1c79356b
A
11781
11782 /*
11783 * Verify that there are no gaps in the region
11784 */
11785
11786 tmp_entry = src_entry->vme_next;
fe8ab488 11787 if ((tmp_entry->vme_start != src_start) ||
39236c6e 11788 (tmp_entry == vm_map_to_entry(src_map))) {
1c79356b 11789 RETURN(KERN_INVALID_ADDRESS);
39236c6e 11790 }
1c79356b
A
11791 }
11792
11793 /*
11794 * If the source should be destroyed, do it now, since the
5ba3f43e 11795 * copy was successful.
1c79356b
A
11796 */
11797 if (src_destroy) {
39236c6e
A
11798 (void) vm_map_delete(
11799 src_map,
11800 vm_map_trunc_page(src_addr,
0a7de745 11801 VM_MAP_PAGE_MASK(src_map)),
39236c6e
A
11802 src_end,
11803 ((src_map == kernel_map) ?
0a7de745
A
11804 VM_MAP_REMOVE_KUNWIRE :
11805 VM_MAP_REMOVE_NO_FLAGS),
39236c6e 11806 VM_MAP_NULL);
2d21ac55
A
11807 } else {
11808 /* fix up the damage we did in the base map */
39236c6e
A
11809 vm_map_simplify_range(
11810 src_map,
11811 vm_map_trunc_page(src_addr,
0a7de745 11812 VM_MAP_PAGE_MASK(src_map)),
39236c6e 11813 vm_map_round_page(src_end,
0a7de745 11814 VM_MAP_PAGE_MASK(src_map)));
1c79356b
A
11815 }
11816
11817 vm_map_unlock(src_map);
5ba3f43e 11818 tmp_entry = VM_MAP_ENTRY_NULL;
1c79356b 11819
39236c6e 11820 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
fe8ab488 11821 vm_map_offset_t original_start, original_offset, original_end;
5ba3f43e 11822
39236c6e
A
11823 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
11824
11825 /* adjust alignment of first copy_entry's "vme_start" */
11826 tmp_entry = vm_map_copy_first_entry(copy);
11827 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11828 vm_map_offset_t adjustment;
fe8ab488
A
11829
11830 original_start = tmp_entry->vme_start;
3e170ce0 11831 original_offset = VME_OFFSET(tmp_entry);
fe8ab488
A
11832
11833 /* map-align the start of the first copy entry... */
11834 adjustment = (tmp_entry->vme_start -
0a7de745
A
11835 vm_map_trunc_page(
11836 tmp_entry->vme_start,
11837 VM_MAP_PAGE_MASK(src_map)));
fe8ab488 11838 tmp_entry->vme_start -= adjustment;
3e170ce0 11839 VME_OFFSET_SET(tmp_entry,
0a7de745 11840 VME_OFFSET(tmp_entry) - adjustment);
fe8ab488
A
11841 copy_addr -= adjustment;
11842 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11843 /* ... adjust for mis-aligned start of copy range */
39236c6e 11844 adjustment =
0a7de745
A
11845 (vm_map_trunc_page(copy->offset,
11846 PAGE_MASK) -
11847 vm_map_trunc_page(copy->offset,
11848 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
11849 if (adjustment) {
11850 assert(page_aligned(adjustment));
11851 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11852 tmp_entry->vme_start += adjustment;
3e170ce0 11853 VME_OFFSET_SET(tmp_entry,
0a7de745
A
11854 (VME_OFFSET(tmp_entry) +
11855 adjustment));
39236c6e
A
11856 copy_addr += adjustment;
11857 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11858 }
fe8ab488
A
11859
11860 /*
11861 * Assert that the adjustments haven't exposed
11862 * more than was originally copied...
11863 */
11864 assert(tmp_entry->vme_start >= original_start);
3e170ce0 11865 assert(VME_OFFSET(tmp_entry) >= original_offset);
fe8ab488
A
11866 /*
11867 * ... and that it did not adjust outside of a
11868 * a single 16K page.
11869 */
11870 assert(vm_map_trunc_page(tmp_entry->vme_start,
0a7de745
A
11871 VM_MAP_PAGE_MASK(src_map)) ==
11872 vm_map_trunc_page(original_start,
11873 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
11874 }
11875
11876 /* adjust alignment of last copy_entry's "vme_end" */
11877 tmp_entry = vm_map_copy_last_entry(copy);
11878 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11879 vm_map_offset_t adjustment;
fe8ab488
A
11880
11881 original_end = tmp_entry->vme_end;
11882
11883 /* map-align the end of the last copy entry... */
11884 tmp_entry->vme_end =
0a7de745
A
11885 vm_map_round_page(tmp_entry->vme_end,
11886 VM_MAP_PAGE_MASK(src_map));
fe8ab488 11887 /* ... adjust for mis-aligned end of copy range */
39236c6e 11888 adjustment =
0a7de745
A
11889 (vm_map_round_page((copy->offset +
11890 copy->size),
11891 VM_MAP_PAGE_MASK(src_map)) -
11892 vm_map_round_page((copy->offset +
11893 copy->size),
11894 PAGE_MASK));
39236c6e
A
11895 if (adjustment) {
11896 assert(page_aligned(adjustment));
11897 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11898 tmp_entry->vme_end -= adjustment;
11899 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11900 }
fe8ab488
A
11901
11902 /*
11903 * Assert that the adjustments haven't exposed
11904 * more than was originally copied...
11905 */
11906 assert(tmp_entry->vme_end <= original_end);
11907 /*
11908 * ... and that it did not adjust outside of a
11909 * a single 16K page.
11910 */
11911 assert(vm_map_round_page(tmp_entry->vme_end,
0a7de745
A
11912 VM_MAP_PAGE_MASK(src_map)) ==
11913 vm_map_round_page(original_end,
11914 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
11915 }
11916 }
11917
1c79356b
A
11918 /* Fix-up start and end points in copy. This is necessary */
11919 /* when the various entries in the copy object were picked */
11920 /* up from different sub-maps */
11921
11922 tmp_entry = vm_map_copy_first_entry(copy);
fe8ab488 11923 copy_size = 0; /* compute actual size */
1c79356b 11924 while (tmp_entry != vm_map_copy_to_entry(copy)) {
39236c6e 11925 assert(VM_MAP_PAGE_ALIGNED(
0a7de745
A
11926 copy_addr + (tmp_entry->vme_end -
11927 tmp_entry->vme_start),
11928 VM_MAP_COPY_PAGE_MASK(copy)));
39236c6e 11929 assert(VM_MAP_PAGE_ALIGNED(
0a7de745
A
11930 copy_addr,
11931 VM_MAP_COPY_PAGE_MASK(copy)));
39236c6e
A
11932
11933 /*
11934 * The copy_entries will be injected directly into the
11935 * destination map and might not be "map aligned" there...
11936 */
11937 tmp_entry->map_aligned = FALSE;
11938
5ba3f43e 11939 tmp_entry->vme_end = copy_addr +
0a7de745 11940 (tmp_entry->vme_end - tmp_entry->vme_start);
1c79356b 11941 tmp_entry->vme_start = copy_addr;
e2d2fc5c 11942 assert(tmp_entry->vme_start < tmp_entry->vme_end);
1c79356b 11943 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
fe8ab488 11944 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
1c79356b
A
11945 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
11946 }
11947
fe8ab488
A
11948 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
11949 copy_size < copy->size) {
11950 /*
11951 * The actual size of the VM map copy is smaller than what
11952 * was requested by the caller. This must be because some
11953 * PAGE_SIZE-sized pages are missing at the end of the last
11954 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11955 * The caller might not have been aware of those missing
11956 * pages and might not want to be aware of it, which is
11957 * fine as long as they don't try to access (and crash on)
11958 * those missing pages.
11959 * Let's adjust the size of the "copy", to avoid failing
11960 * in vm_map_copyout() or vm_map_copy_overwrite().
11961 */
11962 assert(vm_map_round_page(copy_size,
0a7de745
A
11963 VM_MAP_PAGE_MASK(src_map)) ==
11964 vm_map_round_page(copy->size,
11965 VM_MAP_PAGE_MASK(src_map)));
fe8ab488
A
11966 copy->size = copy_size;
11967 }
11968
1c79356b 11969 *copy_result = copy;
0a7de745 11970 return KERN_SUCCESS;
1c79356b 11971
0a7de745 11972#undef RETURN
1c79356b
A
11973}
11974
39236c6e
A
11975kern_return_t
11976vm_map_copy_extract(
0a7de745
A
11977 vm_map_t src_map,
11978 vm_map_address_t src_addr,
11979 vm_map_size_t len,
11980 vm_map_copy_t *copy_result, /* OUT */
11981 vm_prot_t *cur_prot, /* OUT */
11982 vm_prot_t *max_prot)
39236c6e 11983{
0a7de745
A
11984 vm_map_offset_t src_start, src_end;
11985 vm_map_copy_t copy;
11986 kern_return_t kr;
39236c6e
A
11987
11988 /*
11989 * Check for copies of zero bytes.
11990 */
11991
11992 if (len == 0) {
11993 *copy_result = VM_MAP_COPY_NULL;
0a7de745 11994 return KERN_SUCCESS;
39236c6e
A
11995 }
11996
11997 /*
11998 * Check that the end address doesn't overflow
11999 */
12000 src_end = src_addr + len;
0a7de745 12001 if (src_end < src_addr) {
39236c6e 12002 return KERN_INVALID_ADDRESS;
0a7de745 12003 }
39236c6e
A
12004
12005 /*
12006 * Compute (page aligned) start and end of region
12007 */
12008 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
12009 src_end = vm_map_round_page(src_end, PAGE_MASK);
12010
12011 /*
12012 * Allocate a header element for the list.
12013 *
5ba3f43e 12014 * Use the start and end in the header to
39236c6e
A
12015 * remember the endpoints prior to rounding.
12016 */
12017
d9a64523 12018 copy = vm_map_copy_allocate();
39236c6e 12019 copy->type = VM_MAP_COPY_ENTRY_LIST;
39236c6e
A
12020 copy->cpy_hdr.entries_pageable = TRUE;
12021
12022 vm_map_store_init(&copy->cpy_hdr);
12023
12024 copy->offset = 0;
12025 copy->size = len;
12026
12027 kr = vm_map_remap_extract(src_map,
0a7de745
A
12028 src_addr,
12029 len,
12030 FALSE, /* copy */
12031 &copy->cpy_hdr,
12032 cur_prot,
12033 max_prot,
12034 VM_INHERIT_SHARE,
12035 TRUE, /* pageable */
12036 FALSE, /* same_map */
12037 VM_MAP_KERNEL_FLAGS_NONE);
39236c6e
A
12038 if (kr != KERN_SUCCESS) {
12039 vm_map_copy_discard(copy);
12040 return kr;
12041 }
12042
12043 *copy_result = copy;
12044 return KERN_SUCCESS;
12045}
12046
1c79356b
A
12047/*
12048 * vm_map_copyin_object:
12049 *
12050 * Create a copy object from an object.
12051 * Our caller donates an object reference.
12052 */
12053
12054kern_return_t
12055vm_map_copyin_object(
0a7de745
A
12056 vm_object_t object,
12057 vm_object_offset_t offset, /* offset of region in object */
12058 vm_object_size_t size, /* size of region in object */
12059 vm_map_copy_t *copy_result) /* OUT */
1c79356b 12060{
0a7de745 12061 vm_map_copy_t copy; /* Resulting copy */
1c79356b
A
12062
12063 /*
12064 * We drop the object into a special copy object
12065 * that contains the object directly.
12066 */
12067
d9a64523 12068 copy = vm_map_copy_allocate();
1c79356b
A
12069 copy->type = VM_MAP_COPY_OBJECT;
12070 copy->cpy_object = object;
1c79356b
A
12071 copy->offset = offset;
12072 copy->size = size;
12073
12074 *copy_result = copy;
0a7de745 12075 return KERN_SUCCESS;
1c79356b
A
12076}
12077
91447636 12078static void
1c79356b 12079vm_map_fork_share(
0a7de745
A
12080 vm_map_t old_map,
12081 vm_map_entry_t old_entry,
12082 vm_map_t new_map)
1c79356b 12083{
0a7de745
A
12084 vm_object_t object;
12085 vm_map_entry_t new_entry;
1c79356b
A
12086
12087 /*
12088 * New sharing code. New map entry
12089 * references original object. Internal
12090 * objects use asynchronous copy algorithm for
12091 * future copies. First make sure we have
12092 * the right object. If we need a shadow,
12093 * or someone else already has one, then
12094 * make a new shadow and share it.
12095 */
5ba3f43e 12096
3e170ce0 12097 object = VME_OBJECT(old_entry);
1c79356b
A
12098 if (old_entry->is_sub_map) {
12099 assert(old_entry->wired_count == 0);
0c530ab8 12100#ifndef NO_NESTED_PMAP
0a7de745
A
12101 if (old_entry->use_pmap) {
12102 kern_return_t result;
91447636 12103
5ba3f43e 12104 result = pmap_nest(new_map->pmap,
0a7de745
A
12105 (VME_SUBMAP(old_entry))->pmap,
12106 (addr64_t)old_entry->vme_start,
12107 (addr64_t)old_entry->vme_start,
12108 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12109 if (result) {
1c79356b 12110 panic("vm_map_fork_share: pmap_nest failed!");
0a7de745 12111 }
1c79356b 12112 }
0a7de745 12113#endif /* NO_NESTED_PMAP */
1c79356b 12114 } else if (object == VM_OBJECT_NULL) {
91447636 12115 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
0a7de745 12116 old_entry->vme_start));
3e170ce0
A
12117 VME_OFFSET_SET(old_entry, 0);
12118 VME_OBJECT_SET(old_entry, object);
fe8ab488 12119 old_entry->use_pmap = TRUE;
a39ff7e2 12120// assert(!old_entry->needs_copy);
1c79356b 12121 } else if (object->copy_strategy !=
0a7de745 12122 MEMORY_OBJECT_COPY_SYMMETRIC) {
1c79356b
A
12123 /*
12124 * We are already using an asymmetric
12125 * copy, and therefore we already have
12126 * the right object.
12127 */
5ba3f43e 12128
0a7de745
A
12129 assert(!old_entry->needs_copy);
12130 } else if (old_entry->needs_copy || /* case 1 */
12131 object->shadowed || /* case 2 */
12132 (!object->true_share && /* case 3 */
12133 !old_entry->is_shared &&
12134 (object->vo_size >
12135 (vm_map_size_t)(old_entry->vme_end -
12136 old_entry->vme_start)))) {
1c79356b
A
12137 /*
12138 * We need to create a shadow.
12139 * There are three cases here.
12140 * In the first case, we need to
12141 * complete a deferred symmetrical
12142 * copy that we participated in.
12143 * In the second and third cases,
12144 * we need to create the shadow so
12145 * that changes that we make to the
12146 * object do not interfere with
12147 * any symmetrical copies which
12148 * have occured (case 2) or which
12149 * might occur (case 3).
12150 *
12151 * The first case is when we had
12152 * deferred shadow object creation
12153 * via the entry->needs_copy mechanism.
12154 * This mechanism only works when
12155 * only one entry points to the source
12156 * object, and we are about to create
12157 * a second entry pointing to the
12158 * same object. The problem is that
12159 * there is no way of mapping from
12160 * an object to the entries pointing
12161 * to it. (Deferred shadow creation
12162 * works with one entry because occurs
12163 * at fault time, and we walk from the
12164 * entry to the object when handling
12165 * the fault.)
12166 *
12167 * The second case is when the object
12168 * to be shared has already been copied
12169 * with a symmetric copy, but we point
12170 * directly to the object without
12171 * needs_copy set in our entry. (This
12172 * can happen because different ranges
12173 * of an object can be pointed to by
12174 * different entries. In particular,
12175 * a single entry pointing to an object
12176 * can be split by a call to vm_inherit,
12177 * which, combined with task_create, can
12178 * result in the different entries
12179 * having different needs_copy values.)
12180 * The shadowed flag in the object allows
12181 * us to detect this case. The problem
12182 * with this case is that if this object
12183 * has or will have shadows, then we
12184 * must not perform an asymmetric copy
12185 * of this object, since such a copy
12186 * allows the object to be changed, which
12187 * will break the previous symmetrical
12188 * copies (which rely upon the object
12189 * not changing). In a sense, the shadowed
12190 * flag says "don't change this object".
12191 * We fix this by creating a shadow
12192 * object for this object, and sharing
12193 * that. This works because we are free
12194 * to change the shadow object (and thus
12195 * to use an asymmetric copy strategy);
12196 * this is also semantically correct,
12197 * since this object is temporary, and
12198 * therefore a copy of the object is
12199 * as good as the object itself. (This
12200 * is not true for permanent objects,
12201 * since the pager needs to see changes,
12202 * which won't happen if the changes
12203 * are made to a copy.)
12204 *
12205 * The third case is when the object
12206 * to be shared has parts sticking
12207 * outside of the entry we're working
12208 * with, and thus may in the future
12209 * be subject to a symmetrical copy.
12210 * (This is a preemptive version of
12211 * case 2.)
12212 */
3e170ce0 12213 VME_OBJECT_SHADOW(old_entry,
0a7de745
A
12214 (vm_map_size_t) (old_entry->vme_end -
12215 old_entry->vme_start));
5ba3f43e 12216
1c79356b
A
12217 /*
12218 * If we're making a shadow for other than
12219 * copy on write reasons, then we have
12220 * to remove write permission.
12221 */
12222
1c79356b
A
12223 if (!old_entry->needs_copy &&
12224 (old_entry->protection & VM_PROT_WRITE)) {
0a7de745 12225 vm_prot_t prot;
0c530ab8 12226
5ba3f43e
A
12227 assert(!pmap_has_prot_policy(old_entry->protection));
12228
0c530ab8 12229 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 12230
5ba3f43e
A
12231 assert(!pmap_has_prot_policy(prot));
12232
0a7de745
A
12233 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12234 prot |= VM_PROT_EXECUTE;
12235 }
2d21ac55 12236
5ba3f43e 12237
316670eb 12238 if (old_map->mapped_in_other_pmaps) {
9bccf70c 12239 vm_object_pmap_protect(
3e170ce0
A
12240 VME_OBJECT(old_entry),
12241 VME_OFFSET(old_entry),
9bccf70c 12242 (old_entry->vme_end -
0a7de745 12243 old_entry->vme_start),
9bccf70c
A
12244 PMAP_NULL,
12245 old_entry->vme_start,
0c530ab8 12246 prot);
1c79356b 12247 } else {
9bccf70c 12248 pmap_protect(old_map->pmap,
0a7de745
A
12249 old_entry->vme_start,
12250 old_entry->vme_end,
12251 prot);
1c79356b
A
12252 }
12253 }
5ba3f43e 12254
1c79356b 12255 old_entry->needs_copy = FALSE;
3e170ce0 12256 object = VME_OBJECT(old_entry);
1c79356b 12257 }
6d2010ae 12258
5ba3f43e 12259
1c79356b
A
12260 /*
12261 * If object was using a symmetric copy strategy,
12262 * change its copy strategy to the default
12263 * asymmetric copy strategy, which is copy_delay
12264 * in the non-norma case and copy_call in the
12265 * norma case. Bump the reference count for the
12266 * new entry.
12267 */
5ba3f43e 12268
0a7de745 12269 if (old_entry->is_sub_map) {
3e170ce0
A
12270 vm_map_lock(VME_SUBMAP(old_entry));
12271 vm_map_reference(VME_SUBMAP(old_entry));
12272 vm_map_unlock(VME_SUBMAP(old_entry));
1c79356b
A
12273 } else {
12274 vm_object_lock(object);
2d21ac55 12275 vm_object_reference_locked(object);
1c79356b
A
12276 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12277 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12278 }
12279 vm_object_unlock(object);
12280 }
5ba3f43e 12281
1c79356b
A
12282 /*
12283 * Clone the entry, using object ref from above.
12284 * Mark both entries as shared.
12285 */
5ba3f43e 12286
7ddcb079 12287 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
0a7de745 12288 * map or descendants */
1c79356b
A
12289 vm_map_entry_copy(new_entry, old_entry);
12290 old_entry->is_shared = TRUE;
12291 new_entry->is_shared = TRUE;
39037602 12292
a39ff7e2
A
12293 /*
12294 * We're dealing with a shared mapping, so the resulting mapping
12295 * should inherit some of the original mapping's accounting settings.
12296 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12297 * "use_pmap" should stay the same as before (if it hasn't been reset
12298 * to TRUE when we cleared "iokit_acct").
12299 */
12300 assert(!new_entry->iokit_acct);
12301
39037602
A
12302 /*
12303 * If old entry's inheritence is VM_INHERIT_NONE,
12304 * the new entry is for corpse fork, remove the
12305 * write permission from the new entry.
12306 */
12307 if (old_entry->inheritance == VM_INHERIT_NONE) {
39037602
A
12308 new_entry->protection &= ~VM_PROT_WRITE;
12309 new_entry->max_protection &= ~VM_PROT_WRITE;
12310 }
5ba3f43e 12311
1c79356b
A
12312 /*
12313 * Insert the entry into the new map -- we
12314 * know we're inserting at the end of the new
12315 * map.
12316 */
5ba3f43e 12317
d9a64523 12318 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
0a7de745 12319 VM_MAP_KERNEL_FLAGS_NONE);
5ba3f43e 12320
1c79356b
A
12321 /*
12322 * Update the physical map
12323 */
5ba3f43e 12324
1c79356b
A
12325 if (old_entry->is_sub_map) {
12326 /* Bill Angell pmap support goes here */
12327 } else {
12328 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
0a7de745
A
12329 old_entry->vme_end - old_entry->vme_start,
12330 old_entry->vme_start);
1c79356b
A
12331 }
12332}
12333
91447636 12334static boolean_t
1c79356b 12335vm_map_fork_copy(
0a7de745
A
12336 vm_map_t old_map,
12337 vm_map_entry_t *old_entry_p,
12338 vm_map_t new_map,
12339 int vm_map_copyin_flags)
1c79356b
A
12340{
12341 vm_map_entry_t old_entry = *old_entry_p;
91447636
A
12342 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12343 vm_map_offset_t start = old_entry->vme_start;
1c79356b
A
12344 vm_map_copy_t copy;
12345 vm_map_entry_t last = vm_map_last_entry(new_map);
12346
12347 vm_map_unlock(old_map);
12348 /*
12349 * Use maxprot version of copyin because we
12350 * care about whether this memory can ever
12351 * be accessed, not just whether it's accessible
12352 * right now.
12353 */
39037602
A
12354 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12355 if (vm_map_copyin_internal(old_map, start, entry_size,
0a7de745 12356 vm_map_copyin_flags, &copy)
1c79356b
A
12357 != KERN_SUCCESS) {
12358 /*
12359 * The map might have changed while it
12360 * was unlocked, check it again. Skip
12361 * any blank space or permanently
12362 * unreadable region.
12363 */
12364 vm_map_lock(old_map);
12365 if (!vm_map_lookup_entry(old_map, start, &last) ||
55e303ae 12366 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
1c79356b
A
12367 last = last->vme_next;
12368 }
12369 *old_entry_p = last;
12370
12371 /*
12372 * XXX For some error returns, want to
12373 * XXX skip to the next element. Note
12374 * that INVALID_ADDRESS and
12375 * PROTECTION_FAILURE are handled above.
12376 */
5ba3f43e 12377
1c79356b
A
12378 return FALSE;
12379 }
5ba3f43e 12380
1c79356b
A
12381 /*
12382 * Insert the copy into the new map
12383 */
5ba3f43e 12384
1c79356b 12385 vm_map_copy_insert(new_map, last, copy);
5ba3f43e 12386
1c79356b
A
12387 /*
12388 * Pick up the traversal at the end of
12389 * the copied region.
12390 */
5ba3f43e 12391
1c79356b
A
12392 vm_map_lock(old_map);
12393 start += entry_size;
0a7de745 12394 if (!vm_map_lookup_entry(old_map, start, &last)) {
1c79356b
A
12395 last = last->vme_next;
12396 } else {
2d21ac55
A
12397 if (last->vme_start == start) {
12398 /*
12399 * No need to clip here and we don't
12400 * want to cause any unnecessary
12401 * unnesting...
12402 */
12403 } else {
12404 vm_map_clip_start(old_map, last, start);
12405 }
1c79356b
A
12406 }
12407 *old_entry_p = last;
12408
12409 return TRUE;
12410}
12411
12412/*
12413 * vm_map_fork:
12414 *
12415 * Create and return a new map based on the old
12416 * map, according to the inheritance values on the
39037602 12417 * regions in that map and the options.
1c79356b
A
12418 *
12419 * The source map must not be locked.
12420 */
12421vm_map_t
12422vm_map_fork(
0a7de745
A
12423 ledger_t ledger,
12424 vm_map_t old_map,
12425 int options)
1c79356b 12426{
0a7de745
A
12427 pmap_t new_pmap;
12428 vm_map_t new_map;
12429 vm_map_entry_t old_entry;
12430 vm_map_size_t new_size = 0, entry_size;
12431 vm_map_entry_t new_entry;
12432 boolean_t src_needs_copy;
12433 boolean_t new_entry_needs_copy;
12434 boolean_t pmap_is64bit;
12435 int vm_map_copyin_flags;
12436 vm_inherit_t old_entry_inheritance;
12437 int map_create_options;
12438 kern_return_t footprint_collect_kr;
39037602
A
12439
12440 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
0a7de745
A
12441 VM_MAP_FORK_PRESERVE_PURGEABLE |
12442 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
39037602
A
12443 /* unsupported option */
12444 return VM_MAP_NULL;
12445 }
1c79356b 12446
3e170ce0 12447 pmap_is64bit =
b0d623f7 12448#if defined(__i386__) || defined(__x86_64__)
0a7de745 12449 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
5ba3f43e 12450#elif defined(__arm64__)
0a7de745 12451 old_map->pmap->max == MACH_VM_MAX_ADDRESS;
5ba3f43e 12452#elif defined(__arm__)
0a7de745 12453 FALSE;
b0d623f7 12454#else
316670eb 12455#error Unknown architecture.
b0d623f7 12456#endif
3e170ce0 12457
cb323159
A
12458 unsigned int pmap_flags = 0;
12459 pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12460#if defined(HAS_APPLE_PAC)
12461 pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12462#endif
12463 new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
2d21ac55 12464
1c79356b
A
12465 vm_map_reference_swap(old_map);
12466 vm_map_lock(old_map);
12467
d9a64523
A
12468 map_create_options = 0;
12469 if (old_map->hdr.entries_pageable) {
12470 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12471 }
12472 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12473 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12474 footprint_collect_kr = KERN_SUCCESS;
12475 }
12476 new_map = vm_map_create_options(new_pmap,
0a7de745
A
12477 old_map->min_offset,
12478 old_map->max_offset,
12479 map_create_options);
5ba3f43e 12480 vm_map_lock(new_map);
39037602 12481 vm_commit_pagezero_status(new_map);
39236c6e
A
12482 /* inherit the parent map's page size */
12483 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
1c79356b 12484 for (
2d21ac55
A
12485 old_entry = vm_map_first_entry(old_map);
12486 old_entry != vm_map_to_entry(old_map);
12487 ) {
1c79356b
A
12488 entry_size = old_entry->vme_end - old_entry->vme_start;
12489
d9a64523
A
12490 old_entry_inheritance = old_entry->inheritance;
12491 /*
12492 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12493 * share VM_INHERIT_NONE entries that are not backed by a
12494 * device pager.
12495 */
12496 if (old_entry_inheritance == VM_INHERIT_NONE &&
12497 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12498 !(!old_entry->is_sub_map &&
0a7de745
A
12499 VME_OBJECT(old_entry) != NULL &&
12500 VME_OBJECT(old_entry)->pager != NULL &&
12501 is_device_pager_ops(
12502 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
d9a64523
A
12503 old_entry_inheritance = VM_INHERIT_SHARE;
12504 }
12505
12506 if (old_entry_inheritance != VM_INHERIT_NONE &&
12507 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12508 footprint_collect_kr == KERN_SUCCESS) {
39037602 12509 /*
d9a64523
A
12510 * The corpse won't have old_map->pmap to query
12511 * footprint information, so collect that data now
12512 * and store it in new_map->vmmap_corpse_footprint
12513 * for later autopsy.
39037602 12514 */
d9a64523 12515 footprint_collect_kr =
0a7de745
A
12516 vm_map_corpse_footprint_collect(old_map,
12517 old_entry,
12518 new_map);
d9a64523
A
12519 }
12520
12521 switch (old_entry_inheritance) {
12522 case VM_INHERIT_NONE:
12523 break;
1c79356b
A
12524
12525 case VM_INHERIT_SHARE:
12526 vm_map_fork_share(old_map, old_entry, new_map);
12527 new_size += entry_size;
12528 break;
12529
12530 case VM_INHERIT_COPY:
12531
12532 /*
12533 * Inline the copy_quickly case;
12534 * upon failure, fall back on call
12535 * to vm_map_fork_copy.
12536 */
12537
0a7de745 12538 if (old_entry->is_sub_map) {
1c79356b 12539 break;
0a7de745 12540 }
9bccf70c 12541 if ((old_entry->wired_count != 0) ||
3e170ce0 12542 ((VME_OBJECT(old_entry) != NULL) &&
0a7de745 12543 (VME_OBJECT(old_entry)->true_share))) {
1c79356b
A
12544 goto slow_vm_map_fork_copy;
12545 }
12546
7ddcb079 12547 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
1c79356b 12548 vm_map_entry_copy(new_entry, old_entry);
fe8ab488
A
12549 if (new_entry->is_sub_map) {
12550 /* clear address space specifics */
12551 new_entry->use_pmap = FALSE;
a39ff7e2
A
12552 } else {
12553 /*
12554 * We're dealing with a copy-on-write operation,
12555 * so the resulting mapping should not inherit
12556 * the original mapping's accounting settings.
12557 * "iokit_acct" should have been cleared in
12558 * vm_map_entry_copy().
12559 * "use_pmap" should be reset to its default
12560 * (TRUE) so that the new mapping gets
12561 * accounted for in the task's memory footprint.
12562 */
12563 assert(!new_entry->iokit_acct);
12564 new_entry->use_pmap = TRUE;
fe8ab488 12565 }
1c79356b 12566
0a7de745 12567 if (!vm_object_copy_quickly(
cb323159 12568 VME_OBJECT_PTR(new_entry),
3e170ce0 12569 VME_OFFSET(old_entry),
2d21ac55 12570 (old_entry->vme_end -
0a7de745 12571 old_entry->vme_start),
2d21ac55
A
12572 &src_needs_copy,
12573 &new_entry_needs_copy)) {
1c79356b
A
12574 vm_map_entry_dispose(new_map, new_entry);
12575 goto slow_vm_map_fork_copy;
12576 }
12577
12578 /*
12579 * Handle copy-on-write obligations
12580 */
5ba3f43e 12581
1c79356b 12582 if (src_needs_copy && !old_entry->needs_copy) {
0a7de745 12583 vm_prot_t prot;
0c530ab8 12584
5ba3f43e
A
12585 assert(!pmap_has_prot_policy(old_entry->protection));
12586
0c530ab8 12587 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 12588
3e170ce0 12589 if (override_nx(old_map, VME_ALIAS(old_entry))
0a7de745
A
12590 && prot) {
12591 prot |= VM_PROT_EXECUTE;
12592 }
2d21ac55 12593
5ba3f43e
A
12594 assert(!pmap_has_prot_policy(prot));
12595
1c79356b 12596 vm_object_pmap_protect(
3e170ce0
A
12597 VME_OBJECT(old_entry),
12598 VME_OFFSET(old_entry),
1c79356b 12599 (old_entry->vme_end -
0a7de745 12600 old_entry->vme_start),
5ba3f43e 12601 ((old_entry->is_shared
0a7de745
A
12602 || old_map->mapped_in_other_pmaps)
12603 ? PMAP_NULL :
12604 old_map->pmap),
1c79356b 12605 old_entry->vme_start,
0c530ab8 12606 prot);
1c79356b 12607
3e170ce0 12608 assert(old_entry->wired_count == 0);
1c79356b
A
12609 old_entry->needs_copy = TRUE;
12610 }
12611 new_entry->needs_copy = new_entry_needs_copy;
5ba3f43e 12612
1c79356b
A
12613 /*
12614 * Insert the entry at the end
12615 * of the map.
12616 */
5ba3f43e 12617
d9a64523 12618 vm_map_store_entry_link(new_map,
0a7de745
A
12619 vm_map_last_entry(new_map),
12620 new_entry,
12621 VM_MAP_KERNEL_FLAGS_NONE);
1c79356b
A
12622 new_size += entry_size;
12623 break;
12624
0a7de745 12625slow_vm_map_fork_copy:
39037602
A
12626 vm_map_copyin_flags = 0;
12627 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
12628 vm_map_copyin_flags |=
0a7de745 12629 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
39037602
A
12630 }
12631 if (vm_map_fork_copy(old_map,
0a7de745
A
12632 &old_entry,
12633 new_map,
12634 vm_map_copyin_flags)) {
1c79356b
A
12635 new_size += entry_size;
12636 }
12637 continue;
12638 }
12639 old_entry = old_entry->vme_next;
12640 }
12641
5ba3f43e
A
12642#if defined(__arm64__)
12643 pmap_insert_sharedpage(new_map->pmap);
12644#endif
fe8ab488 12645
1c79356b 12646 new_map->size = new_size;
d9a64523
A
12647
12648 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12649 vm_map_corpse_footprint_collect_done(new_map);
12650 }
12651
5ba3f43e 12652 vm_map_unlock(new_map);
1c79356b
A
12653 vm_map_unlock(old_map);
12654 vm_map_deallocate(old_map);
12655
0a7de745 12656 return new_map;
1c79356b
A
12657}
12658
2d21ac55
A
12659/*
12660 * vm_map_exec:
12661 *
0a7de745 12662 * Setup the "new_map" with the proper execution environment according
2d21ac55
A
12663 * to the type of executable (platform, 64bit, chroot environment).
12664 * Map the comm page and shared region, etc...
12665 */
12666kern_return_t
12667vm_map_exec(
0a7de745
A
12668 vm_map_t new_map,
12669 task_t task,
12670 boolean_t is64bit,
12671 void *fsroot,
12672 cpu_type_t cpu,
12673 cpu_subtype_t cpu_subtype)
2d21ac55
A
12674{
12675 SHARED_REGION_TRACE_DEBUG(
d9a64523 12676 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
0a7de745
A
12677 (void *)VM_KERNEL_ADDRPERM(current_task()),
12678 (void *)VM_KERNEL_ADDRPERM(new_map),
12679 (void *)VM_KERNEL_ADDRPERM(task),
12680 (void *)VM_KERNEL_ADDRPERM(fsroot),
12681 cpu,
12682 cpu_subtype));
39037602 12683 (void) vm_commpage_enter(new_map, task, is64bit);
d9a64523 12684 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype);
2d21ac55 12685 SHARED_REGION_TRACE_DEBUG(
d9a64523 12686 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
0a7de745
A
12687 (void *)VM_KERNEL_ADDRPERM(current_task()),
12688 (void *)VM_KERNEL_ADDRPERM(new_map),
12689 (void *)VM_KERNEL_ADDRPERM(task),
12690 (void *)VM_KERNEL_ADDRPERM(fsroot),
12691 cpu,
12692 cpu_subtype));
2d21ac55
A
12693 return KERN_SUCCESS;
12694}
1c79356b
A
12695
12696/*
12697 * vm_map_lookup_locked:
12698 *
12699 * Finds the VM object, offset, and
12700 * protection for a given virtual address in the
12701 * specified map, assuming a page fault of the
12702 * type specified.
12703 *
12704 * Returns the (object, offset, protection) for
12705 * this address, whether it is wired down, and whether
12706 * this map has the only reference to the data in question.
12707 * In order to later verify this lookup, a "version"
12708 * is returned.
12709 *
12710 * The map MUST be locked by the caller and WILL be
12711 * locked on exit. In order to guarantee the
12712 * existence of the returned object, it is returned
12713 * locked.
12714 *
12715 * If a lookup is requested with "write protection"
12716 * specified, the map may be changed to perform virtual
12717 * copying operations, although the data referenced will
12718 * remain the same.
12719 */
12720kern_return_t
12721vm_map_lookup_locked(
0a7de745
A
12722 vm_map_t *var_map, /* IN/OUT */
12723 vm_map_offset_t vaddr,
12724 vm_prot_t fault_type,
12725 int object_lock_type,
12726 vm_map_version_t *out_version, /* OUT */
12727 vm_object_t *object, /* OUT */
12728 vm_object_offset_t *offset, /* OUT */
12729 vm_prot_t *out_prot, /* OUT */
12730 boolean_t *wired, /* OUT */
12731 vm_object_fault_info_t fault_info, /* OUT */
12732 vm_map_t *real_map)
1c79356b 12733{
0a7de745
A
12734 vm_map_entry_t entry;
12735 vm_map_t map = *var_map;
12736 vm_map_t old_map = *var_map;
12737 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
12738 vm_map_offset_t cow_parent_vaddr = 0;
12739 vm_map_offset_t old_start = 0;
12740 vm_map_offset_t old_end = 0;
12741 vm_prot_t prot;
12742 boolean_t mask_protections;
12743 boolean_t force_copy;
12744 vm_prot_t original_fault_type;
6d2010ae
A
12745
12746 /*
12747 * VM_PROT_MASK means that the caller wants us to use "fault_type"
12748 * as a mask against the mapping's actual protections, not as an
12749 * absolute value.
12750 */
12751 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
fe8ab488
A
12752 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
12753 fault_type &= VM_PROT_ALL;
6d2010ae 12754 original_fault_type = fault_type;
1c79356b 12755
91447636 12756 *real_map = map;
6d2010ae
A
12757
12758RetryLookup:
12759 fault_type = original_fault_type;
1c79356b
A
12760
12761 /*
12762 * If the map has an interesting hint, try it before calling
12763 * full blown lookup routine.
12764 */
1c79356b 12765 entry = map->hint;
1c79356b
A
12766
12767 if ((entry == vm_map_to_entry(map)) ||
12768 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
0a7de745 12769 vm_map_entry_t tmp_entry;
1c79356b
A
12770
12771 /*
12772 * Entry was either not a valid hint, or the vaddr
12773 * was not contained in the entry, so do a full lookup.
12774 */
12775 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
0a7de745 12776 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
1c79356b 12777 vm_map_unlock(cow_sub_map_parent);
0a7de745
A
12778 }
12779 if ((*real_map != map)
12780 && (*real_map != cow_sub_map_parent)) {
91447636 12781 vm_map_unlock(*real_map);
0a7de745 12782 }
1c79356b
A
12783 return KERN_INVALID_ADDRESS;
12784 }
12785
12786 entry = tmp_entry;
12787 }
0a7de745 12788 if (map == old_map) {
1c79356b
A
12789 old_start = entry->vme_start;
12790 old_end = entry->vme_end;
12791 }
12792
12793 /*
12794 * Handle submaps. Drop lock on upper map, submap is
12795 * returned locked.
12796 */
12797
12798submap_recurse:
12799 if (entry->is_sub_map) {
0a7de745
A
12800 vm_map_offset_t local_vaddr;
12801 vm_map_offset_t end_delta;
12802 vm_map_offset_t start_delta;
12803 vm_map_entry_t submap_entry;
12804 vm_prot_t subentry_protection;
12805 vm_prot_t subentry_max_protection;
cb323159 12806 boolean_t subentry_no_copy_on_read;
0a7de745 12807 boolean_t mapped_needs_copy = FALSE;
1c79356b
A
12808
12809 local_vaddr = vaddr;
12810
39037602 12811 if ((entry->use_pmap &&
0a7de745
A
12812 !((fault_type & VM_PROT_WRITE) ||
12813 force_copy))) {
91447636 12814 /* if real_map equals map we unlock below */
5ba3f43e 12815 if ((*real_map != map) &&
0a7de745 12816 (*real_map != cow_sub_map_parent)) {
91447636 12817 vm_map_unlock(*real_map);
0a7de745 12818 }
3e170ce0 12819 *real_map = VME_SUBMAP(entry);
1c79356b
A
12820 }
12821
0a7de745
A
12822 if (entry->needs_copy &&
12823 ((fault_type & VM_PROT_WRITE) ||
39037602 12824 force_copy)) {
1c79356b
A
12825 if (!mapped_needs_copy) {
12826 if (vm_map_lock_read_to_write(map)) {
12827 vm_map_lock_read(map);
99c3a104 12828 *real_map = map;
1c79356b
A
12829 goto RetryLookup;
12830 }
3e170ce0
A
12831 vm_map_lock_read(VME_SUBMAP(entry));
12832 *var_map = VME_SUBMAP(entry);
1c79356b
A
12833 cow_sub_map_parent = map;
12834 /* reset base to map before cow object */
12835 /* this is the map which will accept */
12836 /* the new cow object */
12837 old_start = entry->vme_start;
12838 old_end = entry->vme_end;
12839 cow_parent_vaddr = vaddr;
12840 mapped_needs_copy = TRUE;
12841 } else {
3e170ce0
A
12842 vm_map_lock_read(VME_SUBMAP(entry));
12843 *var_map = VME_SUBMAP(entry);
0a7de745
A
12844 if ((cow_sub_map_parent != map) &&
12845 (*real_map != map)) {
1c79356b 12846 vm_map_unlock(map);
0a7de745 12847 }
1c79356b
A
12848 }
12849 } else {
3e170ce0 12850 vm_map_lock_read(VME_SUBMAP(entry));
5ba3f43e 12851 *var_map = VME_SUBMAP(entry);
1c79356b
A
12852 /* leave map locked if it is a target */
12853 /* cow sub_map above otherwise, just */
12854 /* follow the maps down to the object */
12855 /* here we unlock knowing we are not */
12856 /* revisiting the map. */
0a7de745 12857 if ((*real_map != map) && (map != cow_sub_map_parent)) {
1c79356b 12858 vm_map_unlock_read(map);
0a7de745 12859 }
1c79356b
A
12860 }
12861
99c3a104 12862 map = *var_map;
1c79356b
A
12863
12864 /* calculate the offset in the submap for vaddr */
3e170ce0 12865 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
1c79356b 12866
0a7de745
A
12867RetrySubMap:
12868 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
12869 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
1c79356b
A
12870 vm_map_unlock(cow_sub_map_parent);
12871 }
0a7de745
A
12872 if ((*real_map != map)
12873 && (*real_map != cow_sub_map_parent)) {
91447636 12874 vm_map_unlock(*real_map);
1c79356b 12875 }
91447636 12876 *real_map = map;
1c79356b
A
12877 return KERN_INVALID_ADDRESS;
12878 }
2d21ac55 12879
1c79356b
A
12880 /* find the attenuated shadow of the underlying object */
12881 /* on our target map */
12882
12883 /* in english the submap object may extend beyond the */
12884 /* region mapped by the entry or, may only fill a portion */
12885 /* of it. For our purposes, we only care if the object */
12886 /* doesn't fill. In this case the area which will */
12887 /* ultimately be clipped in the top map will only need */
12888 /* to be as big as the portion of the underlying entry */
12889 /* which is mapped */
3e170ce0 12890 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
0a7de745 12891 submap_entry->vme_start - VME_OFFSET(entry) : 0;
1c79356b 12892
5ba3f43e 12893 end_delta =
0a7de745
A
12894 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
12895 submap_entry->vme_end ?
12896 0 : (VME_OFFSET(entry) +
12897 (old_end - old_start))
12898 - submap_entry->vme_end;
1c79356b
A
12899
12900 old_start += start_delta;
12901 old_end -= end_delta;
12902
0a7de745 12903 if (submap_entry->is_sub_map) {
1c79356b
A
12904 entry = submap_entry;
12905 vaddr = local_vaddr;
12906 goto submap_recurse;
12907 }
12908
39037602 12909 if (((fault_type & VM_PROT_WRITE) ||
0a7de745 12910 force_copy)
39037602 12911 && cow_sub_map_parent) {
0a7de745 12912 vm_object_t sub_object, copy_object;
2d21ac55 12913 vm_object_offset_t copy_offset;
0a7de745
A
12914 vm_map_offset_t local_start;
12915 vm_map_offset_t local_end;
12916 boolean_t copied_slowly = FALSE;
1c79356b
A
12917
12918 if (vm_map_lock_read_to_write(map)) {
12919 vm_map_lock_read(map);
12920 old_start -= start_delta;
12921 old_end += end_delta;
12922 goto RetrySubMap;
12923 }
0b4e3aa0
A
12924
12925
3e170ce0 12926 sub_object = VME_OBJECT(submap_entry);
2d21ac55
A
12927 if (sub_object == VM_OBJECT_NULL) {
12928 sub_object =
0a7de745
A
12929 vm_object_allocate(
12930 (vm_map_size_t)
12931 (submap_entry->vme_end -
12932 submap_entry->vme_start));
3e170ce0
A
12933 VME_OBJECT_SET(submap_entry, sub_object);
12934 VME_OFFSET_SET(submap_entry, 0);
a39ff7e2
A
12935 assert(!submap_entry->is_sub_map);
12936 assert(submap_entry->use_pmap);
1c79356b 12937 }
5ba3f43e 12938 local_start = local_vaddr -
0a7de745 12939 (cow_parent_vaddr - old_start);
5ba3f43e 12940 local_end = local_vaddr +
0a7de745 12941 (old_end - cow_parent_vaddr);
1c79356b
A
12942 vm_map_clip_start(map, submap_entry, local_start);
12943 vm_map_clip_end(map, submap_entry, local_end);
fe8ab488
A
12944 if (submap_entry->is_sub_map) {
12945 /* unnesting was done when clipping */
12946 assert(!submap_entry->use_pmap);
12947 }
1c79356b
A
12948
12949 /* This is the COW case, lets connect */
12950 /* an entry in our space to the underlying */
12951 /* object in the submap, bypassing the */
12952 /* submap. */
0b4e3aa0
A
12953
12954
0a7de745
A
12955 if (submap_entry->wired_count != 0 ||
12956 (sub_object->copy_strategy ==
4a3eedf9 12957 MEMORY_OBJECT_COPY_NONE)) {
2d21ac55
A
12958 vm_object_lock(sub_object);
12959 vm_object_copy_slowly(sub_object,
0a7de745
A
12960 VME_OFFSET(submap_entry),
12961 (submap_entry->vme_end -
12962 submap_entry->vme_start),
12963 FALSE,
12964 &copy_object);
2d21ac55 12965 copied_slowly = TRUE;
0b4e3aa0 12966 } else {
0b4e3aa0 12967 /* set up shadow object */
2d21ac55 12968 copy_object = sub_object;
39037602
A
12969 vm_object_lock(sub_object);
12970 vm_object_reference_locked(sub_object);
2d21ac55 12971 sub_object->shadowed = TRUE;
39037602
A
12972 vm_object_unlock(sub_object);
12973
3e170ce0 12974 assert(submap_entry->wired_count == 0);
0b4e3aa0 12975 submap_entry->needs_copy = TRUE;
0c530ab8 12976
5ba3f43e
A
12977 prot = submap_entry->protection;
12978 assert(!pmap_has_prot_policy(prot));
12979 prot = prot & ~VM_PROT_WRITE;
12980 assert(!pmap_has_prot_policy(prot));
2d21ac55 12981
3e170ce0 12982 if (override_nx(old_map,
0a7de745
A
12983 VME_ALIAS(submap_entry))
12984 && prot) {
12985 prot |= VM_PROT_EXECUTE;
12986 }
2d21ac55 12987
0b4e3aa0 12988 vm_object_pmap_protect(
2d21ac55 12989 sub_object,
3e170ce0 12990 VME_OFFSET(submap_entry),
5ba3f43e 12991 submap_entry->vme_end -
2d21ac55 12992 submap_entry->vme_start,
5ba3f43e 12993 (submap_entry->is_shared
0a7de745 12994 || map->mapped_in_other_pmaps) ?
2d21ac55 12995 PMAP_NULL : map->pmap,
1c79356b 12996 submap_entry->vme_start,
0c530ab8 12997 prot);
0b4e3aa0 12998 }
5ba3f43e 12999
2d21ac55
A
13000 /*
13001 * Adjust the fault offset to the submap entry.
13002 */
13003 copy_offset = (local_vaddr -
0a7de745
A
13004 submap_entry->vme_start +
13005 VME_OFFSET(submap_entry));
1c79356b
A
13006
13007 /* This works diffently than the */
13008 /* normal submap case. We go back */
13009 /* to the parent of the cow map and*/
13010 /* clip out the target portion of */
13011 /* the sub_map, substituting the */
13012 /* new copy object, */
13013
5ba3f43e
A
13014 subentry_protection = submap_entry->protection;
13015 subentry_max_protection = submap_entry->max_protection;
cb323159 13016 subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
1c79356b 13017 vm_map_unlock(map);
5ba3f43e
A
13018 submap_entry = NULL; /* not valid after map unlock */
13019
1c79356b
A
13020 local_start = old_start;
13021 local_end = old_end;
13022 map = cow_sub_map_parent;
13023 *var_map = cow_sub_map_parent;
13024 vaddr = cow_parent_vaddr;
13025 cow_sub_map_parent = NULL;
13026
0a7de745
A
13027 if (!vm_map_lookup_entry(map,
13028 vaddr, &entry)) {
2d21ac55
A
13029 vm_object_deallocate(
13030 copy_object);
13031 vm_map_lock_write_to_read(map);
13032 return KERN_INVALID_ADDRESS;
13033 }
5ba3f43e 13034
2d21ac55
A
13035 /* clip out the portion of space */
13036 /* mapped by the sub map which */
13037 /* corresponds to the underlying */
13038 /* object */
13039
13040 /*
13041 * Clip (and unnest) the smallest nested chunk
13042 * possible around the faulting address...
13043 */
13044 local_start = vaddr & ~(pmap_nesting_size_min - 1);
13045 local_end = local_start + pmap_nesting_size_min;
13046 /*
13047 * ... but don't go beyond the "old_start" to "old_end"
13048 * range, to avoid spanning over another VM region
13049 * with a possibly different VM object and/or offset.
13050 */
13051 if (local_start < old_start) {
13052 local_start = old_start;
13053 }
13054 if (local_end > old_end) {
13055 local_end = old_end;
13056 }
13057 /*
13058 * Adjust copy_offset to the start of the range.
13059 */
13060 copy_offset -= (vaddr - local_start);
13061
1c79356b
A
13062 vm_map_clip_start(map, entry, local_start);
13063 vm_map_clip_end(map, entry, local_end);
fe8ab488
A
13064 if (entry->is_sub_map) {
13065 /* unnesting was done when clipping */
13066 assert(!entry->use_pmap);
13067 }
1c79356b
A
13068
13069 /* substitute copy object for */
13070 /* shared map entry */
3e170ce0 13071 vm_map_deallocate(VME_SUBMAP(entry));
fe8ab488 13072 assert(!entry->iokit_acct);
1c79356b 13073 entry->is_sub_map = FALSE;
fe8ab488 13074 entry->use_pmap = TRUE;
3e170ce0 13075 VME_OBJECT_SET(entry, copy_object);
1c79356b 13076
2d21ac55 13077 /* propagate the submap entry's protections */
d9a64523
A
13078 if (entry->protection != VM_PROT_READ) {
13079 /*
13080 * Someone has already altered the top entry's
13081 * protections via vm_protect(VM_PROT_COPY).
13082 * Respect these new values and ignore the
13083 * submap entry's protections.
13084 */
13085 } else {
13086 /*
13087 * Regular copy-on-write: propagate the submap
13088 * entry's protections to the top map entry.
13089 */
13090 entry->protection |= subentry_protection;
13091 }
5ba3f43e 13092 entry->max_protection |= subentry_max_protection;
cb323159
A
13093 /* propagate no_copy_on_read */
13094 entry->vme_no_copy_on_read = subentry_no_copy_on_read;
5ba3f43e 13095
d9a64523
A
13096 if ((entry->protection & VM_PROT_WRITE) &&
13097 (entry->protection & VM_PROT_EXECUTE) &&
13098#if !CONFIG_EMBEDDED
13099 map != kernel_map &&
13100 cs_process_enforcement(NULL) &&
13101#endif /* !CONFIG_EMBEDDED */
13102 !(entry->used_for_jit)) {
13103 DTRACE_VM3(cs_wx,
0a7de745
A
13104 uint64_t, (uint64_t)entry->vme_start,
13105 uint64_t, (uint64_t)entry->vme_end,
13106 vm_prot_t, entry->protection);
d9a64523 13107 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
0a7de745
A
13108 proc_selfpid(),
13109 (current_task()->bsd_info
13110 ? proc_name_address(current_task()->bsd_info)
13111 : "?"),
13112 __FUNCTION__);
d9a64523 13113 entry->protection &= ~VM_PROT_EXECUTE;
5ba3f43e 13114 }
2d21ac55 13115
0a7de745 13116 if (copied_slowly) {
3e170ce0 13117 VME_OFFSET_SET(entry, local_start - old_start);
0b4e3aa0
A
13118 entry->needs_copy = FALSE;
13119 entry->is_shared = FALSE;
13120 } else {
3e170ce0
A
13121 VME_OFFSET_SET(entry, copy_offset);
13122 assert(entry->wired_count == 0);
0b4e3aa0 13123 entry->needs_copy = TRUE;
0a7de745 13124 if (entry->inheritance == VM_INHERIT_SHARE) {
0b4e3aa0 13125 entry->inheritance = VM_INHERIT_COPY;
0a7de745
A
13126 }
13127 if (map != old_map) {
0b4e3aa0 13128 entry->is_shared = TRUE;
0a7de745 13129 }
0b4e3aa0 13130 }
0a7de745 13131 if (entry->inheritance == VM_INHERIT_SHARE) {
0b4e3aa0 13132 entry->inheritance = VM_INHERIT_COPY;
0a7de745 13133 }
1c79356b
A
13134
13135 vm_map_lock_write_to_read(map);
13136 } else {
0a7de745
A
13137 if ((cow_sub_map_parent)
13138 && (cow_sub_map_parent != *real_map)
13139 && (cow_sub_map_parent != map)) {
1c79356b
A
13140 vm_map_unlock(cow_sub_map_parent);
13141 }
13142 entry = submap_entry;
13143 vaddr = local_vaddr;
13144 }
13145 }
5ba3f43e 13146
1c79356b
A
13147 /*
13148 * Check whether this task is allowed to have
13149 * this page.
13150 */
2d21ac55 13151
6601e61a 13152 prot = entry->protection;
0c530ab8 13153
3e170ce0 13154 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
0a7de745 13155 /*
2d21ac55 13156 * HACK -- if not a stack, then allow execution
0c530ab8 13157 */
0a7de745 13158 prot |= VM_PROT_EXECUTE;
2d21ac55
A
13159 }
13160
6d2010ae
A
13161 if (mask_protections) {
13162 fault_type &= prot;
13163 if (fault_type == VM_PROT_NONE) {
13164 goto protection_failure;
13165 }
13166 }
39037602 13167 if (((fault_type & prot) != fault_type)
5ba3f43e
A
13168#if __arm64__
13169 /* prefetch abort in execute-only page */
13170 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13171#endif
39037602 13172 ) {
0a7de745 13173protection_failure:
2d21ac55
A
13174 if (*real_map != map) {
13175 vm_map_unlock(*real_map);
0c530ab8
A
13176 }
13177 *real_map = map;
13178
0a7de745
A
13179 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13180 log_stack_execution_failure((addr64_t)vaddr, prot);
13181 }
0c530ab8 13182
2d21ac55 13183 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
0c530ab8 13184 return KERN_PROTECTION_FAILURE;
1c79356b
A
13185 }
13186
13187 /*
13188 * If this page is not pageable, we have to get
13189 * it for all possible accesses.
13190 */
13191
91447636 13192 *wired = (entry->wired_count != 0);
0a7de745
A
13193 if (*wired) {
13194 fault_type = prot;
13195 }
1c79356b
A
13196
13197 /*
13198 * If the entry was copy-on-write, we either ...
13199 */
13200
13201 if (entry->needs_copy) {
0a7de745 13202 /*
1c79356b
A
13203 * If we want to write the page, we may as well
13204 * handle that now since we've got the map locked.
13205 *
13206 * If we don't need to write the page, we just
13207 * demote the permissions allowed.
13208 */
13209
fe8ab488 13210 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
1c79356b
A
13211 /*
13212 * Make a new object, and place it in the
13213 * object chain. Note that no new references
13214 * have appeared -- one just moved from the
13215 * map to the new object.
13216 */
13217
13218 if (vm_map_lock_read_to_write(map)) {
13219 vm_map_lock_read(map);
13220 goto RetryLookup;
13221 }
39037602
A
13222
13223 if (VME_OBJECT(entry)->shadowed == FALSE) {
13224 vm_object_lock(VME_OBJECT(entry));
13225 VME_OBJECT(entry)->shadowed = TRUE;
13226 vm_object_unlock(VME_OBJECT(entry));
13227 }
3e170ce0 13228 VME_OBJECT_SHADOW(entry,
0a7de745
A
13229 (vm_map_size_t) (entry->vme_end -
13230 entry->vme_start));
1c79356b 13231 entry->needs_copy = FALSE;
39037602 13232
1c79356b
A
13233 vm_map_lock_write_to_read(map);
13234 }
39037602 13235 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
1c79356b
A
13236 /*
13237 * We're attempting to read a copy-on-write
13238 * page -- don't allow writes.
13239 */
13240
13241 prot &= (~VM_PROT_WRITE);
13242 }
13243 }
13244
13245 /*
13246 * Create an object if necessary.
13247 */
3e170ce0 13248 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
1c79356b
A
13249 if (vm_map_lock_read_to_write(map)) {
13250 vm_map_lock_read(map);
13251 goto RetryLookup;
13252 }
13253
3e170ce0 13254 VME_OBJECT_SET(entry,
0a7de745
A
13255 vm_object_allocate(
13256 (vm_map_size_t)(entry->vme_end -
13257 entry->vme_start)));
3e170ce0 13258 VME_OFFSET_SET(entry, 0);
a39ff7e2 13259 assert(entry->use_pmap);
1c79356b
A
13260 vm_map_lock_write_to_read(map);
13261 }
13262
13263 /*
13264 * Return the object/offset from this entry. If the entry
13265 * was copy-on-write or empty, it has been fixed up. Also
13266 * return the protection.
13267 */
13268
0a7de745
A
13269 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13270 *object = VME_OBJECT(entry);
1c79356b 13271 *out_prot = prot;
94ff46dc 13272 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), 0, 0, 0, 0);
2d21ac55
A
13273
13274 if (fault_info) {
13275 fault_info->interruptible = THREAD_UNINT; /* for now... */
13276 /* ... the caller will change "interruptible" if needed */
0a7de745 13277 fault_info->cluster_size = 0;
3e170ce0 13278 fault_info->user_tag = VME_ALIAS(entry);
fe8ab488
A
13279 fault_info->pmap_options = 0;
13280 if (entry->iokit_acct ||
13281 (!entry->is_sub_map && !entry->use_pmap)) {
13282 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13283 }
0a7de745 13284 fault_info->behavior = entry->behavior;
3e170ce0
A
13285 fault_info->lo_offset = VME_OFFSET(entry);
13286 fault_info->hi_offset =
0a7de745 13287 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
2d21ac55 13288 fault_info->no_cache = entry->no_cache;
b0d623f7 13289 fault_info->stealth = FALSE;
6d2010ae 13290 fault_info->io_sync = FALSE;
3e170ce0
A
13291 if (entry->used_for_jit ||
13292 entry->vme_resilient_codesign) {
13293 fault_info->cs_bypass = TRUE;
13294 } else {
13295 fault_info->cs_bypass = FALSE;
13296 }
d9a64523
A
13297 fault_info->pmap_cs_associated = FALSE;
13298#if CONFIG_PMAP_CS
13299 if (entry->pmap_cs_associated) {
13300 /*
13301 * The pmap layer will validate this page
13302 * before allowing it to be executed from.
13303 */
13304 fault_info->pmap_cs_associated = TRUE;
13305 }
13306#endif /* CONFIG_PMAP_CS */
0b4c1975 13307 fault_info->mark_zf_absent = FALSE;
316670eb 13308 fault_info->batch_pmap_op = FALSE;
cb323159
A
13309 fault_info->resilient_media = entry->vme_resilient_media;
13310 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
2d21ac55 13311 }
1c79356b
A
13312
13313 /*
13314 * Lock the object to prevent it from disappearing
13315 */
0a7de745
A
13316 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
13317 vm_object_lock(*object);
13318 } else {
13319 vm_object_lock_shared(*object);
13320 }
5ba3f43e 13321
1c79356b
A
13322 /*
13323 * Save the version number
13324 */
13325
13326 out_version->main_timestamp = map->timestamp;
13327
13328 return KERN_SUCCESS;
13329}
13330
13331
13332/*
13333 * vm_map_verify:
13334 *
13335 * Verifies that the map in question has not changed
5ba3f43e
A
13336 * since the given version. The map has to be locked
13337 * ("shared" mode is fine) before calling this function
13338 * and it will be returned locked too.
1c79356b
A
13339 */
13340boolean_t
13341vm_map_verify(
0a7de745
A
13342 vm_map_t map,
13343 vm_map_version_t *version) /* REF */
1c79356b 13344{
0a7de745 13345 boolean_t result;
1c79356b 13346
5ba3f43e 13347 vm_map_lock_assert_held(map);
1c79356b
A
13348 result = (map->timestamp == version->main_timestamp);
13349
0a7de745 13350 return result;
1c79356b
A
13351}
13352
91447636
A
13353/*
13354 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13355 * Goes away after regular vm_region_recurse function migrates to
13356 * 64 bits
13357 * vm_region_recurse: A form of vm_region which follows the
13358 * submaps in a target map
13359 *
13360 */
13361
13362kern_return_t
13363vm_map_region_recurse_64(
0a7de745
A
13364 vm_map_t map,
13365 vm_map_offset_t *address, /* IN/OUT */
13366 vm_map_size_t *size, /* OUT */
13367 natural_t *nesting_depth, /* IN/OUT */
13368 vm_region_submap_info_64_t submap_info, /* IN/OUT */
13369 mach_msg_type_number_t *count) /* IN/OUT */
91447636 13370{
0a7de745
A
13371 mach_msg_type_number_t original_count;
13372 vm_region_extended_info_data_t extended;
13373 vm_map_entry_t tmp_entry;
13374 vm_map_offset_t user_address;
13375 unsigned int user_max_depth;
91447636
A
13376
13377 /*
13378 * "curr_entry" is the VM map entry preceding or including the
13379 * address we're looking for.
13380 * "curr_map" is the map or sub-map containing "curr_entry".
5ba3f43e 13381 * "curr_address" is the equivalent of the top map's "user_address"
6d2010ae 13382 * in the current map.
91447636
A
13383 * "curr_offset" is the cumulated offset of "curr_map" in the
13384 * target task's address space.
13385 * "curr_depth" is the depth of "curr_map" in the chain of
13386 * sub-maps.
5ba3f43e 13387 *
6d2010ae
A
13388 * "curr_max_below" and "curr_max_above" limit the range (around
13389 * "curr_address") we should take into account in the current (sub)map.
13390 * They limit the range to what's visible through the map entries
13391 * we've traversed from the top map to the current map.
0a7de745 13392 *
91447636 13393 */
0a7de745
A
13394 vm_map_entry_t curr_entry;
13395 vm_map_address_t curr_address;
13396 vm_map_offset_t curr_offset;
13397 vm_map_t curr_map;
13398 unsigned int curr_depth;
13399 vm_map_offset_t curr_max_below, curr_max_above;
13400 vm_map_offset_t curr_skip;
91447636
A
13401
13402 /*
13403 * "next_" is the same as "curr_" but for the VM region immediately
13404 * after the address we're looking for. We need to keep track of this
13405 * too because we want to return info about that region if the
13406 * address we're looking for is not mapped.
13407 */
0a7de745
A
13408 vm_map_entry_t next_entry;
13409 vm_map_offset_t next_offset;
13410 vm_map_offset_t next_address;
13411 vm_map_t next_map;
13412 unsigned int next_depth;
13413 vm_map_offset_t next_max_below, next_max_above;
13414 vm_map_offset_t next_skip;
13415
13416 boolean_t look_for_pages;
2d21ac55 13417 vm_region_submap_short_info_64_t short_info;
0a7de745 13418 boolean_t do_region_footprint;
2d21ac55 13419
91447636
A
13420 if (map == VM_MAP_NULL) {
13421 /* no address space to work on */
13422 return KERN_INVALID_ARGUMENT;
13423 }
13424
5ba3f43e 13425
39236c6e
A
13426 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
13427 /*
13428 * "info" structure is not big enough and
13429 * would overflow
13430 */
13431 return KERN_INVALID_ARGUMENT;
13432 }
5ba3f43e 13433
a39ff7e2 13434 do_region_footprint = task_self_region_footprint();
39236c6e 13435 original_count = *count;
5ba3f43e 13436
39236c6e
A
13437 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
13438 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
13439 look_for_pages = FALSE;
13440 short_info = (vm_region_submap_short_info_64_t) submap_info;
13441 submap_info = NULL;
2d21ac55
A
13442 } else {
13443 look_for_pages = TRUE;
39236c6e 13444 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
2d21ac55 13445 short_info = NULL;
5ba3f43e 13446
39236c6e
A
13447 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13448 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
13449 }
cb323159
A
13450 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13451 *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
13452 }
91447636 13453 }
5ba3f43e 13454
91447636
A
13455 user_address = *address;
13456 user_max_depth = *nesting_depth;
5ba3f43e 13457
3e170ce0
A
13458 if (not_in_kdp) {
13459 vm_map_lock_read(map);
13460 }
13461
13462recurse_again:
91447636
A
13463 curr_entry = NULL;
13464 curr_map = map;
6d2010ae 13465 curr_address = user_address;
91447636 13466 curr_offset = 0;
6d2010ae 13467 curr_skip = 0;
91447636 13468 curr_depth = 0;
6d2010ae
A
13469 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
13470 curr_max_below = curr_address;
91447636
A
13471
13472 next_entry = NULL;
13473 next_map = NULL;
6d2010ae 13474 next_address = 0;
91447636 13475 next_offset = 0;
6d2010ae 13476 next_skip = 0;
91447636 13477 next_depth = 0;
6d2010ae
A
13478 next_max_above = (vm_map_offset_t) -1;
13479 next_max_below = (vm_map_offset_t) -1;
91447636 13480
91447636
A
13481 for (;;) {
13482 if (vm_map_lookup_entry(curr_map,
0a7de745
A
13483 curr_address,
13484 &tmp_entry)) {
91447636
A
13485 /* tmp_entry contains the address we're looking for */
13486 curr_entry = tmp_entry;
13487 } else {
6d2010ae 13488 vm_map_offset_t skip;
91447636
A
13489 /*
13490 * The address is not mapped. "tmp_entry" is the
13491 * map entry preceding the address. We want the next
13492 * one, if it exists.
13493 */
13494 curr_entry = tmp_entry->vme_next;
6d2010ae 13495
91447636 13496 if (curr_entry == vm_map_to_entry(curr_map) ||
6d2010ae 13497 (curr_entry->vme_start >=
0a7de745 13498 curr_address + curr_max_above)) {
91447636
A
13499 /* no next entry at this level: stop looking */
13500 if (not_in_kdp) {
13501 vm_map_unlock_read(curr_map);
13502 }
13503 curr_entry = NULL;
13504 curr_map = NULL;
3e170ce0 13505 curr_skip = 0;
91447636
A
13506 curr_offset = 0;
13507 curr_depth = 0;
6d2010ae
A
13508 curr_max_above = 0;
13509 curr_max_below = 0;
91447636
A
13510 break;
13511 }
6d2010ae
A
13512
13513 /* adjust current address and offset */
13514 skip = curr_entry->vme_start - curr_address;
13515 curr_address = curr_entry->vme_start;
3e170ce0 13516 curr_skip += skip;
6d2010ae
A
13517 curr_offset += skip;
13518 curr_max_above -= skip;
13519 curr_max_below = 0;
91447636
A
13520 }
13521
13522 /*
13523 * Is the next entry at this level closer to the address (or
13524 * deeper in the submap chain) than the one we had
13525 * so far ?
13526 */
13527 tmp_entry = curr_entry->vme_next;
13528 if (tmp_entry == vm_map_to_entry(curr_map)) {
13529 /* no next entry at this level */
6d2010ae 13530 } else if (tmp_entry->vme_start >=
0a7de745 13531 curr_address + curr_max_above) {
91447636
A
13532 /*
13533 * tmp_entry is beyond the scope of what we mapped of
13534 * this submap in the upper level: ignore it.
13535 */
13536 } else if ((next_entry == NULL) ||
0a7de745
A
13537 (tmp_entry->vme_start + curr_offset <=
13538 next_entry->vme_start + next_offset)) {
91447636
A
13539 /*
13540 * We didn't have a "next_entry" or this one is
13541 * closer to the address we're looking for:
13542 * use this "tmp_entry" as the new "next_entry".
13543 */
13544 if (next_entry != NULL) {
13545 /* unlock the last "next_map" */
13546 if (next_map != curr_map && not_in_kdp) {
13547 vm_map_unlock_read(next_map);
13548 }
13549 }
13550 next_entry = tmp_entry;
13551 next_map = curr_map;
91447636 13552 next_depth = curr_depth;
6d2010ae
A
13553 next_address = next_entry->vme_start;
13554 next_skip = curr_skip;
3e170ce0 13555 next_skip += (next_address - curr_address);
6d2010ae
A
13556 next_offset = curr_offset;
13557 next_offset += (next_address - curr_address);
13558 next_max_above = MIN(next_max_above, curr_max_above);
13559 next_max_above = MIN(next_max_above,
0a7de745 13560 next_entry->vme_end - next_address);
6d2010ae
A
13561 next_max_below = MIN(next_max_below, curr_max_below);
13562 next_max_below = MIN(next_max_below,
0a7de745 13563 next_address - next_entry->vme_start);
91447636
A
13564 }
13565
6d2010ae
A
13566 /*
13567 * "curr_max_{above,below}" allow us to keep track of the
13568 * portion of the submap that is actually mapped at this level:
13569 * the rest of that submap is irrelevant to us, since it's not
13570 * mapped here.
13571 * The relevant portion of the map starts at
3e170ce0 13572 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
6d2010ae
A
13573 */
13574 curr_max_above = MIN(curr_max_above,
0a7de745 13575 curr_entry->vme_end - curr_address);
6d2010ae 13576 curr_max_below = MIN(curr_max_below,
0a7de745 13577 curr_address - curr_entry->vme_start);
6d2010ae 13578
91447636
A
13579 if (!curr_entry->is_sub_map ||
13580 curr_depth >= user_max_depth) {
13581 /*
13582 * We hit a leaf map or we reached the maximum depth
13583 * we could, so stop looking. Keep the current map
13584 * locked.
13585 */
13586 break;
13587 }
13588
13589 /*
13590 * Get down to the next submap level.
13591 */
13592
13593 /*
13594 * Lock the next level and unlock the current level,
13595 * unless we need to keep it locked to access the "next_entry"
13596 * later.
13597 */
13598 if (not_in_kdp) {
3e170ce0 13599 vm_map_lock_read(VME_SUBMAP(curr_entry));
91447636
A
13600 }
13601 if (curr_map == next_map) {
13602 /* keep "next_map" locked in case we need it */
13603 } else {
13604 /* release this map */
0a7de745 13605 if (not_in_kdp) {
b0d623f7 13606 vm_map_unlock_read(curr_map);
0a7de745 13607 }
91447636
A
13608 }
13609
13610 /*
13611 * Adjust the offset. "curr_entry" maps the submap
13612 * at relative address "curr_entry->vme_start" in the
3e170ce0 13613 * curr_map but skips the first "VME_OFFSET(curr_entry)"
91447636
A
13614 * bytes of the submap.
13615 * "curr_offset" always represents the offset of a virtual
13616 * address in the curr_map relative to the absolute address
13617 * space (i.e. the top-level VM map).
13618 */
13619 curr_offset +=
0a7de745 13620 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
6d2010ae 13621 curr_address = user_address + curr_offset;
91447636 13622 /* switch to the submap */
3e170ce0 13623 curr_map = VME_SUBMAP(curr_entry);
91447636 13624 curr_depth++;
91447636
A
13625 curr_entry = NULL;
13626 }
13627
a39ff7e2
A
13628// LP64todo: all the current tools are 32bit, obviously never worked for 64b
13629// so probably should be a real 32b ID vs. ptr.
13630// Current users just check for equality
13631
91447636
A
13632 if (curr_entry == NULL) {
13633 /* no VM region contains the address... */
a39ff7e2
A
13634
13635 if (do_region_footprint && /* we want footprint numbers */
39037602
A
13636 next_entry == NULL && /* & there are no more regions */
13637 /* & we haven't already provided our fake region: */
a39ff7e2 13638 user_address <= vm_map_last_entry(map)->vme_end) {
cb323159
A
13639 ledger_amount_t ledger_resident, ledger_compressed;
13640
39037602
A
13641 /*
13642 * Add a fake memory region to account for
cb323159
A
13643 * purgeable and/or ledger-tagged memory that
13644 * counts towards this task's memory footprint,
13645 * i.e. the resident/compressed pages of non-volatile
13646 * objects owned by that task.
39037602 13647 */
cb323159
A
13648 task_ledgers_footprint(map->pmap->ledger,
13649 &ledger_resident,
13650 &ledger_compressed);
13651 if (ledger_resident + ledger_compressed == 0) {
39037602 13652 /* no purgeable memory usage to report */
a39ff7e2 13653 return KERN_INVALID_ADDRESS;
39037602
A
13654 }
13655 /* fake region to show nonvolatile footprint */
a39ff7e2
A
13656 if (look_for_pages) {
13657 submap_info->protection = VM_PROT_DEFAULT;
13658 submap_info->max_protection = VM_PROT_DEFAULT;
13659 submap_info->inheritance = VM_INHERIT_DEFAULT;
13660 submap_info->offset = 0;
13661 submap_info->user_tag = -1;
cb323159 13662 submap_info->pages_resident = (unsigned int) (ledger_resident / PAGE_SIZE);
a39ff7e2 13663 submap_info->pages_shared_now_private = 0;
cb323159 13664 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / PAGE_SIZE);
a39ff7e2
A
13665 submap_info->pages_dirtied = submap_info->pages_resident;
13666 submap_info->ref_count = 1;
13667 submap_info->shadow_depth = 0;
13668 submap_info->external_pager = 0;
13669 submap_info->share_mode = SM_PRIVATE;
13670 submap_info->is_submap = 0;
13671 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
13672 submap_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13673 submap_info->user_wired_count = 0;
13674 submap_info->pages_reusable = 0;
13675 } else {
13676 short_info->user_tag = -1;
13677 short_info->offset = 0;
13678 short_info->protection = VM_PROT_DEFAULT;
13679 short_info->inheritance = VM_INHERIT_DEFAULT;
13680 short_info->max_protection = VM_PROT_DEFAULT;
13681 short_info->behavior = VM_BEHAVIOR_DEFAULT;
13682 short_info->user_wired_count = 0;
13683 short_info->is_submap = 0;
13684 short_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13685 short_info->external_pager = 0;
13686 short_info->shadow_depth = 0;
13687 short_info->share_mode = SM_PRIVATE;
13688 short_info->ref_count = 1;
13689 }
39037602 13690 *nesting_depth = 0;
cb323159 13691 *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
a39ff7e2
A
13692// *address = user_address;
13693 *address = vm_map_last_entry(map)->vme_end;
39037602
A
13694 return KERN_SUCCESS;
13695 }
a39ff7e2 13696
91447636
A
13697 if (next_entry == NULL) {
13698 /* ... and no VM region follows it either */
13699 return KERN_INVALID_ADDRESS;
13700 }
13701 /* ... gather info about the next VM region */
13702 curr_entry = next_entry;
0a7de745 13703 curr_map = next_map; /* still locked ... */
6d2010ae
A
13704 curr_address = next_address;
13705 curr_skip = next_skip;
91447636
A
13706 curr_offset = next_offset;
13707 curr_depth = next_depth;
6d2010ae
A
13708 curr_max_above = next_max_above;
13709 curr_max_below = next_max_below;
91447636
A
13710 } else {
13711 /* we won't need "next_entry" after all */
13712 if (next_entry != NULL) {
13713 /* release "next_map" */
13714 if (next_map != curr_map && not_in_kdp) {
13715 vm_map_unlock_read(next_map);
13716 }
13717 }
13718 }
13719 next_entry = NULL;
13720 next_map = NULL;
13721 next_offset = 0;
6d2010ae 13722 next_skip = 0;
91447636 13723 next_depth = 0;
6d2010ae
A
13724 next_max_below = -1;
13725 next_max_above = -1;
91447636 13726
3e170ce0
A
13727 if (curr_entry->is_sub_map &&
13728 curr_depth < user_max_depth) {
13729 /*
13730 * We're not as deep as we could be: we must have
13731 * gone back up after not finding anything mapped
13732 * below the original top-level map entry's.
13733 * Let's move "curr_address" forward and recurse again.
13734 */
13735 user_address = curr_address;
13736 goto recurse_again;
13737 }
13738
91447636 13739 *nesting_depth = curr_depth;
6d2010ae
A
13740 *size = curr_max_above + curr_max_below;
13741 *address = user_address + curr_skip - curr_max_below;
91447636 13742
b0d623f7
A
13743// LP64todo: all the current tools are 32bit, obviously never worked for 64b
13744// so probably should be a real 32b ID vs. ptr.
13745// Current users just check for equality
0a7de745 13746#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
b0d623f7 13747
2d21ac55 13748 if (look_for_pages) {
3e170ce0 13749 submap_info->user_tag = VME_ALIAS(curr_entry);
5ba3f43e 13750 submap_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
13751 submap_info->protection = curr_entry->protection;
13752 submap_info->inheritance = curr_entry->inheritance;
13753 submap_info->max_protection = curr_entry->max_protection;
13754 submap_info->behavior = curr_entry->behavior;
13755 submap_info->user_wired_count = curr_entry->user_wired_count;
13756 submap_info->is_submap = curr_entry->is_sub_map;
3e170ce0 13757 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 13758 } else {
3e170ce0 13759 short_info->user_tag = VME_ALIAS(curr_entry);
5ba3f43e 13760 short_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
13761 short_info->protection = curr_entry->protection;
13762 short_info->inheritance = curr_entry->inheritance;
13763 short_info->max_protection = curr_entry->max_protection;
13764 short_info->behavior = curr_entry->behavior;
13765 short_info->user_wired_count = curr_entry->user_wired_count;
13766 short_info->is_submap = curr_entry->is_sub_map;
3e170ce0 13767 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 13768 }
91447636
A
13769
13770 extended.pages_resident = 0;
13771 extended.pages_swapped_out = 0;
13772 extended.pages_shared_now_private = 0;
13773 extended.pages_dirtied = 0;
39236c6e 13774 extended.pages_reusable = 0;
91447636
A
13775 extended.external_pager = 0;
13776 extended.shadow_depth = 0;
3e170ce0
A
13777 extended.share_mode = SM_EMPTY;
13778 extended.ref_count = 0;
91447636
A
13779
13780 if (not_in_kdp) {
13781 if (!curr_entry->is_sub_map) {
6d2010ae
A
13782 vm_map_offset_t range_start, range_end;
13783 range_start = MAX((curr_address - curr_max_below),
0a7de745 13784 curr_entry->vme_start);
6d2010ae 13785 range_end = MIN((curr_address + curr_max_above),
0a7de745 13786 curr_entry->vme_end);
91447636 13787 vm_map_region_walk(curr_map,
0a7de745
A
13788 range_start,
13789 curr_entry,
13790 (VME_OFFSET(curr_entry) +
13791 (range_start -
13792 curr_entry->vme_start)),
13793 range_end - range_start,
13794 &extended,
13795 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
91447636
A
13796 if (extended.external_pager &&
13797 extended.ref_count == 2 &&
13798 extended.share_mode == SM_SHARED) {
2d21ac55 13799 extended.share_mode = SM_PRIVATE;
91447636 13800 }
91447636
A
13801 } else {
13802 if (curr_entry->use_pmap) {
2d21ac55 13803 extended.share_mode = SM_TRUESHARED;
91447636 13804 } else {
2d21ac55 13805 extended.share_mode = SM_PRIVATE;
91447636 13806 }
cb323159 13807 extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
91447636
A
13808 }
13809 }
13810
2d21ac55
A
13811 if (look_for_pages) {
13812 submap_info->pages_resident = extended.pages_resident;
13813 submap_info->pages_swapped_out = extended.pages_swapped_out;
13814 submap_info->pages_shared_now_private =
0a7de745 13815 extended.pages_shared_now_private;
2d21ac55
A
13816 submap_info->pages_dirtied = extended.pages_dirtied;
13817 submap_info->external_pager = extended.external_pager;
13818 submap_info->shadow_depth = extended.shadow_depth;
13819 submap_info->share_mode = extended.share_mode;
13820 submap_info->ref_count = extended.ref_count;
5ba3f43e 13821
39236c6e
A
13822 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13823 submap_info->pages_reusable = extended.pages_reusable;
13824 }
cb323159
A
13825 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13826 submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
13827 }
2d21ac55
A
13828 } else {
13829 short_info->external_pager = extended.external_pager;
13830 short_info->shadow_depth = extended.shadow_depth;
13831 short_info->share_mode = extended.share_mode;
13832 short_info->ref_count = extended.ref_count;
13833 }
91447636
A
13834
13835 if (not_in_kdp) {
13836 vm_map_unlock_read(curr_map);
13837 }
13838
13839 return KERN_SUCCESS;
13840}
13841
1c79356b
A
13842/*
13843 * vm_region:
13844 *
13845 * User call to obtain information about a region in
13846 * a task's address map. Currently, only one flavor is
13847 * supported.
13848 *
13849 * XXX The reserved and behavior fields cannot be filled
13850 * in until the vm merge from the IK is completed, and
13851 * vm_reserve is implemented.
1c79356b
A
13852 */
13853
13854kern_return_t
91447636 13855vm_map_region(
0a7de745
A
13856 vm_map_t map,
13857 vm_map_offset_t *address, /* IN/OUT */
13858 vm_map_size_t *size, /* OUT */
13859 vm_region_flavor_t flavor, /* IN */
13860 vm_region_info_t info, /* OUT */
13861 mach_msg_type_number_t *count, /* IN/OUT */
13862 mach_port_t *object_name) /* OUT */
1c79356b 13863{
0a7de745
A
13864 vm_map_entry_t tmp_entry;
13865 vm_map_entry_t entry;
13866 vm_map_offset_t start;
1c79356b 13867
0a7de745
A
13868 if (map == VM_MAP_NULL) {
13869 return KERN_INVALID_ARGUMENT;
13870 }
1c79356b
A
13871
13872 switch (flavor) {
1c79356b 13873 case VM_REGION_BASIC_INFO:
2d21ac55 13874 /* legacy for old 32-bit objects info */
1c79356b 13875 {
0a7de745 13876 vm_region_basic_info_t basic;
91447636 13877
0a7de745
A
13878 if (*count < VM_REGION_BASIC_INFO_COUNT) {
13879 return KERN_INVALID_ARGUMENT;
13880 }
1c79356b 13881
2d21ac55
A
13882 basic = (vm_region_basic_info_t) info;
13883 *count = VM_REGION_BASIC_INFO_COUNT;
1c79356b 13884
2d21ac55 13885 vm_map_lock_read(map);
1c79356b 13886
2d21ac55
A
13887 start = *address;
13888 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13889 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13890 vm_map_unlock_read(map);
0a7de745 13891 return KERN_INVALID_ADDRESS;
2d21ac55
A
13892 }
13893 } else {
13894 entry = tmp_entry;
1c79356b 13895 }
1c79356b 13896
2d21ac55 13897 start = entry->vme_start;
1c79356b 13898
3e170ce0 13899 basic->offset = (uint32_t)VME_OFFSET(entry);
2d21ac55
A
13900 basic->protection = entry->protection;
13901 basic->inheritance = entry->inheritance;
13902 basic->max_protection = entry->max_protection;
13903 basic->behavior = entry->behavior;
13904 basic->user_wired_count = entry->user_wired_count;
13905 basic->reserved = entry->is_sub_map;
13906 *address = start;
13907 *size = (entry->vme_end - start);
91447636 13908
0a7de745
A
13909 if (object_name) {
13910 *object_name = IP_NULL;
13911 }
2d21ac55
A
13912 if (entry->is_sub_map) {
13913 basic->shared = FALSE;
13914 } else {
13915 basic->shared = entry->is_shared;
13916 }
91447636 13917
2d21ac55 13918 vm_map_unlock_read(map);
0a7de745 13919 return KERN_SUCCESS;
91447636
A
13920 }
13921
13922 case VM_REGION_BASIC_INFO_64:
13923 {
0a7de745 13924 vm_region_basic_info_64_t basic;
91447636 13925
0a7de745
A
13926 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
13927 return KERN_INVALID_ARGUMENT;
13928 }
2d21ac55
A
13929
13930 basic = (vm_region_basic_info_64_t) info;
13931 *count = VM_REGION_BASIC_INFO_COUNT_64;
13932
13933 vm_map_lock_read(map);
13934
13935 start = *address;
13936 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13937 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13938 vm_map_unlock_read(map);
0a7de745 13939 return KERN_INVALID_ADDRESS;
2d21ac55
A
13940 }
13941 } else {
13942 entry = tmp_entry;
13943 }
91447636 13944
2d21ac55 13945 start = entry->vme_start;
91447636 13946
3e170ce0 13947 basic->offset = VME_OFFSET(entry);
2d21ac55
A
13948 basic->protection = entry->protection;
13949 basic->inheritance = entry->inheritance;
13950 basic->max_protection = entry->max_protection;
13951 basic->behavior = entry->behavior;
13952 basic->user_wired_count = entry->user_wired_count;
13953 basic->reserved = entry->is_sub_map;
13954 *address = start;
13955 *size = (entry->vme_end - start);
91447636 13956
0a7de745
A
13957 if (object_name) {
13958 *object_name = IP_NULL;
13959 }
2d21ac55
A
13960 if (entry->is_sub_map) {
13961 basic->shared = FALSE;
13962 } else {
13963 basic->shared = entry->is_shared;
91447636 13964 }
2d21ac55
A
13965
13966 vm_map_unlock_read(map);
0a7de745 13967 return KERN_SUCCESS;
1c79356b
A
13968 }
13969 case VM_REGION_EXTENDED_INFO:
0a7de745
A
13970 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
13971 return KERN_INVALID_ARGUMENT;
13972 }
13973 /*fallthru*/
39236c6e 13974 case VM_REGION_EXTENDED_INFO__legacy:
0a7de745 13975 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
39236c6e 13976 return KERN_INVALID_ARGUMENT;
0a7de745 13977 }
39236c6e 13978
0a7de745
A
13979 {
13980 vm_region_extended_info_t extended;
13981 mach_msg_type_number_t original_count;
1c79356b 13982
0a7de745 13983 extended = (vm_region_extended_info_t) info;
1c79356b 13984
0a7de745 13985 vm_map_lock_read(map);
1c79356b 13986
0a7de745
A
13987 start = *address;
13988 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13989 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13990 vm_map_unlock_read(map);
13991 return KERN_INVALID_ADDRESS;
13992 }
13993 } else {
13994 entry = tmp_entry;
13995 }
13996 start = entry->vme_start;
1c79356b 13997
0a7de745
A
13998 extended->protection = entry->protection;
13999 extended->user_tag = VME_ALIAS(entry);
14000 extended->pages_resident = 0;
14001 extended->pages_swapped_out = 0;
14002 extended->pages_shared_now_private = 0;
14003 extended->pages_dirtied = 0;
14004 extended->external_pager = 0;
14005 extended->shadow_depth = 0;
14006
14007 original_count = *count;
14008 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
14009 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
14010 } else {
14011 extended->pages_reusable = 0;
14012 *count = VM_REGION_EXTENDED_INFO_COUNT;
14013 }
39236c6e 14014
0a7de745 14015 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
1c79356b 14016
0a7de745
A
14017 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
14018 extended->share_mode = SM_PRIVATE;
14019 }
1c79356b 14020
0a7de745
A
14021 if (object_name) {
14022 *object_name = IP_NULL;
14023 }
14024 *address = start;
14025 *size = (entry->vme_end - start);
1c79356b 14026
0a7de745
A
14027 vm_map_unlock_read(map);
14028 return KERN_SUCCESS;
14029 }
1c79356b 14030 case VM_REGION_TOP_INFO:
5ba3f43e 14031 {
0a7de745 14032 vm_region_top_info_t top;
1c79356b 14033
0a7de745
A
14034 if (*count < VM_REGION_TOP_INFO_COUNT) {
14035 return KERN_INVALID_ARGUMENT;
14036 }
1c79356b 14037
2d21ac55
A
14038 top = (vm_region_top_info_t) info;
14039 *count = VM_REGION_TOP_INFO_COUNT;
1c79356b 14040
2d21ac55 14041 vm_map_lock_read(map);
1c79356b 14042
2d21ac55
A
14043 start = *address;
14044 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14045 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14046 vm_map_unlock_read(map);
0a7de745 14047 return KERN_INVALID_ADDRESS;
2d21ac55
A
14048 }
14049 } else {
14050 entry = tmp_entry;
2d21ac55
A
14051 }
14052 start = entry->vme_start;
1c79356b 14053
2d21ac55
A
14054 top->private_pages_resident = 0;
14055 top->shared_pages_resident = 0;
1c79356b 14056
2d21ac55 14057 vm_map_region_top_walk(entry, top);
1c79356b 14058
0a7de745 14059 if (object_name) {
2d21ac55 14060 *object_name = IP_NULL;
0a7de745 14061 }
2d21ac55
A
14062 *address = start;
14063 *size = (entry->vme_end - start);
1c79356b 14064
2d21ac55 14065 vm_map_unlock_read(map);
0a7de745 14066 return KERN_SUCCESS;
1c79356b
A
14067 }
14068 default:
0a7de745 14069 return KERN_INVALID_ARGUMENT;
1c79356b
A
14070 }
14071}
14072
0a7de745
A
14073#define OBJ_RESIDENT_COUNT(obj, entry_size) \
14074 MIN((entry_size), \
14075 ((obj)->all_reusable ? \
14076 (obj)->wired_page_count : \
b0d623f7 14077 (obj)->resident_page_count - (obj)->reusable_page_count))
2d21ac55 14078
0c530ab8 14079void
91447636 14080vm_map_region_top_walk(
0a7de745 14081 vm_map_entry_t entry,
91447636 14082 vm_region_top_info_t top)
1c79356b 14083{
3e170ce0 14084 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
2d21ac55
A
14085 top->share_mode = SM_EMPTY;
14086 top->ref_count = 0;
14087 top->obj_id = 0;
14088 return;
1c79356b 14089 }
2d21ac55 14090
91447636 14091 {
0a7de745
A
14092 struct vm_object *obj, *tmp_obj;
14093 int ref_count;
14094 uint32_t entry_size;
1c79356b 14095
b0d623f7 14096 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
1c79356b 14097
3e170ce0 14098 obj = VME_OBJECT(entry);
1c79356b 14099
2d21ac55
A
14100 vm_object_lock(obj);
14101
0a7de745 14102 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
2d21ac55 14103 ref_count--;
0a7de745 14104 }
2d21ac55 14105
b0d623f7 14106 assert(obj->reusable_page_count <= obj->resident_page_count);
2d21ac55 14107 if (obj->shadow) {
0a7de745 14108 if (ref_count == 1) {
b0d623f7 14109 top->private_pages_resident =
0a7de745
A
14110 OBJ_RESIDENT_COUNT(obj, entry_size);
14111 } else {
b0d623f7 14112 top->shared_pages_resident =
0a7de745
A
14113 OBJ_RESIDENT_COUNT(obj, entry_size);
14114 }
2d21ac55
A
14115 top->ref_count = ref_count;
14116 top->share_mode = SM_COW;
5ba3f43e 14117
2d21ac55
A
14118 while ((tmp_obj = obj->shadow)) {
14119 vm_object_lock(tmp_obj);
14120 vm_object_unlock(obj);
14121 obj = tmp_obj;
1c79356b 14122
0a7de745 14123 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
2d21ac55 14124 ref_count--;
0a7de745 14125 }
1c79356b 14126
b0d623f7
A
14127 assert(obj->reusable_page_count <= obj->resident_page_count);
14128 top->shared_pages_resident +=
0a7de745 14129 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
14130 top->ref_count += ref_count - 1;
14131 }
1c79356b 14132 } else {
6d2010ae
A
14133 if (entry->superpage_size) {
14134 top->share_mode = SM_LARGE_PAGE;
14135 top->shared_pages_resident = 0;
14136 top->private_pages_resident = entry_size;
14137 } else if (entry->needs_copy) {
2d21ac55 14138 top->share_mode = SM_COW;
b0d623f7 14139 top->shared_pages_resident =
0a7de745 14140 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
14141 } else {
14142 if (ref_count == 1 ||
cb323159 14143 (ref_count == 2 && obj->named)) {
2d21ac55 14144 top->share_mode = SM_PRIVATE;
0a7de745
A
14145 top->private_pages_resident =
14146 OBJ_RESIDENT_COUNT(obj,
14147 entry_size);
2d21ac55
A
14148 } else {
14149 top->share_mode = SM_SHARED;
b0d623f7 14150 top->shared_pages_resident =
0a7de745
A
14151 OBJ_RESIDENT_COUNT(obj,
14152 entry_size);
2d21ac55
A
14153 }
14154 }
14155 top->ref_count = ref_count;
1c79356b 14156 }
b0d623f7 14157 /* XXX K64: obj_id will be truncated */
39236c6e 14158 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
1c79356b 14159
2d21ac55 14160 vm_object_unlock(obj);
1c79356b 14161 }
91447636
A
14162}
14163
0c530ab8 14164void
91447636 14165vm_map_region_walk(
0a7de745
A
14166 vm_map_t map,
14167 vm_map_offset_t va,
14168 vm_map_entry_t entry,
14169 vm_object_offset_t offset,
14170 vm_object_size_t range,
14171 vm_region_extended_info_t extended,
14172 boolean_t look_for_pages,
39236c6e 14173 mach_msg_type_number_t count)
91447636 14174{
0a7de745 14175 struct vm_object *obj, *tmp_obj;
39037602
A
14176 vm_map_offset_t last_offset;
14177 int i;
14178 int ref_count;
0a7de745
A
14179 struct vm_object *shadow_object;
14180 int shadow_depth;
14181 boolean_t do_region_footprint;
a39ff7e2
A
14182
14183 do_region_footprint = task_self_region_footprint();
91447636 14184
3e170ce0 14185 if ((VME_OBJECT(entry) == 0) ||
2d21ac55 14186 (entry->is_sub_map) ||
3e170ce0 14187 (VME_OBJECT(entry)->phys_contiguous &&
0a7de745 14188 !entry->superpage_size)) {
2d21ac55
A
14189 extended->share_mode = SM_EMPTY;
14190 extended->ref_count = 0;
14191 return;
1c79356b 14192 }
6d2010ae
A
14193
14194 if (entry->superpage_size) {
14195 extended->shadow_depth = 0;
14196 extended->share_mode = SM_LARGE_PAGE;
14197 extended->ref_count = 1;
14198 extended->external_pager = 0;
14199 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14200 extended->shadow_depth = 0;
14201 return;
14202 }
14203
39037602 14204 obj = VME_OBJECT(entry);
2d21ac55 14205
39037602 14206 vm_object_lock(obj);
2d21ac55 14207
0a7de745 14208 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
39037602 14209 ref_count--;
0a7de745 14210 }
2d21ac55 14211
39037602
A
14212 if (look_for_pages) {
14213 for (last_offset = offset + range;
0a7de745
A
14214 offset < last_offset;
14215 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
a39ff7e2
A
14216 if (do_region_footprint) {
14217 int disp;
14218
14219 disp = 0;
d9a64523
A
14220 if (map->has_corpse_footprint) {
14221 /*
14222 * Query the page info data we saved
14223 * while forking the corpse.
14224 */
14225 vm_map_corpse_footprint_query_page_info(
14226 map,
14227 va,
14228 &disp);
14229 } else {
14230 /*
14231 * Query the pmap.
14232 */
14233 pmap_query_page_info(map->pmap,
0a7de745
A
14234 va,
14235 &disp);
d9a64523 14236 }
a39ff7e2 14237 if (disp & PMAP_QUERY_PAGE_PRESENT) {
d9a64523
A
14238 if (!(disp & PMAP_QUERY_PAGE_ALTACCT)) {
14239 extended->pages_resident++;
14240 }
a39ff7e2
A
14241 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
14242 extended->pages_reusable++;
14243 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
0a7de745 14244 (disp & PMAP_QUERY_PAGE_ALTACCT)) {
a39ff7e2
A
14245 /* alternate accounting */
14246 } else {
14247 extended->pages_dirtied++;
14248 }
14249 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14250 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
14251 /* alternate accounting */
14252 } else {
14253 extended->pages_swapped_out++;
14254 }
14255 }
14256 /* deal with alternate accounting */
d9a64523
A
14257 if (obj->purgable == VM_PURGABLE_NONVOLATILE &&
14258 /* && not tagged as no-footprint? */
14259 VM_OBJECT_OWNER(obj) != NULL &&
14260 VM_OBJECT_OWNER(obj)->map == map) {
14261 if ((((va
0a7de745
A
14262 - entry->vme_start
14263 + VME_OFFSET(entry))
14264 / PAGE_SIZE) <
14265 (obj->resident_page_count +
14266 vm_compressor_pager_get_count(obj->pager)))) {
d9a64523
A
14267 /*
14268 * Non-volatile purgeable object owned
14269 * by this task: report the first
14270 * "#resident + #compressed" pages as
14271 * "resident" (to show that they
14272 * contribute to the footprint) but not
14273 * "dirty" (to avoid double-counting
14274 * with the fake "non-volatile" region
14275 * we'll report at the end of the
14276 * address space to account for all
14277 * (mapped or not) non-volatile memory
14278 * owned by this task.
14279 */
14280 extended->pages_resident++;
14281 }
14282 } else if ((obj->purgable == VM_PURGABLE_VOLATILE ||
0a7de745
A
14283 obj->purgable == VM_PURGABLE_EMPTY) &&
14284 /* && not tagged as no-footprint? */
14285 VM_OBJECT_OWNER(obj) != NULL &&
14286 VM_OBJECT_OWNER(obj)->map == map) {
d9a64523 14287 if ((((va
0a7de745
A
14288 - entry->vme_start
14289 + VME_OFFSET(entry))
14290 / PAGE_SIZE) <
14291 obj->wired_page_count)) {
d9a64523
A
14292 /*
14293 * Volatile|empty purgeable object owned
14294 * by this task: report the first
14295 * "#wired" pages as "resident" (to
14296 * show that they contribute to the
14297 * footprint) but not "dirty" (to avoid
14298 * double-counting with the fake
14299 * "non-volatile" region we'll report
14300 * at the end of the address space to
14301 * account for all (mapped or not)
14302 * non-volatile memory owned by this
14303 * task.
14304 */
14305 extended->pages_resident++;
14306 }
14307 } else if (obj->purgable != VM_PURGABLE_DENY) {
a39ff7e2
A
14308 /*
14309 * Pages from purgeable objects
0a7de745 14310 * will be reported as dirty
a39ff7e2
A
14311 * appropriately in an extra
14312 * fake memory region at the end of
14313 * the address space.
14314 */
39037602 14315 } else if (entry->iokit_acct) {
a39ff7e2
A
14316 /*
14317 * IOKit mappings are considered
14318 * as fully dirty for footprint's
14319 * sake.
14320 */
39037602 14321 extended->pages_dirtied++;
2d21ac55 14322 }
39037602 14323 continue;
2d21ac55 14324 }
a39ff7e2 14325
39037602 14326 vm_map_region_look_for_page(map, va, obj,
0a7de745
A
14327 offset, ref_count,
14328 0, extended, count);
2d21ac55 14329 }
a39ff7e2
A
14330
14331 if (do_region_footprint) {
39037602
A
14332 goto collect_object_info;
14333 }
39037602 14334 } else {
0a7de745 14335collect_object_info:
39037602
A
14336 shadow_object = obj->shadow;
14337 shadow_depth = 0;
2d21ac55 14338
cb323159 14339 if (!(obj->internal)) {
39037602 14340 extended->external_pager = 1;
0a7de745 14341 }
39037602
A
14342
14343 if (shadow_object != VM_OBJECT_NULL) {
14344 vm_object_lock(shadow_object);
14345 for (;
0a7de745
A
14346 shadow_object != VM_OBJECT_NULL;
14347 shadow_depth++) {
14348 vm_object_t next_shadow;
39037602 14349
cb323159 14350 if (!(shadow_object->internal)) {
39037602 14351 extended->external_pager = 1;
0a7de745 14352 }
39037602
A
14353
14354 next_shadow = shadow_object->shadow;
14355 if (next_shadow) {
14356 vm_object_lock(next_shadow);
14357 }
14358 vm_object_unlock(shadow_object);
14359 shadow_object = next_shadow;
2d21ac55 14360 }
91447636 14361 }
39037602
A
14362 extended->shadow_depth = shadow_depth;
14363 }
1c79356b 14364
0a7de745 14365 if (extended->shadow_depth || entry->needs_copy) {
39037602 14366 extended->share_mode = SM_COW;
0a7de745
A
14367 } else {
14368 if (ref_count == 1) {
39037602 14369 extended->share_mode = SM_PRIVATE;
0a7de745
A
14370 } else {
14371 if (obj->true_share) {
39037602 14372 extended->share_mode = SM_TRUESHARED;
0a7de745 14373 } else {
39037602 14374 extended->share_mode = SM_SHARED;
0a7de745 14375 }
2d21ac55 14376 }
39037602
A
14377 }
14378 extended->ref_count = ref_count - extended->shadow_depth;
5ba3f43e 14379
39037602 14380 for (i = 0; i < extended->shadow_depth; i++) {
0a7de745 14381 if ((tmp_obj = obj->shadow) == 0) {
39037602 14382 break;
0a7de745 14383 }
39037602 14384 vm_object_lock(tmp_obj);
2d21ac55 14385 vm_object_unlock(obj);
1c79356b 14386
0a7de745 14387 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
39037602 14388 ref_count--;
0a7de745 14389 }
39037602
A
14390
14391 extended->ref_count += ref_count;
14392 obj = tmp_obj;
14393 }
14394 vm_object_unlock(obj);
91447636 14395
39037602 14396 if (extended->share_mode == SM_SHARED) {
0a7de745
A
14397 vm_map_entry_t cur;
14398 vm_map_entry_t last;
39037602 14399 int my_refs;
91447636 14400
39037602
A
14401 obj = VME_OBJECT(entry);
14402 last = vm_map_to_entry(map);
14403 my_refs = 0;
91447636 14404
0a7de745 14405 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
39037602 14406 ref_count--;
0a7de745
A
14407 }
14408 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
39037602 14409 my_refs += vm_map_region_count_obj_refs(cur, obj);
0a7de745 14410 }
39037602 14411
0a7de745 14412 if (my_refs == ref_count) {
39037602 14413 extended->share_mode = SM_PRIVATE_ALIASED;
0a7de745 14414 } else if (my_refs > 1) {
39037602 14415 extended->share_mode = SM_SHARED_ALIASED;
0a7de745 14416 }
91447636 14417 }
1c79356b
A
14418}
14419
1c79356b 14420
91447636
A
14421/* object is locked on entry and locked on return */
14422
14423
14424static void
14425vm_map_region_look_for_page(
0a7de745
A
14426 __unused vm_map_t map,
14427 __unused vm_map_offset_t va,
14428 vm_object_t object,
14429 vm_object_offset_t offset,
14430 int max_refcnt,
14431 int depth,
14432 vm_region_extended_info_t extended,
39236c6e 14433 mach_msg_type_number_t count)
1c79356b 14434{
0a7de745
A
14435 vm_page_t p;
14436 vm_object_t shadow;
14437 int ref_count;
14438 vm_object_t caller_object;
39037602 14439
91447636
A
14440 shadow = object->shadow;
14441 caller_object = object;
1c79356b 14442
5ba3f43e 14443
91447636 14444 while (TRUE) {
cb323159 14445 if (!(object->internal)) {
2d21ac55 14446 extended->external_pager = 1;
0a7de745 14447 }
1c79356b 14448
91447636 14449 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
0a7de745
A
14450 if (shadow && (max_refcnt == 1)) {
14451 extended->pages_shared_now_private++;
14452 }
1c79356b 14453
d9a64523 14454 if (!p->vmp_fictitious &&
0a7de745
A
14455 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
14456 extended->pages_dirtied++;
14457 } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
d9a64523 14458 if (p->vmp_reusable || object->all_reusable) {
39236c6e
A
14459 extended->pages_reusable++;
14460 }
14461 }
1c79356b 14462
39236c6e 14463 extended->pages_resident++;
91447636 14464
0a7de745 14465 if (object != caller_object) {
2d21ac55 14466 vm_object_unlock(object);
0a7de745 14467 }
91447636
A
14468
14469 return;
1c79356b 14470 }
39236c6e
A
14471 if (object->internal &&
14472 object->alive &&
14473 !object->terminating &&
14474 object->pager_ready) {
39037602
A
14475 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14476 == VM_EXTERNAL_STATE_EXISTS) {
14477 /* the pager has that page */
14478 extended->pages_swapped_out++;
0a7de745 14479 if (object != caller_object) {
39037602 14480 vm_object_unlock(object);
0a7de745 14481 }
39037602 14482 return;
2d21ac55 14483 }
1c79356b 14484 }
2d21ac55 14485
91447636 14486 if (shadow) {
2d21ac55 14487 vm_object_lock(shadow);
1c79356b 14488
0a7de745
A
14489 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
14490 ref_count--;
14491 }
1c79356b 14492
0a7de745
A
14493 if (++depth > extended->shadow_depth) {
14494 extended->shadow_depth = depth;
14495 }
1c79356b 14496
0a7de745
A
14497 if (ref_count > max_refcnt) {
14498 max_refcnt = ref_count;
14499 }
5ba3f43e 14500
0a7de745 14501 if (object != caller_object) {
2d21ac55 14502 vm_object_unlock(object);
0a7de745 14503 }
91447636 14504
6d2010ae 14505 offset = offset + object->vo_shadow_offset;
91447636
A
14506 object = shadow;
14507 shadow = object->shadow;
14508 continue;
1c79356b 14509 }
0a7de745 14510 if (object != caller_object) {
2d21ac55 14511 vm_object_unlock(object);
0a7de745 14512 }
91447636
A
14513 break;
14514 }
14515}
1c79356b 14516
91447636
A
14517static int
14518vm_map_region_count_obj_refs(
0a7de745 14519 vm_map_entry_t entry,
91447636
A
14520 vm_object_t object)
14521{
0a7de745 14522 int ref_count;
39037602
A
14523 vm_object_t chk_obj;
14524 vm_object_t tmp_obj;
1c79356b 14525
0a7de745
A
14526 if (VME_OBJECT(entry) == 0) {
14527 return 0;
14528 }
1c79356b 14529
0a7de745
A
14530 if (entry->is_sub_map) {
14531 return 0;
14532 } else {
2d21ac55 14533 ref_count = 0;
1c79356b 14534
3e170ce0 14535 chk_obj = VME_OBJECT(entry);
2d21ac55 14536 vm_object_lock(chk_obj);
1c79356b 14537
2d21ac55 14538 while (chk_obj) {
0a7de745 14539 if (chk_obj == object) {
2d21ac55 14540 ref_count++;
0a7de745 14541 }
2d21ac55 14542 tmp_obj = chk_obj->shadow;
0a7de745 14543 if (tmp_obj) {
2d21ac55 14544 vm_object_lock(tmp_obj);
0a7de745 14545 }
2d21ac55 14546 vm_object_unlock(chk_obj);
1c79356b 14547
2d21ac55
A
14548 chk_obj = tmp_obj;
14549 }
1c79356b 14550 }
0a7de745 14551 return ref_count;
1c79356b
A
14552}
14553
14554
14555/*
91447636
A
14556 * Routine: vm_map_simplify
14557 *
14558 * Description:
14559 * Attempt to simplify the map representation in
14560 * the vicinity of the given starting address.
14561 * Note:
14562 * This routine is intended primarily to keep the
14563 * kernel maps more compact -- they generally don't
14564 * benefit from the "expand a map entry" technology
14565 * at allocation time because the adjacent entry
14566 * is often wired down.
1c79356b 14567 */
91447636
A
14568void
14569vm_map_simplify_entry(
0a7de745
A
14570 vm_map_t map,
14571 vm_map_entry_t this_entry)
1c79356b 14572{
0a7de745 14573 vm_map_entry_t prev_entry;
1c79356b 14574
91447636 14575 counter(c_vm_map_simplify_entry_called++);
1c79356b 14576
91447636 14577 prev_entry = this_entry->vme_prev;
1c79356b 14578
91447636 14579 if ((this_entry != vm_map_to_entry(map)) &&
2d21ac55 14580 (prev_entry != vm_map_to_entry(map)) &&
1c79356b 14581
91447636 14582 (prev_entry->vme_end == this_entry->vme_start) &&
1c79356b 14583
2d21ac55 14584 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
3e170ce0
A
14585 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
14586 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
0a7de745
A
14587 prev_entry->vme_start))
14588 == VME_OFFSET(this_entry)) &&
1c79356b 14589
fe8ab488
A
14590 (prev_entry->behavior == this_entry->behavior) &&
14591 (prev_entry->needs_copy == this_entry->needs_copy) &&
91447636
A
14592 (prev_entry->protection == this_entry->protection) &&
14593 (prev_entry->max_protection == this_entry->max_protection) &&
fe8ab488
A
14594 (prev_entry->inheritance == this_entry->inheritance) &&
14595 (prev_entry->use_pmap == this_entry->use_pmap) &&
3e170ce0 14596 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
2d21ac55 14597 (prev_entry->no_cache == this_entry->no_cache) &&
fe8ab488
A
14598 (prev_entry->permanent == this_entry->permanent) &&
14599 (prev_entry->map_aligned == this_entry->map_aligned) &&
14600 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
14601 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
d9a64523 14602 (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
fe8ab488
A
14603 /* from_reserved_zone: OK if that field doesn't match */
14604 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
3e170ce0 14605 (prev_entry->vme_resilient_codesign ==
0a7de745 14606 this_entry->vme_resilient_codesign) &&
3e170ce0 14607 (prev_entry->vme_resilient_media ==
0a7de745 14608 this_entry->vme_resilient_media) &&
cb323159 14609 (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
fe8ab488 14610
91447636
A
14611 (prev_entry->wired_count == this_entry->wired_count) &&
14612 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
1c79356b 14613
39037602 14614 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
91447636
A
14615 (prev_entry->in_transition == FALSE) &&
14616 (this_entry->in_transition == FALSE) &&
14617 (prev_entry->needs_wakeup == FALSE) &&
14618 (this_entry->needs_wakeup == FALSE) &&
14619 (prev_entry->is_shared == FALSE) &&
fe8ab488
A
14620 (this_entry->is_shared == FALSE) &&
14621 (prev_entry->superpage_size == FALSE) &&
14622 (this_entry->superpage_size == FALSE)
0a7de745 14623 ) {
316670eb 14624 vm_map_store_entry_unlink(map, prev_entry);
e2d2fc5c 14625 assert(prev_entry->vme_start < this_entry->vme_end);
0a7de745 14626 if (prev_entry->map_aligned) {
39236c6e 14627 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
0a7de745
A
14628 VM_MAP_PAGE_MASK(map)));
14629 }
91447636 14630 this_entry->vme_start = prev_entry->vme_start;
3e170ce0
A
14631 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
14632
14633 if (map->holelistenabled) {
14634 vm_map_store_update_first_free(map, this_entry, TRUE);
14635 }
14636
2d21ac55 14637 if (prev_entry->is_sub_map) {
3e170ce0 14638 vm_map_deallocate(VME_SUBMAP(prev_entry));
2d21ac55 14639 } else {
3e170ce0 14640 vm_object_deallocate(VME_OBJECT(prev_entry));
2d21ac55 14641 }
91447636 14642 vm_map_entry_dispose(map, prev_entry);
0c530ab8 14643 SAVE_HINT_MAP_WRITE(map, this_entry);
91447636 14644 counter(c_vm_map_simplified++);
1c79356b 14645 }
91447636 14646}
1c79356b 14647
91447636
A
14648void
14649vm_map_simplify(
0a7de745
A
14650 vm_map_t map,
14651 vm_map_offset_t start)
91447636 14652{
0a7de745 14653 vm_map_entry_t this_entry;
1c79356b 14654
91447636
A
14655 vm_map_lock(map);
14656 if (vm_map_lookup_entry(map, start, &this_entry)) {
14657 vm_map_simplify_entry(map, this_entry);
14658 vm_map_simplify_entry(map, this_entry->vme_next);
14659 }
14660 counter(c_vm_map_simplify_called++);
14661 vm_map_unlock(map);
14662}
1c79356b 14663
91447636
A
14664static void
14665vm_map_simplify_range(
0a7de745
A
14666 vm_map_t map,
14667 vm_map_offset_t start,
14668 vm_map_offset_t end)
91447636 14669{
0a7de745 14670 vm_map_entry_t entry;
1c79356b 14671
91447636
A
14672 /*
14673 * The map should be locked (for "write") by the caller.
14674 */
1c79356b 14675
91447636
A
14676 if (start >= end) {
14677 /* invalid address range */
14678 return;
14679 }
1c79356b 14680
39236c6e 14681 start = vm_map_trunc_page(start,
0a7de745 14682 VM_MAP_PAGE_MASK(map));
39236c6e 14683 end = vm_map_round_page(end,
0a7de745 14684 VM_MAP_PAGE_MASK(map));
2d21ac55 14685
91447636
A
14686 if (!vm_map_lookup_entry(map, start, &entry)) {
14687 /* "start" is not mapped and "entry" ends before "start" */
14688 if (entry == vm_map_to_entry(map)) {
14689 /* start with first entry in the map */
14690 entry = vm_map_first_entry(map);
14691 } else {
14692 /* start with next entry */
14693 entry = entry->vme_next;
14694 }
14695 }
5ba3f43e 14696
91447636 14697 while (entry != vm_map_to_entry(map) &&
0a7de745 14698 entry->vme_start <= end) {
91447636
A
14699 /* try and coalesce "entry" with its previous entry */
14700 vm_map_simplify_entry(map, entry);
14701 entry = entry->vme_next;
14702 }
14703}
1c79356b 14704
1c79356b 14705
91447636
A
14706/*
14707 * Routine: vm_map_machine_attribute
14708 * Purpose:
14709 * Provide machine-specific attributes to mappings,
14710 * such as cachability etc. for machines that provide
14711 * them. NUMA architectures and machines with big/strange
14712 * caches will use this.
14713 * Note:
14714 * Responsibilities for locking and checking are handled here,
14715 * everything else in the pmap module. If any non-volatile
14716 * information must be kept, the pmap module should handle
14717 * it itself. [This assumes that attributes do not
14718 * need to be inherited, which seems ok to me]
14719 */
14720kern_return_t
14721vm_map_machine_attribute(
0a7de745
A
14722 vm_map_t map,
14723 vm_map_offset_t start,
14724 vm_map_offset_t end,
14725 vm_machine_attribute_t attribute,
14726 vm_machine_attribute_val_t* value) /* IN/OUT */
91447636 14727{
0a7de745 14728 kern_return_t ret;
91447636
A
14729 vm_map_size_t sync_size;
14730 vm_map_entry_t entry;
5ba3f43e 14731
0a7de745 14732 if (start < vm_map_min(map) || end > vm_map_max(map)) {
91447636 14733 return KERN_INVALID_ADDRESS;
0a7de745 14734 }
1c79356b 14735
91447636
A
14736 /* Figure how much memory we need to flush (in page increments) */
14737 sync_size = end - start;
1c79356b 14738
91447636 14739 vm_map_lock(map);
5ba3f43e
A
14740
14741 if (attribute != MATTR_CACHE) {
91447636
A
14742 /* If we don't have to find physical addresses, we */
14743 /* don't have to do an explicit traversal here. */
0a7de745
A
14744 ret = pmap_attribute(map->pmap, start, end - start,
14745 attribute, value);
91447636
A
14746 vm_map_unlock(map);
14747 return ret;
14748 }
1c79356b 14749
0a7de745 14750 ret = KERN_SUCCESS; /* Assume it all worked */
1c79356b 14751
0a7de745 14752 while (sync_size) {
91447636 14753 if (vm_map_lookup_entry(map, start, &entry)) {
0a7de745
A
14754 vm_map_size_t sub_size;
14755 if ((entry->vme_end - start) > sync_size) {
91447636
A
14756 sub_size = sync_size;
14757 sync_size = 0;
14758 } else {
14759 sub_size = entry->vme_end - start;
2d21ac55 14760 sync_size -= sub_size;
91447636 14761 }
0a7de745 14762 if (entry->is_sub_map) {
91447636
A
14763 vm_map_offset_t sub_start;
14764 vm_map_offset_t sub_end;
1c79356b 14765
5ba3f43e 14766 sub_start = (start - entry->vme_start)
0a7de745 14767 + VME_OFFSET(entry);
91447636
A
14768 sub_end = sub_start + sub_size;
14769 vm_map_machine_attribute(
5ba3f43e 14770 VME_SUBMAP(entry),
91447636
A
14771 sub_start,
14772 sub_end,
14773 attribute, value);
14774 } else {
3e170ce0 14775 if (VME_OBJECT(entry)) {
0a7de745
A
14776 vm_page_t m;
14777 vm_object_t object;
14778 vm_object_t base_object;
14779 vm_object_t last_object;
14780 vm_object_offset_t offset;
14781 vm_object_offset_t base_offset;
14782 vm_map_size_t range;
91447636
A
14783 range = sub_size;
14784 offset = (start - entry->vme_start)
0a7de745 14785 + VME_OFFSET(entry);
91447636 14786 base_offset = offset;
3e170ce0 14787 object = VME_OBJECT(entry);
91447636
A
14788 base_object = object;
14789 last_object = NULL;
1c79356b 14790
91447636 14791 vm_object_lock(object);
1c79356b 14792
91447636
A
14793 while (range) {
14794 m = vm_page_lookup(
14795 object, offset);
1c79356b 14796
d9a64523 14797 if (m && !m->vmp_fictitious) {
0a7de745
A
14798 ret =
14799 pmap_attribute_cache_sync(
14800 VM_PAGE_GET_PHYS_PAGE(m),
14801 PAGE_SIZE,
14802 attribute, value);
91447636 14803 } else if (object->shadow) {
0a7de745 14804 offset = offset + object->vo_shadow_offset;
91447636
A
14805 last_object = object;
14806 object = object->shadow;
14807 vm_object_lock(last_object->shadow);
14808 vm_object_unlock(last_object);
14809 continue;
14810 }
14811 range -= PAGE_SIZE;
1c79356b 14812
91447636 14813 if (base_object != object) {
0a7de745 14814 vm_object_unlock(object);
91447636
A
14815 vm_object_lock(base_object);
14816 object = base_object;
14817 }
14818 /* Bump to the next page */
14819 base_offset += PAGE_SIZE;
14820 offset = base_offset;
14821 }
14822 vm_object_unlock(object);
14823 }
14824 }
14825 start += sub_size;
14826 } else {
14827 vm_map_unlock(map);
14828 return KERN_FAILURE;
14829 }
1c79356b 14830 }
e5568f75 14831
91447636 14832 vm_map_unlock(map);
e5568f75 14833
91447636
A
14834 return ret;
14835}
e5568f75 14836
91447636
A
14837/*
14838 * vm_map_behavior_set:
14839 *
14840 * Sets the paging reference behavior of the specified address
14841 * range in the target map. Paging reference behavior affects
5ba3f43e 14842 * how pagein operations resulting from faults on the map will be
91447636
A
14843 * clustered.
14844 */
5ba3f43e 14845kern_return_t
91447636 14846vm_map_behavior_set(
0a7de745
A
14847 vm_map_t map,
14848 vm_map_offset_t start,
14849 vm_map_offset_t end,
14850 vm_behavior_t new_behavior)
91447636 14851{
0a7de745
A
14852 vm_map_entry_t entry;
14853 vm_map_entry_t temp_entry;
e5568f75 14854
6d2010ae
A
14855 if (start > end ||
14856 start < vm_map_min(map) ||
14857 end > vm_map_max(map)) {
14858 return KERN_NO_SPACE;
14859 }
14860
91447636 14861 switch (new_behavior) {
b0d623f7
A
14862 /*
14863 * This first block of behaviors all set a persistent state on the specified
14864 * memory range. All we have to do here is to record the desired behavior
14865 * in the vm_map_entry_t's.
14866 */
14867
91447636
A
14868 case VM_BEHAVIOR_DEFAULT:
14869 case VM_BEHAVIOR_RANDOM:
14870 case VM_BEHAVIOR_SEQUENTIAL:
14871 case VM_BEHAVIOR_RSEQNTL:
b0d623f7
A
14872 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
14873 vm_map_lock(map);
5ba3f43e 14874
b0d623f7
A
14875 /*
14876 * The entire address range must be valid for the map.
0a7de745 14877 * Note that vm_map_range_check() does a
b0d623f7
A
14878 * vm_map_lookup_entry() internally and returns the
14879 * entry containing the start of the address range if
14880 * the entire range is valid.
14881 */
14882 if (vm_map_range_check(map, start, end, &temp_entry)) {
14883 entry = temp_entry;
14884 vm_map_clip_start(map, entry, start);
0a7de745 14885 } else {
b0d623f7 14886 vm_map_unlock(map);
0a7de745 14887 return KERN_INVALID_ADDRESS;
b0d623f7 14888 }
5ba3f43e 14889
b0d623f7
A
14890 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
14891 vm_map_clip_end(map, entry, end);
fe8ab488
A
14892 if (entry->is_sub_map) {
14893 assert(!entry->use_pmap);
14894 }
5ba3f43e 14895
0a7de745 14896 if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
b0d623f7
A
14897 entry->zero_wired_pages = TRUE;
14898 } else {
14899 entry->behavior = new_behavior;
14900 }
14901 entry = entry->vme_next;
14902 }
5ba3f43e 14903
b0d623f7 14904 vm_map_unlock(map);
91447636 14905 break;
b0d623f7
A
14906
14907 /*
14908 * The rest of these are different from the above in that they cause
5ba3f43e 14909 * an immediate action to take place as opposed to setting a behavior that
b0d623f7
A
14910 * affects future actions.
14911 */
14912
91447636 14913 case VM_BEHAVIOR_WILLNEED:
b0d623f7
A
14914 return vm_map_willneed(map, start, end);
14915
91447636 14916 case VM_BEHAVIOR_DONTNEED:
b0d623f7
A
14917 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
14918
14919 case VM_BEHAVIOR_FREE:
14920 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
14921
14922 case VM_BEHAVIOR_REUSABLE:
14923 return vm_map_reusable_pages(map, start, end);
14924
14925 case VM_BEHAVIOR_REUSE:
14926 return vm_map_reuse_pages(map, start, end);
14927
14928 case VM_BEHAVIOR_CAN_REUSE:
14929 return vm_map_can_reuse(map, start, end);
14930
3e170ce0
A
14931#if MACH_ASSERT
14932 case VM_BEHAVIOR_PAGEOUT:
14933 return vm_map_pageout(map, start, end);
14934#endif /* MACH_ASSERT */
14935
1c79356b 14936 default:
0a7de745 14937 return KERN_INVALID_ARGUMENT;
1c79356b 14938 }
1c79356b 14939
0a7de745 14940 return KERN_SUCCESS;
b0d623f7
A
14941}
14942
14943
14944/*
14945 * Internals for madvise(MADV_WILLNEED) system call.
14946 *
cb323159
A
14947 * The implementation is to do:-
14948 * a) read-ahead if the mapping corresponds to a mapped regular file
14949 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
b0d623f7
A
14950 */
14951
14952
14953static kern_return_t
14954vm_map_willneed(
0a7de745
A
14955 vm_map_t map,
14956 vm_map_offset_t start,
14957 vm_map_offset_t end
14958 )
b0d623f7 14959{
0a7de745
A
14960 vm_map_entry_t entry;
14961 vm_object_t object;
14962 memory_object_t pager;
14963 struct vm_object_fault_info fault_info = {};
14964 kern_return_t kr;
14965 vm_object_size_t len;
14966 vm_object_offset_t offset;
14967
14968 fault_info.interruptible = THREAD_UNINT; /* ignored value */
b0d623f7 14969 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
0a7de745 14970 fault_info.stealth = TRUE;
b0d623f7
A
14971
14972 /*
14973 * The MADV_WILLNEED operation doesn't require any changes to the
14974 * vm_map_entry_t's, so the read lock is sufficient.
14975 */
14976
14977 vm_map_lock_read(map);
14978
14979 /*
14980 * The madvise semantics require that the address range be fully
14981 * allocated with no holes. Otherwise, we're required to return
14982 * an error.
14983 */
14984
0a7de745 14985 if (!vm_map_range_check(map, start, end, &entry)) {
6d2010ae
A
14986 vm_map_unlock_read(map);
14987 return KERN_INVALID_ADDRESS;
14988 }
b0d623f7 14989
6d2010ae
A
14990 /*
14991 * Examine each vm_map_entry_t in the range.
14992 */
0a7de745 14993 for (; entry != vm_map_to_entry(map) && start < end;) {
b0d623f7 14994 /*
6d2010ae
A
14995 * The first time through, the start address could be anywhere
14996 * within the vm_map_entry we found. So adjust the offset to
14997 * correspond. After that, the offset will always be zero to
14998 * correspond to the beginning of the current vm_map_entry.
b0d623f7 14999 */
3e170ce0 15000 offset = (start - entry->vme_start) + VME_OFFSET(entry);
b0d623f7 15001
6d2010ae
A
15002 /*
15003 * Set the length so we don't go beyond the end of the
15004 * map_entry or beyond the end of the range we were given.
15005 * This range could span also multiple map entries all of which
15006 * map different files, so make sure we only do the right amount
15007 * of I/O for each object. Note that it's possible for there
15008 * to be multiple map entries all referring to the same object
15009 * but with different page permissions, but it's not worth
15010 * trying to optimize that case.
15011 */
15012 len = MIN(entry->vme_end - start, end - start);
b0d623f7 15013
6d2010ae
A
15014 if ((vm_size_t) len != len) {
15015 /* 32-bit overflow */
15016 len = (vm_size_t) (0 - PAGE_SIZE);
15017 }
15018 fault_info.cluster_size = (vm_size_t) len;
5ba3f43e 15019 fault_info.lo_offset = offset;
6d2010ae 15020 fault_info.hi_offset = offset + len;
3e170ce0 15021 fault_info.user_tag = VME_ALIAS(entry);
fe8ab488
A
15022 fault_info.pmap_options = 0;
15023 if (entry->iokit_acct ||
15024 (!entry->is_sub_map && !entry->use_pmap)) {
15025 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15026 }
b0d623f7 15027
6d2010ae 15028 /*
cb323159
A
15029 * If the entry is a submap OR there's no read permission
15030 * to this mapping, then just skip it.
6d2010ae 15031 */
cb323159 15032 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
6d2010ae
A
15033 entry = entry->vme_next;
15034 start = entry->vme_start;
15035 continue;
15036 }
b0d623f7 15037
cb323159 15038 object = VME_OBJECT(entry);
b0d623f7 15039
cb323159
A
15040 if (object == NULL ||
15041 (object && object->internal)) {
15042 /*
15043 * Memory range backed by anonymous memory.
15044 */
15045 vm_size_t region_size = 0, effective_page_size = 0;
15046 vm_map_offset_t addr = 0, effective_page_mask = 0;
b0d623f7 15047
cb323159
A
15048 region_size = len;
15049 addr = start;
b0d623f7 15050
cb323159
A
15051 effective_page_mask = MAX(vm_map_page_mask(current_map()), PAGE_MASK);
15052 effective_page_size = effective_page_mask + 1;
b0d623f7 15053
cb323159 15054 vm_map_unlock_read(map);
b0d623f7 15055
cb323159
A
15056 while (region_size) {
15057 vm_pre_fault(
15058 vm_map_trunc_page(addr, effective_page_mask),
15059 VM_PROT_READ | VM_PROT_WRITE);
15060
15061 region_size -= effective_page_size;
15062 addr += effective_page_size;
15063 }
15064 } else {
15065 /*
15066 * Find the file object backing this map entry. If there is
15067 * none, then we simply ignore the "will need" advice for this
15068 * entry and go on to the next one.
15069 */
15070 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15071 entry = entry->vme_next;
15072 start = entry->vme_start;
15073 continue;
15074 }
15075
15076 vm_object_paging_begin(object);
15077 pager = object->pager;
15078 vm_object_unlock(object);
15079
15080 /*
15081 * The data_request() could take a long time, so let's
15082 * release the map lock to avoid blocking other threads.
15083 */
15084 vm_map_unlock_read(map);
15085
15086 /*
15087 * Get the data from the object asynchronously.
15088 *
15089 * Note that memory_object_data_request() places limits on the
15090 * amount of I/O it will do. Regardless of the len we
15091 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15092 * silently truncates the len to that size. This isn't
15093 * necessarily bad since madvise shouldn't really be used to
15094 * page in unlimited amounts of data. Other Unix variants
15095 * limit the willneed case as well. If this turns out to be an
15096 * issue for developers, then we can always adjust the policy
15097 * here and still be backwards compatible since this is all
15098 * just "advice".
15099 */
15100 kr = memory_object_data_request(
15101 pager,
15102 offset + object->paging_offset,
15103 0, /* ignored */
15104 VM_PROT_READ,
15105 (memory_object_fault_info_t)&fault_info);
15106
15107 vm_object_lock(object);
15108 vm_object_paging_end(object);
15109 vm_object_unlock(object);
15110
15111 /*
15112 * If we couldn't do the I/O for some reason, just give up on
15113 * the madvise. We still return success to the user since
15114 * madvise isn't supposed to fail when the advice can't be
15115 * taken.
15116 */
15117
15118 if (kr != KERN_SUCCESS) {
15119 return KERN_SUCCESS;
15120 }
6d2010ae 15121 }
b0d623f7 15122
6d2010ae
A
15123 start += len;
15124 if (start >= end) {
15125 /* done */
15126 return KERN_SUCCESS;
15127 }
b0d623f7 15128
6d2010ae
A
15129 /* look up next entry */
15130 vm_map_lock_read(map);
0a7de745 15131 if (!vm_map_lookup_entry(map, start, &entry)) {
b0d623f7 15132 /*
6d2010ae 15133 * There's a new hole in the address range.
b0d623f7 15134 */
6d2010ae
A
15135 vm_map_unlock_read(map);
15136 return KERN_INVALID_ADDRESS;
b0d623f7 15137 }
6d2010ae 15138 }
b0d623f7
A
15139
15140 vm_map_unlock_read(map);
6d2010ae 15141 return KERN_SUCCESS;
b0d623f7
A
15142}
15143
15144static boolean_t
15145vm_map_entry_is_reusable(
15146 vm_map_entry_t entry)
15147{
3e170ce0
A
15148 /* Only user map entries */
15149
b0d623f7
A
15150 vm_object_t object;
15151
2dced7af
A
15152 if (entry->is_sub_map) {
15153 return FALSE;
15154 }
15155
3e170ce0 15156 switch (VME_ALIAS(entry)) {
39236c6e
A
15157 case VM_MEMORY_MALLOC:
15158 case VM_MEMORY_MALLOC_SMALL:
15159 case VM_MEMORY_MALLOC_LARGE:
15160 case VM_MEMORY_REALLOC:
15161 case VM_MEMORY_MALLOC_TINY:
15162 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15163 case VM_MEMORY_MALLOC_LARGE_REUSED:
15164 /*
15165 * This is a malloc() memory region: check if it's still
15166 * in its original state and can be re-used for more
15167 * malloc() allocations.
15168 */
15169 break;
15170 default:
15171 /*
15172 * Not a malloc() memory region: let the caller decide if
15173 * it's re-usable.
15174 */
15175 return TRUE;
15176 }
15177
d9a64523 15178 if (/*entry->is_shared ||*/
0a7de745
A
15179 entry->is_sub_map ||
15180 entry->in_transition ||
15181 entry->protection != VM_PROT_DEFAULT ||
15182 entry->max_protection != VM_PROT_ALL ||
15183 entry->inheritance != VM_INHERIT_DEFAULT ||
15184 entry->no_cache ||
15185 entry->permanent ||
15186 entry->superpage_size != FALSE ||
15187 entry->zero_wired_pages ||
15188 entry->wired_count != 0 ||
15189 entry->user_wired_count != 0) {
b0d623f7 15190 return FALSE;
91447636 15191 }
b0d623f7 15192
3e170ce0 15193 object = VME_OBJECT(entry);
b0d623f7
A
15194 if (object == VM_OBJECT_NULL) {
15195 return TRUE;
15196 }
316670eb
A
15197 if (
15198#if 0
15199 /*
15200 * Let's proceed even if the VM object is potentially
15201 * shared.
15202 * We check for this later when processing the actual
15203 * VM pages, so the contents will be safe if shared.
5ba3f43e 15204 *
316670eb
A
15205 * But we can still mark this memory region as "reusable" to
15206 * acknowledge that the caller did let us know that the memory
15207 * could be re-used and should not be penalized for holding
15208 * on to it. This allows its "resident size" to not include
15209 * the reusable range.
15210 */
0a7de745 15211 object->ref_count == 1 &&
316670eb 15212#endif
0a7de745
A
15213 object->wired_page_count == 0 &&
15214 object->copy == VM_OBJECT_NULL &&
15215 object->shadow == VM_OBJECT_NULL &&
15216 object->internal &&
15217 object->purgable == VM_PURGABLE_DENY &&
15218 object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
15219 !object->true_share &&
15220 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15221 !object->code_signed) {
b0d623f7 15222 return TRUE;
1c79356b 15223 }
b0d623f7 15224 return FALSE;
b0d623f7 15225}
1c79356b 15226
b0d623f7
A
15227static kern_return_t
15228vm_map_reuse_pages(
0a7de745
A
15229 vm_map_t map,
15230 vm_map_offset_t start,
15231 vm_map_offset_t end)
b0d623f7 15232{
0a7de745
A
15233 vm_map_entry_t entry;
15234 vm_object_t object;
15235 vm_object_offset_t start_offset, end_offset;
b0d623f7
A
15236
15237 /*
15238 * The MADV_REUSE operation doesn't require any changes to the
15239 * vm_map_entry_t's, so the read lock is sufficient.
15240 */
0b4e3aa0 15241
b0d623f7 15242 vm_map_lock_read(map);
0a7de745 15243 assert(map->pmap != kernel_pmap); /* protect alias access */
1c79356b 15244
b0d623f7
A
15245 /*
15246 * The madvise semantics require that the address range be fully
15247 * allocated with no holes. Otherwise, we're required to return
15248 * an error.
15249 */
15250
15251 if (!vm_map_range_check(map, start, end, &entry)) {
15252 vm_map_unlock_read(map);
15253 vm_page_stats_reusable.reuse_pages_failure++;
15254 return KERN_INVALID_ADDRESS;
1c79356b 15255 }
91447636 15256
b0d623f7
A
15257 /*
15258 * Examine each vm_map_entry_t in the range.
15259 */
15260 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
0a7de745 15261 entry = entry->vme_next) {
b0d623f7
A
15262 /*
15263 * Sanity check on the VM map entry.
15264 */
0a7de745 15265 if (!vm_map_entry_is_reusable(entry)) {
b0d623f7
A
15266 vm_map_unlock_read(map);
15267 vm_page_stats_reusable.reuse_pages_failure++;
15268 return KERN_INVALID_ADDRESS;
15269 }
15270
15271 /*
15272 * The first time through, the start address could be anywhere
15273 * within the vm_map_entry we found. So adjust the offset to
15274 * correspond.
15275 */
15276 if (entry->vme_start < start) {
15277 start_offset = start - entry->vme_start;
15278 } else {
15279 start_offset = 0;
15280 }
15281 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
15282 start_offset += VME_OFFSET(entry);
15283 end_offset += VME_OFFSET(entry);
b0d623f7 15284
2dced7af 15285 assert(!entry->is_sub_map);
3e170ce0 15286 object = VME_OBJECT(entry);
b0d623f7
A
15287 if (object != VM_OBJECT_NULL) {
15288 vm_object_lock(object);
15289 vm_object_reuse_pages(object, start_offset, end_offset,
0a7de745 15290 TRUE);
b0d623f7
A
15291 vm_object_unlock(object);
15292 }
15293
3e170ce0 15294 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
b0d623f7
A
15295 /*
15296 * XXX
15297 * We do not hold the VM map exclusively here.
15298 * The "alias" field is not that critical, so it's
15299 * safe to update it here, as long as it is the only
15300 * one that can be modified while holding the VM map
15301 * "shared".
15302 */
3e170ce0 15303 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
b0d623f7
A
15304 }
15305 }
5ba3f43e 15306
b0d623f7
A
15307 vm_map_unlock_read(map);
15308 vm_page_stats_reusable.reuse_pages_success++;
15309 return KERN_SUCCESS;
1c79356b
A
15310}
15311
1c79356b 15312
b0d623f7
A
15313static kern_return_t
15314vm_map_reusable_pages(
0a7de745
A
15315 vm_map_t map,
15316 vm_map_offset_t start,
15317 vm_map_offset_t end)
b0d623f7 15318{
0a7de745
A
15319 vm_map_entry_t entry;
15320 vm_object_t object;
15321 vm_object_offset_t start_offset, end_offset;
15322 vm_map_offset_t pmap_offset;
b0d623f7
A
15323
15324 /*
15325 * The MADV_REUSABLE operation doesn't require any changes to the
15326 * vm_map_entry_t's, so the read lock is sufficient.
15327 */
15328
15329 vm_map_lock_read(map);
0a7de745 15330 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
15331
15332 /*
15333 * The madvise semantics require that the address range be fully
15334 * allocated with no holes. Otherwise, we're required to return
15335 * an error.
15336 */
15337
15338 if (!vm_map_range_check(map, start, end, &entry)) {
15339 vm_map_unlock_read(map);
15340 vm_page_stats_reusable.reusable_pages_failure++;
15341 return KERN_INVALID_ADDRESS;
15342 }
15343
15344 /*
15345 * Examine each vm_map_entry_t in the range.
15346 */
15347 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
0a7de745 15348 entry = entry->vme_next) {
b0d623f7
A
15349 int kill_pages = 0;
15350
15351 /*
15352 * Sanity check on the VM map entry.
15353 */
0a7de745 15354 if (!vm_map_entry_is_reusable(entry)) {
b0d623f7
A
15355 vm_map_unlock_read(map);
15356 vm_page_stats_reusable.reusable_pages_failure++;
15357 return KERN_INVALID_ADDRESS;
15358 }
15359
0a7de745 15360 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
39037602
A
15361 /* not writable: can't discard contents */
15362 vm_map_unlock_read(map);
15363 vm_page_stats_reusable.reusable_nonwritable++;
15364 vm_page_stats_reusable.reusable_pages_failure++;
15365 return KERN_PROTECTION_FAILURE;
15366 }
15367
b0d623f7
A
15368 /*
15369 * The first time through, the start address could be anywhere
15370 * within the vm_map_entry we found. So adjust the offset to
15371 * correspond.
15372 */
15373 if (entry->vme_start < start) {
15374 start_offset = start - entry->vme_start;
3e170ce0 15375 pmap_offset = start;
b0d623f7
A
15376 } else {
15377 start_offset = 0;
3e170ce0 15378 pmap_offset = entry->vme_start;
b0d623f7
A
15379 }
15380 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
15381 start_offset += VME_OFFSET(entry);
15382 end_offset += VME_OFFSET(entry);
b0d623f7 15383
2dced7af 15384 assert(!entry->is_sub_map);
3e170ce0 15385 object = VME_OBJECT(entry);
0a7de745 15386 if (object == VM_OBJECT_NULL) {
b0d623f7 15387 continue;
0a7de745 15388 }
b0d623f7
A
15389
15390
15391 vm_object_lock(object);
39037602 15392 if (((object->ref_count == 1) ||
0a7de745
A
15393 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
15394 object->copy == VM_OBJECT_NULL)) &&
39037602 15395 object->shadow == VM_OBJECT_NULL &&
fe8ab488
A
15396 /*
15397 * "iokit_acct" entries are billed for their virtual size
15398 * (rather than for their resident pages only), so they
15399 * wouldn't benefit from making pages reusable, and it
15400 * would be hard to keep track of pages that are both
39037602
A
15401 * "iokit_acct" and "reusable" in the pmap stats and
15402 * ledgers.
fe8ab488
A
15403 */
15404 !(entry->iokit_acct ||
0a7de745 15405 (!entry->is_sub_map && !entry->use_pmap))) {
39037602
A
15406 if (object->ref_count != 1) {
15407 vm_page_stats_reusable.reusable_shared++;
15408 }
b0d623f7 15409 kill_pages = 1;
39037602 15410 } else {
b0d623f7 15411 kill_pages = -1;
39037602 15412 }
b0d623f7
A
15413 if (kill_pages != -1) {
15414 vm_object_deactivate_pages(object,
0a7de745
A
15415 start_offset,
15416 end_offset - start_offset,
15417 kill_pages,
15418 TRUE /*reusable_pages*/,
15419 map->pmap,
15420 pmap_offset);
b0d623f7
A
15421 } else {
15422 vm_page_stats_reusable.reusable_pages_shared++;
15423 }
15424 vm_object_unlock(object);
15425
3e170ce0
A
15426 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
15427 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
b0d623f7
A
15428 /*
15429 * XXX
15430 * We do not hold the VM map exclusively here.
15431 * The "alias" field is not that critical, so it's
15432 * safe to update it here, as long as it is the only
15433 * one that can be modified while holding the VM map
15434 * "shared".
15435 */
3e170ce0 15436 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
b0d623f7
A
15437 }
15438 }
5ba3f43e 15439
b0d623f7
A
15440 vm_map_unlock_read(map);
15441 vm_page_stats_reusable.reusable_pages_success++;
15442 return KERN_SUCCESS;
15443}
15444
15445
15446static kern_return_t
15447vm_map_can_reuse(
0a7de745
A
15448 vm_map_t map,
15449 vm_map_offset_t start,
15450 vm_map_offset_t end)
b0d623f7 15451{
0a7de745 15452 vm_map_entry_t entry;
b0d623f7
A
15453
15454 /*
15455 * The MADV_REUSABLE operation doesn't require any changes to the
15456 * vm_map_entry_t's, so the read lock is sufficient.
15457 */
15458
15459 vm_map_lock_read(map);
0a7de745 15460 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
15461
15462 /*
15463 * The madvise semantics require that the address range be fully
15464 * allocated with no holes. Otherwise, we're required to return
15465 * an error.
15466 */
15467
15468 if (!vm_map_range_check(map, start, end, &entry)) {
15469 vm_map_unlock_read(map);
15470 vm_page_stats_reusable.can_reuse_failure++;
15471 return KERN_INVALID_ADDRESS;
15472 }
15473
15474 /*
15475 * Examine each vm_map_entry_t in the range.
15476 */
15477 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
0a7de745 15478 entry = entry->vme_next) {
b0d623f7
A
15479 /*
15480 * Sanity check on the VM map entry.
15481 */
0a7de745 15482 if (!vm_map_entry_is_reusable(entry)) {
b0d623f7
A
15483 vm_map_unlock_read(map);
15484 vm_page_stats_reusable.can_reuse_failure++;
15485 return KERN_INVALID_ADDRESS;
15486 }
15487 }
5ba3f43e 15488
b0d623f7
A
15489 vm_map_unlock_read(map);
15490 vm_page_stats_reusable.can_reuse_success++;
15491 return KERN_SUCCESS;
15492}
15493
15494
3e170ce0
A
15495#if MACH_ASSERT
15496static kern_return_t
15497vm_map_pageout(
0a7de745
A
15498 vm_map_t map,
15499 vm_map_offset_t start,
15500 vm_map_offset_t end)
3e170ce0 15501{
0a7de745 15502 vm_map_entry_t entry;
3e170ce0
A
15503
15504 /*
15505 * The MADV_PAGEOUT operation doesn't require any changes to the
15506 * vm_map_entry_t's, so the read lock is sufficient.
15507 */
15508
15509 vm_map_lock_read(map);
15510
15511 /*
15512 * The madvise semantics require that the address range be fully
15513 * allocated with no holes. Otherwise, we're required to return
15514 * an error.
15515 */
15516
15517 if (!vm_map_range_check(map, start, end, &entry)) {
15518 vm_map_unlock_read(map);
15519 return KERN_INVALID_ADDRESS;
15520 }
15521
15522 /*
15523 * Examine each vm_map_entry_t in the range.
15524 */
15525 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
0a7de745
A
15526 entry = entry->vme_next) {
15527 vm_object_t object;
3e170ce0
A
15528
15529 /*
15530 * Sanity check on the VM map entry.
15531 */
15532 if (entry->is_sub_map) {
15533 vm_map_t submap;
15534 vm_map_offset_t submap_start;
15535 vm_map_offset_t submap_end;
15536 vm_map_entry_t submap_entry;
15537
15538 submap = VME_SUBMAP(entry);
15539 submap_start = VME_OFFSET(entry);
5ba3f43e 15540 submap_end = submap_start + (entry->vme_end -
0a7de745 15541 entry->vme_start);
3e170ce0
A
15542
15543 vm_map_lock_read(submap);
15544
0a7de745
A
15545 if (!vm_map_range_check(submap,
15546 submap_start,
15547 submap_end,
15548 &submap_entry)) {
3e170ce0
A
15549 vm_map_unlock_read(submap);
15550 vm_map_unlock_read(map);
15551 return KERN_INVALID_ADDRESS;
15552 }
15553
15554 object = VME_OBJECT(submap_entry);
15555 if (submap_entry->is_sub_map ||
15556 object == VM_OBJECT_NULL ||
15557 !object->internal) {
15558 vm_map_unlock_read(submap);
15559 continue;
15560 }
15561
15562 vm_object_pageout(object);
15563
15564 vm_map_unlock_read(submap);
15565 submap = VM_MAP_NULL;
15566 submap_entry = VM_MAP_ENTRY_NULL;
15567 continue;
15568 }
15569
15570 object = VME_OBJECT(entry);
15571 if (entry->is_sub_map ||
15572 object == VM_OBJECT_NULL ||
15573 !object->internal) {
15574 continue;
15575 }
15576
15577 vm_object_pageout(object);
15578 }
5ba3f43e 15579
3e170ce0
A
15580 vm_map_unlock_read(map);
15581 return KERN_SUCCESS;
15582}
15583#endif /* MACH_ASSERT */
15584
15585
1c79356b 15586/*
91447636
A
15587 * Routine: vm_map_entry_insert
15588 *
d9a64523 15589 * Description: This routine inserts a new vm_entry in a locked map.
1c79356b 15590 */
91447636
A
15591vm_map_entry_t
15592vm_map_entry_insert(
0a7de745
A
15593 vm_map_t map,
15594 vm_map_entry_t insp_entry,
15595 vm_map_offset_t start,
15596 vm_map_offset_t end,
15597 vm_object_t object,
15598 vm_object_offset_t offset,
15599 boolean_t needs_copy,
15600 boolean_t is_shared,
15601 boolean_t in_transition,
15602 vm_prot_t cur_protection,
15603 vm_prot_t max_protection,
15604 vm_behavior_t behavior,
15605 vm_inherit_t inheritance,
15606 unsigned wired_count,
15607 boolean_t no_cache,
15608 boolean_t permanent,
cb323159 15609 boolean_t no_copy_on_read,
0a7de745
A
15610 unsigned int superpage_size,
15611 boolean_t clear_map_aligned,
15612 boolean_t is_submap,
15613 boolean_t used_for_jit,
15614 int alias)
1c79356b 15615{
0a7de745 15616 vm_map_entry_t new_entry;
1c79356b 15617
91447636 15618 assert(insp_entry != (vm_map_entry_t)0);
d9a64523 15619 vm_map_lock_assert_exclusive(map);
1c79356b 15620
a39ff7e2 15621#if DEVELOPMENT || DEBUG
0a7de745 15622 vm_object_offset_t end_offset = 0;
a39ff7e2
A
15623 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
15624#endif /* DEVELOPMENT || DEBUG */
15625
7ddcb079 15626 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
1c79356b 15627
39236c6e
A
15628 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
15629 new_entry->map_aligned = TRUE;
15630 } else {
15631 new_entry->map_aligned = FALSE;
15632 }
15633 if (clear_map_aligned &&
0a7de745
A
15634 (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
15635 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
39236c6e
A
15636 new_entry->map_aligned = FALSE;
15637 }
15638
91447636
A
15639 new_entry->vme_start = start;
15640 new_entry->vme_end = end;
15641 assert(page_aligned(new_entry->vme_start));
15642 assert(page_aligned(new_entry->vme_end));
39236c6e 15643 if (new_entry->map_aligned) {
fe8ab488 15644 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
0a7de745 15645 VM_MAP_PAGE_MASK(map)));
39236c6e 15646 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
0a7de745 15647 VM_MAP_PAGE_MASK(map)));
39236c6e 15648 }
e2d2fc5c 15649 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 15650
3e170ce0
A
15651 VME_OBJECT_SET(new_entry, object);
15652 VME_OFFSET_SET(new_entry, offset);
91447636 15653 new_entry->is_shared = is_shared;
fe8ab488 15654 new_entry->is_sub_map = is_submap;
91447636
A
15655 new_entry->needs_copy = needs_copy;
15656 new_entry->in_transition = in_transition;
15657 new_entry->needs_wakeup = FALSE;
15658 new_entry->inheritance = inheritance;
15659 new_entry->protection = cur_protection;
15660 new_entry->max_protection = max_protection;
15661 new_entry->behavior = behavior;
15662 new_entry->wired_count = wired_count;
15663 new_entry->user_wired_count = 0;
fe8ab488
A
15664 if (is_submap) {
15665 /*
15666 * submap: "use_pmap" means "nested".
15667 * default: false.
15668 */
15669 new_entry->use_pmap = FALSE;
15670 } else {
15671 /*
15672 * object: "use_pmap" means "use pmap accounting" for footprint.
15673 * default: true.
15674 */
15675 new_entry->use_pmap = TRUE;
15676 }
5ba3f43e 15677 VME_ALIAS_SET(new_entry, alias);
b0d623f7 15678 new_entry->zero_wired_pages = FALSE;
2d21ac55 15679 new_entry->no_cache = no_cache;
b0d623f7 15680 new_entry->permanent = permanent;
0a7de745 15681 if (superpage_size) {
39236c6e 15682 new_entry->superpage_size = TRUE;
0a7de745 15683 } else {
39236c6e 15684 new_entry->superpage_size = FALSE;
0a7de745
A
15685 }
15686 if (used_for_jit) {
d9a64523
A
15687#if CONFIG_EMBEDDED
15688 if (!(map->jit_entry_exists))
15689#endif /* CONFIG_EMBEDDED */
15690 {
5ba3f43e
A
15691 new_entry->used_for_jit = TRUE;
15692 map->jit_entry_exists = TRUE;
5ba3f43e
A
15693 }
15694 } else {
15695 new_entry->used_for_jit = FALSE;
15696 }
d9a64523 15697 new_entry->pmap_cs_associated = FALSE;
fe8ab488 15698 new_entry->iokit_acct = FALSE;
3e170ce0
A
15699 new_entry->vme_resilient_codesign = FALSE;
15700 new_entry->vme_resilient_media = FALSE;
39037602 15701 new_entry->vme_atomic = FALSE;
cb323159 15702 new_entry->vme_no_copy_on_read = no_copy_on_read;
1c79356b 15703
91447636
A
15704 /*
15705 * Insert the new entry into the list.
15706 */
1c79356b 15707
d9a64523 15708 vm_map_store_entry_link(map, insp_entry, new_entry,
0a7de745 15709 VM_MAP_KERNEL_FLAGS_NONE);
91447636
A
15710 map->size += end - start;
15711
15712 /*
15713 * Update the free space hint and the lookup hint.
15714 */
15715
0c530ab8 15716 SAVE_HINT_MAP_WRITE(map, new_entry);
91447636 15717 return new_entry;
1c79356b
A
15718}
15719
15720/*
91447636
A
15721 * Routine: vm_map_remap_extract
15722 *
15723 * Descritpion: This routine returns a vm_entry list from a map.
1c79356b 15724 */
91447636
A
15725static kern_return_t
15726vm_map_remap_extract(
0a7de745
A
15727 vm_map_t map,
15728 vm_map_offset_t addr,
15729 vm_map_size_t size,
15730 boolean_t copy,
15731 struct vm_map_header *map_header,
15732 vm_prot_t *cur_protection,
15733 vm_prot_t *max_protection,
91447636 15734 /* What, no behavior? */
0a7de745
A
15735 vm_inherit_t inheritance,
15736 boolean_t pageable,
15737 boolean_t same_map,
15738 vm_map_kernel_flags_t vmk_flags)
1c79356b 15739{
0a7de745
A
15740 kern_return_t result;
15741 vm_map_size_t mapped_size;
15742 vm_map_size_t tmp_size;
15743 vm_map_entry_t src_entry; /* result of last map lookup */
15744 vm_map_entry_t new_entry;
15745 vm_object_offset_t offset;
15746 vm_map_offset_t map_address;
15747 vm_map_offset_t src_start; /* start of entry to map */
15748 vm_map_offset_t src_end; /* end of region to be mapped */
15749 vm_object_t object;
15750 vm_map_version_t version;
15751 boolean_t src_needs_copy;
15752 boolean_t new_entry_needs_copy;
15753 vm_map_entry_t saved_src_entry;
15754 boolean_t src_entry_was_wired;
15755 vm_prot_t max_prot_for_prot_copy;
1c79356b 15756
91447636 15757 assert(map != VM_MAP_NULL);
39236c6e
A
15758 assert(size != 0);
15759 assert(size == vm_map_round_page(size, PAGE_MASK));
91447636 15760 assert(inheritance == VM_INHERIT_NONE ||
0a7de745
A
15761 inheritance == VM_INHERIT_COPY ||
15762 inheritance == VM_INHERIT_SHARE);
1c79356b 15763
91447636
A
15764 /*
15765 * Compute start and end of region.
15766 */
39236c6e
A
15767 src_start = vm_map_trunc_page(addr, PAGE_MASK);
15768 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
15769
1c79356b 15770
91447636
A
15771 /*
15772 * Initialize map_header.
15773 */
d9a64523
A
15774 map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15775 map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
91447636
A
15776 map_header->nentries = 0;
15777 map_header->entries_pageable = pageable;
39236c6e 15778 map_header->page_shift = PAGE_SHIFT;
1c79356b 15779
6d2010ae
A
15780 vm_map_store_init( map_header );
15781
d9a64523
A
15782 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15783 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
15784 } else {
15785 max_prot_for_prot_copy = VM_PROT_NONE;
15786 }
91447636
A
15787 *cur_protection = VM_PROT_ALL;
15788 *max_protection = VM_PROT_ALL;
1c79356b 15789
91447636
A
15790 map_address = 0;
15791 mapped_size = 0;
15792 result = KERN_SUCCESS;
1c79356b 15793
5ba3f43e 15794 /*
91447636
A
15795 * The specified source virtual space might correspond to
15796 * multiple map entries, need to loop on them.
15797 */
15798 vm_map_lock(map);
15799 while (mapped_size != size) {
0a7de745 15800 vm_map_size_t entry_size;
1c79356b 15801
91447636
A
15802 /*
15803 * Find the beginning of the region.
5ba3f43e 15804 */
0a7de745 15805 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
91447636
A
15806 result = KERN_INVALID_ADDRESS;
15807 break;
15808 }
1c79356b 15809
91447636
A
15810 if (src_start < src_entry->vme_start ||
15811 (mapped_size && src_start != src_entry->vme_start)) {
15812 result = KERN_INVALID_ADDRESS;
15813 break;
15814 }
1c79356b 15815
91447636 15816 tmp_size = size - mapped_size;
0a7de745 15817 if (src_end > src_entry->vme_end) {
91447636 15818 tmp_size -= (src_end - src_entry->vme_end);
0a7de745 15819 }
1c79356b 15820
91447636 15821 entry_size = (vm_map_size_t)(src_entry->vme_end -
0a7de745 15822 src_entry->vme_start);
1c79356b 15823
0a7de745 15824 if (src_entry->is_sub_map) {
3e170ce0 15825 vm_map_reference(VME_SUBMAP(src_entry));
91447636
A
15826 object = VM_OBJECT_NULL;
15827 } else {
3e170ce0 15828 object = VME_OBJECT(src_entry);
fe8ab488
A
15829 if (src_entry->iokit_acct) {
15830 /*
15831 * This entry uses "IOKit accounting".
15832 */
15833 } else if (object != VM_OBJECT_NULL &&
cb323159
A
15834 (object->purgable != VM_PURGABLE_DENY ||
15835 object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
fe8ab488
A
15836 /*
15837 * Purgeable objects have their own accounting:
15838 * no pmap accounting for them.
15839 */
a39ff7e2 15840 assertf(!src_entry->use_pmap,
0a7de745
A
15841 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15842 map,
15843 src_entry,
15844 (uint64_t)src_entry->vme_start,
15845 (uint64_t)src_entry->vme_end,
15846 src_entry->protection,
15847 src_entry->max_protection,
15848 VME_ALIAS(src_entry));
fe8ab488
A
15849 } else {
15850 /*
15851 * Not IOKit or purgeable:
15852 * must be accounted by pmap stats.
15853 */
a39ff7e2 15854 assertf(src_entry->use_pmap,
0a7de745
A
15855 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15856 map,
15857 src_entry,
15858 (uint64_t)src_entry->vme_start,
15859 (uint64_t)src_entry->vme_end,
15860 src_entry->protection,
15861 src_entry->max_protection,
15862 VME_ALIAS(src_entry));
fe8ab488 15863 }
55e303ae 15864
91447636
A
15865 if (object == VM_OBJECT_NULL) {
15866 object = vm_object_allocate(entry_size);
3e170ce0
A
15867 VME_OFFSET_SET(src_entry, 0);
15868 VME_OBJECT_SET(src_entry, object);
a39ff7e2 15869 assert(src_entry->use_pmap);
91447636 15870 } else if (object->copy_strategy !=
0a7de745 15871 MEMORY_OBJECT_COPY_SYMMETRIC) {
91447636
A
15872 /*
15873 * We are already using an asymmetric
15874 * copy, and therefore we already have
15875 * the right object.
15876 */
15877 assert(!src_entry->needs_copy);
15878 } else if (src_entry->needs_copy || object->shadowed ||
0a7de745
A
15879 (object->internal && !object->true_share &&
15880 !src_entry->is_shared &&
15881 object->vo_size > entry_size)) {
3e170ce0 15882 VME_OBJECT_SHADOW(src_entry, entry_size);
a39ff7e2 15883 assert(src_entry->use_pmap);
1c79356b 15884
91447636
A
15885 if (!src_entry->needs_copy &&
15886 (src_entry->protection & VM_PROT_WRITE)) {
0a7de745 15887 vm_prot_t prot;
0c530ab8 15888
5ba3f43e
A
15889 assert(!pmap_has_prot_policy(src_entry->protection));
15890
0a7de745 15891 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 15892
3e170ce0 15893 if (override_nx(map,
0a7de745
A
15894 VME_ALIAS(src_entry))
15895 && prot) {
15896 prot |= VM_PROT_EXECUTE;
15897 }
2d21ac55 15898
5ba3f43e
A
15899 assert(!pmap_has_prot_policy(prot));
15900
0a7de745 15901 if (map->mapped_in_other_pmaps) {
2d21ac55 15902 vm_object_pmap_protect(
3e170ce0
A
15903 VME_OBJECT(src_entry),
15904 VME_OFFSET(src_entry),
2d21ac55
A
15905 entry_size,
15906 PMAP_NULL,
0c530ab8 15907 src_entry->vme_start,
0c530ab8 15908 prot);
2d21ac55
A
15909 } else {
15910 pmap_protect(vm_map_pmap(map),
0a7de745
A
15911 src_entry->vme_start,
15912 src_entry->vme_end,
15913 prot);
91447636
A
15914 }
15915 }
1c79356b 15916
3e170ce0 15917 object = VME_OBJECT(src_entry);
91447636
A
15918 src_entry->needs_copy = FALSE;
15919 }
1c79356b 15920
1c79356b 15921
91447636 15922 vm_object_lock(object);
2d21ac55 15923 vm_object_reference_locked(object); /* object ref. for new entry */
5ba3f43e 15924 if (object->copy_strategy ==
2d21ac55 15925 MEMORY_OBJECT_COPY_SYMMETRIC) {
5ba3f43e 15926 object->copy_strategy =
0a7de745 15927 MEMORY_OBJECT_COPY_DELAY;
91447636
A
15928 }
15929 vm_object_unlock(object);
15930 }
1c79356b 15931
3e170ce0 15932 offset = (VME_OFFSET(src_entry) +
0a7de745 15933 (src_start - src_entry->vme_start));
1c79356b 15934
7ddcb079 15935 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
91447636 15936 vm_map_entry_copy(new_entry, src_entry);
fe8ab488
A
15937 if (new_entry->is_sub_map) {
15938 /* clr address space specifics */
15939 new_entry->use_pmap = FALSE;
a39ff7e2
A
15940 } else if (copy) {
15941 /*
15942 * We're dealing with a copy-on-write operation,
15943 * so the resulting mapping should not inherit the
15944 * original mapping's accounting settings.
15945 * "use_pmap" should be reset to its default (TRUE)
15946 * so that the new mapping gets accounted for in
15947 * the task's memory footprint.
15948 */
15949 new_entry->use_pmap = TRUE;
fe8ab488 15950 }
a39ff7e2
A
15951 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15952 assert(!new_entry->iokit_acct);
1c79356b 15953
39236c6e
A
15954 new_entry->map_aligned = FALSE;
15955
91447636
A
15956 new_entry->vme_start = map_address;
15957 new_entry->vme_end = map_address + tmp_size;
e2d2fc5c 15958 assert(new_entry->vme_start < new_entry->vme_end);
5c9f4661
A
15959 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15960 /*
15961 * Remapping for vm_map_protect(VM_PROT_COPY)
15962 * to convert a read-only mapping into a
15963 * copy-on-write version of itself but
15964 * with write access:
0a7de745 15965 * keep the original inheritance and add
5c9f4661
A
15966 * VM_PROT_WRITE to the max protection.
15967 */
15968 new_entry->inheritance = src_entry->inheritance;
d9a64523 15969 new_entry->protection &= max_prot_for_prot_copy;
5c9f4661
A
15970 new_entry->max_protection |= VM_PROT_WRITE;
15971 } else {
15972 new_entry->inheritance = inheritance;
15973 }
3e170ce0 15974 VME_OFFSET_SET(new_entry, offset);
0a7de745 15975
91447636
A
15976 /*
15977 * The new region has to be copied now if required.
15978 */
0a7de745 15979RestartCopy:
91447636 15980 if (!copy) {
cb323159
A
15981 if (src_entry->used_for_jit == TRUE) {
15982 if (same_map) {
c6bf4f31
A
15983#if __APRR_SUPPORTED__
15984 /*
15985 * Disallow re-mapping of any JIT regions on APRR devices.
15986 */
15987 result = KERN_PROTECTION_FAILURE;
15988 break;
15989#endif /* __APRR_SUPPORTED__*/
cb323159 15990 } else {
d9a64523 15991#if CONFIG_EMBEDDED
cb323159
A
15992 /*
15993 * Cannot allow an entry describing a JIT
15994 * region to be shared across address spaces.
15995 */
15996 result = KERN_INVALID_ARGUMENT;
15997 break;
d9a64523 15998#endif /* CONFIG_EMBEDDED */
cb323159 15999 }
316670eb 16000 }
cb323159 16001
91447636
A
16002 src_entry->is_shared = TRUE;
16003 new_entry->is_shared = TRUE;
0a7de745 16004 if (!(new_entry->is_sub_map)) {
91447636 16005 new_entry->needs_copy = FALSE;
0a7de745 16006 }
91447636
A
16007 } else if (src_entry->is_sub_map) {
16008 /* make this a COW sub_map if not already */
3e170ce0 16009 assert(new_entry->wired_count == 0);
91447636
A
16010 new_entry->needs_copy = TRUE;
16011 object = VM_OBJECT_NULL;
16012 } else if (src_entry->wired_count == 0 &&
cb323159 16013 vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
0a7de745
A
16014 VME_OFFSET(new_entry),
16015 (new_entry->vme_end -
16016 new_entry->vme_start),
16017 &src_needs_copy,
16018 &new_entry_needs_copy)) {
91447636
A
16019 new_entry->needs_copy = new_entry_needs_copy;
16020 new_entry->is_shared = FALSE;
a39ff7e2 16021 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
1c79356b 16022
91447636
A
16023 /*
16024 * Handle copy_on_write semantics.
16025 */
16026 if (src_needs_copy && !src_entry->needs_copy) {
0a7de745 16027 vm_prot_t prot;
0c530ab8 16028
5ba3f43e
A
16029 assert(!pmap_has_prot_policy(src_entry->protection));
16030
0c530ab8 16031 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 16032
3e170ce0 16033 if (override_nx(map,
0a7de745
A
16034 VME_ALIAS(src_entry))
16035 && prot) {
16036 prot |= VM_PROT_EXECUTE;
16037 }
2d21ac55 16038
5ba3f43e
A
16039 assert(!pmap_has_prot_policy(prot));
16040
91447636 16041 vm_object_pmap_protect(object,
0a7de745
A
16042 offset,
16043 entry_size,
16044 ((src_entry->is_shared
16045 || map->mapped_in_other_pmaps) ?
16046 PMAP_NULL : map->pmap),
16047 src_entry->vme_start,
16048 prot);
1c79356b 16049
3e170ce0 16050 assert(src_entry->wired_count == 0);
91447636
A
16051 src_entry->needs_copy = TRUE;
16052 }
16053 /*
16054 * Throw away the old object reference of the new entry.
16055 */
16056 vm_object_deallocate(object);
91447636
A
16057 } else {
16058 new_entry->is_shared = FALSE;
a39ff7e2
A
16059 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16060
16061 src_entry_was_wired = (src_entry->wired_count > 0);
16062 saved_src_entry = src_entry;
16063 src_entry = VM_MAP_ENTRY_NULL;
1c79356b 16064
91447636
A
16065 /*
16066 * The map can be safely unlocked since we
16067 * already hold a reference on the object.
16068 *
16069 * Record the timestamp of the map for later
16070 * verification, and unlock the map.
16071 */
16072 version.main_timestamp = map->timestamp;
0a7de745 16073 vm_map_unlock(map); /* Increments timestamp once! */
55e303ae 16074
91447636
A
16075 /*
16076 * Perform the copy.
16077 */
a39ff7e2 16078 if (src_entry_was_wired > 0) {
91447636
A
16079 vm_object_lock(object);
16080 result = vm_object_copy_slowly(
2d21ac55
A
16081 object,
16082 offset,
5ba3f43e
A
16083 (new_entry->vme_end -
16084 new_entry->vme_start),
2d21ac55 16085 THREAD_UNINT,
cb323159 16086 VME_OBJECT_PTR(new_entry));
1c79356b 16087
3e170ce0 16088 VME_OFFSET_SET(new_entry, 0);
91447636
A
16089 new_entry->needs_copy = FALSE;
16090 } else {
3e170ce0
A
16091 vm_object_offset_t new_offset;
16092
16093 new_offset = VME_OFFSET(new_entry);
91447636 16094 result = vm_object_copy_strategically(
2d21ac55
A
16095 object,
16096 offset,
5ba3f43e
A
16097 (new_entry->vme_end -
16098 new_entry->vme_start),
cb323159 16099 VME_OBJECT_PTR(new_entry),
3e170ce0 16100 &new_offset,
2d21ac55 16101 &new_entry_needs_copy);
3e170ce0
A
16102 if (new_offset != VME_OFFSET(new_entry)) {
16103 VME_OFFSET_SET(new_entry, new_offset);
16104 }
1c79356b 16105
91447636
A
16106 new_entry->needs_copy = new_entry_needs_copy;
16107 }
1c79356b 16108
91447636
A
16109 /*
16110 * Throw away the old object reference of the new entry.
16111 */
16112 vm_object_deallocate(object);
1c79356b 16113
91447636
A
16114 if (result != KERN_SUCCESS &&
16115 result != KERN_MEMORY_RESTART_COPY) {
16116 _vm_map_entry_dispose(map_header, new_entry);
39037602 16117 vm_map_lock(map);
91447636
A
16118 break;
16119 }
1c79356b 16120
91447636
A
16121 /*
16122 * Verify that the map has not substantially
16123 * changed while the copy was being made.
16124 */
1c79356b 16125
91447636
A
16126 vm_map_lock(map);
16127 if (version.main_timestamp + 1 != map->timestamp) {
16128 /*
16129 * Simple version comparison failed.
16130 *
16131 * Retry the lookup and verify that the
16132 * same object/offset are still present.
16133 */
a39ff7e2 16134 saved_src_entry = VM_MAP_ENTRY_NULL;
3e170ce0 16135 vm_object_deallocate(VME_OBJECT(new_entry));
91447636 16136 _vm_map_entry_dispose(map_header, new_entry);
0a7de745 16137 if (result == KERN_MEMORY_RESTART_COPY) {
91447636 16138 result = KERN_SUCCESS;
0a7de745 16139 }
91447636
A
16140 continue;
16141 }
a39ff7e2
A
16142 /* map hasn't changed: src_entry is still valid */
16143 src_entry = saved_src_entry;
16144 saved_src_entry = VM_MAP_ENTRY_NULL;
1c79356b 16145
91447636
A
16146 if (result == KERN_MEMORY_RESTART_COPY) {
16147 vm_object_reference(object);
16148 goto RestartCopy;
16149 }
16150 }
1c79356b 16151
6d2010ae 16152 _vm_map_store_entry_link(map_header,
0a7de745 16153 map_header->links.prev, new_entry);
1c79356b 16154
6d2010ae 16155 /*Protections for submap mapping are irrelevant here*/
0a7de745 16156 if (!src_entry->is_sub_map) {
6d2010ae
A
16157 *cur_protection &= src_entry->protection;
16158 *max_protection &= src_entry->max_protection;
16159 }
91447636
A
16160 map_address += tmp_size;
16161 mapped_size += tmp_size;
16162 src_start += tmp_size;
91447636 16163 } /* end while */
1c79356b 16164
91447636
A
16165 vm_map_unlock(map);
16166 if (result != KERN_SUCCESS) {
16167 /*
16168 * Free all allocated elements.
16169 */
16170 for (src_entry = map_header->links.next;
0a7de745
A
16171 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
16172 src_entry = new_entry) {
91447636 16173 new_entry = src_entry->vme_next;
6d2010ae 16174 _vm_map_store_entry_unlink(map_header, src_entry);
39236c6e 16175 if (src_entry->is_sub_map) {
3e170ce0 16176 vm_map_deallocate(VME_SUBMAP(src_entry));
39236c6e 16177 } else {
3e170ce0 16178 vm_object_deallocate(VME_OBJECT(src_entry));
39236c6e 16179 }
91447636
A
16180 _vm_map_entry_dispose(map_header, src_entry);
16181 }
16182 }
16183 return result;
1c79356b
A
16184}
16185
16186/*
91447636 16187 * Routine: vm_remap
1c79356b 16188 *
91447636
A
16189 * Map portion of a task's address space.
16190 * Mapped region must not overlap more than
16191 * one vm memory object. Protections and
16192 * inheritance attributes remain the same
16193 * as in the original task and are out parameters.
16194 * Source and Target task can be identical
16195 * Other attributes are identical as for vm_map()
1c79356b
A
16196 */
16197kern_return_t
91447636 16198vm_map_remap(
0a7de745
A
16199 vm_map_t target_map,
16200 vm_map_address_t *address,
16201 vm_map_size_t size,
16202 vm_map_offset_t mask,
16203 int flags,
16204 vm_map_kernel_flags_t vmk_flags,
16205 vm_tag_t tag,
16206 vm_map_t src_map,
16207 vm_map_offset_t memory_address,
16208 boolean_t copy,
16209 vm_prot_t *cur_protection,
16210 vm_prot_t *max_protection,
16211 vm_inherit_t inheritance)
1c79356b 16212{
0a7de745
A
16213 kern_return_t result;
16214 vm_map_entry_t entry;
16215 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
16216 vm_map_entry_t new_entry;
16217 struct vm_map_header map_header;
16218 vm_map_offset_t offset_in_mapping;
16219
16220 if (target_map == VM_MAP_NULL) {
91447636 16221 return KERN_INVALID_ARGUMENT;
0a7de745 16222 }
1c79356b 16223
91447636 16224 switch (inheritance) {
2d21ac55
A
16225 case VM_INHERIT_NONE:
16226 case VM_INHERIT_COPY:
16227 case VM_INHERIT_SHARE:
0a7de745 16228 if (size != 0 && src_map != VM_MAP_NULL) {
91447636 16229 break;
0a7de745
A
16230 }
16231 /*FALL THRU*/
2d21ac55 16232 default:
91447636
A
16233 return KERN_INVALID_ARGUMENT;
16234 }
1c79356b 16235
5ba3f43e
A
16236 /*
16237 * If the user is requesting that we return the address of the
16238 * first byte of the data (rather than the base of the page),
16239 * then we use different rounding semantics: specifically,
39236c6e
A
16240 * we assume that (memory_address, size) describes a region
16241 * all of whose pages we must cover, rather than a base to be truncated
16242 * down and a size to be added to that base. So we figure out
16243 * the highest page that the requested region includes and make
16244 * sure that the size will cover it.
5ba3f43e 16245 *
0a7de745 16246 * The key example we're worried about it is of the form:
39236c6e 16247 *
0a7de745 16248 * memory_address = 0x1ff0, size = 0x20
5ba3f43e
A
16249 *
16250 * With the old semantics, we round down the memory_address to 0x1000
39236c6e
A
16251 * and round up the size to 0x1000, resulting in our covering *only*
16252 * page 0x1000. With the new semantics, we'd realize that the region covers
5ba3f43e 16253 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
39236c6e
A
16254 * 0x1000 and page 0x2000 in the region we remap.
16255 */
16256 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16257 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
16258 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
16259 } else {
16260 size = vm_map_round_page(size, PAGE_MASK);
5ba3f43e
A
16261 }
16262 if (size == 0) {
16263 return KERN_INVALID_ARGUMENT;
16264 }
1c79356b 16265
cb323159
A
16266 if (flags & VM_FLAGS_RESILIENT_MEDIA) {
16267 /* must be copy-on-write to be "media resilient" */
16268 if (!copy) {
16269 return KERN_INVALID_ARGUMENT;
16270 }
16271 }
16272
91447636 16273 result = vm_map_remap_extract(src_map, memory_address,
0a7de745
A
16274 size, copy, &map_header,
16275 cur_protection,
16276 max_protection,
16277 inheritance,
16278 target_map->hdr.entries_pageable,
16279 src_map == target_map,
16280 vmk_flags);
1c79356b 16281
91447636
A
16282 if (result != KERN_SUCCESS) {
16283 return result;
16284 }
1c79356b 16285
91447636
A
16286 /*
16287 * Allocate/check a range of free virtual address
16288 * space for the target
1c79356b 16289 */
39236c6e 16290 *address = vm_map_trunc_page(*address,
0a7de745 16291 VM_MAP_PAGE_MASK(target_map));
91447636
A
16292 vm_map_lock(target_map);
16293 result = vm_map_remap_range_allocate(target_map, address, size,
0a7de745
A
16294 mask, flags, vmk_flags, tag,
16295 &insp_entry);
1c79356b 16296
91447636 16297 for (entry = map_header.links.next;
0a7de745
A
16298 entry != CAST_TO_VM_MAP_ENTRY(&map_header.links);
16299 entry = new_entry) {
91447636 16300 new_entry = entry->vme_next;
6d2010ae 16301 _vm_map_store_entry_unlink(&map_header, entry);
91447636 16302 if (result == KERN_SUCCESS) {
3e170ce0
A
16303 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16304 /* no codesigning -> read-only access */
3e170ce0
A
16305 entry->max_protection = VM_PROT_READ;
16306 entry->protection = VM_PROT_READ;
16307 entry->vme_resilient_codesign = TRUE;
16308 }
91447636
A
16309 entry->vme_start += *address;
16310 entry->vme_end += *address;
39236c6e 16311 assert(!entry->map_aligned);
cb323159
A
16312 if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
16313 !entry->is_sub_map &&
16314 (VME_OBJECT(entry) == VM_OBJECT_NULL ||
16315 VME_OBJECT(entry)->internal)) {
16316 entry->vme_resilient_media = TRUE;
16317 }
d9a64523 16318 vm_map_store_entry_link(target_map, insp_entry, entry,
0a7de745 16319 vmk_flags);
91447636
A
16320 insp_entry = entry;
16321 } else {
16322 if (!entry->is_sub_map) {
3e170ce0 16323 vm_object_deallocate(VME_OBJECT(entry));
91447636 16324 } else {
3e170ce0 16325 vm_map_deallocate(VME_SUBMAP(entry));
2d21ac55 16326 }
91447636 16327 _vm_map_entry_dispose(&map_header, entry);
1c79356b 16328 }
91447636 16329 }
1c79356b 16330
3e170ce0
A
16331 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16332 *cur_protection = VM_PROT_READ;
16333 *max_protection = VM_PROT_READ;
16334 }
16335
0a7de745 16336 if (target_map->disable_vmentry_reuse == TRUE) {
39037602 16337 assert(!target_map->is_nested_map);
0a7de745 16338 if (target_map->highest_entry_end < insp_entry->vme_end) {
6d2010ae
A
16339 target_map->highest_entry_end = insp_entry->vme_end;
16340 }
16341 }
16342
91447636
A
16343 if (result == KERN_SUCCESS) {
16344 target_map->size += size;
0c530ab8 16345 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
5ba3f43e 16346
d9a64523
A
16347#if PMAP_CS
16348 if (*max_protection & VM_PROT_EXECUTE) {
16349 vm_map_address_t region_start = 0, region_size = 0;
16350 struct pmap_cs_code_directory *region_cd = NULL;
16351 vm_map_address_t base = 0;
16352 struct pmap_cs_lookup_results results = {};
16353 vm_map_size_t page_addr = vm_map_trunc_page(memory_address, PAGE_MASK);
16354 vm_map_size_t assoc_size = vm_map_round_page(memory_address + size - page_addr, PAGE_MASK);
16355
16356 pmap_cs_lookup(src_map->pmap, memory_address, &results);
16357 region_size = results.region_size;
16358 region_start = results.region_start;
16359 region_cd = results.region_cd_entry;
16360 base = results.base;
16361
16362 if (region_cd != NULL && (page_addr != region_start || assoc_size != region_size)) {
16363 *cur_protection = VM_PROT_READ;
16364 *max_protection = VM_PROT_READ;
16365 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
0a7de745
A
16366 "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
16367 page_addr, page_addr + assoc_size, *address,
16368 region_start, region_size,
16369 region_cd != NULL ? "not " : "" // Don't leak kernel slide
16370 );
d9a64523
A
16371 }
16372 }
16373#endif
d9a64523
A
16374 }
16375 vm_map_unlock(target_map);
16376
0a7de745 16377 if (result == KERN_SUCCESS && target_map->wiring_required) {
5ba3f43e 16378 result = vm_map_wire_kernel(target_map, *address,
0a7de745
A
16379 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
16380 TRUE);
16381 }
39236c6e 16382
5ba3f43e
A
16383 /*
16384 * If requested, return the address of the data pointed to by the
39236c6e
A
16385 * request, rather than the base of the resulting page.
16386 */
16387 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16388 *address += offset_in_mapping;
16389 }
16390
91447636
A
16391 return result;
16392}
1c79356b 16393
91447636
A
16394/*
16395 * Routine: vm_map_remap_range_allocate
16396 *
16397 * Description:
16398 * Allocate a range in the specified virtual address map.
16399 * returns the address and the map entry just before the allocated
16400 * range
16401 *
16402 * Map must be locked.
16403 */
1c79356b 16404
91447636
A
16405static kern_return_t
16406vm_map_remap_range_allocate(
0a7de745
A
16407 vm_map_t map,
16408 vm_map_address_t *address, /* IN/OUT */
16409 vm_map_size_t size,
16410 vm_map_offset_t mask,
16411 int flags,
16412 vm_map_kernel_flags_t vmk_flags,
5ba3f43e 16413 __unused vm_tag_t tag,
0a7de745 16414 vm_map_entry_t *map_entry) /* OUT */
91447636 16415{
0a7de745
A
16416 vm_map_entry_t entry;
16417 vm_map_offset_t start;
16418 vm_map_offset_t end;
16419 vm_map_offset_t desired_empty_end;
16420 kern_return_t kr;
16421 vm_map_entry_t hole_entry;
1c79356b 16422
0a7de745 16423StartAgain:;
1c79356b 16424
2d21ac55 16425 start = *address;
1c79356b 16426
0a7de745
A
16427 if (flags & VM_FLAGS_ANYWHERE) {
16428 if (flags & VM_FLAGS_RANDOM_ADDR) {
39037602
A
16429 /*
16430 * Get a random start address.
16431 */
16432 kr = vm_map_random_address_for_size(map, address, size);
16433 if (kr != KERN_SUCCESS) {
0a7de745 16434 return kr;
39037602
A
16435 }
16436 start = *address;
16437 }
16438
2d21ac55
A
16439 /*
16440 * Calculate the first possible address.
16441 */
1c79356b 16442
0a7de745 16443 if (start < map->min_offset) {
2d21ac55 16444 start = map->min_offset;
0a7de745
A
16445 }
16446 if (start > map->max_offset) {
16447 return KERN_NO_SPACE;
16448 }
5ba3f43e 16449
2d21ac55
A
16450 /*
16451 * Look for the first possible address;
16452 * if there's already something at this
16453 * address, we have to start after it.
16454 */
1c79356b 16455
0a7de745 16456 if (map->disable_vmentry_reuse == TRUE) {
6d2010ae 16457 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2d21ac55 16458 } else {
3e170ce0 16459 if (map->holelistenabled) {
d9a64523 16460 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
3e170ce0
A
16461
16462 if (hole_entry == NULL) {
16463 /*
16464 * No more space in the map?
16465 */
0a7de745 16466 return KERN_NO_SPACE;
3e170ce0 16467 } else {
3e170ce0
A
16468 boolean_t found_hole = FALSE;
16469
16470 do {
16471 if (hole_entry->vme_start >= start) {
16472 start = hole_entry->vme_start;
16473 found_hole = TRUE;
16474 break;
16475 }
16476
16477 if (hole_entry->vme_end > start) {
16478 found_hole = TRUE;
16479 break;
16480 }
16481 hole_entry = hole_entry->vme_next;
d9a64523 16482 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
3e170ce0
A
16483
16484 if (found_hole == FALSE) {
0a7de745 16485 return KERN_NO_SPACE;
3e170ce0
A
16486 }
16487
16488 entry = hole_entry;
16489 }
6d2010ae 16490 } else {
3e170ce0
A
16491 assert(first_free_is_valid(map));
16492 if (start == map->min_offset) {
0a7de745 16493 if ((entry = map->first_free) != vm_map_to_entry(map)) {
3e170ce0 16494 start = entry->vme_end;
0a7de745 16495 }
3e170ce0 16496 } else {
0a7de745
A
16497 vm_map_entry_t tmp_entry;
16498 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
3e170ce0 16499 start = tmp_entry->vme_end;
0a7de745 16500 }
3e170ce0
A
16501 entry = tmp_entry;
16502 }
6d2010ae 16503 }
39236c6e 16504 start = vm_map_round_page(start,
0a7de745 16505 VM_MAP_PAGE_MASK(map));
2d21ac55 16506 }
5ba3f43e 16507
2d21ac55
A
16508 /*
16509 * In any case, the "entry" always precedes
16510 * the proposed new region throughout the
16511 * loop:
16512 */
1c79356b 16513
2d21ac55 16514 while (TRUE) {
0a7de745 16515 vm_map_entry_t next;
2d21ac55
A
16516
16517 /*
16518 * Find the end of the proposed new region.
16519 * Be sure we didn't go beyond the end, or
16520 * wrap around the address.
16521 */
16522
16523 end = ((start + mask) & ~mask);
39236c6e 16524 end = vm_map_round_page(end,
0a7de745
A
16525 VM_MAP_PAGE_MASK(map));
16526 if (end < start) {
16527 return KERN_NO_SPACE;
16528 }
2d21ac55
A
16529 start = end;
16530 end += size;
16531
d9a64523
A
16532 /* We want an entire page of empty space, but don't increase the allocation size. */
16533 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
16534
16535 if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
2d21ac55
A
16536 if (map->wait_for_space) {
16537 if (size <= (map->max_offset -
0a7de745 16538 map->min_offset)) {
2d21ac55
A
16539 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
16540 vm_map_unlock(map);
16541 thread_block(THREAD_CONTINUE_NULL);
16542 vm_map_lock(map);
16543 goto StartAgain;
16544 }
16545 }
5ba3f43e 16546
0a7de745 16547 return KERN_NO_SPACE;
2d21ac55 16548 }
1c79356b 16549
2d21ac55 16550 next = entry->vme_next;
1c79356b 16551
3e170ce0 16552 if (map->holelistenabled) {
0a7de745 16553 if (entry->vme_end >= desired_empty_end) {
3e170ce0 16554 break;
0a7de745 16555 }
3e170ce0
A
16556 } else {
16557 /*
0a7de745 16558 * If there are no more entries, we must win.
3e170ce0
A
16559 *
16560 * OR
16561 *
16562 * If there is another entry, it must be
16563 * after the end of the potential new region.
16564 */
1c79356b 16565
0a7de745 16566 if (next == vm_map_to_entry(map)) {
3e170ce0 16567 break;
0a7de745 16568 }
3e170ce0 16569
0a7de745 16570 if (next->vme_start >= desired_empty_end) {
3e170ce0 16571 break;
0a7de745 16572 }
3e170ce0 16573 }
1c79356b 16574
2d21ac55
A
16575 /*
16576 * Didn't fit -- move to the next entry.
16577 */
1c79356b 16578
2d21ac55 16579 entry = next;
3e170ce0
A
16580
16581 if (map->holelistenabled) {
d9a64523 16582 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
3e170ce0
A
16583 /*
16584 * Wrapped around
16585 */
0a7de745 16586 return KERN_NO_SPACE;
3e170ce0
A
16587 }
16588 start = entry->vme_start;
16589 } else {
16590 start = entry->vme_end;
16591 }
16592 }
16593
16594 if (map->holelistenabled) {
3e170ce0
A
16595 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
16596 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
16597 }
2d21ac55 16598 }
3e170ce0 16599
2d21ac55
A
16600 *address = start;
16601 } else {
0a7de745 16602 vm_map_entry_t temp_entry;
5ba3f43e 16603
2d21ac55
A
16604 /*
16605 * Verify that:
16606 * the address doesn't itself violate
16607 * the mask requirement.
16608 */
1c79356b 16609
0a7de745
A
16610 if ((start & mask) != 0) {
16611 return KERN_NO_SPACE;
16612 }
1c79356b 16613
1c79356b 16614
2d21ac55
A
16615 /*
16616 * ... the address is within bounds
16617 */
1c79356b 16618
2d21ac55 16619 end = start + size;
1c79356b 16620
2d21ac55
A
16621 if ((start < map->min_offset) ||
16622 (end > map->max_offset) ||
16623 (start >= end)) {
0a7de745 16624 return KERN_INVALID_ADDRESS;
2d21ac55 16625 }
1c79356b 16626
060df5ea
A
16627 /*
16628 * If we're asked to overwrite whatever was mapped in that
16629 * range, first deallocate that range.
16630 */
16631 if (flags & VM_FLAGS_OVERWRITE) {
16632 vm_map_t zap_map;
d9a64523 16633 int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
060df5ea
A
16634
16635 /*
16636 * We use a "zap_map" to avoid having to unlock
16637 * the "map" in vm_map_delete(), which would compromise
16638 * the atomicity of the "deallocate" and then "remap"
16639 * combination.
16640 */
16641 zap_map = vm_map_create(PMAP_NULL,
0a7de745
A
16642 start,
16643 end,
16644 map->hdr.entries_pageable);
060df5ea
A
16645 if (zap_map == VM_MAP_NULL) {
16646 return KERN_RESOURCE_SHORTAGE;
16647 }
39236c6e 16648 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 16649 vm_map_disable_hole_optimization(zap_map);
060df5ea 16650
d9a64523
A
16651 if (vmk_flags.vmkf_overwrite_immutable) {
16652 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
16653 }
060df5ea 16654 kr = vm_map_delete(map, start, end,
0a7de745
A
16655 remove_flags,
16656 zap_map);
060df5ea
A
16657 if (kr == KERN_SUCCESS) {
16658 vm_map_destroy(zap_map,
0a7de745 16659 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
060df5ea
A
16660 zap_map = VM_MAP_NULL;
16661 }
16662 }
16663
2d21ac55
A
16664 /*
16665 * ... the starting address isn't allocated
16666 */
91447636 16667
0a7de745
A
16668 if (vm_map_lookup_entry(map, start, &temp_entry)) {
16669 return KERN_NO_SPACE;
16670 }
91447636 16671
2d21ac55 16672 entry = temp_entry;
91447636 16673
2d21ac55
A
16674 /*
16675 * ... the next region doesn't overlap the
16676 * end point.
16677 */
1c79356b 16678
2d21ac55 16679 if ((entry->vme_next != vm_map_to_entry(map)) &&
0a7de745
A
16680 (entry->vme_next->vme_start < end)) {
16681 return KERN_NO_SPACE;
16682 }
2d21ac55
A
16683 }
16684 *map_entry = entry;
0a7de745 16685 return KERN_SUCCESS;
91447636 16686}
1c79356b 16687
91447636
A
16688/*
16689 * vm_map_switch:
16690 *
16691 * Set the address map for the current thread to the specified map
16692 */
1c79356b 16693
91447636
A
16694vm_map_t
16695vm_map_switch(
0a7de745 16696 vm_map_t map)
91447636 16697{
0a7de745
A
16698 int mycpu;
16699 thread_t thread = current_thread();
16700 vm_map_t oldmap = thread->map;
1c79356b 16701
91447636
A
16702 mp_disable_preemption();
16703 mycpu = cpu_number();
1c79356b 16704
91447636
A
16705 /*
16706 * Deactivate the current map and activate the requested map
16707 */
16708 PMAP_SWITCH_USER(thread, map, mycpu);
1c79356b 16709
91447636 16710 mp_enable_preemption();
0a7de745 16711 return oldmap;
91447636 16712}
1c79356b 16713
1c79356b 16714
91447636
A
16715/*
16716 * Routine: vm_map_write_user
16717 *
16718 * Description:
16719 * Copy out data from a kernel space into space in the
16720 * destination map. The space must already exist in the
16721 * destination map.
16722 * NOTE: This routine should only be called by threads
16723 * which can block on a page fault. i.e. kernel mode user
16724 * threads.
16725 *
16726 */
16727kern_return_t
16728vm_map_write_user(
0a7de745
A
16729 vm_map_t map,
16730 void *src_p,
16731 vm_map_address_t dst_addr,
16732 vm_size_t size)
91447636 16733{
0a7de745 16734 kern_return_t kr = KERN_SUCCESS;
1c79356b 16735
0a7de745 16736 if (current_map() == map) {
91447636
A
16737 if (copyout(src_p, dst_addr, size)) {
16738 kr = KERN_INVALID_ADDRESS;
16739 }
16740 } else {
0a7de745 16741 vm_map_t oldmap;
1c79356b 16742
91447636
A
16743 /* take on the identity of the target map while doing */
16744 /* the transfer */
1c79356b 16745
91447636
A
16746 vm_map_reference(map);
16747 oldmap = vm_map_switch(map);
16748 if (copyout(src_p, dst_addr, size)) {
16749 kr = KERN_INVALID_ADDRESS;
1c79356b 16750 }
91447636
A
16751 vm_map_switch(oldmap);
16752 vm_map_deallocate(map);
1c79356b 16753 }
91447636 16754 return kr;
1c79356b
A
16755}
16756
16757/*
91447636
A
16758 * Routine: vm_map_read_user
16759 *
16760 * Description:
16761 * Copy in data from a user space source map into the
16762 * kernel map. The space must already exist in the
16763 * kernel map.
16764 * NOTE: This routine should only be called by threads
16765 * which can block on a page fault. i.e. kernel mode user
16766 * threads.
1c79356b 16767 *
1c79356b
A
16768 */
16769kern_return_t
91447636 16770vm_map_read_user(
0a7de745
A
16771 vm_map_t map,
16772 vm_map_address_t src_addr,
16773 void *dst_p,
16774 vm_size_t size)
1c79356b 16775{
0a7de745 16776 kern_return_t kr = KERN_SUCCESS;
1c79356b 16777
0a7de745 16778 if (current_map() == map) {
91447636
A
16779 if (copyin(src_addr, dst_p, size)) {
16780 kr = KERN_INVALID_ADDRESS;
16781 }
16782 } else {
0a7de745 16783 vm_map_t oldmap;
1c79356b 16784
91447636
A
16785 /* take on the identity of the target map while doing */
16786 /* the transfer */
16787
16788 vm_map_reference(map);
16789 oldmap = vm_map_switch(map);
16790 if (copyin(src_addr, dst_p, size)) {
16791 kr = KERN_INVALID_ADDRESS;
16792 }
16793 vm_map_switch(oldmap);
16794 vm_map_deallocate(map);
1c79356b 16795 }
91447636
A
16796 return kr;
16797}
16798
1c79356b 16799
91447636
A
16800/*
16801 * vm_map_check_protection:
16802 *
16803 * Assert that the target map allows the specified
16804 * privilege on the entire address region given.
16805 * The entire region must be allocated.
16806 */
2d21ac55
A
16807boolean_t
16808vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
0a7de745 16809 vm_map_offset_t end, vm_prot_t protection)
91447636 16810{
2d21ac55
A
16811 vm_map_entry_t entry;
16812 vm_map_entry_t tmp_entry;
1c79356b 16813
91447636 16814 vm_map_lock(map);
1c79356b 16815
0a7de745 16816 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
2d21ac55 16817 vm_map_unlock(map);
0a7de745 16818 return FALSE;
1c79356b
A
16819 }
16820
91447636
A
16821 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
16822 vm_map_unlock(map);
0a7de745 16823 return FALSE;
91447636 16824 }
1c79356b 16825
91447636
A
16826 entry = tmp_entry;
16827
16828 while (start < end) {
16829 if (entry == vm_map_to_entry(map)) {
16830 vm_map_unlock(map);
0a7de745 16831 return FALSE;
1c79356b 16832 }
1c79356b 16833
91447636
A
16834 /*
16835 * No holes allowed!
16836 */
1c79356b 16837
91447636
A
16838 if (start < entry->vme_start) {
16839 vm_map_unlock(map);
0a7de745 16840 return FALSE;
91447636
A
16841 }
16842
16843 /*
16844 * Check protection associated with entry.
16845 */
16846
16847 if ((entry->protection & protection) != protection) {
16848 vm_map_unlock(map);
0a7de745 16849 return FALSE;
91447636
A
16850 }
16851
16852 /* go to next entry */
16853
16854 start = entry->vme_end;
16855 entry = entry->vme_next;
16856 }
16857 vm_map_unlock(map);
0a7de745 16858 return TRUE;
1c79356b
A
16859}
16860
1c79356b 16861kern_return_t
91447636 16862vm_map_purgable_control(
0a7de745
A
16863 vm_map_t map,
16864 vm_map_offset_t address,
16865 vm_purgable_t control,
16866 int *state)
1c79356b 16867{
0a7de745
A
16868 vm_map_entry_t entry;
16869 vm_object_t object;
16870 kern_return_t kr;
16871 boolean_t was_nonvolatile;
1c79356b 16872
1c79356b 16873 /*
91447636
A
16874 * Vet all the input parameters and current type and state of the
16875 * underlaying object. Return with an error if anything is amiss.
1c79356b 16876 */
0a7de745
A
16877 if (map == VM_MAP_NULL) {
16878 return KERN_INVALID_ARGUMENT;
16879 }
1c79356b 16880
91447636 16881 if (control != VM_PURGABLE_SET_STATE &&
b0d623f7 16882 control != VM_PURGABLE_GET_STATE &&
5ba3f43e 16883 control != VM_PURGABLE_PURGE_ALL &&
0a7de745
A
16884 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
16885 return KERN_INVALID_ARGUMENT;
16886 }
1c79356b 16887
b0d623f7
A
16888 if (control == VM_PURGABLE_PURGE_ALL) {
16889 vm_purgeable_object_purge_all();
16890 return KERN_SUCCESS;
16891 }
16892
5ba3f43e 16893 if ((control == VM_PURGABLE_SET_STATE ||
0a7de745 16894 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
b0d623f7 16895 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
0a7de745
A
16896 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
16897 return KERN_INVALID_ARGUMENT;
16898 }
91447636 16899
b0d623f7 16900 vm_map_lock_read(map);
91447636
A
16901
16902 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
91447636
A
16903 /*
16904 * Must pass a valid non-submap address.
16905 */
b0d623f7 16906 vm_map_unlock_read(map);
0a7de745 16907 return KERN_INVALID_ADDRESS;
91447636
A
16908 }
16909
16910 if ((entry->protection & VM_PROT_WRITE) == 0) {
16911 /*
16912 * Can't apply purgable controls to something you can't write.
16913 */
b0d623f7 16914 vm_map_unlock_read(map);
0a7de745 16915 return KERN_PROTECTION_FAILURE;
91447636
A
16916 }
16917
3e170ce0 16918 object = VME_OBJECT(entry);
fe8ab488
A
16919 if (object == VM_OBJECT_NULL ||
16920 object->purgable == VM_PURGABLE_DENY) {
91447636 16921 /*
fe8ab488 16922 * Object must already be present and be purgeable.
91447636 16923 */
b0d623f7 16924 vm_map_unlock_read(map);
91447636
A
16925 return KERN_INVALID_ARGUMENT;
16926 }
5ba3f43e 16927
91447636
A
16928 vm_object_lock(object);
16929
39236c6e 16930#if 00
5ba3f43e 16931 if (VME_OFFSET(entry) != 0 ||
6d2010ae 16932 entry->vme_end - entry->vme_start != object->vo_size) {
91447636
A
16933 /*
16934 * Can only apply purgable controls to the whole (existing)
16935 * object at once.
16936 */
b0d623f7 16937 vm_map_unlock_read(map);
91447636
A
16938 vm_object_unlock(object);
16939 return KERN_INVALID_ARGUMENT;
1c79356b 16940 }
39236c6e 16941#endif
fe8ab488
A
16942
16943 assert(!entry->is_sub_map);
16944 assert(!entry->use_pmap); /* purgeable has its own accounting */
16945
b0d623f7 16946 vm_map_unlock_read(map);
1c79356b 16947
fe8ab488
A
16948 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
16949
91447636 16950 kr = vm_object_purgable_control(object, control, state);
1c79356b 16951
fe8ab488
A
16952 if (was_nonvolatile &&
16953 object->purgable != VM_PURGABLE_NONVOLATILE &&
16954 map->pmap == kernel_pmap) {
16955#if DEBUG
16956 object->vo_purgeable_volatilizer = kernel_task;
16957#endif /* DEBUG */
16958 }
16959
91447636 16960 vm_object_unlock(object);
1c79356b 16961
91447636
A
16962 return kr;
16963}
1c79356b 16964
91447636 16965kern_return_t
b0d623f7 16966vm_map_page_query_internal(
0a7de745
A
16967 vm_map_t target_map,
16968 vm_map_offset_t offset,
16969 int *disposition,
16970 int *ref_count)
91447636 16971{
0a7de745
A
16972 kern_return_t kr;
16973 vm_page_info_basic_data_t info;
16974 mach_msg_type_number_t count;
b0d623f7
A
16975
16976 count = VM_PAGE_INFO_BASIC_COUNT;
16977 kr = vm_map_page_info(target_map,
0a7de745
A
16978 offset,
16979 VM_PAGE_INFO_BASIC,
16980 (vm_page_info_t) &info,
16981 &count);
b0d623f7
A
16982 if (kr == KERN_SUCCESS) {
16983 *disposition = info.disposition;
16984 *ref_count = info.ref_count;
16985 } else {
16986 *disposition = 0;
16987 *ref_count = 0;
16988 }
2d21ac55 16989
b0d623f7
A
16990 return kr;
16991}
5ba3f43e 16992
b0d623f7
A
16993kern_return_t
16994vm_map_page_info(
0a7de745
A
16995 vm_map_t map,
16996 vm_map_offset_t offset,
16997 vm_page_info_flavor_t flavor,
16998 vm_page_info_t info,
16999 mach_msg_type_number_t *count)
5ba3f43e 17000{
0a7de745
A
17001 return vm_map_page_range_info_internal(map,
17002 offset, /* start of range */
17003 (offset + 1), /* this will get rounded in the call to the page boundary */
17004 flavor,
17005 info,
17006 count);
5ba3f43e
A
17007}
17008
17009kern_return_t
17010vm_map_page_range_info_internal(
0a7de745
A
17011 vm_map_t map,
17012 vm_map_offset_t start_offset,
17013 vm_map_offset_t end_offset,
17014 vm_page_info_flavor_t flavor,
17015 vm_page_info_t info,
17016 mach_msg_type_number_t *count)
b0d623f7 17017{
0a7de745
A
17018 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
17019 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
17020 vm_page_t m = VM_PAGE_NULL;
17021 kern_return_t retval = KERN_SUCCESS;
17022 int disposition = 0;
17023 int ref_count = 0;
17024 int depth = 0, info_idx = 0;
17025 vm_page_info_basic_t basic_info = 0;
17026 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
17027 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
17028 boolean_t do_region_footprint;
cb323159 17029 ledger_amount_t ledger_resident, ledger_compressed;
2d21ac55 17030
b0d623f7
A
17031 switch (flavor) {
17032 case VM_PAGE_INFO_BASIC:
17033 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
6d2010ae
A
17034 /*
17035 * The "vm_page_info_basic_data" structure was not
17036 * properly padded, so allow the size to be off by
17037 * one to maintain backwards binary compatibility...
17038 */
0a7de745 17039 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
6d2010ae 17040 return KERN_INVALID_ARGUMENT;
0a7de745 17041 }
b0d623f7
A
17042 }
17043 break;
17044 default:
17045 return KERN_INVALID_ARGUMENT;
91447636 17046 }
2d21ac55 17047
a39ff7e2 17048 do_region_footprint = task_self_region_footprint();
b0d623f7
A
17049 disposition = 0;
17050 ref_count = 0;
b0d623f7 17051 depth = 0;
5ba3f43e 17052 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
b0d623f7 17053 retval = KERN_SUCCESS;
5ba3f43e
A
17054
17055 offset_in_page = start_offset & PAGE_MASK;
17056 start = vm_map_trunc_page(start_offset, PAGE_MASK);
17057 end = vm_map_round_page(end_offset, PAGE_MASK);
17058
0a7de745
A
17059 if (end < start) {
17060 return KERN_INVALID_ARGUMENT;
17061 }
17062
17063 assert((end - start) <= MAX_PAGE_RANGE_QUERY);
b0d623f7
A
17064
17065 vm_map_lock_read(map);
17066
cb323159
A
17067 task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
17068
5ba3f43e
A
17069 for (curr_s_offset = start; curr_s_offset < end;) {
17070 /*
17071 * New lookup needs reset of these variables.
17072 */
17073 curr_object = object = VM_OBJECT_NULL;
17074 offset_in_object = 0;
17075 ref_count = 0;
17076 depth = 0;
17077
a39ff7e2
A
17078 if (do_region_footprint &&
17079 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
a39ff7e2
A
17080 /*
17081 * Request for "footprint" info about a page beyond
17082 * the end of address space: this must be for
17083 * the fake region vm_map_region_recurse_64()
17084 * reported to account for non-volatile purgeable
17085 * memory owned by this task.
17086 */
17087 disposition = 0;
cb323159 17088
a39ff7e2 17089 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
cb323159 17090 (unsigned) ledger_compressed) {
a39ff7e2
A
17091 /*
17092 * We haven't reported all the "non-volatile
17093 * compressed" pages yet, so report this fake
17094 * page as "compressed".
17095 */
17096 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17097 } else {
17098 /*
17099 * We've reported all the non-volatile
17100 * compressed page but not all the non-volatile
17101 * pages , so report this fake page as
17102 * "resident dirty".
17103 */
17104 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17105 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17106 disposition |= VM_PAGE_QUERY_PAGE_REF;
17107 }
17108 switch (flavor) {
17109 case VM_PAGE_INFO_BASIC:
17110 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17111 basic_info->disposition = disposition;
17112 basic_info->ref_count = 1;
17113 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17114 basic_info->offset = 0;
17115 basic_info->depth = 0;
17116
17117 info_idx++;
17118 break;
17119 }
17120 curr_s_offset += PAGE_SIZE;
17121 continue;
17122 }
17123
5ba3f43e
A
17124 /*
17125 * First, find the map entry covering "curr_s_offset", going down
17126 * submaps if necessary.
17127 */
17128 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
17129 /* no entry -> no object -> no page */
17130
17131 if (curr_s_offset < vm_map_min(map)) {
17132 /*
17133 * Illegal address that falls below map min.
17134 */
17135 curr_e_offset = MIN(end, vm_map_min(map));
5ba3f43e
A
17136 } else if (curr_s_offset >= vm_map_max(map)) {
17137 /*
17138 * Illegal address that falls on/after map max.
17139 */
17140 curr_e_offset = end;
5ba3f43e
A
17141 } else if (map_entry == vm_map_to_entry(map)) {
17142 /*
17143 * Hit a hole.
17144 */
17145 if (map_entry->vme_next == vm_map_to_entry(map)) {
17146 /*
17147 * Empty map.
17148 */
17149 curr_e_offset = MIN(map->max_offset, end);
17150 } else {
17151 /*
0a7de745
A
17152 * Hole at start of the map.
17153 */
5ba3f43e
A
17154 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17155 }
17156 } else {
17157 if (map_entry->vme_next == vm_map_to_entry(map)) {
17158 /*
17159 * Hole at the end of the map.
17160 */
17161 curr_e_offset = MIN(map->max_offset, end);
17162 } else {
17163 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17164 }
17165 }
17166
17167 assert(curr_e_offset >= curr_s_offset);
17168
17169 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17170
17171 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17172
17173 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17174
17175 curr_s_offset = curr_e_offset;
17176
17177 info_idx += num_pages;
17178
17179 continue;
b0d623f7 17180 }
5ba3f43e 17181
b0d623f7 17182 /* compute offset from this map entry's start */
5ba3f43e
A
17183 offset_in_object = curr_s_offset - map_entry->vme_start;
17184
b0d623f7 17185 /* compute offset into this map entry's object (or submap) */
5ba3f43e 17186 offset_in_object += VME_OFFSET(map_entry);
b0d623f7
A
17187
17188 if (map_entry->is_sub_map) {
5ba3f43e
A
17189 vm_map_t sub_map = VM_MAP_NULL;
17190 vm_page_info_t submap_info = 0;
17191 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
17192
17193 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
17194
17195 submap_s_offset = offset_in_object;
17196 submap_e_offset = submap_s_offset + range_len;
2d21ac55 17197
3e170ce0 17198 sub_map = VME_SUBMAP(map_entry);
5ba3f43e
A
17199
17200 vm_map_reference(sub_map);
b0d623f7 17201 vm_map_unlock_read(map);
2d21ac55 17202
5ba3f43e
A
17203 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17204
17205 retval = vm_map_page_range_info_internal(sub_map,
0a7de745
A
17206 submap_s_offset,
17207 submap_e_offset,
17208 VM_PAGE_INFO_BASIC,
17209 (vm_page_info_t) submap_info,
17210 count);
5ba3f43e
A
17211
17212 assert(retval == KERN_SUCCESS);
17213
17214 vm_map_lock_read(map);
17215 vm_map_deallocate(sub_map);
17216
17217 /* Move the "info" index by the number of pages we inspected.*/
17218 info_idx += range_len >> PAGE_SHIFT;
17219
17220 /* Move our current offset by the size of the range we inspected.*/
17221 curr_s_offset += range_len;
b0d623f7 17222
b0d623f7 17223 continue;
1c79356b 17224 }
b0d623f7 17225
5ba3f43e
A
17226 object = VME_OBJECT(map_entry);
17227 if (object == VM_OBJECT_NULL) {
5ba3f43e
A
17228 /*
17229 * We don't have an object here and, hence,
17230 * no pages to inspect. We'll fill up the
17231 * info structure appropriately.
17232 */
17233
17234 curr_e_offset = MIN(map_entry->vme_end, end);
17235
17236 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17237
17238 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17239
17240 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17241
17242 curr_s_offset = curr_e_offset;
17243
17244 info_idx += num_pages;
17245
17246 continue;
17247 }
17248
a39ff7e2
A
17249 if (do_region_footprint) {
17250 int pmap_disp;
17251
17252 disposition = 0;
17253 pmap_disp = 0;
d9a64523
A
17254 if (map->has_corpse_footprint) {
17255 /*
17256 * Query the page info data we saved
17257 * while forking the corpse.
17258 */
17259 vm_map_corpse_footprint_query_page_info(
17260 map,
17261 curr_s_offset,
17262 &pmap_disp);
17263 } else {
17264 /*
17265 * Query the pmap.
17266 */
17267 pmap_query_page_info(map->pmap,
0a7de745
A
17268 curr_s_offset,
17269 &pmap_disp);
d9a64523
A
17270 }
17271 if (object->purgable == VM_PURGABLE_NONVOLATILE &&
17272 /* && not tagged as no-footprint? */
17273 VM_OBJECT_OWNER(object) != NULL &&
17274 VM_OBJECT_OWNER(object)->map == map) {
17275 if ((((curr_s_offset
0a7de745
A
17276 - map_entry->vme_start
17277 + VME_OFFSET(map_entry))
17278 / PAGE_SIZE) <
17279 (object->resident_page_count +
17280 vm_compressor_pager_get_count(object->pager)))) {
d9a64523
A
17281 /*
17282 * Non-volatile purgeable object owned
17283 * by this task: report the first
17284 * "#resident + #compressed" pages as
17285 * "resident" (to show that they
17286 * contribute to the footprint) but not
17287 * "dirty" (to avoid double-counting
17288 * with the fake "non-volatile" region
17289 * we'll report at the end of the
17290 * address space to account for all
17291 * (mapped or not) non-volatile memory
17292 * owned by this task.
17293 */
17294 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17295 }
17296 } else if ((object->purgable == VM_PURGABLE_VOLATILE ||
0a7de745
A
17297 object->purgable == VM_PURGABLE_EMPTY) &&
17298 /* && not tagged as no-footprint? */
17299 VM_OBJECT_OWNER(object) != NULL &&
17300 VM_OBJECT_OWNER(object)->map == map) {
d9a64523 17301 if ((((curr_s_offset
0a7de745
A
17302 - map_entry->vme_start
17303 + VME_OFFSET(map_entry))
17304 / PAGE_SIZE) <
17305 object->wired_page_count)) {
d9a64523
A
17306 /*
17307 * Volatile|empty purgeable object owned
17308 * by this task: report the first
17309 * "#wired" pages as "resident" (to
17310 * show that they contribute to the
17311 * footprint) but not "dirty" (to avoid
17312 * double-counting with the fake
17313 * "non-volatile" region we'll report
17314 * at the end of the address space to
17315 * account for all (mapped or not)
17316 * non-volatile memory owned by this
17317 * task.
17318 */
17319 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17320 }
17321 } else if (map_entry->iokit_acct &&
0a7de745
A
17322 object->internal &&
17323 object->purgable == VM_PURGABLE_DENY) {
a39ff7e2
A
17324 /*
17325 * Non-purgeable IOKit memory: phys_footprint
17326 * includes the entire virtual mapping.
17327 */
17328 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17329 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17330 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17331 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
0a7de745 17332 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
a39ff7e2 17333 /* alternate accounting */
d9a64523
A
17334#if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
17335 if (map->pmap->footprint_was_suspended ||
17336 /*
17337 * XXX corpse does not know if original
17338 * pmap had its footprint suspended...
17339 */
17340 map->has_corpse_footprint) {
17341 /*
17342 * The assertion below can fail if dyld
17343 * suspended footprint accounting
17344 * while doing some adjustments to
17345 * this page; the mapping would say
17346 * "use pmap accounting" but the page
17347 * would be marked "alternate
17348 * accounting".
17349 */
17350 } else
17351#endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
0a7de745 17352 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
a39ff7e2
A
17353 pmap_disp = 0;
17354 } else {
17355 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
17356 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17357 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17358 disposition |= VM_PAGE_QUERY_PAGE_REF;
17359 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
17360 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17361 } else {
17362 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17363 }
cb323159
A
17364 if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
17365 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
17366 }
a39ff7e2
A
17367 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
17368 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17369 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17370 }
17371 }
17372 switch (flavor) {
17373 case VM_PAGE_INFO_BASIC:
17374 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17375 basic_info->disposition = disposition;
17376 basic_info->ref_count = 1;
17377 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17378 basic_info->offset = 0;
17379 basic_info->depth = 0;
17380
17381 info_idx++;
17382 break;
17383 }
17384 curr_s_offset += PAGE_SIZE;
17385 continue;
17386 }
17387
5ba3f43e
A
17388 vm_object_reference(object);
17389 /*
17390 * Shared mode -- so we can allow other readers
17391 * to grab the lock too.
17392 */
17393 vm_object_lock_shared(object);
17394
17395 curr_e_offset = MIN(map_entry->vme_end, end);
17396
b0d623f7 17397 vm_map_unlock_read(map);
b0d623f7 17398
5ba3f43e 17399 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
b0d623f7 17400
5ba3f43e 17401 curr_object = object;
2d21ac55 17402
5ba3f43e 17403 for (; curr_s_offset < curr_e_offset;) {
5ba3f43e
A
17404 if (object == curr_object) {
17405 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
17406 } else {
17407 ref_count = curr_object->ref_count;
17408 }
17409
17410 curr_offset_in_object = offset_in_object;
17411
17412 for (;;) {
17413 m = vm_page_lookup(curr_object, curr_offset_in_object);
17414
17415 if (m != VM_PAGE_NULL) {
5ba3f43e 17416 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
39037602 17417 break;
5ba3f43e
A
17418 } else {
17419 if (curr_object->internal &&
17420 curr_object->alive &&
17421 !curr_object->terminating &&
17422 curr_object->pager_ready) {
5ba3f43e
A
17423 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, curr_offset_in_object)
17424 == VM_EXTERNAL_STATE_EXISTS) {
17425 /* the pager has that page */
17426 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17427 break;
17428 }
17429 }
0a7de745 17430
5ba3f43e
A
17431 /*
17432 * Go down the VM object shadow chain until we find the page
17433 * we're looking for.
17434 */
17435
17436 if (curr_object->shadow != VM_OBJECT_NULL) {
17437 vm_object_t shadow = VM_OBJECT_NULL;
17438
17439 curr_offset_in_object += curr_object->vo_shadow_offset;
17440 shadow = curr_object->shadow;
17441
17442 vm_object_lock_shared(shadow);
17443 vm_object_unlock(curr_object);
17444
17445 curr_object = shadow;
17446 depth++;
17447 continue;
17448 } else {
5ba3f43e
A
17449 break;
17450 }
2d21ac55
A
17451 }
17452 }
b0d623f7 17453
5ba3f43e
A
17454 /* The ref_count is not strictly accurate, it measures the number */
17455 /* of entities holding a ref on the object, they may not be mapping */
17456 /* the object or may not be mapping the section holding the */
17457 /* target page but its still a ball park number and though an over- */
17458 /* count, it picks up the copy-on-write cases */
2d21ac55 17459
5ba3f43e
A
17460 /* We could also get a picture of page sharing from pmap_attributes */
17461 /* but this would under count as only faulted-in mappings would */
17462 /* show up. */
2d21ac55 17463
0a7de745 17464 if ((curr_object == object) && curr_object->shadow) {
5ba3f43e 17465 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
0a7de745 17466 }
5ba3f43e 17467
0a7de745 17468 if (!curr_object->internal) {
5ba3f43e 17469 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
0a7de745 17470 }
5ba3f43e
A
17471
17472 if (m != VM_PAGE_NULL) {
d9a64523 17473 if (m->vmp_fictitious) {
5ba3f43e 17474 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
5ba3f43e 17475 } else {
0a7de745 17476 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
5ba3f43e 17477 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
0a7de745 17478 }
5ba3f43e 17479
0a7de745 17480 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
5ba3f43e 17481 disposition |= VM_PAGE_QUERY_PAGE_REF;
0a7de745 17482 }
5ba3f43e 17483
0a7de745 17484 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
5ba3f43e 17485 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
0a7de745 17486 }
5ba3f43e 17487
0a7de745 17488 if (m->vmp_cs_validated) {
5ba3f43e 17489 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
0a7de745
A
17490 }
17491 if (m->vmp_cs_tainted) {
5ba3f43e 17492 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
0a7de745
A
17493 }
17494 if (m->vmp_cs_nx) {
5ba3f43e 17495 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
0a7de745 17496 }
cb323159
A
17497 if (m->vmp_reusable || curr_object->all_reusable) {
17498 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
17499 }
5ba3f43e 17500 }
91447636 17501 }
1c79356b 17502
5ba3f43e
A
17503 switch (flavor) {
17504 case VM_PAGE_INFO_BASIC:
17505 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17506 basic_info->disposition = disposition;
17507 basic_info->ref_count = ref_count;
17508 basic_info->object_id = (vm_object_id_t) (uintptr_t)
0a7de745 17509 VM_KERNEL_ADDRPERM(curr_object);
5ba3f43e 17510 basic_info->offset =
0a7de745 17511 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
5ba3f43e
A
17512 basic_info->depth = depth;
17513
17514 info_idx++;
17515 break;
17516 }
1c79356b 17517
5ba3f43e
A
17518 disposition = 0;
17519 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
b0d623f7 17520
5ba3f43e
A
17521 /*
17522 * Move to next offset in the range and in our object.
17523 */
0a7de745 17524 curr_s_offset += PAGE_SIZE;
5ba3f43e
A
17525 offset_in_object += PAGE_SIZE;
17526 curr_offset_in_object = offset_in_object;
2d21ac55 17527
5ba3f43e 17528 if (curr_object != object) {
5ba3f43e 17529 vm_object_unlock(curr_object);
1c79356b 17530
5ba3f43e 17531 curr_object = object;
1c79356b 17532
5ba3f43e
A
17533 vm_object_lock_shared(curr_object);
17534 } else {
5ba3f43e
A
17535 vm_object_lock_yield_shared(curr_object);
17536 }
17537 }
593a1d5f 17538
5ba3f43e
A
17539 vm_object_unlock(curr_object);
17540 vm_object_deallocate(curr_object);
b0d623f7 17541
5ba3f43e 17542 vm_map_lock_read(map);
b0d623f7 17543 }
0c530ab8 17544
5ba3f43e 17545 vm_map_unlock_read(map);
2d21ac55 17546 return retval;
91447636
A
17547}
17548
17549/*
17550 * vm_map_msync
17551 *
17552 * Synchronises the memory range specified with its backing store
17553 * image by either flushing or cleaning the contents to the appropriate
17554 * memory manager engaging in a memory object synchronize dialog with
17555 * the manager. The client doesn't return until the manager issues
17556 * m_o_s_completed message. MIG Magically converts user task parameter
17557 * to the task's address map.
17558 *
17559 * interpretation of sync_flags
17560 * VM_SYNC_INVALIDATE - discard pages, only return precious
17561 * pages to manager.
17562 *
17563 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
17564 * - discard pages, write dirty or precious
17565 * pages back to memory manager.
17566 *
17567 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
17568 * - write dirty or precious pages back to
17569 * the memory manager.
17570 *
17571 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
17572 * is a hole in the region, and we would
17573 * have returned KERN_SUCCESS, return
17574 * KERN_INVALID_ADDRESS instead.
17575 *
17576 * NOTE
17577 * The memory object attributes have not yet been implemented, this
17578 * function will have to deal with the invalidate attribute
17579 *
17580 * RETURNS
17581 * KERN_INVALID_TASK Bad task parameter
17582 * KERN_INVALID_ARGUMENT both sync and async were specified.
17583 * KERN_SUCCESS The usual.
17584 * KERN_INVALID_ADDRESS There was a hole in the region.
17585 */
17586
17587kern_return_t
17588vm_map_msync(
0a7de745
A
17589 vm_map_t map,
17590 vm_map_address_t address,
17591 vm_map_size_t size,
17592 vm_sync_t sync_flags)
91447636 17593{
0a7de745
A
17594 vm_map_entry_t entry;
17595 vm_map_size_t amount_left;
17596 vm_object_offset_t offset;
17597 boolean_t do_sync_req;
17598 boolean_t had_hole = FALSE;
17599 vm_map_offset_t pmap_offset;
5ba3f43e 17600
91447636 17601 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
0a7de745
A
17602 (sync_flags & VM_SYNC_SYNCHRONOUS)) {
17603 return KERN_INVALID_ARGUMENT;
17604 }
1c79356b
A
17605
17606 /*
91447636 17607 * align address and size on page boundaries
1c79356b 17608 */
39236c6e 17609 size = (vm_map_round_page(address + size,
0a7de745
A
17610 VM_MAP_PAGE_MASK(map)) -
17611 vm_map_trunc_page(address,
17612 VM_MAP_PAGE_MASK(map)));
39236c6e 17613 address = vm_map_trunc_page(address,
0a7de745 17614 VM_MAP_PAGE_MASK(map));
1c79356b 17615
0a7de745
A
17616 if (map == VM_MAP_NULL) {
17617 return KERN_INVALID_TASK;
17618 }
1c79356b 17619
0a7de745
A
17620 if (size == 0) {
17621 return KERN_SUCCESS;
17622 }
1c79356b 17623
91447636 17624 amount_left = size;
1c79356b 17625
91447636 17626 while (amount_left > 0) {
0a7de745
A
17627 vm_object_size_t flush_size;
17628 vm_object_t object;
1c79356b 17629
91447636
A
17630 vm_map_lock(map);
17631 if (!vm_map_lookup_entry(map,
0a7de745
A
17632 address,
17633 &entry)) {
17634 vm_map_size_t skip;
91447636
A
17635
17636 /*
17637 * hole in the address map.
17638 */
17639 had_hole = TRUE;
17640
39037602
A
17641 if (sync_flags & VM_SYNC_KILLPAGES) {
17642 /*
17643 * For VM_SYNC_KILLPAGES, there should be
17644 * no holes in the range, since we couldn't
17645 * prevent someone else from allocating in
17646 * that hole and we wouldn't want to "kill"
17647 * their pages.
17648 */
17649 vm_map_unlock(map);
17650 break;
17651 }
17652
91447636
A
17653 /*
17654 * Check for empty map.
17655 */
17656 if (entry == vm_map_to_entry(map) &&
17657 entry->vme_next == entry) {
17658 vm_map_unlock(map);
17659 break;
17660 }
17661 /*
17662 * Check that we don't wrap and that
17663 * we have at least one real map entry.
17664 */
17665 if ((map->hdr.nentries == 0) ||
17666 (entry->vme_next->vme_start < address)) {
17667 vm_map_unlock(map);
17668 break;
17669 }
17670 /*
17671 * Move up to the next entry if needed
17672 */
17673 skip = (entry->vme_next->vme_start - address);
0a7de745 17674 if (skip >= amount_left) {
91447636 17675 amount_left = 0;
0a7de745 17676 } else {
91447636 17677 amount_left -= skip;
0a7de745 17678 }
91447636
A
17679 address = entry->vme_next->vme_start;
17680 vm_map_unlock(map);
17681 continue;
17682 }
1c79356b 17683
91447636 17684 offset = address - entry->vme_start;
3e170ce0 17685 pmap_offset = address;
1c79356b 17686
91447636
A
17687 /*
17688 * do we have more to flush than is contained in this
17689 * entry ?
17690 */
17691 if (amount_left + entry->vme_start + offset > entry->vme_end) {
17692 flush_size = entry->vme_end -
0a7de745 17693 (entry->vme_start + offset);
91447636
A
17694 } else {
17695 flush_size = amount_left;
17696 }
17697 amount_left -= flush_size;
17698 address += flush_size;
1c79356b 17699
91447636 17700 if (entry->is_sub_map == TRUE) {
0a7de745
A
17701 vm_map_t local_map;
17702 vm_map_offset_t local_offset;
1c79356b 17703
3e170ce0
A
17704 local_map = VME_SUBMAP(entry);
17705 local_offset = VME_OFFSET(entry);
94ff46dc 17706 vm_map_reference(local_map);
91447636
A
17707 vm_map_unlock(map);
17708 if (vm_map_msync(
2d21ac55
A
17709 local_map,
17710 local_offset,
17711 flush_size,
17712 sync_flags) == KERN_INVALID_ADDRESS) {
91447636
A
17713 had_hole = TRUE;
17714 }
94ff46dc 17715 vm_map_deallocate(local_map);
91447636
A
17716 continue;
17717 }
3e170ce0 17718 object = VME_OBJECT(entry);
1c79356b 17719
91447636
A
17720 /*
17721 * We can't sync this object if the object has not been
17722 * created yet
17723 */
17724 if (object == VM_OBJECT_NULL) {
17725 vm_map_unlock(map);
17726 continue;
17727 }
3e170ce0 17728 offset += VME_OFFSET(entry);
1c79356b 17729
0a7de745 17730 vm_object_lock(object);
1c79356b 17731
91447636 17732 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
0a7de745 17733 int kill_pages = 0;
b0d623f7 17734 boolean_t reusable_pages = FALSE;
91447636
A
17735
17736 if (sync_flags & VM_SYNC_KILLPAGES) {
0a7de745
A
17737 if (((object->ref_count == 1) ||
17738 ((object->copy_strategy !=
17739 MEMORY_OBJECT_COPY_SYMMETRIC) &&
17740 (object->copy == VM_OBJECT_NULL))) &&
39037602
A
17741 (object->shadow == VM_OBJECT_NULL)) {
17742 if (object->ref_count != 1) {
17743 vm_page_stats_reusable.free_shared++;
17744 }
0a7de745 17745 kill_pages = 1;
39037602 17746 } else {
0a7de745 17747 kill_pages = -1;
39037602 17748 }
91447636 17749 }
0a7de745
A
17750 if (kill_pages != -1) {
17751 vm_object_deactivate_pages(
3e170ce0
A
17752 object,
17753 offset,
17754 (vm_object_size_t) flush_size,
17755 kill_pages,
17756 reusable_pages,
17757 map->pmap,
17758 pmap_offset);
0a7de745 17759 }
91447636
A
17760 vm_object_unlock(object);
17761 vm_map_unlock(map);
17762 continue;
1c79356b 17763 }
91447636
A
17764 /*
17765 * We can't sync this object if there isn't a pager.
17766 * Don't bother to sync internal objects, since there can't
17767 * be any "permanent" storage for these objects anyway.
17768 */
17769 if ((object->pager == MEMORY_OBJECT_NULL) ||
17770 (object->internal) || (object->private)) {
17771 vm_object_unlock(object);
17772 vm_map_unlock(map);
17773 continue;
17774 }
17775 /*
17776 * keep reference on the object until syncing is done
17777 */
2d21ac55 17778 vm_object_reference_locked(object);
91447636 17779 vm_object_unlock(object);
1c79356b 17780
91447636 17781 vm_map_unlock(map);
1c79356b 17782
91447636 17783 do_sync_req = vm_object_sync(object,
0a7de745
A
17784 offset,
17785 flush_size,
17786 sync_flags & VM_SYNC_INVALIDATE,
17787 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
17788 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
17789 sync_flags & VM_SYNC_SYNCHRONOUS);
2d21ac55 17790
5ba3f43e 17791 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
0a7de745 17792 /*
5ba3f43e
A
17793 * clear out the clustering and read-ahead hints
17794 */
0a7de745 17795 vm_object_lock(object);
2d21ac55 17796
5ba3f43e
A
17797 object->pages_created = 0;
17798 object->pages_used = 0;
17799 object->sequential = 0;
17800 object->last_alloc = 0;
2d21ac55 17801
2d21ac55 17802 vm_object_unlock(object);
2d21ac55 17803 }
5ba3f43e
A
17804 vm_object_deallocate(object);
17805 } /* while */
91447636
A
17806
17807 /* for proper msync() behaviour */
0a7de745
A
17808 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
17809 return KERN_INVALID_ADDRESS;
17810 }
91447636 17811
0a7de745 17812 return KERN_SUCCESS;
91447636 17813}/* vm_msync */
1c79356b 17814
1c79356b 17815/*
91447636
A
17816 * Routine: convert_port_entry_to_map
17817 * Purpose:
17818 * Convert from a port specifying an entry or a task
17819 * to a map. Doesn't consume the port ref; produces a map ref,
17820 * which may be null. Unlike convert_port_to_map, the
17821 * port may be task or a named entry backed.
17822 * Conditions:
17823 * Nothing locked.
1c79356b 17824 */
1c79356b 17825
1c79356b 17826
91447636
A
17827vm_map_t
17828convert_port_entry_to_map(
0a7de745 17829 ipc_port_t port)
91447636
A
17830{
17831 vm_map_t map;
0a7de745
A
17832 vm_named_entry_t named_entry;
17833 uint32_t try_failed_count = 0;
1c79356b 17834
0a7de745
A
17835 if (IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17836 while (TRUE) {
91447636 17837 ip_lock(port);
0a7de745
A
17838 if (ip_active(port) && (ip_kotype(port)
17839 == IKOT_NAMED_ENTRY)) {
91447636 17840 named_entry =
ea3f0419 17841 (vm_named_entry_t) ip_get_kobject(port);
b0d623f7 17842 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
0a7de745 17843 ip_unlock(port);
2d21ac55
A
17844
17845 try_failed_count++;
0a7de745
A
17846 mutex_pause(try_failed_count);
17847 continue;
17848 }
91447636 17849 named_entry->ref_count++;
b0d623f7 17850 lck_mtx_unlock(&(named_entry)->Lock);
91447636
A
17851 ip_unlock(port);
17852 if ((named_entry->is_sub_map) &&
5ba3f43e 17853 (named_entry->protection
0a7de745 17854 & VM_PROT_WRITE)) {
91447636
A
17855 map = named_entry->backing.map;
17856 } else {
17857 mach_destroy_memory_entry(port);
17858 return VM_MAP_NULL;
17859 }
17860 vm_map_reference_swap(map);
17861 mach_destroy_memory_entry(port);
17862 break;
0a7de745 17863 } else {
91447636 17864 return VM_MAP_NULL;
0a7de745 17865 }
91447636 17866 }
0a7de745 17867 } else {
91447636 17868 map = convert_port_to_map(port);
0a7de745 17869 }
1c79356b 17870
91447636
A
17871 return map;
17872}
1c79356b 17873
91447636
A
17874/*
17875 * Routine: convert_port_entry_to_object
17876 * Purpose:
17877 * Convert from a port specifying a named entry to an
17878 * object. Doesn't consume the port ref; produces a map ref,
5ba3f43e 17879 * which may be null.
91447636
A
17880 * Conditions:
17881 * Nothing locked.
17882 */
1c79356b 17883
1c79356b 17884
91447636
A
17885vm_object_t
17886convert_port_entry_to_object(
0a7de745 17887 ipc_port_t port)
91447636 17888{
0a7de745
A
17889 vm_object_t object = VM_OBJECT_NULL;
17890 vm_named_entry_t named_entry;
17891 uint32_t try_failed_count = 0;
39236c6e
A
17892
17893 if (IP_VALID(port) &&
17894 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
0a7de745 17895try_again:
39236c6e
A
17896 ip_lock(port);
17897 if (ip_active(port) &&
17898 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
ea3f0419 17899 named_entry = (vm_named_entry_t) ip_get_kobject(port);
39236c6e 17900 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 17901 ip_unlock(port);
39236c6e
A
17902 try_failed_count++;
17903 mutex_pause(try_failed_count);
0a7de745 17904 goto try_again;
39236c6e
A
17905 }
17906 named_entry->ref_count++;
17907 lck_mtx_unlock(&(named_entry)->Lock);
17908 ip_unlock(port);
17909 if (!(named_entry->is_sub_map) &&
39236c6e
A
17910 !(named_entry->is_copy) &&
17911 (named_entry->protection & VM_PROT_WRITE)) {
17912 object = named_entry->backing.object;
17913 vm_object_reference(object);
91447636 17914 }
39236c6e 17915 mach_destroy_memory_entry(port);
1c79356b 17916 }
1c79356b 17917 }
91447636
A
17918
17919 return object;
1c79356b 17920}
9bccf70c
A
17921
17922/*
91447636
A
17923 * Export routines to other components for the things we access locally through
17924 * macros.
9bccf70c 17925 */
91447636
A
17926#undef current_map
17927vm_map_t
17928current_map(void)
9bccf70c 17929{
0a7de745 17930 return current_map_fast();
9bccf70c
A
17931}
17932
17933/*
17934 * vm_map_reference:
17935 *
17936 * Most code internal to the osfmk will go through a
17937 * macro defining this. This is always here for the
17938 * use of other kernel components.
17939 */
17940#undef vm_map_reference
17941void
17942vm_map_reference(
0a7de745 17943 vm_map_t map)
9bccf70c 17944{
0a7de745 17945 if (map == VM_MAP_NULL) {
9bccf70c 17946 return;
0a7de745 17947 }
9bccf70c 17948
b0d623f7 17949 lck_mtx_lock(&map->s_lock);
0a7de745 17950#if TASK_SWAPPER
9bccf70c 17951 assert(map->res_count > 0);
cb323159 17952 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
9bccf70c
A
17953 map->res_count++;
17954#endif
cb323159 17955 os_ref_retain_locked(&map->map_refcnt);
b0d623f7 17956 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
17957}
17958
17959/*
17960 * vm_map_deallocate:
17961 *
17962 * Removes a reference from the specified map,
17963 * destroying it if no references remain.
17964 * The map should not be locked.
17965 */
17966void
17967vm_map_deallocate(
0a7de745 17968 vm_map_t map)
9bccf70c 17969{
0a7de745 17970 unsigned int ref;
9bccf70c 17971
0a7de745 17972 if (map == VM_MAP_NULL) {
9bccf70c 17973 return;
0a7de745 17974 }
9bccf70c 17975
b0d623f7 17976 lck_mtx_lock(&map->s_lock);
cb323159 17977 ref = os_ref_release_locked(&map->map_refcnt);
9bccf70c
A
17978 if (ref > 0) {
17979 vm_map_res_deallocate(map);
b0d623f7 17980 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
17981 return;
17982 }
cb323159 17983 assert(os_ref_get_count(&map->map_refcnt) == 0);
b0d623f7 17984 lck_mtx_unlock(&map->s_lock);
9bccf70c 17985
0a7de745 17986#if TASK_SWAPPER
9bccf70c
A
17987 /*
17988 * The map residence count isn't decremented here because
5ba3f43e 17989 * the vm_map_delete below will traverse the entire map,
9bccf70c
A
17990 * deleting entries, and the residence counts on objects
17991 * and sharing maps will go away then.
17992 */
17993#endif
17994
d9a64523 17995 vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
0c530ab8 17996}
91447636 17997
91447636 17998
0c530ab8
A
17999void
18000vm_map_disable_NX(vm_map_t map)
18001{
0a7de745
A
18002 if (map == NULL) {
18003 return;
18004 }
18005 if (map->pmap == NULL) {
18006 return;
18007 }
0c530ab8 18008
0a7de745 18009 pmap_disable_NX(map->pmap);
0c530ab8
A
18010}
18011
6d2010ae
A
18012void
18013vm_map_disallow_data_exec(vm_map_t map)
18014{
0a7de745
A
18015 if (map == NULL) {
18016 return;
18017 }
6d2010ae 18018
0a7de745 18019 map->map_disallow_data_exec = TRUE;
6d2010ae
A
18020}
18021
0c530ab8
A
18022/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
18023 * more descriptive.
18024 */
18025void
18026vm_map_set_32bit(vm_map_t map)
18027{
5ba3f43e
A
18028#if defined(__arm__) || defined(__arm64__)
18029 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
18030#else
0c530ab8 18031 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
5ba3f43e 18032#endif
0c530ab8
A
18033}
18034
18035
18036void
18037vm_map_set_64bit(vm_map_t map)
18038{
5ba3f43e
A
18039#if defined(__arm__) || defined(__arm64__)
18040 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
18041#else
0c530ab8 18042 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
5ba3f43e 18043#endif
0c530ab8
A
18044}
18045
813fb2f6 18046/*
d9a64523 18047 * Expand the maximum size of an existing map to the maximum supported.
813fb2f6
A
18048 */
18049void
18050vm_map_set_jumbo(vm_map_t map)
18051{
5ba3f43e 18052#if defined (__arm64__)
d9a64523
A
18053 vm_map_set_max_addr(map, ~0);
18054#else /* arm64 */
18055 (void) map;
18056#endif
18057}
18058
cb323159
A
18059/*
18060 * This map has a JIT entitlement
18061 */
18062void
18063vm_map_set_jit_entitled(vm_map_t map)
18064{
18065#if defined (__arm64__)
18066 pmap_set_jit_entitled(map->pmap);
18067#else /* arm64 */
18068 (void) map;
18069#endif
18070}
18071
d9a64523
A
18072/*
18073 * Expand the maximum size of an existing map.
18074 */
18075void
18076vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
18077{
18078#if defined(__arm64__)
18079 vm_map_offset_t max_supported_offset = 0;
5ba3f43e 18080 vm_map_offset_t old_max_offset = map->max_offset;
d9a64523
A
18081 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
18082
18083 new_max_offset = trunc_page(new_max_offset);
18084
18085 /* The address space cannot be shrunk using this routine. */
18086 if (old_max_offset >= new_max_offset) {
18087 return;
18088 }
18089
18090 if (max_supported_offset < new_max_offset) {
18091 new_max_offset = max_supported_offset;
18092 }
18093
18094 map->max_offset = new_max_offset;
18095
18096 if (map->holes_list->prev->vme_end == old_max_offset) {
5ba3f43e
A
18097 /*
18098 * There is already a hole at the end of the map; simply make it bigger.
18099 */
18100 map->holes_list->prev->vme_end = map->max_offset;
18101 } else {
18102 /*
18103 * There is no hole at the end, so we need to create a new hole
18104 * for the new empty space we're creating.
18105 */
18106 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
18107 new_hole->start = old_max_offset;
18108 new_hole->end = map->max_offset;
18109 new_hole->prev = map->holes_list->prev;
18110 new_hole->next = (struct vm_map_entry *)map->holes_list;
18111 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
18112 map->holes_list->prev = (struct vm_map_entry *)new_hole;
18113 }
d9a64523
A
18114#else
18115 (void)map;
18116 (void)new_max_offset;
5ba3f43e 18117#endif
813fb2f6
A
18118}
18119
0c530ab8 18120vm_map_offset_t
3e170ce0 18121vm_compute_max_offset(boolean_t is64)
0c530ab8 18122{
5ba3f43e 18123#if defined(__arm__) || defined(__arm64__)
0a7de745 18124 return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
5ba3f43e 18125#else
0a7de745 18126 return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
5ba3f43e
A
18127#endif
18128}
18129
18130void
18131vm_map_get_max_aslr_slide_section(
0a7de745
A
18132 vm_map_t map __unused,
18133 int64_t *max_sections,
18134 int64_t *section_size)
5ba3f43e
A
18135{
18136#if defined(__arm64__)
18137 *max_sections = 3;
18138 *section_size = ARM_TT_TWIG_SIZE;
18139#else
18140 *max_sections = 1;
18141 *section_size = 0;
18142#endif
0c530ab8
A
18143}
18144
39236c6e 18145uint64_t
5ba3f43e 18146vm_map_get_max_aslr_slide_pages(vm_map_t map)
39236c6e 18147{
5ba3f43e
A
18148#if defined(__arm64__)
18149 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
18150 * limited embedded address space; this is also meant to minimize pmap
18151 * memory usage on 16KB page systems.
18152 */
0a7de745 18153 return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
5ba3f43e 18154#else
0a7de745 18155 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
5ba3f43e
A
18156#endif
18157}
18158
18159uint64_t
18160vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
18161{
18162#if defined(__arm64__)
18163 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
18164 * of independent entropy on 16KB page systems.
18165 */
0a7de745 18166 return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
5ba3f43e 18167#else
0a7de745 18168 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
5ba3f43e 18169#endif
39236c6e
A
18170}
18171
0a7de745 18172#ifndef __arm__
0c530ab8 18173boolean_t
2d21ac55 18174vm_map_is_64bit(
0a7de745 18175 vm_map_t map)
2d21ac55
A
18176{
18177 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
18178}
5ba3f43e 18179#endif
2d21ac55
A
18180
18181boolean_t
316670eb 18182vm_map_has_hard_pagezero(
0a7de745
A
18183 vm_map_t map,
18184 vm_map_offset_t pagezero_size)
0c530ab8
A
18185{
18186 /*
18187 * XXX FBDP
18188 * We should lock the VM map (for read) here but we can get away
18189 * with it for now because there can't really be any race condition:
18190 * the VM map's min_offset is changed only when the VM map is created
18191 * and when the zero page is established (when the binary gets loaded),
18192 * and this routine gets called only when the task terminates and the
18193 * VM map is being torn down, and when a new map is created via
18194 * load_machfile()/execve().
18195 */
0a7de745 18196 return map->min_offset >= pagezero_size;
0c530ab8
A
18197}
18198
316670eb
A
18199/*
18200 * Raise a VM map's maximun offset.
18201 */
18202kern_return_t
18203vm_map_raise_max_offset(
0a7de745
A
18204 vm_map_t map,
18205 vm_map_offset_t new_max_offset)
316670eb 18206{
0a7de745 18207 kern_return_t ret;
316670eb
A
18208
18209 vm_map_lock(map);
18210 ret = KERN_INVALID_ADDRESS;
18211
18212 if (new_max_offset >= map->max_offset) {
5ba3f43e 18213 if (!vm_map_is_64bit(map)) {
316670eb
A
18214 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
18215 map->max_offset = new_max_offset;
18216 ret = KERN_SUCCESS;
18217 }
18218 } else {
18219 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
18220 map->max_offset = new_max_offset;
18221 ret = KERN_SUCCESS;
18222 }
18223 }
18224 }
18225
18226 vm_map_unlock(map);
18227 return ret;
18228}
18229
18230
0c530ab8
A
18231/*
18232 * Raise a VM map's minimum offset.
18233 * To strictly enforce "page zero" reservation.
18234 */
18235kern_return_t
18236vm_map_raise_min_offset(
0a7de745
A
18237 vm_map_t map,
18238 vm_map_offset_t new_min_offset)
0c530ab8 18239{
0a7de745 18240 vm_map_entry_t first_entry;
0c530ab8 18241
39236c6e 18242 new_min_offset = vm_map_round_page(new_min_offset,
0a7de745 18243 VM_MAP_PAGE_MASK(map));
0c530ab8
A
18244
18245 vm_map_lock(map);
18246
18247 if (new_min_offset < map->min_offset) {
18248 /*
18249 * Can't move min_offset backwards, as that would expose
18250 * a part of the address space that was previously, and for
18251 * possibly good reasons, inaccessible.
18252 */
18253 vm_map_unlock(map);
18254 return KERN_INVALID_ADDRESS;
18255 }
3e170ce0
A
18256 if (new_min_offset >= map->max_offset) {
18257 /* can't go beyond the end of the address space */
18258 vm_map_unlock(map);
18259 return KERN_INVALID_ADDRESS;
18260 }
0c530ab8
A
18261
18262 first_entry = vm_map_first_entry(map);
18263 if (first_entry != vm_map_to_entry(map) &&
18264 first_entry->vme_start < new_min_offset) {
18265 /*
18266 * Some memory was already allocated below the new
18267 * minimun offset. It's too late to change it now...
18268 */
18269 vm_map_unlock(map);
18270 return KERN_NO_SPACE;
18271 }
18272
18273 map->min_offset = new_min_offset;
18274
3e170ce0
A
18275 assert(map->holes_list);
18276 map->holes_list->start = new_min_offset;
18277 assert(new_min_offset < map->holes_list->end);
18278
0c530ab8
A
18279 vm_map_unlock(map);
18280
18281 return KERN_SUCCESS;
18282}
2d21ac55
A
18283
18284/*
18285 * Set the limit on the maximum amount of user wired memory allowed for this map.
18286 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
18287 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
18288 * don't have to reach over to the BSD data structures.
18289 */
18290
18291void
0a7de745
A
18292vm_map_set_user_wire_limit(vm_map_t map,
18293 vm_size_t limit)
2d21ac55
A
18294{
18295 map->user_wire_limit = limit;
18296}
593a1d5f 18297
b0d623f7 18298
0a7de745
A
18299void
18300vm_map_switch_protect(vm_map_t map,
18301 boolean_t val)
593a1d5f
A
18302{
18303 vm_map_lock(map);
0a7de745 18304 map->switch_protect = val;
593a1d5f 18305 vm_map_unlock(map);
b0d623f7 18306}
b7266188 18307
39236c6e
A
18308/*
18309 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
18310 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
18311 * bump both counters.
18312 */
18313void
18314vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
18315{
18316 pmap_t pmap = vm_map_pmap(map);
18317
fe8ab488 18318 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
5ba3f43e 18319 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
39236c6e
A
18320}
18321
18322void
18323vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
18324{
18325 pmap_t pmap = vm_map_pmap(map);
18326
fe8ab488 18327 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
5ba3f43e 18328 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
39236c6e
A
18329}
18330
b7266188
A
18331/* Add (generate) code signature for memory range */
18332#if CONFIG_DYNAMIC_CODE_SIGNING
0a7de745
A
18333kern_return_t
18334vm_map_sign(vm_map_t map,
18335 vm_map_offset_t start,
18336 vm_map_offset_t end)
b7266188
A
18337{
18338 vm_map_entry_t entry;
18339 vm_page_t m;
18340 vm_object_t object;
5ba3f43e 18341
b7266188
A
18342 /*
18343 * Vet all the input parameters and current type and state of the
18344 * underlaying object. Return with an error if anything is amiss.
18345 */
0a7de745
A
18346 if (map == VM_MAP_NULL) {
18347 return KERN_INVALID_ARGUMENT;
18348 }
5ba3f43e 18349
b7266188 18350 vm_map_lock_read(map);
5ba3f43e 18351
b7266188
A
18352 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
18353 /*
18354 * Must pass a valid non-submap address.
18355 */
18356 vm_map_unlock_read(map);
0a7de745 18357 return KERN_INVALID_ADDRESS;
b7266188 18358 }
5ba3f43e 18359
0a7de745 18360 if ((entry->vme_start > start) || (entry->vme_end < end)) {
b7266188
A
18361 /*
18362 * Map entry doesn't cover the requested range. Not handling
18363 * this situation currently.
18364 */
18365 vm_map_unlock_read(map);
0a7de745 18366 return KERN_INVALID_ARGUMENT;
b7266188 18367 }
5ba3f43e 18368
3e170ce0 18369 object = VME_OBJECT(entry);
b7266188
A
18370 if (object == VM_OBJECT_NULL) {
18371 /*
18372 * Object must already be present or we can't sign.
18373 */
18374 vm_map_unlock_read(map);
18375 return KERN_INVALID_ARGUMENT;
18376 }
5ba3f43e 18377
b7266188
A
18378 vm_object_lock(object);
18379 vm_map_unlock_read(map);
5ba3f43e 18380
0a7de745 18381 while (start < end) {
b7266188 18382 uint32_t refmod;
5ba3f43e 18383
3e170ce0 18384 m = vm_page_lookup(object,
0a7de745
A
18385 start - entry->vme_start + VME_OFFSET(entry));
18386 if (m == VM_PAGE_NULL) {
5ba3f43e 18387 /* shoud we try to fault a page here? we can probably
b7266188
A
18388 * demand it exists and is locked for this request */
18389 vm_object_unlock(object);
18390 return KERN_FAILURE;
18391 }
18392 /* deal with special page status */
d9a64523
A
18393 if (m->vmp_busy ||
18394 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
b7266188
A
18395 vm_object_unlock(object);
18396 return KERN_FAILURE;
18397 }
5ba3f43e 18398
b7266188 18399 /* Page is OK... now "validate" it */
5ba3f43e 18400 /* This is the place where we'll call out to create a code
b7266188 18401 * directory, later */
d9a64523 18402 m->vmp_cs_validated = TRUE;
b7266188
A
18403
18404 /* The page is now "clean" for codesigning purposes. That means
5ba3f43e 18405 * we don't consider it as modified (wpmapped) anymore. But
b7266188
A
18406 * we'll disconnect the page so we note any future modification
18407 * attempts. */
d9a64523 18408 m->vmp_wpmapped = FALSE;
39037602 18409 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
5ba3f43e
A
18410
18411 /* Pull the dirty status from the pmap, since we cleared the
b7266188 18412 * wpmapped bit */
d9a64523 18413 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
316670eb 18414 SET_PAGE_DIRTY(m, FALSE);
b7266188 18415 }
5ba3f43e 18416
b7266188
A
18417 /* On to the next page */
18418 start += PAGE_SIZE;
18419 }
18420 vm_object_unlock(object);
5ba3f43e 18421
b7266188
A
18422 return KERN_SUCCESS;
18423}
18424#endif
6d2010ae 18425
0a7de745
A
18426kern_return_t
18427vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
5ba3f43e 18428{
0a7de745 18429 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
fe8ab488 18430 vm_map_entry_t next_entry;
0a7de745
A
18431 kern_return_t kr = KERN_SUCCESS;
18432 vm_map_t zap_map;
fe8ab488
A
18433
18434 vm_map_lock(map);
18435
18436 /*
18437 * We use a "zap_map" to avoid having to unlock
18438 * the "map" in vm_map_delete().
18439 */
18440 zap_map = vm_map_create(PMAP_NULL,
0a7de745
A
18441 map->min_offset,
18442 map->max_offset,
18443 map->hdr.entries_pageable);
fe8ab488
A
18444
18445 if (zap_map == VM_MAP_NULL) {
18446 return KERN_RESOURCE_SHORTAGE;
18447 }
18448
5ba3f43e 18449 vm_map_set_page_shift(zap_map,
0a7de745 18450 VM_MAP_PAGE_SHIFT(map));
3e170ce0 18451 vm_map_disable_hole_optimization(zap_map);
fe8ab488
A
18452
18453 for (entry = vm_map_first_entry(map);
0a7de745
A
18454 entry != vm_map_to_entry(map);
18455 entry = next_entry) {
fe8ab488 18456 next_entry = entry->vme_next;
5ba3f43e 18457
3e170ce0
A
18458 if (VME_OBJECT(entry) &&
18459 !entry->is_sub_map &&
18460 (VME_OBJECT(entry)->internal == TRUE) &&
18461 (VME_OBJECT(entry)->ref_count == 1)) {
3e170ce0
A
18462 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
18463 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
fe8ab488 18464
5ba3f43e 18465 (void)vm_map_delete(map,
0a7de745
A
18466 entry->vme_start,
18467 entry->vme_end,
18468 VM_MAP_REMOVE_SAVE_ENTRIES,
18469 zap_map);
fe8ab488
A
18470 }
18471 }
18472
18473 vm_map_unlock(map);
18474
0a7de745 18475 /*
fe8ab488 18476 * Get rid of the "zap_maps" and all the map entries that
0a7de745
A
18477 * they may still contain.
18478 */
18479 if (zap_map != VM_MAP_NULL) {
18480 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18481 zap_map = VM_MAP_NULL;
18482 }
fe8ab488
A
18483
18484 return kr;
18485}
18486
6d2010ae 18487
39037602
A
18488#if DEVELOPMENT || DEBUG
18489
18490int
18491vm_map_disconnect_page_mappings(
18492 vm_map_t map,
18493 boolean_t do_unnest)
6d2010ae
A
18494{
18495 vm_map_entry_t entry;
0a7de745 18496 int page_count = 0;
39037602
A
18497
18498 if (do_unnest == TRUE) {
18499#ifndef NO_NESTED_PMAP
18500 vm_map_lock(map);
18501
18502 for (entry = vm_map_first_entry(map);
0a7de745
A
18503 entry != vm_map_to_entry(map);
18504 entry = entry->vme_next) {
39037602
A
18505 if (entry->is_sub_map && entry->use_pmap) {
18506 /*
18507 * Make sure the range between the start of this entry and
18508 * the end of this entry is no longer nested, so that
18509 * we will only remove mappings from the pmap in use by this
18510 * this task
18511 */
18512 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
18513 }
18514 }
18515 vm_map_unlock(map);
18516#endif
18517 }
6d2010ae 18518 vm_map_lock_read(map);
39037602
A
18519
18520 page_count = map->pmap->stats.resident_count;
18521
6d2010ae 18522 for (entry = vm_map_first_entry(map);
0a7de745
A
18523 entry != vm_map_to_entry(map);
18524 entry = entry->vme_next) {
39037602 18525 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
0a7de745 18526 (VME_OBJECT(entry)->phys_contiguous))) {
6d2010ae
A
18527 continue;
18528 }
0a7de745 18529 if (entry->is_sub_map) {
39037602 18530 assert(!entry->use_pmap);
0a7de745 18531 }
6d2010ae 18532
39037602 18533 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
6d2010ae 18534 }
6d2010ae
A
18535 vm_map_unlock_read(map);
18536
39037602 18537 return page_count;
6d2010ae
A
18538}
18539
39037602
A
18540#endif
18541
18542
18543#if CONFIG_FREEZE
18544
18545
d9a64523 18546int c_freezer_swapout_page_count;
3e170ce0
A
18547int c_freezer_compression_count = 0;
18548AbsoluteTime c_freezer_last_yield_ts = 0;
18549
d9a64523
A
18550extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
18551extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
18552
18553kern_return_t
18554vm_map_freeze(
cb323159 18555 task_t task,
0a7de745
A
18556 unsigned int *purgeable_count,
18557 unsigned int *wired_count,
18558 unsigned int *clean_count,
18559 unsigned int *dirty_count,
cb323159 18560 unsigned int dirty_budget,
0a7de745
A
18561 unsigned int *shared_count,
18562 int *freezer_error_code,
18563 boolean_t eval_only)
5ba3f43e 18564{
0a7de745
A
18565 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
18566 kern_return_t kr = KERN_SUCCESS;
18567 boolean_t evaluation_phase = TRUE;
18568 vm_object_t cur_shared_object = NULL;
18569 int cur_shared_obj_ref_cnt = 0;
18570 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
6d2010ae 18571
d9a64523 18572 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
6d2010ae 18573
6d2010ae
A
18574 /*
18575 * We need the exclusive lock here so that we can
18576 * block any page faults or lookups while we are
18577 * in the middle of freezing this vm map.
18578 */
cb323159
A
18579 vm_map_t map = task->map;
18580
6d2010ae
A
18581 vm_map_lock(map);
18582
39037602
A
18583 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
18584
18585 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
d9a64523
A
18586 if (vm_compressor_low_on_space()) {
18587 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18588 }
18589
18590 if (vm_swap_low_on_space()) {
18591 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18592 }
18593
39037602 18594 kr = KERN_NO_SPACE;
5ba3f43e 18595 goto done;
6d2010ae 18596 }
39037602 18597
d9a64523
A
18598 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
18599 /*
18600 * In-memory compressor backing the freezer. No disk.
18601 * So no need to do the evaluation phase.
18602 */
18603 evaluation_phase = FALSE;
18604
18605 if (eval_only == TRUE) {
18606 /*
18607 * We don't support 'eval_only' mode
18608 * in this non-swap config.
18609 */
18610 *freezer_error_code = FREEZER_ERROR_GENERIC;
18611 kr = KERN_INVALID_ARGUMENT;
18612 goto done;
18613 }
18614
18615 c_freezer_compression_count = 0;
18616 clock_get_uptime(&c_freezer_last_yield_ts);
18617 }
18618again:
3e170ce0 18619
6d2010ae 18620 for (entry2 = vm_map_first_entry(map);
0a7de745
A
18621 entry2 != vm_map_to_entry(map);
18622 entry2 = entry2->vme_next) {
18623 vm_object_t src_object = VME_OBJECT(entry2);
6d2010ae 18624
39037602 18625 if (src_object &&
3e170ce0 18626 !entry2->is_sub_map &&
39037602 18627 !src_object->phys_contiguous) {
39236c6e 18628 /* If eligible, scan the entry, moving eligible pages over to our parent object */
6d2010ae 18629
39037602 18630 if (src_object->internal == TRUE) {
39037602 18631 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
cb323159
A
18632 /*
18633 * We skip purgeable objects during evaluation phase only.
18634 * If we decide to freeze this process, we'll explicitly
18635 * purge these objects before we go around again with
18636 * 'evaluation_phase' set to FALSE.
18637 */
18638
18639 if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
18640 /*
18641 * We want to purge objects that may not belong to this task but are mapped
18642 * in this task alone. Since we already purged this task's purgeable memory
18643 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
18644 * on this task's purgeable objects. Hence the check for only volatile objects.
18645 */
18646 if (evaluation_phase == FALSE &&
18647 (src_object->purgable == VM_PURGABLE_VOLATILE) &&
18648 (src_object->ref_count == 1)) {
18649 vm_object_lock(src_object);
18650 vm_object_purge(src_object, 0);
18651 vm_object_unlock(src_object);
18652 }
18653 continue;
18654 }
18655
39037602
A
18656 /*
18657 * Pages belonging to this object could be swapped to disk.
18658 * Make sure it's not a shared object because we could end
18659 * up just bringing it back in again.
d9a64523
A
18660 *
18661 * We try to optimize somewhat by checking for objects that are mapped
18662 * more than once within our own map. But we don't do full searches,
18663 * we just look at the entries following our current entry.
39037602 18664 */
cb323159 18665
39037602 18666 if (src_object->ref_count > 1) {
d9a64523
A
18667 if (src_object != cur_shared_object) {
18668 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18669 dirty_shared_count += obj_pages_snapshot;
18670
18671 cur_shared_object = src_object;
18672 cur_shared_obj_ref_cnt = 1;
18673 continue;
18674 } else {
18675 cur_shared_obj_ref_cnt++;
18676 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
18677 /*
18678 * Fall through to below and treat this object as private.
18679 * So deduct its pages from our shared total and add it to the
18680 * private total.
18681 */
18682
18683 dirty_shared_count -= obj_pages_snapshot;
18684 dirty_private_count += obj_pages_snapshot;
18685 } else {
18686 continue;
18687 }
18688 }
18689 }
18690
18691
18692 if (src_object->ref_count == 1) {
18693 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18694 }
18695
18696 if (evaluation_phase == TRUE) {
39037602 18697 continue;
3e170ce0 18698 }
3e170ce0 18699 }
d9a64523 18700
cb323159 18701 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
d9a64523
A
18702 *wired_count += src_object->wired_page_count;
18703
3e170ce0 18704 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
d9a64523
A
18705 if (vm_compressor_low_on_space()) {
18706 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18707 }
18708
18709 if (vm_swap_low_on_space()) {
18710 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18711 }
18712
3e170ce0 18713 kr = KERN_NO_SPACE;
5ba3f43e 18714 break;
39236c6e 18715 }
cb323159
A
18716 if (paged_out_count >= dirty_budget) {
18717 break;
18718 }
18719 dirty_budget -= paged_out_count;
6d2010ae
A
18720 }
18721 }
18722 }
d9a64523 18723
ea3f0419 18724 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
d9a64523 18725 if (evaluation_phase) {
d9a64523
A
18726 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
18727
18728 if (dirty_shared_count > shared_pages_threshold) {
18729 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
18730 kr = KERN_FAILURE;
18731 goto done;
18732 }
18733
18734 if (dirty_shared_count &&
0a7de745 18735 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
d9a64523
A
18736 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
18737 kr = KERN_FAILURE;
18738 goto done;
18739 }
18740
18741 evaluation_phase = FALSE;
18742 dirty_shared_count = dirty_private_count = 0;
0a7de745 18743
d9a64523
A
18744 c_freezer_compression_count = 0;
18745 clock_get_uptime(&c_freezer_last_yield_ts);
18746
18747 if (eval_only) {
18748 kr = KERN_SUCCESS;
18749 goto done;
18750 }
18751
cb323159
A
18752 vm_purgeable_purge_task_owned(task);
18753
d9a64523 18754 goto again;
d9a64523 18755 } else {
d9a64523 18756 kr = KERN_SUCCESS;
d9a64523
A
18757 }
18758
6d2010ae
A
18759done:
18760 vm_map_unlock(map);
5ba3f43e 18761
d9a64523
A
18762 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
18763 vm_object_compressed_freezer_done();
39037602 18764
d9a64523
A
18765 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18766 /*
18767 * reset the counter tracking the # of swapped compressed pages
18768 * because we are now done with this freeze session and task.
18769 */
18770
18771 *dirty_count = c_freezer_swapout_page_count; //used to track pageouts
18772 c_freezer_swapout_page_count = 0;
18773 }
3e170ce0 18774 }
6d2010ae
A
18775 return kr;
18776}
18777
6d2010ae 18778#endif
e2d2fc5c 18779
e2d2fc5c
A
18780/*
18781 * vm_map_entry_should_cow_for_true_share:
18782 *
18783 * Determines if the map entry should be clipped and setup for copy-on-write
18784 * to avoid applying "true_share" to a large VM object when only a subset is
18785 * targeted.
18786 *
18787 * For now, we target only the map entries created for the Objective C
18788 * Garbage Collector, which initially have the following properties:
18789 * - alias == VM_MEMORY_MALLOC
0a7de745
A
18790 * - wired_count == 0
18791 * - !needs_copy
e2d2fc5c 18792 * and a VM object with:
0a7de745
A
18793 * - internal
18794 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
18795 * - !true_share
18796 * - vo_size == ANON_CHUNK_SIZE
3e170ce0
A
18797 *
18798 * Only non-kernel map entries.
e2d2fc5c
A
18799 */
18800boolean_t
18801vm_map_entry_should_cow_for_true_share(
0a7de745 18802 vm_map_entry_t entry)
e2d2fc5c 18803{
0a7de745 18804 vm_object_t object;
e2d2fc5c
A
18805
18806 if (entry->is_sub_map) {
18807 /* entry does not point at a VM object */
18808 return FALSE;
18809 }
18810
18811 if (entry->needs_copy) {
18812 /* already set for copy_on_write: done! */
18813 return FALSE;
18814 }
18815
3e170ce0
A
18816 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
18817 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
fe8ab488 18818 /* not a malloc heap or Obj-C Garbage Collector heap */
e2d2fc5c
A
18819 return FALSE;
18820 }
18821
18822 if (entry->wired_count) {
18823 /* wired: can't change the map entry... */
fe8ab488 18824 vm_counters.should_cow_but_wired++;
e2d2fc5c
A
18825 return FALSE;
18826 }
18827
3e170ce0 18828 object = VME_OBJECT(entry);
e2d2fc5c
A
18829
18830 if (object == VM_OBJECT_NULL) {
18831 /* no object yet... */
18832 return FALSE;
18833 }
18834
18835 if (!object->internal) {
18836 /* not an internal object */
18837 return FALSE;
18838 }
18839
18840 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
18841 /* not the default copy strategy */
18842 return FALSE;
18843 }
18844
18845 if (object->true_share) {
18846 /* already true_share: too late to avoid it */
18847 return FALSE;
18848 }
18849
3e170ce0 18850 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
fe8ab488
A
18851 object->vo_size != ANON_CHUNK_SIZE) {
18852 /* ... not an object created for the ObjC Garbage Collector */
18853 return FALSE;
18854 }
18855
3e170ce0 18856 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
fe8ab488
A
18857 object->vo_size != 2048 * 4096) {
18858 /* ... not a "MALLOC_SMALL" heap */
e2d2fc5c
A
18859 return FALSE;
18860 }
18861
18862 /*
18863 * All the criteria match: we have a large object being targeted for "true_share".
18864 * To limit the adverse side-effects linked with "true_share", tell the caller to
18865 * try and avoid setting up the entire object for "true_share" by clipping the
18866 * targeted range and setting it up for copy-on-write.
18867 */
18868 return TRUE;
18869}
39236c6e 18870
5ba3f43e 18871vm_map_offset_t
39236c6e 18872vm_map_round_page_mask(
0a7de745
A
18873 vm_map_offset_t offset,
18874 vm_map_offset_t mask)
39236c6e
A
18875{
18876 return VM_MAP_ROUND_PAGE(offset, mask);
18877}
18878
5ba3f43e 18879vm_map_offset_t
39236c6e 18880vm_map_trunc_page_mask(
0a7de745
A
18881 vm_map_offset_t offset,
18882 vm_map_offset_t mask)
39236c6e
A
18883{
18884 return VM_MAP_TRUNC_PAGE(offset, mask);
18885}
18886
3e170ce0
A
18887boolean_t
18888vm_map_page_aligned(
0a7de745
A
18889 vm_map_offset_t offset,
18890 vm_map_offset_t mask)
3e170ce0
A
18891{
18892 return ((offset) & mask) == 0;
18893}
18894
39236c6e
A
18895int
18896vm_map_page_shift(
18897 vm_map_t map)
18898{
18899 return VM_MAP_PAGE_SHIFT(map);
18900}
18901
18902int
18903vm_map_page_size(
18904 vm_map_t map)
18905{
18906 return VM_MAP_PAGE_SIZE(map);
18907}
18908
3e170ce0 18909vm_map_offset_t
39236c6e
A
18910vm_map_page_mask(
18911 vm_map_t map)
18912{
18913 return VM_MAP_PAGE_MASK(map);
18914}
18915
18916kern_return_t
18917vm_map_set_page_shift(
0a7de745
A
18918 vm_map_t map,
18919 int pageshift)
39236c6e
A
18920{
18921 if (map->hdr.nentries != 0) {
18922 /* too late to change page size */
18923 return KERN_FAILURE;
18924 }
18925
18926 map->hdr.page_shift = pageshift;
18927
18928 return KERN_SUCCESS;
18929}
18930
18931kern_return_t
18932vm_map_query_volatile(
0a7de745
A
18933 vm_map_t map,
18934 mach_vm_size_t *volatile_virtual_size_p,
18935 mach_vm_size_t *volatile_resident_size_p,
18936 mach_vm_size_t *volatile_compressed_size_p,
18937 mach_vm_size_t *volatile_pmap_size_p,
18938 mach_vm_size_t *volatile_compressed_pmap_size_p)
39236c6e 18939{
0a7de745
A
18940 mach_vm_size_t volatile_virtual_size;
18941 mach_vm_size_t volatile_resident_count;
18942 mach_vm_size_t volatile_compressed_count;
18943 mach_vm_size_t volatile_pmap_count;
18944 mach_vm_size_t volatile_compressed_pmap_count;
18945 mach_vm_size_t resident_count;
18946 vm_map_entry_t entry;
18947 vm_object_t object;
39236c6e
A
18948
18949 /* map should be locked by caller */
18950
18951 volatile_virtual_size = 0;
18952 volatile_resident_count = 0;
3e170ce0 18953 volatile_compressed_count = 0;
39236c6e 18954 volatile_pmap_count = 0;
3e170ce0 18955 volatile_compressed_pmap_count = 0;
39236c6e
A
18956
18957 for (entry = vm_map_first_entry(map);
0a7de745
A
18958 entry != vm_map_to_entry(map);
18959 entry = entry->vme_next) {
18960 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
4bd07ac2 18961
39236c6e
A
18962 if (entry->is_sub_map) {
18963 continue;
18964 }
0a7de745 18965 if (!(entry->protection & VM_PROT_WRITE)) {
39236c6e
A
18966 continue;
18967 }
3e170ce0 18968 object = VME_OBJECT(entry);
39236c6e
A
18969 if (object == VM_OBJECT_NULL) {
18970 continue;
18971 }
3e170ce0
A
18972 if (object->purgable != VM_PURGABLE_VOLATILE &&
18973 object->purgable != VM_PURGABLE_EMPTY) {
39236c6e
A
18974 continue;
18975 }
3e170ce0 18976 if (VME_OFFSET(entry)) {
39236c6e
A
18977 /*
18978 * If the map entry has been split and the object now
18979 * appears several times in the VM map, we don't want
18980 * to count the object's resident_page_count more than
18981 * once. We count it only for the first one, starting
18982 * at offset 0 and ignore the other VM map entries.
18983 */
18984 continue;
18985 }
18986 resident_count = object->resident_page_count;
3e170ce0 18987 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
39236c6e
A
18988 resident_count = 0;
18989 } else {
3e170ce0 18990 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
39236c6e
A
18991 }
18992
18993 volatile_virtual_size += entry->vme_end - entry->vme_start;
18994 volatile_resident_count += resident_count;
3e170ce0
A
18995 if (object->pager) {
18996 volatile_compressed_count +=
0a7de745 18997 vm_compressor_pager_get_count(object->pager);
3e170ce0 18998 }
4bd07ac2
A
18999 pmap_compressed_bytes = 0;
19000 pmap_resident_bytes =
0a7de745
A
19001 pmap_query_resident(map->pmap,
19002 entry->vme_start,
19003 entry->vme_end,
19004 &pmap_compressed_bytes);
4bd07ac2
A
19005 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
19006 volatile_compressed_pmap_count += (pmap_compressed_bytes
0a7de745 19007 / PAGE_SIZE);
39236c6e
A
19008 }
19009
19010 /* map is still locked on return */
19011
19012 *volatile_virtual_size_p = volatile_virtual_size;
19013 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
3e170ce0 19014 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
39236c6e 19015 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
3e170ce0 19016 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
39236c6e
A
19017
19018 return KERN_SUCCESS;
19019}
fe8ab488 19020
3e170ce0
A
19021void
19022vm_map_sizes(vm_map_t map,
0a7de745
A
19023 vm_map_size_t * psize,
19024 vm_map_size_t * pfree,
19025 vm_map_size_t * plargest_free)
3e170ce0 19026{
0a7de745
A
19027 vm_map_entry_t entry;
19028 vm_map_offset_t prev;
19029 vm_map_size_t free, total_free, largest_free;
19030 boolean_t end;
19031
19032 if (!map) {
19033 *psize = *pfree = *plargest_free = 0;
19034 return;
19035 }
19036 total_free = largest_free = 0;
19037
19038 vm_map_lock_read(map);
19039 if (psize) {
19040 *psize = map->max_offset - map->min_offset;
19041 }
19042
19043 prev = map->min_offset;
19044 for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
19045 end = (entry == vm_map_to_entry(map));
19046
19047 if (end) {
19048 free = entry->vme_end - prev;
19049 } else {
19050 free = entry->vme_start - prev;
19051 }
19052
19053 total_free += free;
19054 if (free > largest_free) {
19055 largest_free = free;
19056 }
19057
19058 if (end) {
19059 break;
19060 }
19061 prev = entry->vme_end;
19062 }
19063 vm_map_unlock_read(map);
19064 if (pfree) {
19065 *pfree = total_free;
19066 }
19067 if (plargest_free) {
19068 *plargest_free = largest_free;
19069 }
3e170ce0
A
19070}
19071
fe8ab488
A
19072#if VM_SCAN_FOR_SHADOW_CHAIN
19073int vm_map_shadow_max(vm_map_t map);
0a7de745
A
19074int
19075vm_map_shadow_max(
fe8ab488
A
19076 vm_map_t map)
19077{
0a7de745
A
19078 int shadows, shadows_max;
19079 vm_map_entry_t entry;
19080 vm_object_t object, next_object;
fe8ab488 19081
0a7de745 19082 if (map == NULL) {
fe8ab488 19083 return 0;
0a7de745 19084 }
fe8ab488
A
19085
19086 shadows_max = 0;
19087
19088 vm_map_lock_read(map);
5ba3f43e 19089
fe8ab488 19090 for (entry = vm_map_first_entry(map);
0a7de745
A
19091 entry != vm_map_to_entry(map);
19092 entry = entry->vme_next) {
fe8ab488
A
19093 if (entry->is_sub_map) {
19094 continue;
19095 }
3e170ce0 19096 object = VME_OBJECT(entry);
fe8ab488
A
19097 if (object == NULL) {
19098 continue;
19099 }
19100 vm_object_lock_shared(object);
19101 for (shadows = 0;
0a7de745
A
19102 object->shadow != NULL;
19103 shadows++, object = next_object) {
fe8ab488
A
19104 next_object = object->shadow;
19105 vm_object_lock_shared(next_object);
19106 vm_object_unlock(object);
19107 }
19108 vm_object_unlock(object);
19109 if (shadows > shadows_max) {
19110 shadows_max = shadows;
19111 }
19112 }
19113
19114 vm_map_unlock_read(map);
19115
19116 return shadows_max;
19117}
19118#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
39037602 19119
0a7de745
A
19120void
19121vm_commit_pagezero_status(vm_map_t lmap)
19122{
39037602
A
19123 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
19124}
5ba3f43e 19125
cb323159 19126#if !CONFIG_EMBEDDED
5ba3f43e
A
19127void
19128vm_map_set_high_start(
0a7de745
A
19129 vm_map_t map,
19130 vm_map_offset_t high_start)
5ba3f43e
A
19131{
19132 map->vmmap_high_start = high_start;
19133}
cb323159 19134#endif
d9a64523
A
19135
19136#if PMAP_CS
19137kern_return_t
19138vm_map_entry_cs_associate(
0a7de745
A
19139 vm_map_t map,
19140 vm_map_entry_t entry,
19141 vm_map_kernel_flags_t vmk_flags)
d9a64523
A
19142{
19143 vm_object_t cs_object, cs_shadow;
19144 vm_object_offset_t cs_offset;
19145 void *cs_blobs;
19146 struct vnode *cs_vnode;
19147 kern_return_t cs_ret;
19148
19149 if (map->pmap == NULL ||
19150 entry->is_sub_map || /* XXX FBDP: recurse on sub-range? */
19151 VME_OBJECT(entry) == VM_OBJECT_NULL ||
0a7de745 19152 !(entry->protection & VM_PROT_EXECUTE)) {
d9a64523
A
19153 return KERN_SUCCESS;
19154 }
19155
19156 vm_map_lock_assert_exclusive(map);
19157
19158 if (entry->used_for_jit) {
19159 cs_ret = pmap_cs_associate(map->pmap,
0a7de745
A
19160 PMAP_CS_ASSOCIATE_JIT,
19161 entry->vme_start,
19162 entry->vme_end - entry->vme_start);
d9a64523
A
19163 goto done;
19164 }
19165
19166 if (vmk_flags.vmkf_remap_prot_copy) {
19167 cs_ret = pmap_cs_associate(map->pmap,
0a7de745
A
19168 PMAP_CS_ASSOCIATE_COW,
19169 entry->vme_start,
19170 entry->vme_end - entry->vme_start);
d9a64523
A
19171 goto done;
19172 }
19173
19174 vm_object_lock_shared(VME_OBJECT(entry));
19175 cs_offset = VME_OFFSET(entry);
19176 for (cs_object = VME_OBJECT(entry);
0a7de745
A
19177 (cs_object != VM_OBJECT_NULL &&
19178 !cs_object->code_signed);
19179 cs_object = cs_shadow) {
d9a64523
A
19180 cs_shadow = cs_object->shadow;
19181 if (cs_shadow != VM_OBJECT_NULL) {
19182 cs_offset += cs_object->vo_shadow_offset;
19183 vm_object_lock_shared(cs_shadow);
19184 }
19185 vm_object_unlock(cs_object);
19186 }
19187 if (cs_object == VM_OBJECT_NULL) {
19188 return KERN_SUCCESS;
19189 }
19190
19191 cs_offset += cs_object->paging_offset;
19192 cs_vnode = vnode_pager_lookup_vnode(cs_object->pager);
19193 cs_ret = vnode_pager_get_cs_blobs(cs_vnode,
0a7de745 19194 &cs_blobs);
d9a64523
A
19195 assert(cs_ret == KERN_SUCCESS);
19196 cs_ret = cs_associate_blob_with_mapping(map->pmap,
0a7de745
A
19197 entry->vme_start,
19198 (entry->vme_end -
19199 entry->vme_start),
19200 cs_offset,
19201 cs_blobs);
d9a64523
A
19202 vm_object_unlock(cs_object);
19203 cs_object = VM_OBJECT_NULL;
19204
0a7de745 19205done:
d9a64523
A
19206 if (cs_ret == KERN_SUCCESS) {
19207 DTRACE_VM2(vm_map_entry_cs_associate_success,
0a7de745
A
19208 vm_map_offset_t, entry->vme_start,
19209 vm_map_offset_t, entry->vme_end);
d9a64523
A
19210 if (vm_map_executable_immutable) {
19211 /*
19212 * Prevent this executable
19213 * mapping from being unmapped
19214 * or modified.
19215 */
19216 entry->permanent = TRUE;
19217 }
19218 /*
19219 * pmap says it will validate the
19220 * code-signing validity of pages
19221 * faulted in via this mapping, so
19222 * this map entry should be marked so
19223 * that vm_fault() bypasses code-signing
19224 * validation for faults coming through
19225 * this mapping.
19226 */
19227 entry->pmap_cs_associated = TRUE;
19228 } else if (cs_ret == KERN_NOT_SUPPORTED) {
19229 /*
19230 * pmap won't check the code-signing
19231 * validity of pages faulted in via
19232 * this mapping, so VM should keep
19233 * doing it.
19234 */
19235 DTRACE_VM3(vm_map_entry_cs_associate_off,
0a7de745
A
19236 vm_map_offset_t, entry->vme_start,
19237 vm_map_offset_t, entry->vme_end,
19238 int, cs_ret);
d9a64523
A
19239 } else {
19240 /*
19241 * A real error: do not allow
19242 * execution in this mapping.
19243 */
19244 DTRACE_VM3(vm_map_entry_cs_associate_failure,
0a7de745
A
19245 vm_map_offset_t, entry->vme_start,
19246 vm_map_offset_t, entry->vme_end,
19247 int, cs_ret);
d9a64523
A
19248 entry->protection &= ~VM_PROT_EXECUTE;
19249 entry->max_protection &= ~VM_PROT_EXECUTE;
19250 }
19251
19252 return cs_ret;
19253}
19254#endif /* PMAP_CS */
19255
19256/*
19257 * FORKED CORPSE FOOTPRINT
19258 *
19259 * A forked corpse gets a copy of the original VM map but its pmap is mostly
19260 * empty since it never ran and never got to fault in any pages.
19261 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
19262 * a forked corpse would therefore return very little information.
19263 *
19264 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
19265 * to vm_map_fork() to collect footprint information from the original VM map
19266 * and its pmap, and store it in the forked corpse's VM map. That information
19267 * is stored in place of the VM map's "hole list" since we'll never need to
19268 * lookup for holes in the corpse's map.
19269 *
19270 * The corpse's footprint info looks like this:
19271 *
19272 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
19273 * as follows:
19274 * +---------------------------------------+
19275 * header-> | cf_size |
19276 * +-------------------+-------------------+
19277 * | cf_last_region | cf_last_zeroes |
19278 * +-------------------+-------------------+
19279 * region1-> | cfr_vaddr |
19280 * +-------------------+-------------------+
19281 * | cfr_num_pages | d0 | d1 | d2 | d3 |
19282 * +---------------------------------------+
19283 * | d4 | d5 | ... |
19284 * +---------------------------------------+
19285 * | ... |
19286 * +-------------------+-------------------+
19287 * | dy | dz | na | na | cfr_vaddr... | <-region2
19288 * +-------------------+-------------------+
19289 * | cfr_vaddr (ctd) | cfr_num_pages |
19290 * +---------------------------------------+
19291 * | d0 | d1 ... |
19292 * +---------------------------------------+
19293 * ...
19294 * +---------------------------------------+
19295 * last region-> | cfr_vaddr |
19296 * +---------------------------------------+
19297 * + cfr_num_pages | d0 | d1 | d2 | d3 |
19298 * +---------------------------------------+
19299 * ...
19300 * +---------------------------------------+
19301 * | dx | dy | dz | na | na | na | na | na |
19302 * +---------------------------------------+
19303 *
19304 * where:
0a7de745
A
19305 * cf_size: total size of the buffer (rounded to page size)
19306 * cf_last_region: offset in the buffer of the last "region" sub-header
d9a64523
A
19307 * cf_last_zeroes: number of trailing "zero" dispositions at the end
19308 * of last region
19309 * cfr_vaddr: virtual address of the start of the covered "region"
19310 * cfr_num_pages: number of pages in the covered "region"
19311 * d*: disposition of the page at that virtual address
19312 * Regions in the buffer are word-aligned.
19313 *
19314 * We estimate the size of the buffer based on the number of memory regions
19315 * and the virtual size of the address space. While copying each memory region
19316 * during vm_map_fork(), we also collect the footprint info for that region
19317 * and store it in the buffer, packing it as much as possible (coalescing
19318 * contiguous memory regions to avoid having too many region headers and
19319 * avoiding long streaks of "zero" page dispositions by splitting footprint
19320 * "regions", so the number of regions in the footprint buffer might not match
19321 * the number of memory regions in the address space.
19322 *
19323 * We also have to copy the original task's "nonvolatile" ledgers since that's
19324 * part of the footprint and will need to be reported to any tool asking for
19325 * the footprint information of the forked corpse.
19326 */
19327
19328uint64_t vm_map_corpse_footprint_count = 0;
19329uint64_t vm_map_corpse_footprint_size_avg = 0;
19330uint64_t vm_map_corpse_footprint_size_max = 0;
19331uint64_t vm_map_corpse_footprint_full = 0;
19332uint64_t vm_map_corpse_footprint_no_buf = 0;
19333
19334/*
19335 * vm_map_corpse_footprint_new_region:
0a7de745 19336 * closes the current footprint "region" and creates a new one
d9a64523
A
19337 *
19338 * Returns NULL if there's not enough space in the buffer for a new region.
19339 */
19340static struct vm_map_corpse_footprint_region *
19341vm_map_corpse_footprint_new_region(
19342 struct vm_map_corpse_footprint_header *footprint_header)
19343{
0a7de745
A
19344 uintptr_t footprint_edge;
19345 uint32_t new_region_offset;
d9a64523
A
19346 struct vm_map_corpse_footprint_region *footprint_region;
19347 struct vm_map_corpse_footprint_region *new_footprint_region;
19348
19349 footprint_edge = ((uintptr_t)footprint_header +
0a7de745 19350 footprint_header->cf_size);
d9a64523 19351 footprint_region = ((struct vm_map_corpse_footprint_region *)
0a7de745
A
19352 ((char *)footprint_header +
19353 footprint_header->cf_last_region));
19354 assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
19355 footprint_edge);
d9a64523
A
19356
19357 /* get rid of trailing zeroes in the last region */
19358 assert(footprint_region->cfr_num_pages >=
0a7de745 19359 footprint_header->cf_last_zeroes);
d9a64523 19360 footprint_region->cfr_num_pages -=
0a7de745 19361 footprint_header->cf_last_zeroes;
d9a64523
A
19362 footprint_header->cf_last_zeroes = 0;
19363
19364 /* reuse this region if it's now empty */
19365 if (footprint_region->cfr_num_pages == 0) {
19366 return footprint_region;
19367 }
19368
19369 /* compute offset of new region */
19370 new_region_offset = footprint_header->cf_last_region;
0a7de745 19371 new_region_offset += sizeof(*footprint_region);
d9a64523 19372 new_region_offset += footprint_region->cfr_num_pages;
0a7de745 19373 new_region_offset = roundup(new_region_offset, sizeof(int));
d9a64523
A
19374
19375 /* check if we're going over the edge */
19376 if (((uintptr_t)footprint_header +
0a7de745
A
19377 new_region_offset +
19378 sizeof(*footprint_region)) >=
d9a64523
A
19379 footprint_edge) {
19380 /* over the edge: no new region */
19381 return NULL;
19382 }
19383
19384 /* adjust offset of last region in header */
19385 footprint_header->cf_last_region = new_region_offset;
19386
19387 new_footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745
A
19388 ((char *)footprint_header +
19389 footprint_header->cf_last_region);
d9a64523
A
19390 new_footprint_region->cfr_vaddr = 0;
19391 new_footprint_region->cfr_num_pages = 0;
19392 /* caller needs to initialize new region */
19393
19394 return new_footprint_region;
19395}
19396
19397/*
19398 * vm_map_corpse_footprint_collect:
19399 * collect footprint information for "old_entry" in "old_map" and
19400 * stores it in "new_map"'s vmmap_footprint_info.
19401 */
19402kern_return_t
19403vm_map_corpse_footprint_collect(
0a7de745
A
19404 vm_map_t old_map,
19405 vm_map_entry_t old_entry,
19406 vm_map_t new_map)
d9a64523 19407{
0a7de745
A
19408 vm_map_offset_t va;
19409 int disp;
19410 kern_return_t kr;
d9a64523
A
19411 struct vm_map_corpse_footprint_header *footprint_header;
19412 struct vm_map_corpse_footprint_region *footprint_region;
19413 struct vm_map_corpse_footprint_region *new_footprint_region;
0a7de745
A
19414 unsigned char *next_disp_p;
19415 uintptr_t footprint_edge;
19416 uint32_t num_pages_tmp;
d9a64523
A
19417
19418 va = old_entry->vme_start;
19419
19420 vm_map_lock_assert_exclusive(old_map);
19421 vm_map_lock_assert_exclusive(new_map);
19422
19423 assert(new_map->has_corpse_footprint);
19424 assert(!old_map->has_corpse_footprint);
19425 if (!new_map->has_corpse_footprint ||
19426 old_map->has_corpse_footprint) {
19427 /*
19428 * This can only transfer footprint info from a
19429 * map with a live pmap to a map with a corpse footprint.
19430 */
19431 return KERN_NOT_SUPPORTED;
19432 }
19433
19434 if (new_map->vmmap_corpse_footprint == NULL) {
0a7de745
A
19435 vm_offset_t buf;
19436 vm_size_t buf_size;
d9a64523
A
19437
19438 buf = 0;
0a7de745
A
19439 buf_size = (sizeof(*footprint_header) +
19440 (old_map->hdr.nentries
19441 *
19442 (sizeof(*footprint_region) +
19443 +3)) /* potential alignment for each region */
19444 +
19445 ((old_map->size / PAGE_SIZE)
19446 *
19447 sizeof(char))); /* disposition for each page */
d9a64523
A
19448// printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
19449 buf_size = round_page(buf_size);
19450
19451 /* limit buffer to 1 page to validate overflow detection */
19452// buf_size = PAGE_SIZE;
19453
19454 /* limit size to a somewhat sane amount */
19455#if CONFIG_EMBEDDED
0a7de745 19456#define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
d9a64523 19457#else /* CONFIG_EMBEDDED */
0a7de745 19458#define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
d9a64523
A
19459#endif /* CONFIG_EMBEDDED */
19460 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
19461 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
19462 }
19463
19464 /*
19465 * Allocate the pageable buffer (with a trailing guard page).
19466 * It will be zero-filled on demand.
19467 */
19468 kr = kernel_memory_allocate(kernel_map,
0a7de745
A
19469 &buf,
19470 (buf_size
19471 + PAGE_SIZE), /* trailing guard page */
19472 0, /* mask */
19473 KMA_PAGEABLE | KMA_GUARD_LAST,
19474 VM_KERN_MEMORY_DIAG);
d9a64523
A
19475 if (kr != KERN_SUCCESS) {
19476 vm_map_corpse_footprint_no_buf++;
19477 return kr;
19478 }
19479
19480 /* initialize header and 1st region */
19481 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
19482 new_map->vmmap_corpse_footprint = footprint_header;
19483
19484 footprint_header->cf_size = buf_size;
19485 footprint_header->cf_last_region =
0a7de745 19486 sizeof(*footprint_header);
d9a64523
A
19487 footprint_header->cf_last_zeroes = 0;
19488
19489 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745
A
19490 ((char *)footprint_header +
19491 footprint_header->cf_last_region);
d9a64523
A
19492 footprint_region->cfr_vaddr = 0;
19493 footprint_region->cfr_num_pages = 0;
19494 } else {
19495 /* retrieve header and last region */
19496 footprint_header = (struct vm_map_corpse_footprint_header *)
0a7de745 19497 new_map->vmmap_corpse_footprint;
d9a64523 19498 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745
A
19499 ((char *)footprint_header +
19500 footprint_header->cf_last_region);
d9a64523
A
19501 }
19502 footprint_edge = ((uintptr_t)footprint_header +
0a7de745 19503 footprint_header->cf_size);
d9a64523
A
19504
19505 if ((footprint_region->cfr_vaddr +
0a7de745
A
19506 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
19507 PAGE_SIZE))
d9a64523
A
19508 != old_entry->vme_start) {
19509 uint64_t num_pages_delta;
19510 uint32_t region_offset_delta;
19511
19512 /*
19513 * Not the next contiguous virtual address:
19514 * start a new region or store "zero" dispositions for
19515 * the missing pages?
19516 */
19517 /* size of gap in actual page dispositions */
19518 num_pages_delta = (((old_entry->vme_start -
0a7de745
A
19519 footprint_region->cfr_vaddr) / PAGE_SIZE)
19520 - footprint_region->cfr_num_pages);
d9a64523
A
19521 /* size of gap as a new footprint region header */
19522 region_offset_delta =
0a7de745
A
19523 (sizeof(*footprint_region) +
19524 roundup((footprint_region->cfr_num_pages -
19525 footprint_header->cf_last_zeroes),
19526 sizeof(int)) -
19527 (footprint_region->cfr_num_pages -
19528 footprint_header->cf_last_zeroes));
d9a64523
A
19529// printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
19530 if (region_offset_delta < num_pages_delta ||
19531 os_add3_overflow(footprint_region->cfr_num_pages,
0a7de745
A
19532 (uint32_t) num_pages_delta,
19533 1,
19534 &num_pages_tmp)) {
d9a64523
A
19535 /*
19536 * Storing data for this gap would take more space
19537 * than inserting a new footprint region header:
19538 * let's start a new region and save space. If it's a
19539 * tie, let's avoid using a new region, since that
19540 * would require more region hops to find the right
19541 * range during lookups.
19542 *
19543 * If the current region's cfr_num_pages would overflow
19544 * if we added "zero" page dispositions for the gap,
19545 * no choice but to start a new region.
19546 */
19547// printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
19548 new_footprint_region =
0a7de745 19549 vm_map_corpse_footprint_new_region(footprint_header);
d9a64523
A
19550 /* check that we're not going over the edge */
19551 if (new_footprint_region == NULL) {
19552 goto over_the_edge;
19553 }
19554 footprint_region = new_footprint_region;
19555 /* initialize new region as empty */
19556 footprint_region->cfr_vaddr = old_entry->vme_start;
19557 footprint_region->cfr_num_pages = 0;
19558 } else {
19559 /*
19560 * Store "zero" page dispositions for the missing
19561 * pages.
19562 */
19563// printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
19564 for (; num_pages_delta > 0; num_pages_delta--) {
19565 next_disp_p =
0a7de745
A
19566 ((unsigned char *) footprint_region +
19567 sizeof(*footprint_region) +
19568 footprint_region->cfr_num_pages);
d9a64523
A
19569 /* check that we're not going over the edge */
19570 if ((uintptr_t)next_disp_p >= footprint_edge) {
19571 goto over_the_edge;
19572 }
19573 /* store "zero" disposition for this gap page */
19574 footprint_region->cfr_num_pages++;
19575 *next_disp_p = (unsigned char) 0;
19576 footprint_header->cf_last_zeroes++;
19577 }
19578 }
19579 }
19580
19581 for (va = old_entry->vme_start;
0a7de745
A
19582 va < old_entry->vme_end;
19583 va += PAGE_SIZE) {
19584 vm_object_t object;
d9a64523
A
19585
19586 object = VME_OBJECT(old_entry);
19587 if (!old_entry->is_sub_map &&
19588 old_entry->iokit_acct &&
19589 object != VM_OBJECT_NULL &&
19590 object->internal &&
19591 object->purgable == VM_PURGABLE_DENY) {
19592 /*
19593 * Non-purgeable IOKit memory: phys_footprint
19594 * includes the entire virtual mapping.
19595 * Since the forked corpse's VM map entry will not
19596 * have "iokit_acct", pretend that this page's
19597 * disposition is "present & internal", so that it
19598 * shows up in the forked corpse's footprint.
19599 */
19600 disp = (PMAP_QUERY_PAGE_PRESENT |
0a7de745 19601 PMAP_QUERY_PAGE_INTERNAL);
d9a64523
A
19602 } else {
19603 disp = 0;
19604 pmap_query_page_info(old_map->pmap,
0a7de745
A
19605 va,
19606 &disp);
d9a64523
A
19607 }
19608
19609// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
19610
19611 if (disp == 0 && footprint_region->cfr_num_pages == 0) {
19612 /*
19613 * Ignore "zero" dispositions at start of
19614 * region: just move start of region.
19615 */
19616 footprint_region->cfr_vaddr += PAGE_SIZE;
19617 continue;
19618 }
19619
19620 /* would region's cfr_num_pages overflow? */
19621 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
0a7de745 19622 &num_pages_tmp)) {
d9a64523
A
19623 /* overflow: create a new region */
19624 new_footprint_region =
0a7de745
A
19625 vm_map_corpse_footprint_new_region(
19626 footprint_header);
d9a64523
A
19627 if (new_footprint_region == NULL) {
19628 goto over_the_edge;
19629 }
19630 footprint_region = new_footprint_region;
19631 footprint_region->cfr_vaddr = va;
19632 footprint_region->cfr_num_pages = 0;
19633 }
19634
19635 next_disp_p = ((unsigned char *)footprint_region +
0a7de745
A
19636 sizeof(*footprint_region) +
19637 footprint_region->cfr_num_pages);
d9a64523
A
19638 /* check that we're not going over the edge */
19639 if ((uintptr_t)next_disp_p >= footprint_edge) {
19640 goto over_the_edge;
19641 }
19642 /* store this dispostion */
19643 *next_disp_p = (unsigned char) disp;
19644 footprint_region->cfr_num_pages++;
19645
19646 if (disp != 0) {
19647 /* non-zero disp: break the current zero streak */
19648 footprint_header->cf_last_zeroes = 0;
19649 /* done */
19650 continue;
19651 }
19652
19653 /* zero disp: add to the current streak of zeroes */
19654 footprint_header->cf_last_zeroes++;
19655 if ((footprint_header->cf_last_zeroes +
0a7de745
A
19656 roundup((footprint_region->cfr_num_pages -
19657 footprint_header->cf_last_zeroes) &
19658 (sizeof(int) - 1),
19659 sizeof(int))) <
19660 (sizeof(*footprint_header))) {
d9a64523
A
19661 /*
19662 * There are not enough trailing "zero" dispositions
19663 * (+ the extra padding we would need for the previous
19664 * region); creating a new region would not save space
19665 * at this point, so let's keep this "zero" disposition
19666 * in this region and reconsider later.
19667 */
19668 continue;
19669 }
19670 /*
19671 * Create a new region to avoid having too many consecutive
19672 * "zero" dispositions.
19673 */
19674 new_footprint_region =
0a7de745 19675 vm_map_corpse_footprint_new_region(footprint_header);
d9a64523
A
19676 if (new_footprint_region == NULL) {
19677 goto over_the_edge;
19678 }
19679 footprint_region = new_footprint_region;
19680 /* initialize the new region as empty ... */
19681 footprint_region->cfr_num_pages = 0;
19682 /* ... and skip this "zero" disp */
19683 footprint_region->cfr_vaddr = va + PAGE_SIZE;
19684 }
19685
19686 return KERN_SUCCESS;
19687
19688over_the_edge:
19689// printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
19690 vm_map_corpse_footprint_full++;
19691 return KERN_RESOURCE_SHORTAGE;
19692}
19693
19694/*
19695 * vm_map_corpse_footprint_collect_done:
19696 * completes the footprint collection by getting rid of any remaining
19697 * trailing "zero" dispositions and trimming the unused part of the
19698 * kernel buffer
19699 */
19700void
19701vm_map_corpse_footprint_collect_done(
0a7de745 19702 vm_map_t new_map)
d9a64523
A
19703{
19704 struct vm_map_corpse_footprint_header *footprint_header;
19705 struct vm_map_corpse_footprint_region *footprint_region;
0a7de745
A
19706 vm_size_t buf_size, actual_size;
19707 kern_return_t kr;
d9a64523
A
19708
19709 assert(new_map->has_corpse_footprint);
19710 if (!new_map->has_corpse_footprint ||
19711 new_map->vmmap_corpse_footprint == NULL) {
19712 return;
19713 }
19714
19715 footprint_header = (struct vm_map_corpse_footprint_header *)
0a7de745 19716 new_map->vmmap_corpse_footprint;
d9a64523
A
19717 buf_size = footprint_header->cf_size;
19718
19719 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745
A
19720 ((char *)footprint_header +
19721 footprint_header->cf_last_region);
d9a64523
A
19722
19723 /* get rid of trailing zeroes in last region */
19724 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
19725 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
19726 footprint_header->cf_last_zeroes = 0;
19727
19728 actual_size = (vm_size_t)(footprint_header->cf_last_region +
0a7de745
A
19729 sizeof(*footprint_region) +
19730 footprint_region->cfr_num_pages);
d9a64523
A
19731
19732// printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
0a7de745
A
19733 vm_map_corpse_footprint_size_avg =
19734 (((vm_map_corpse_footprint_size_avg *
19735 vm_map_corpse_footprint_count) +
19736 actual_size) /
19737 (vm_map_corpse_footprint_count + 1));
d9a64523
A
19738 vm_map_corpse_footprint_count++;
19739 if (actual_size > vm_map_corpse_footprint_size_max) {
19740 vm_map_corpse_footprint_size_max = actual_size;
19741 }
19742
19743 actual_size = round_page(actual_size);
19744 if (buf_size > actual_size) {
19745 kr = vm_deallocate(kernel_map,
0a7de745
A
19746 ((vm_address_t)footprint_header +
19747 actual_size +
19748 PAGE_SIZE), /* trailing guard page */
19749 (buf_size - actual_size));
d9a64523 19750 assertf(kr == KERN_SUCCESS,
0a7de745
A
19751 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19752 footprint_header,
19753 (uint64_t) buf_size,
19754 (uint64_t) actual_size,
19755 kr);
d9a64523 19756 kr = vm_protect(kernel_map,
0a7de745
A
19757 ((vm_address_t)footprint_header +
19758 actual_size),
19759 PAGE_SIZE,
19760 FALSE, /* set_maximum */
19761 VM_PROT_NONE);
d9a64523 19762 assertf(kr == KERN_SUCCESS,
0a7de745
A
19763 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19764 footprint_header,
19765 (uint64_t) buf_size,
19766 (uint64_t) actual_size,
19767 kr);
d9a64523
A
19768 }
19769
19770 footprint_header->cf_size = actual_size;
19771}
19772
19773/*
19774 * vm_map_corpse_footprint_query_page_info:
19775 * retrieves the disposition of the page at virtual address "vaddr"
19776 * in the forked corpse's VM map
19777 *
19778 * This is the equivalent of pmap_query_page_info() for a forked corpse.
19779 */
19780kern_return_t
19781vm_map_corpse_footprint_query_page_info(
0a7de745
A
19782 vm_map_t map,
19783 vm_map_offset_t va,
19784 int *disp)
d9a64523
A
19785{
19786 struct vm_map_corpse_footprint_header *footprint_header;
19787 struct vm_map_corpse_footprint_region *footprint_region;
0a7de745
A
19788 uint32_t footprint_region_offset;
19789 vm_map_offset_t region_start, region_end;
19790 int disp_idx;
19791 kern_return_t kr;
d9a64523
A
19792
19793 if (!map->has_corpse_footprint) {
19794 *disp = 0;
19795 kr = KERN_INVALID_ARGUMENT;
19796 goto done;
19797 }
19798
19799 footprint_header = map->vmmap_corpse_footprint;
19800 if (footprint_header == NULL) {
19801 *disp = 0;
19802// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19803 kr = KERN_INVALID_ARGUMENT;
19804 goto done;
19805 }
19806
19807 /* start looking at the hint ("cf_hint_region") */
19808 footprint_region_offset = footprint_header->cf_hint_region;
19809
19810lookup_again:
0a7de745 19811 if (footprint_region_offset < sizeof(*footprint_header)) {
d9a64523 19812 /* hint too low: start from 1st region */
0a7de745 19813 footprint_region_offset = sizeof(*footprint_header);
d9a64523
A
19814 }
19815 if (footprint_region_offset >= footprint_header->cf_last_region) {
19816 /* hint too high: re-start from 1st region */
0a7de745 19817 footprint_region_offset = sizeof(*footprint_header);
d9a64523
A
19818 }
19819 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745 19820 ((char *)footprint_header + footprint_region_offset);
d9a64523
A
19821 region_start = footprint_region->cfr_vaddr;
19822 region_end = (region_start +
0a7de745
A
19823 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19824 PAGE_SIZE));
d9a64523 19825 if (va < region_start &&
0a7de745 19826 footprint_region_offset != sizeof(*footprint_header)) {
d9a64523
A
19827 /* our range starts before the hint region */
19828
19829 /* reset the hint (in a racy way...) */
0a7de745 19830 footprint_header->cf_hint_region = sizeof(*footprint_header);
d9a64523 19831 /* lookup "va" again from 1st region */
0a7de745 19832 footprint_region_offset = sizeof(*footprint_header);
d9a64523
A
19833 goto lookup_again;
19834 }
19835
19836 while (va >= region_end) {
19837 if (footprint_region_offset >= footprint_header->cf_last_region) {
19838 break;
19839 }
19840 /* skip the region's header */
0a7de745 19841 footprint_region_offset += sizeof(*footprint_region);
d9a64523
A
19842 /* skip the region's page dispositions */
19843 footprint_region_offset += footprint_region->cfr_num_pages;
19844 /* align to next word boundary */
19845 footprint_region_offset =
0a7de745
A
19846 roundup(footprint_region_offset,
19847 sizeof(int));
d9a64523 19848 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745 19849 ((char *)footprint_header + footprint_region_offset);
d9a64523
A
19850 region_start = footprint_region->cfr_vaddr;
19851 region_end = (region_start +
0a7de745
A
19852 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19853 PAGE_SIZE));
d9a64523
A
19854 }
19855 if (va < region_start || va >= region_end) {
19856 /* page not found */
19857 *disp = 0;
19858// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19859 kr = KERN_SUCCESS;
19860 goto done;
19861 }
19862
19863 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
19864 footprint_header->cf_hint_region = footprint_region_offset;
19865
19866 /* get page disposition for "va" in this region */
19867 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / PAGE_SIZE);
19868 *disp = (int) (footprint_region->cfr_disposition[disp_idx]);
19869
19870 kr = KERN_SUCCESS;
19871done:
19872// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19873 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
19874 DTRACE_VM4(footprint_query_page_info,
0a7de745
A
19875 vm_map_t, map,
19876 vm_map_offset_t, va,
19877 int, *disp,
19878 kern_return_t, kr);
d9a64523
A
19879
19880 return kr;
19881}
19882
19883
19884static void
19885vm_map_corpse_footprint_destroy(
0a7de745 19886 vm_map_t map)
d9a64523
A
19887{
19888 if (map->has_corpse_footprint &&
19889 map->vmmap_corpse_footprint != 0) {
19890 struct vm_map_corpse_footprint_header *footprint_header;
19891 vm_size_t buf_size;
19892 kern_return_t kr;
19893
19894 footprint_header = map->vmmap_corpse_footprint;
19895 buf_size = footprint_header->cf_size;
19896 kr = vm_deallocate(kernel_map,
0a7de745
A
19897 (vm_offset_t) map->vmmap_corpse_footprint,
19898 ((vm_size_t) buf_size
19899 + PAGE_SIZE)); /* trailing guard page */
d9a64523
A
19900 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
19901 map->vmmap_corpse_footprint = 0;
19902 map->has_corpse_footprint = FALSE;
19903 }
19904}
19905
19906/*
19907 * vm_map_copy_footprint_ledgers:
19908 * copies any ledger that's relevant to the memory footprint of "old_task"
19909 * into the forked corpse's task ("new_task")
19910 */
19911void
19912vm_map_copy_footprint_ledgers(
0a7de745
A
19913 task_t old_task,
19914 task_t new_task)
d9a64523
A
19915{
19916 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
19917 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
19918 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
0a7de745
A
19919 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
19920 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
19921 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
19922 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
19923 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
19924 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
cb323159
A
19925 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
19926 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
0a7de745
A
19927 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
19928 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
cb323159
A
19929 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
19930 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
19931 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
19932 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
19933 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
19934 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
0a7de745 19935 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
d9a64523
A
19936}
19937
19938/*
19939 * vm_map_copy_ledger:
19940 * copy a single ledger from "old_task" to "new_task"
19941 */
19942void
19943vm_map_copy_ledger(
0a7de745
A
19944 task_t old_task,
19945 task_t new_task,
19946 int ledger_entry)
d9a64523 19947{
0a7de745 19948 ledger_amount_t old_balance, new_balance, delta;
d9a64523
A
19949
19950 assert(new_task->map->has_corpse_footprint);
0a7de745 19951 if (!new_task->map->has_corpse_footprint) {
d9a64523 19952 return;
0a7de745 19953 }
d9a64523
A
19954
19955 /* turn off sanity checks for the ledger we're about to mess with */
19956 ledger_disable_panic_on_negative(new_task->ledger,
0a7de745 19957 ledger_entry);
d9a64523
A
19958
19959 /* adjust "new_task" to match "old_task" */
19960 ledger_get_balance(old_task->ledger,
0a7de745
A
19961 ledger_entry,
19962 &old_balance);
d9a64523 19963 ledger_get_balance(new_task->ledger,
0a7de745
A
19964 ledger_entry,
19965 &new_balance);
d9a64523
A
19966 if (new_balance == old_balance) {
19967 /* new == old: done */
19968 } else if (new_balance > old_balance) {
19969 /* new > old ==> new -= new - old */
19970 delta = new_balance - old_balance;
19971 ledger_debit(new_task->ledger,
0a7de745
A
19972 ledger_entry,
19973 delta);
d9a64523
A
19974 } else {
19975 /* new < old ==> new += old - new */
19976 delta = old_balance - new_balance;
19977 ledger_credit(new_task->ledger,
0a7de745
A
19978 ledger_entry,
19979 delta);
d9a64523
A
19980 }
19981}
cb323159
A
19982
19983#if MACH_ASSERT
19984
19985extern int pmap_ledgers_panic;
19986extern int pmap_ledgers_panic_leeway;
19987
19988#define LEDGER_DRIFT(__LEDGER) \
19989 int __LEDGER##_over; \
19990 ledger_amount_t __LEDGER##_over_total; \
19991 ledger_amount_t __LEDGER##_over_max; \
19992 int __LEDGER##_under; \
19993 ledger_amount_t __LEDGER##_under_total; \
19994 ledger_amount_t __LEDGER##_under_max
19995
19996struct {
19997 uint64_t num_pmaps_checked;
19998
19999 LEDGER_DRIFT(phys_footprint);
20000 LEDGER_DRIFT(internal);
20001 LEDGER_DRIFT(internal_compressed);
20002 LEDGER_DRIFT(iokit_mapped);
20003 LEDGER_DRIFT(alternate_accounting);
20004 LEDGER_DRIFT(alternate_accounting_compressed);
20005 LEDGER_DRIFT(page_table);
20006 LEDGER_DRIFT(purgeable_volatile);
20007 LEDGER_DRIFT(purgeable_nonvolatile);
20008 LEDGER_DRIFT(purgeable_volatile_compressed);
20009 LEDGER_DRIFT(purgeable_nonvolatile_compressed);
20010 LEDGER_DRIFT(tagged_nofootprint);
20011 LEDGER_DRIFT(tagged_footprint);
20012 LEDGER_DRIFT(tagged_nofootprint_compressed);
20013 LEDGER_DRIFT(tagged_footprint_compressed);
20014 LEDGER_DRIFT(network_volatile);
20015 LEDGER_DRIFT(network_nonvolatile);
20016 LEDGER_DRIFT(network_volatile_compressed);
20017 LEDGER_DRIFT(network_nonvolatile_compressed);
20018 LEDGER_DRIFT(media_nofootprint);
20019 LEDGER_DRIFT(media_footprint);
20020 LEDGER_DRIFT(media_nofootprint_compressed);
20021 LEDGER_DRIFT(media_footprint_compressed);
20022 LEDGER_DRIFT(graphics_nofootprint);
20023 LEDGER_DRIFT(graphics_footprint);
20024 LEDGER_DRIFT(graphics_nofootprint_compressed);
20025 LEDGER_DRIFT(graphics_footprint_compressed);
20026 LEDGER_DRIFT(neural_nofootprint);
20027 LEDGER_DRIFT(neural_footprint);
20028 LEDGER_DRIFT(neural_nofootprint_compressed);
20029 LEDGER_DRIFT(neural_footprint_compressed);
20030} pmap_ledgers_drift;
20031
20032void
20033vm_map_pmap_check_ledgers(
20034 pmap_t pmap,
20035 ledger_t ledger,
20036 int pid,
20037 char *procname)
20038{
20039 ledger_amount_t bal;
20040 boolean_t do_panic;
20041
20042 do_panic = FALSE;
20043
20044 pmap_ledgers_drift.num_pmaps_checked++;
20045
20046#define LEDGER_CHECK_BALANCE(__LEDGER) \
20047MACRO_BEGIN \
20048 int panic_on_negative = TRUE; \
20049 ledger_get_balance(ledger, \
20050 task_ledgers.__LEDGER, \
20051 &bal); \
20052 ledger_get_panic_on_negative(ledger, \
20053 task_ledgers.__LEDGER, \
20054 &panic_on_negative); \
20055 if (bal != 0) { \
20056 if (panic_on_negative || \
20057 (pmap_ledgers_panic && \
20058 pmap_ledgers_panic_leeway > 0 && \
20059 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
20060 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
20061 do_panic = TRUE; \
20062 } \
20063 printf("LEDGER BALANCE proc %d (%s) " \
20064 "\"%s\" = %lld\n", \
20065 pid, procname, #__LEDGER, bal); \
20066 if (bal > 0) { \
20067 pmap_ledgers_drift.__LEDGER##_over++; \
20068 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
20069 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
20070 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
20071 } \
20072 } else if (bal < 0) { \
20073 pmap_ledgers_drift.__LEDGER##_under++; \
20074 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
20075 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
20076 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
20077 } \
20078 } \
20079 } \
20080MACRO_END
20081
20082 LEDGER_CHECK_BALANCE(phys_footprint);
20083 LEDGER_CHECK_BALANCE(internal);
20084 LEDGER_CHECK_BALANCE(internal_compressed);
20085 LEDGER_CHECK_BALANCE(iokit_mapped);
20086 LEDGER_CHECK_BALANCE(alternate_accounting);
20087 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
20088 LEDGER_CHECK_BALANCE(page_table);
20089 LEDGER_CHECK_BALANCE(purgeable_volatile);
20090 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
20091 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
20092 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
20093 LEDGER_CHECK_BALANCE(tagged_nofootprint);
20094 LEDGER_CHECK_BALANCE(tagged_footprint);
20095 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
20096 LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
20097 LEDGER_CHECK_BALANCE(network_volatile);
20098 LEDGER_CHECK_BALANCE(network_nonvolatile);
20099 LEDGER_CHECK_BALANCE(network_volatile_compressed);
20100 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
20101 LEDGER_CHECK_BALANCE(media_nofootprint);
20102 LEDGER_CHECK_BALANCE(media_footprint);
20103 LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
20104 LEDGER_CHECK_BALANCE(media_footprint_compressed);
20105 LEDGER_CHECK_BALANCE(graphics_nofootprint);
20106 LEDGER_CHECK_BALANCE(graphics_footprint);
20107 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
20108 LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
20109 LEDGER_CHECK_BALANCE(neural_nofootprint);
20110 LEDGER_CHECK_BALANCE(neural_footprint);
20111 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
20112 LEDGER_CHECK_BALANCE(neural_footprint_compressed);
20113
20114 if (do_panic) {
20115 if (pmap_ledgers_panic) {
20116 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20117 pmap, pid, procname);
20118 } else {
20119 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20120 pmap, pid, procname);
20121 }
20122 }
20123}
20124#endif /* MACH_ASSERT */