]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_map.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5ba3f43e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
5ba3f43e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
5ba3f43e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
5ba3f43e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
5ba3f43e 31/*
1c79356b
A
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
5ba3f43e 35 *
1c79356b
A
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
5ba3f43e 41 *
1c79356b
A
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
5ba3f43e 45 *
1c79356b 46 * Carnegie Mellon requests users of this software to return to
5ba3f43e 47 *
1c79356b
A
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
5ba3f43e 52 *
1c79356b
A
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
1c79356b
A
66#include <task_swapper.h>
67#include <mach_assert.h>
fe8ab488
A
68
69#include <vm/vm_options.h>
70
91447636 71#include <libkern/OSAtomic.h>
1c79356b
A
72
73#include <mach/kern_return.h>
74#include <mach/port.h>
75#include <mach/vm_attributes.h>
76#include <mach/vm_param.h>
77#include <mach/vm_behavior.h>
55e303ae 78#include <mach/vm_statistics.h>
91447636 79#include <mach/memory_object.h>
0c530ab8 80#include <mach/mach_vm.h>
91447636 81#include <machine/cpu_capabilities.h>
2d21ac55 82#include <mach/sdt.h>
91447636 83
1c79356b 84#include <kern/assert.h>
39037602 85#include <kern/backtrace.h>
1c79356b 86#include <kern/counters.h>
d9a64523 87#include <kern/exc_guard.h>
91447636 88#include <kern/kalloc.h>
1c79356b 89#include <kern/zalloc.h>
91447636
A
90
91#include <vm/cpm.h>
d9a64523 92#include <vm/vm_compressor.h>
39236c6e 93#include <vm/vm_compressor_pager.h>
1c79356b
A
94#include <vm/vm_init.h>
95#include <vm/vm_fault.h>
96#include <vm/vm_map.h>
97#include <vm/vm_object.h>
98#include <vm/vm_page.h>
b0d623f7 99#include <vm/vm_pageout.h>
d9a64523 100#include <vm/pmap.h>
1c79356b
A
101#include <vm/vm_kern.h>
102#include <ipc/ipc_port.h>
103#include <kern/sched_prim.h>
104#include <kern/misc_protos.h>
1c79356b
A
105#include <kern/xpr.h>
106
91447636
A
107#include <mach/vm_map_server.h>
108#include <mach/mach_host_server.h>
2d21ac55 109#include <vm/vm_protos.h>
b0d623f7 110#include <vm/vm_purgeable_internal.h>
91447636 111
91447636 112#include <vm/vm_protos.h>
2d21ac55 113#include <vm/vm_shared_region.h>
6d2010ae 114#include <vm/vm_map_store.h>
91447636 115
5ba3f43e
A
116#include <san/kasan.h>
117
d9a64523
A
118#include <sys/codesign.h>
119#include <libkern/section_keywords.h>
120#if DEVELOPMENT || DEBUG
121extern int proc_selfcsflags(void);
122#if CONFIG_EMBEDDED
123extern int panic_on_unsigned_execute;
124#endif /* CONFIG_EMBEDDED */
125#endif /* DEVELOPMENT || DEBUG */
126
5ba3f43e 127#if __arm64__
d9a64523
A
128extern const int fourk_binary_compatibility_unsafe;
129extern const int fourk_binary_compatibility_allow_wx;
5ba3f43e 130#endif /* __arm64__ */
39037602
A
131extern int proc_selfpid(void);
132extern char *proc_name_address(void *p);
133
134#if VM_MAP_DEBUG_APPLE_PROTECT
135int vm_map_debug_apple_protect = 0;
136#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
137#if VM_MAP_DEBUG_FOURK
138int vm_map_debug_fourk = 0;
139#endif /* VM_MAP_DEBUG_FOURK */
3e170ce0 140
d9a64523
A
141SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
142int vm_map_executable_immutable_verbose = 0;
5ba3f43e 143
0a7de745 144extern u_int32_t random(void); /* from <libkern/libkern.h> */
1c79356b
A
145/* Internal prototypes
146 */
2d21ac55 147
91447636 148static void vm_map_simplify_range(
0a7de745
A
149 vm_map_t map,
150 vm_map_offset_t start,
151 vm_map_offset_t end); /* forward */
152
153static boolean_t vm_map_range_check(
154 vm_map_t map,
155 vm_map_offset_t start,
156 vm_map_offset_t end,
157 vm_map_entry_t *entry);
158
159static vm_map_entry_t _vm_map_entry_create(
160 struct vm_map_header *map_header, boolean_t map_locked);
161
162static void _vm_map_entry_dispose(
163 struct vm_map_header *map_header,
164 vm_map_entry_t entry);
165
166static void vm_map_pmap_enter(
167 vm_map_t map,
168 vm_map_offset_t addr,
169 vm_map_offset_t end_addr,
170 vm_object_t object,
171 vm_object_offset_t offset,
172 vm_prot_t protection);
173
174static void _vm_map_clip_end(
175 struct vm_map_header *map_header,
176 vm_map_entry_t entry,
177 vm_map_offset_t end);
178
179static void _vm_map_clip_start(
180 struct vm_map_header *map_header,
181 vm_map_entry_t entry,
182 vm_map_offset_t start);
183
184static void vm_map_entry_delete(
185 vm_map_t map,
186 vm_map_entry_t entry);
187
188static kern_return_t vm_map_delete(
189 vm_map_t map,
190 vm_map_offset_t start,
191 vm_map_offset_t end,
192 int flags,
193 vm_map_t zap_map);
194
195static void vm_map_copy_insert(
196 vm_map_t map,
197 vm_map_entry_t after_where,
198 vm_map_copy_t copy);
199
200static kern_return_t vm_map_copy_overwrite_unaligned(
201 vm_map_t dst_map,
202 vm_map_entry_t entry,
203 vm_map_copy_t copy,
39236c6e 204 vm_map_address_t start,
0a7de745 205 boolean_t discard_on_success);
1c79356b 206
0a7de745
A
207static kern_return_t vm_map_copy_overwrite_aligned(
208 vm_map_t dst_map,
209 vm_map_entry_t tmp_entry,
210 vm_map_copy_t copy,
2d21ac55 211 vm_map_offset_t start,
0a7de745 212 pmap_t pmap);
1c79356b 213
0a7de745
A
214static kern_return_t vm_map_copyin_kernel_buffer(
215 vm_map_t src_map,
2d21ac55 216 vm_map_address_t src_addr,
0a7de745
A
217 vm_map_size_t len,
218 boolean_t src_destroy,
219 vm_map_copy_t *copy_result); /* OUT */
220
221static kern_return_t vm_map_copyout_kernel_buffer(
222 vm_map_t map,
223 vm_map_address_t *addr, /* IN/OUT */
224 vm_map_copy_t copy,
39037602 225 vm_map_size_t copy_size,
0a7de745
A
226 boolean_t overwrite,
227 boolean_t consume_on_success);
228
229static void vm_map_fork_share(
230 vm_map_t old_map,
231 vm_map_entry_t old_entry,
232 vm_map_t new_map);
233
234static boolean_t vm_map_fork_copy(
235 vm_map_t old_map,
236 vm_map_entry_t *old_entry_p,
237 vm_map_t new_map,
238 int vm_map_copyin_flags);
239
240static kern_return_t vm_map_wire_nested(
241 vm_map_t map,
242 vm_map_offset_t start,
243 vm_map_offset_t end,
244 vm_prot_t caller_prot,
245 vm_tag_t tag,
246 boolean_t user_wire,
247 pmap_t map_pmap,
248 vm_map_offset_t pmap_addr,
249 ppnum_t *physpage_p);
250
251static kern_return_t vm_map_unwire_nested(
252 vm_map_t map,
253 vm_map_offset_t start,
254 vm_map_offset_t end,
255 boolean_t user_wire,
256 pmap_t map_pmap,
257 vm_map_offset_t pmap_addr);
258
259static kern_return_t vm_map_overwrite_submap_recurse(
260 vm_map_t dst_map,
261 vm_map_offset_t dst_addr,
262 vm_map_size_t dst_size);
263
264static kern_return_t vm_map_copy_overwrite_nested(
265 vm_map_t dst_map,
266 vm_map_offset_t dst_addr,
267 vm_map_copy_t copy,
268 boolean_t interruptible,
269 pmap_t pmap,
270 boolean_t discard_on_success);
271
272static kern_return_t vm_map_remap_extract(
273 vm_map_t map,
274 vm_map_offset_t addr,
275 vm_map_size_t size,
276 boolean_t copy,
277 struct vm_map_header *map_header,
278 vm_prot_t *cur_protection,
279 vm_prot_t *max_protection,
280 vm_inherit_t inheritance,
281 boolean_t pageable,
282 boolean_t same_map,
283 vm_map_kernel_flags_t vmk_flags);
284
285static kern_return_t vm_map_remap_range_allocate(
286 vm_map_t map,
287 vm_map_address_t *address,
288 vm_map_size_t size,
289 vm_map_offset_t mask,
290 int flags,
291 vm_map_kernel_flags_t vmk_flags,
292 vm_tag_t tag,
293 vm_map_entry_t *map_entry);
294
295static void vm_map_region_look_for_page(
296 vm_map_t map,
2d21ac55 297 vm_map_offset_t va,
0a7de745
A
298 vm_object_t object,
299 vm_object_offset_t offset,
2d21ac55
A
300 int max_refcnt,
301 int depth,
39236c6e
A
302 vm_region_extended_info_t extended,
303 mach_msg_type_number_t count);
91447636 304
0a7de745
A
305static int vm_map_region_count_obj_refs(
306 vm_map_entry_t entry,
307 vm_object_t object);
1c79356b 308
b0d623f7 309
0a7de745
A
310static kern_return_t vm_map_willneed(
311 vm_map_t map,
312 vm_map_offset_t start,
313 vm_map_offset_t end);
b0d623f7 314
0a7de745
A
315static kern_return_t vm_map_reuse_pages(
316 vm_map_t map,
317 vm_map_offset_t start,
318 vm_map_offset_t end);
b0d623f7 319
0a7de745
A
320static kern_return_t vm_map_reusable_pages(
321 vm_map_t map,
322 vm_map_offset_t start,
323 vm_map_offset_t end);
b0d623f7 324
0a7de745
A
325static kern_return_t vm_map_can_reuse(
326 vm_map_t map,
327 vm_map_offset_t start,
328 vm_map_offset_t end);
b0d623f7 329
3e170ce0 330#if MACH_ASSERT
0a7de745
A
331static kern_return_t vm_map_pageout(
332 vm_map_t map,
333 vm_map_offset_t start,
334 vm_map_offset_t end);
3e170ce0 335#endif /* MACH_ASSERT */
6d2010ae 336
0a7de745
A
337static void vm_map_corpse_footprint_destroy(
338 vm_map_t map);
d9a64523 339
5ba3f43e
A
340pid_t find_largest_process_vm_map_entries(void);
341
1c79356b
A
342/*
343 * Macros to copy a vm_map_entry. We must be careful to correctly
344 * manage the wired page count. vm_map_entry_copy() creates a new
345 * map entry to the same memory - the wired count in the new entry
346 * must be set to zero. vm_map_entry_copy_full() creates a new
347 * entry that is identical to the old entry. This preserves the
348 * wire count; it's used for map splitting and zone changing in
349 * vm_map_copyout.
350 */
316670eb 351
d9a64523
A
352#if CONFIG_EMBEDDED
353
354/*
355 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
356 * But for security reasons on embedded platforms, we don't want the
357 * new mapping to be "used for jit", so we always reset the flag here.
358 * Same for "pmap_cs_associated".
359 */
0a7de745
A
360#define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
361MACRO_BEGIN \
362 (NEW)->used_for_jit = FALSE; \
363 (NEW)->pmap_cs_associated = FALSE; \
d9a64523
A
364MACRO_END
365
366#else /* CONFIG_EMBEDDED */
367
368/*
369 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
370 * On macOS, the new mapping can be "used for jit".
371 */
0a7de745
A
372#define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
373MACRO_BEGIN \
374 assert((NEW)->used_for_jit == (OLD)->used_for_jit); \
375 assert((NEW)->pmap_cs_associated == FALSE); \
d9a64523
A
376MACRO_END
377
378#endif /* CONFIG_EMBEDDED */
379
0a7de745
A
380#define vm_map_entry_copy(NEW, OLD) \
381MACRO_BEGIN \
382boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
2d21ac55 383 *(NEW) = *(OLD); \
0a7de745 384 (NEW)->is_shared = FALSE; \
2d21ac55
A
385 (NEW)->needs_wakeup = FALSE; \
386 (NEW)->in_transition = FALSE; \
387 (NEW)->wired_count = 0; \
388 (NEW)->user_wired_count = 0; \
0a7de745
A
389 (NEW)->permanent = FALSE; \
390 VM_MAP_ENTRY_COPY_CODE_SIGNING((NEW),(OLD)); \
391 (NEW)->from_reserved_zone = _vmec_reserved; \
392 if ((NEW)->iokit_acct) { \
5c9f4661 393 assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
0a7de745
A
394 (NEW)->iokit_acct = FALSE; \
395 (NEW)->use_pmap = TRUE; \
396 } \
3e170ce0 397 (NEW)->vme_resilient_codesign = FALSE; \
0a7de745
A
398 (NEW)->vme_resilient_media = FALSE; \
399 (NEW)->vme_atomic = FALSE; \
1c79356b
A
400MACRO_END
401
0a7de745
A
402#define vm_map_entry_copy_full(NEW, OLD) \
403MACRO_BEGIN \
404boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
405(*(NEW) = *(OLD)); \
406(NEW)->from_reserved_zone = _vmecf_reserved; \
7ddcb079 407MACRO_END
1c79356b 408
2d21ac55
A
409/*
410 * Decide if we want to allow processes to execute from their data or stack areas.
5ba3f43e 411 * override_nx() returns true if we do. Data/stack execution can be enabled independently
2d21ac55
A
412 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
413 * or allow_stack_exec to enable data execution for that type of data area for that particular
414 * ABI (or both by or'ing the flags together). These are initialized in the architecture
5ba3f43e
A
415 * specific pmap files since the default behavior varies according to architecture. The
416 * main reason it varies is because of the need to provide binary compatibility with old
417 * applications that were written before these restrictions came into being. In the old
418 * days, an app could execute anything it could read, but this has slowly been tightened
2d21ac55
A
419 * up over time. The default behavior is:
420 *
421 * 32-bit PPC apps may execute from both stack and data areas
422 * 32-bit Intel apps may exeucte from data areas but not stack
423 * 64-bit PPC/Intel apps may not execute from either data or stack
424 *
425 * An application on any architecture may override these defaults by explicitly
5ba3f43e 426 * adding PROT_EXEC permission to the page in question with the mprotect(2)
2d21ac55 427 * system call. This code here just determines what happens when an app tries to
0a7de745 428 * execute from a page that lacks execute permission.
2d21ac55
A
429 *
430 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
6d2010ae
A
431 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
432 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
433 * execution from data areas for a particular binary even if the arch normally permits it. As
434 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
435 * to support some complicated use cases, notably browsers with out-of-process plugins that
436 * are not all NX-safe.
2d21ac55
A
437 */
438
439extern int allow_data_exec, allow_stack_exec;
440
441int
442override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
443{
444 int current_abi;
445
0a7de745
A
446 if (map->pmap == kernel_pmap) {
447 return FALSE;
448 }
3e170ce0 449
2d21ac55
A
450 /*
451 * Determine if the app is running in 32 or 64 bit mode.
452 */
453
0a7de745 454 if (vm_map_is_64bit(map)) {
2d21ac55 455 current_abi = VM_ABI_64;
0a7de745 456 } else {
2d21ac55 457 current_abi = VM_ABI_32;
0a7de745 458 }
2d21ac55
A
459
460 /*
5ba3f43e 461 * Determine if we should allow the execution based on whether it's a
2d21ac55
A
462 * stack or data area and the current architecture.
463 */
464
0a7de745 465 if (user_tag == VM_MEMORY_STACK) {
2d21ac55 466 return allow_stack_exec & current_abi;
0a7de745 467 }
2d21ac55 468
6d2010ae 469 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
2d21ac55
A
470}
471
472
1c79356b
A
473/*
474 * Virtual memory maps provide for the mapping, protection,
475 * and sharing of virtual memory objects. In addition,
476 * this module provides for an efficient virtual copy of
477 * memory from one map to another.
478 *
479 * Synchronization is required prior to most operations.
480 *
481 * Maps consist of an ordered doubly-linked list of simple
482 * entries; a single hint is used to speed up lookups.
483 *
484 * Sharing maps have been deleted from this version of Mach.
485 * All shared objects are now mapped directly into the respective
486 * maps. This requires a change in the copy on write strategy;
487 * the asymmetric (delayed) strategy is used for shared temporary
488 * objects instead of the symmetric (shadow) strategy. All maps
489 * are now "top level" maps (either task map, kernel map or submap
5ba3f43e 490 * of the kernel map).
1c79356b
A
491 *
492 * Since portions of maps are specified by start/end addreses,
493 * which may not align with existing map entries, all
494 * routines merely "clip" entries to these start/end values.
495 * [That is, an entry is split into two, bordering at a
496 * start or end value.] Note that these clippings may not
497 * always be necessary (as the two resulting entries are then
498 * not changed); however, the clipping is done for convenience.
499 * No attempt is currently made to "glue back together" two
500 * abutting entries.
501 *
502 * The symmetric (shadow) copy strategy implements virtual copy
503 * by copying VM object references from one map to
504 * another, and then marking both regions as copy-on-write.
505 * It is important to note that only one writeable reference
506 * to a VM object region exists in any map when this strategy
507 * is used -- this means that shadow object creation can be
508 * delayed until a write operation occurs. The symmetric (delayed)
509 * strategy allows multiple maps to have writeable references to
510 * the same region of a vm object, and hence cannot delay creating
511 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
512 * Copying of permanent objects is completely different; see
513 * vm_object_copy_strategically() in vm_object.c.
514 */
515
0a7de745
A
516static zone_t vm_map_zone; /* zone for vm_map structures */
517zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
518static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking allocations */
519static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
520zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
1c79356b
A
521
522
523/*
524 * Placeholder object for submap operations. This object is dropped
525 * into the range by a call to vm_map_find, and removed when
526 * vm_map_submap creates the submap.
527 */
528
0a7de745 529vm_object_t vm_submap_object;
1c79356b 530
0a7de745
A
531static void *map_data;
532static vm_size_t map_data_size;
533static void *kentry_data;
534static vm_size_t kentry_data_size;
535static void *map_holes_data;
536static vm_size_t map_holes_data_size;
1c79356b 537
5ba3f43e 538#if CONFIG_EMBEDDED
0a7de745 539#define NO_COALESCE_LIMIT 0
5ba3f43e 540#else
b0d623f7 541#define NO_COALESCE_LIMIT ((1024 * 128) - 1)
5ba3f43e 542#endif
1c79356b 543
55e303ae 544/* Skip acquiring locks if we're in the midst of a kernel core dump */
b0d623f7 545unsigned int not_in_kdp = 1;
55e303ae 546
6d2010ae
A
547unsigned int vm_map_set_cache_attr_count = 0;
548
549kern_return_t
550vm_map_set_cache_attr(
0a7de745
A
551 vm_map_t map,
552 vm_map_offset_t va)
6d2010ae 553{
0a7de745
A
554 vm_map_entry_t map_entry;
555 vm_object_t object;
556 kern_return_t kr = KERN_SUCCESS;
6d2010ae
A
557
558 vm_map_lock_read(map);
559
560 if (!vm_map_lookup_entry(map, va, &map_entry) ||
561 map_entry->is_sub_map) {
562 /*
563 * that memory is not properly mapped
564 */
565 kr = KERN_INVALID_ARGUMENT;
566 goto done;
567 }
3e170ce0 568 object = VME_OBJECT(map_entry);
6d2010ae
A
569
570 if (object == VM_OBJECT_NULL) {
571 /*
572 * there should be a VM object here at this point
573 */
574 kr = KERN_INVALID_ARGUMENT;
575 goto done;
576 }
577 vm_object_lock(object);
578 object->set_cache_attr = TRUE;
579 vm_object_unlock(object);
580
581 vm_map_set_cache_attr_count++;
582done:
583 vm_map_unlock_read(map);
584
585 return kr;
586}
587
588
593a1d5f
A
589#if CONFIG_CODE_DECRYPTION
590/*
591 * vm_map_apple_protected:
5ba3f43e 592 * This remaps the requested part of the object with an object backed by
593a1d5f
A
593 * the decrypting pager.
594 * crypt_info contains entry points and session data for the crypt module.
595 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
596 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
597 */
0c530ab8
A
598kern_return_t
599vm_map_apple_protected(
0a7de745
A
600 vm_map_t map,
601 vm_map_offset_t start,
602 vm_map_offset_t end,
603 vm_object_offset_t crypto_backing_offset,
593a1d5f 604 struct pager_crypt_info *crypt_info)
0c530ab8 605{
0a7de745
A
606 boolean_t map_locked;
607 kern_return_t kr;
608 vm_map_entry_t map_entry;
3e170ce0 609 struct vm_map_entry tmp_entry;
0a7de745
A
610 memory_object_t unprotected_mem_obj;
611 vm_object_t protected_object;
612 vm_map_offset_t map_addr;
613 vm_map_offset_t start_aligned, end_aligned;
614 vm_object_offset_t crypto_start, crypto_end;
615 int vm_flags;
5ba3f43e
A
616 vm_map_kernel_flags_t vmk_flags;
617
618 vm_flags = 0;
619 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
0c530ab8 620
3e170ce0
A
621 map_locked = FALSE;
622 unprotected_mem_obj = MEMORY_OBJECT_NULL;
0c530ab8 623
3e170ce0
A
624 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
625 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
626 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
627 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
b0d623f7 628
5ba3f43e
A
629#if __arm64__
630 /*
631 * "start" and "end" might be 4K-aligned but not 16K-aligned,
632 * so we might have to loop and establish up to 3 mappings:
633 *
634 * + the first 16K-page, which might overlap with the previous
635 * 4K-aligned mapping,
636 * + the center,
637 * + the last 16K-page, which might overlap with the next
638 * 4K-aligned mapping.
639 * Each of these mapping might be backed by a vnode pager (if
640 * properly page-aligned) or a "fourk_pager", itself backed by a
641 * vnode pager (if 4K-aligned but not page-aligned).
642 */
643#else /* __arm64__ */
3e170ce0
A
644 assert(start_aligned == start);
645 assert(end_aligned == end);
5ba3f43e 646#endif /* __arm64__ */
b0d623f7 647
3e170ce0
A
648 map_addr = start_aligned;
649 for (map_addr = start_aligned;
0a7de745
A
650 map_addr < end;
651 map_addr = tmp_entry.vme_end) {
3e170ce0
A
652 vm_map_lock(map);
653 map_locked = TRUE;
b0d623f7 654
3e170ce0
A
655 /* lookup the protected VM object */
656 if (!vm_map_lookup_entry(map,
0a7de745
A
657 map_addr,
658 &map_entry) ||
3e170ce0
A
659 map_entry->is_sub_map ||
660 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
661 !(map_entry->protection & VM_PROT_EXECUTE)) {
662 /* that memory is not properly mapped */
663 kr = KERN_INVALID_ARGUMENT;
664 goto done;
665 }
b0d623f7 666
3e170ce0
A
667 /* get the protected object to be decrypted */
668 protected_object = VME_OBJECT(map_entry);
669 if (protected_object == VM_OBJECT_NULL) {
670 /* there should be a VM object here at this point */
671 kr = KERN_INVALID_ARGUMENT;
672 goto done;
673 }
674 /* ensure protected object stays alive while map is unlocked */
675 vm_object_reference(protected_object);
676
677 /* limit the map entry to the area we want to cover */
678 vm_map_clip_start(map, map_entry, start_aligned);
679 vm_map_clip_end(map, map_entry, end_aligned);
680
681 tmp_entry = *map_entry;
682 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
683 vm_map_unlock(map);
684 map_locked = FALSE;
685
686 /*
687 * This map entry might be only partially encrypted
688 * (if not fully "page-aligned").
689 */
690 crypto_start = 0;
691 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
692 if (tmp_entry.vme_start < start) {
693 if (tmp_entry.vme_start != start_aligned) {
694 kr = KERN_INVALID_ADDRESS;
695 }
696 crypto_start += (start - tmp_entry.vme_start);
697 }
698 if (tmp_entry.vme_end > end) {
699 if (tmp_entry.vme_end != end_aligned) {
700 kr = KERN_INVALID_ADDRESS;
701 }
702 crypto_end -= (tmp_entry.vme_end - end);
703 }
704
705 /*
706 * This "extra backing offset" is needed to get the decryption
707 * routine to use the right key. It adjusts for the possibly
708 * relative offset of an interposed "4K" pager...
709 */
710 if (crypto_backing_offset == (vm_object_offset_t) -1) {
711 crypto_backing_offset = VME_OFFSET(&tmp_entry);
712 }
0c530ab8 713
3e170ce0
A
714 /*
715 * Lookup (and create if necessary) the protected memory object
716 * matching that VM object.
717 * If successful, this also grabs a reference on the memory object,
718 * to guarantee that it doesn't go away before we get a chance to map
719 * it.
720 */
721 unprotected_mem_obj = apple_protect_pager_setup(
722 protected_object,
723 VME_OFFSET(&tmp_entry),
724 crypto_backing_offset,
725 crypt_info,
726 crypto_start,
727 crypto_end);
728
729 /* release extra ref on protected object */
730 vm_object_deallocate(protected_object);
731
732 if (unprotected_mem_obj == NULL) {
733 kr = KERN_FAILURE;
734 goto done;
735 }
736
737 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
5ba3f43e
A
738 /* can overwrite an immutable mapping */
739 vmk_flags.vmkf_overwrite_immutable = TRUE;
740#if __arm64__
741 if (tmp_entry.used_for_jit &&
742 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
0a7de745 743 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
5ba3f43e
A
744 fourk_binary_compatibility_unsafe &&
745 fourk_binary_compatibility_allow_wx) {
746 printf("** FOURK_COMPAT [%d]: "
0a7de745
A
747 "allowing write+execute at 0x%llx\n",
748 proc_selfpid(), tmp_entry.vme_start);
5ba3f43e
A
749 vmk_flags.vmkf_map_jit = TRUE;
750 }
751#endif /* __arm64__ */
3e170ce0
A
752
753 /* map this memory object in place of the current one */
754 map_addr = tmp_entry.vme_start;
755 kr = vm_map_enter_mem_object(map,
0a7de745
A
756 &map_addr,
757 (tmp_entry.vme_end -
758 tmp_entry.vme_start),
759 (mach_vm_offset_t) 0,
760 vm_flags,
761 vmk_flags,
762 VM_KERN_MEMORY_NONE,
763 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
764 0,
765 TRUE,
766 tmp_entry.protection,
767 tmp_entry.max_protection,
768 tmp_entry.inheritance);
5ba3f43e 769 assertf(kr == KERN_SUCCESS,
0a7de745 770 "kr = 0x%x\n", kr);
5ba3f43e 771 assertf(map_addr == tmp_entry.vme_start,
0a7de745
A
772 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
773 (uint64_t)map_addr,
774 (uint64_t) tmp_entry.vme_start,
775 &tmp_entry);
3e170ce0
A
776
777#if VM_MAP_DEBUG_APPLE_PROTECT
39037602
A
778 if (vm_map_debug_apple_protect) {
779 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
0a7de745
A
780 " backing:[object:%p,offset:0x%llx,"
781 "crypto_backing_offset:0x%llx,"
782 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
783 map,
784 (uint64_t) map_addr,
785 (uint64_t) (map_addr + (tmp_entry.vme_end -
786 tmp_entry.vme_start)),
787 unprotected_mem_obj,
788 protected_object,
789 VME_OFFSET(&tmp_entry),
790 crypto_backing_offset,
791 crypto_start,
792 crypto_end);
39037602 793 }
3e170ce0 794#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
5ba3f43e 795
3e170ce0
A
796 /*
797 * Release the reference obtained by
798 * apple_protect_pager_setup().
799 * The mapping (if it succeeded) is now holding a reference on
800 * the memory object.
801 */
802 memory_object_deallocate(unprotected_mem_obj);
803 unprotected_mem_obj = MEMORY_OBJECT_NULL;
804
805 /* continue with next map entry */
806 crypto_backing_offset += (tmp_entry.vme_end -
0a7de745 807 tmp_entry.vme_start);
3e170ce0
A
808 crypto_backing_offset -= crypto_start;
809 }
810 kr = KERN_SUCCESS;
0c530ab8
A
811
812done:
813 if (map_locked) {
3e170ce0 814 vm_map_unlock(map);
0c530ab8
A
815 }
816 return kr;
817}
0a7de745 818#endif /* CONFIG_CODE_DECRYPTION */
0c530ab8
A
819
820
0a7de745
A
821lck_grp_t vm_map_lck_grp;
822lck_grp_attr_t vm_map_lck_grp_attr;
823lck_attr_t vm_map_lck_attr;
824lck_attr_t vm_map_lck_rw_attr;
b0d623f7 825
d9a64523
A
826#if CONFIG_EMBEDDED
827int malloc_no_cow = 1;
828#define VM_PROTECT_WX_FAIL 0
829#else /* CONFIG_EMBEDDED */
830int malloc_no_cow = 0;
831#define VM_PROTECT_WX_FAIL 1
832#endif /* CONFIG_EMBEDDED */
833uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
b0d623f7 834
593a1d5f
A
835/*
836 * vm_map_init:
837 *
838 * Initialize the vm_map module. Must be called before
839 * any other vm_map routines.
840 *
841 * Map and entry structures are allocated from zones -- we must
842 * initialize those zones.
843 *
844 * There are three zones of interest:
845 *
846 * vm_map_zone: used to allocate maps.
847 * vm_map_entry_zone: used to allocate map entries.
7ddcb079 848 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
593a1d5f
A
849 *
850 * The kernel allocates map entries from a special zone that is initially
851 * "crammed" with memory. It would be difficult (perhaps impossible) for
852 * the kernel to allocate more memory to a entry zone when it became
853 * empty since the very act of allocating memory implies the creation
854 * of a new entry.
855 */
1c79356b
A
856void
857vm_map_init(
858 void)
859{
7ddcb079 860 vm_size_t entry_zone_alloc_size;
316670eb
A
861 const char *mez_name = "VM map entries";
862
0a7de745
A
863 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40 * 1024,
864 PAGE_SIZE, "maps");
0b4c1975 865 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
0a7de745 866#if defined(__LP64__)
7ddcb079
A
867 entry_zone_alloc_size = PAGE_SIZE * 5;
868#else
869 entry_zone_alloc_size = PAGE_SIZE * 6;
870#endif
91447636 871 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
0a7de745
A
872 1024 * 1024, entry_zone_alloc_size,
873 mez_name);
0b4c1975 874 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
7ddcb079 875 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
316670eb 876 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
1c79356b 877
7ddcb079 878 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
0a7de745
A
879 kentry_data_size * 64, kentry_data_size,
880 "Reserved VM map entries");
7ddcb079 881 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
5ba3f43e
A
882 /* Don't quarantine because we always need elements available */
883 zone_change(vm_map_entry_reserved_zone, Z_KASAN_QUARANTINE, FALSE);
1c79356b 884
91447636 885 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
0a7de745 886 16 * 1024, PAGE_SIZE, "VM map copies");
0b4c1975 887 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
1c79356b 888
3e170ce0 889 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
0a7de745 890 16 * 1024, PAGE_SIZE, "VM map holes");
3e170ce0
A
891 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
892
1c79356b
A
893 /*
894 * Cram the map and kentry zones with initial data.
7ddcb079 895 * Set reserved_zone non-collectible to aid zone_gc().
1c79356b
A
896 */
897 zone_change(vm_map_zone, Z_COLLECT, FALSE);
39037602 898 zone_change(vm_map_zone, Z_FOREIGN, TRUE);
0a7de745 899 zone_change(vm_map_zone, Z_GZALLOC_EXEMPT, TRUE);
7ddcb079
A
900
901 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
902 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
903 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
904 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
905 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
6d2010ae 906 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
316670eb 907 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
6d2010ae 908
3e170ce0
A
909 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
910 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
911 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
912 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
913 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
914 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
915
5ba3f43e 916 /*
3e170ce0 917 * Add the stolen memory to zones, adjust zone size and stolen counts.
5ba3f43e 918 * zcram only up to the maximum number of pages for each zone chunk.
3e170ce0 919 */
7ddcb079 920 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
5ba3f43e
A
921
922 const vm_size_t stride = ZONE_CHUNK_MAXPAGES * PAGE_SIZE;
923 for (vm_offset_t off = 0; off < kentry_data_size; off += stride) {
924 zcram(vm_map_entry_reserved_zone,
0a7de745
A
925 (vm_offset_t)kentry_data + off,
926 MIN(kentry_data_size - off, stride));
5ba3f43e
A
927 }
928 for (vm_offset_t off = 0; off < map_holes_data_size; off += stride) {
929 zcram(vm_map_holes_zone,
0a7de745
A
930 (vm_offset_t)map_holes_data + off,
931 MIN(map_holes_data_size - off, stride));
5ba3f43e
A
932 }
933
0a7de745
A
934 /*
935 * Since these are covered by zones, remove them from stolen page accounting.
936 */
3e170ce0
A
937 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
938
b0d623f7
A
939 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
940 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
5ba3f43e 941 lck_attr_setdefault(&vm_map_lck_attr);
316670eb 942
fe8ab488
A
943 lck_attr_setdefault(&vm_map_lck_rw_attr);
944 lck_attr_cleardebug(&vm_map_lck_rw_attr);
945
39037602
A
946#if VM_MAP_DEBUG_APPLE_PROTECT
947 PE_parse_boot_argn("vm_map_debug_apple_protect",
0a7de745
A
948 &vm_map_debug_apple_protect,
949 sizeof(vm_map_debug_apple_protect));
39037602
A
950#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
951#if VM_MAP_DEBUG_APPLE_FOURK
952 PE_parse_boot_argn("vm_map_debug_fourk",
0a7de745
A
953 &vm_map_debug_fourk,
954 sizeof(vm_map_debug_fourk));
39037602 955#endif /* VM_MAP_DEBUG_FOURK */
5ba3f43e 956 PE_parse_boot_argn("vm_map_executable_immutable",
0a7de745
A
957 &vm_map_executable_immutable,
958 sizeof(vm_map_executable_immutable));
d9a64523 959 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
0a7de745
A
960 &vm_map_executable_immutable_verbose,
961 sizeof(vm_map_executable_immutable_verbose));
d9a64523
A
962
963 PE_parse_boot_argn("malloc_no_cow",
0a7de745
A
964 &malloc_no_cow,
965 sizeof(malloc_no_cow));
d9a64523
A
966 if (malloc_no_cow) {
967 vm_memory_malloc_no_cow_mask = 0ULL;
968 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
969 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
0a7de745 970 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
d9a64523
A
971 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
972// vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
973// vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
974 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
975 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
976 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
977 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
978// vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
979 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
0a7de745
A
980 &vm_memory_malloc_no_cow_mask,
981 sizeof(vm_memory_malloc_no_cow_mask));
d9a64523 982 }
1c79356b
A
983}
984
985void
986vm_map_steal_memory(
987 void)
988{
7ddcb079
A
989 uint32_t kentry_initial_pages;
990
b0d623f7 991 map_data_size = round_page(10 * sizeof(struct _vm_map));
1c79356b
A
992 map_data = pmap_steal_memory(map_data_size);
993
1c79356b 994 /*
7ddcb079
A
995 * kentry_initial_pages corresponds to the number of kernel map entries
996 * required during bootstrap until the asynchronous replenishment
997 * scheme is activated and/or entries are available from the general
998 * map entry pool.
1c79356b 999 */
0a7de745 1000#if defined(__LP64__)
7ddcb079
A
1001 kentry_initial_pages = 10;
1002#else
1003 kentry_initial_pages = 6;
1c79356b 1004#endif
316670eb
A
1005
1006#if CONFIG_GZALLOC
1007 /* If using the guard allocator, reserve more memory for the kernel
1008 * reserved map entry pool.
0a7de745
A
1009 */
1010 if (gzalloc_enabled()) {
316670eb 1011 kentry_initial_pages *= 1024;
0a7de745 1012 }
316670eb
A
1013#endif
1014
7ddcb079 1015 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1c79356b 1016 kentry_data = pmap_steal_memory(kentry_data_size);
3e170ce0
A
1017
1018 map_holes_data_size = kentry_data_size;
1019 map_holes_data = pmap_steal_memory(map_holes_data_size);
1c79356b
A
1020}
1021
5ba3f43e
A
1022boolean_t vm_map_supports_hole_optimization = FALSE;
1023
3e170ce0 1024void
0a7de745
A
1025vm_kernel_reserved_entry_init(void)
1026{
1027 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_entry));
5ba3f43e
A
1028
1029 /*
1030 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1031 */
0a7de745 1032 zone_prio_refill_configure(vm_map_holes_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_links));
5ba3f43e 1033 vm_map_supports_hole_optimization = TRUE;
3e170ce0
A
1034}
1035
1036void
1037vm_map_disable_hole_optimization(vm_map_t map)
1038{
0a7de745 1039 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
3e170ce0
A
1040
1041 if (map->holelistenabled) {
d9a64523 1042 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
3e170ce0
A
1043
1044 while (hole_entry != NULL) {
3e170ce0
A
1045 next_hole_entry = hole_entry->vme_next;
1046
1047 hole_entry->vme_next = NULL;
1048 hole_entry->vme_prev = NULL;
1049 zfree(vm_map_holes_zone, hole_entry);
1050
1051 if (next_hole_entry == head_entry) {
1052 hole_entry = NULL;
1053 } else {
1054 hole_entry = next_hole_entry;
1055 }
1056 }
1057
1058 map->holes_list = NULL;
1059 map->holelistenabled = FALSE;
1060
1061 map->first_free = vm_map_first_entry(map);
1062 SAVE_HINT_HOLE_WRITE(map, NULL);
1063 }
1064}
1065
1066boolean_t
0a7de745
A
1067vm_kernel_map_is_kernel(vm_map_t map)
1068{
1069 return map->pmap == kernel_pmap;
7ddcb079
A
1070}
1071
1c79356b
A
1072/*
1073 * vm_map_create:
1074 *
1075 * Creates and returns a new empty VM map with
1076 * the given physical map structure, and having
1077 * the given lower and upper address bounds.
1078 */
3e170ce0 1079
1c79356b
A
1080vm_map_t
1081vm_map_create(
0a7de745
A
1082 pmap_t pmap,
1083 vm_map_offset_t min,
1084 vm_map_offset_t max,
1085 boolean_t pageable)
d9a64523
A
1086{
1087 int options;
1088
1089 options = 0;
1090 if (pageable) {
1091 options |= VM_MAP_CREATE_PAGEABLE;
1092 }
1093 return vm_map_create_options(pmap, min, max, options);
1094}
1095
1096vm_map_t
1097vm_map_create_options(
0a7de745
A
1098 pmap_t pmap,
1099 vm_map_offset_t min,
d9a64523 1100 vm_map_offset_t max,
0a7de745 1101 int options)
1c79356b 1102{
0a7de745
A
1103 vm_map_t result;
1104 struct vm_map_links *hole_entry = NULL;
1c79356b 1105
d9a64523
A
1106 if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1107 /* unknown option */
1108 return VM_MAP_NULL;
1109 }
1110
1c79356b 1111 result = (vm_map_t) zalloc(vm_map_zone);
0a7de745 1112 if (result == VM_MAP_NULL) {
1c79356b 1113 panic("vm_map_create");
0a7de745 1114 }
1c79356b
A
1115
1116 vm_map_first_entry(result) = vm_map_to_entry(result);
1117 vm_map_last_entry(result) = vm_map_to_entry(result);
1118 result->hdr.nentries = 0;
d9a64523
A
1119 if (options & VM_MAP_CREATE_PAGEABLE) {
1120 result->hdr.entries_pageable = TRUE;
1121 } else {
1122 result->hdr.entries_pageable = FALSE;
1123 }
1c79356b 1124
0a7de745 1125 vm_map_store_init( &(result->hdr));
5ba3f43e 1126
39236c6e
A
1127 result->hdr.page_shift = PAGE_SHIFT;
1128
1c79356b 1129 result->size = 0;
0a7de745 1130 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
2d21ac55 1131 result->user_wire_size = 0;
5ba3f43e
A
1132#if __x86_64__
1133 result->vmmap_high_start = 0;
1134#endif /* __x86_64__ */
d9a64523 1135 result->map_refcnt = 1;
0a7de745 1136#if TASK_SWAPPER
1c79356b
A
1137 result->res_count = 1;
1138 result->sw_state = MAP_SW_IN;
0a7de745 1139#endif /* TASK_SWAPPER */
1c79356b
A
1140 result->pmap = pmap;
1141 result->min_offset = min;
1142 result->max_offset = max;
1143 result->wiring_required = FALSE;
1144 result->no_zero_fill = FALSE;
316670eb 1145 result->mapped_in_other_pmaps = FALSE;
1c79356b 1146 result->wait_for_space = FALSE;
b0d623f7 1147 result->switch_protect = FALSE;
6d2010ae
A
1148 result->disable_vmentry_reuse = FALSE;
1149 result->map_disallow_data_exec = FALSE;
39037602 1150 result->is_nested_map = FALSE;
a39ff7e2 1151 result->map_disallow_new_exec = FALSE;
6d2010ae 1152 result->highest_entry_end = 0;
1c79356b
A
1153 result->first_free = vm_map_to_entry(result);
1154 result->hint = vm_map_to_entry(result);
0a7de745 1155 result->jit_entry_exists = FALSE;
3e170ce0 1156
d9a64523
A
1157 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1158 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1159 result->has_corpse_footprint = TRUE;
1160 result->holelistenabled = FALSE;
1161 result->vmmap_corpse_footprint = NULL;
1162 } else {
1163 result->has_corpse_footprint = FALSE;
1164 if (vm_map_supports_hole_optimization) {
1165 hole_entry = zalloc(vm_map_holes_zone);
3e170ce0 1166
d9a64523 1167 hole_entry->start = min;
5ba3f43e 1168#if defined(__arm__) || defined(__arm64__)
d9a64523 1169 hole_entry->end = result->max_offset;
5ba3f43e 1170#else
d9a64523 1171 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
5ba3f43e 1172#endif
d9a64523
A
1173 result->holes_list = result->hole_hint = hole_entry;
1174 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1175 result->holelistenabled = TRUE;
1176 } else {
1177 result->holelistenabled = FALSE;
1178 }
3e170ce0
A
1179 }
1180
1c79356b 1181 vm_map_lock_init(result);
b0d623f7 1182 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
5ba3f43e 1183
0a7de745 1184 return result;
1c79356b
A
1185}
1186
1187/*
1188 * vm_map_entry_create: [ internal use only ]
1189 *
1190 * Allocates a VM map entry for insertion in the
1191 * given map (or map copy). No fields are filled.
1192 */
0a7de745 1193#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1c79356b 1194
0a7de745 1195#define vm_map_copy_entry_create(copy, map_locked) \
7ddcb079
A
1196 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1197unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1c79356b 1198
91447636 1199static vm_map_entry_t
1c79356b 1200_vm_map_entry_create(
0a7de745 1201 struct vm_map_header *map_header, boolean_t __unused map_locked)
1c79356b 1202{
0a7de745
A
1203 zone_t zone;
1204 vm_map_entry_t entry;
1c79356b 1205
7ddcb079
A
1206 zone = vm_map_entry_zone;
1207
1208 assert(map_header->entries_pageable ? !map_locked : TRUE);
1209
1210 if (map_header->entries_pageable) {
1211 entry = (vm_map_entry_t) zalloc(zone);
0a7de745 1212 } else {
7ddcb079
A
1213 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1214
1215 if (entry == VM_MAP_ENTRY_NULL) {
1216 zone = vm_map_entry_reserved_zone;
1217 entry = (vm_map_entry_t) zalloc(zone);
1218 OSAddAtomic(1, &reserved_zalloc_count);
0a7de745 1219 } else {
7ddcb079 1220 OSAddAtomic(1, &nonreserved_zalloc_count);
0a7de745 1221 }
7ddcb079 1222 }
1c79356b 1223
0a7de745 1224 if (entry == VM_MAP_ENTRY_NULL) {
1c79356b 1225 panic("vm_map_entry_create");
0a7de745 1226 }
7ddcb079
A
1227 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1228
0a7de745
A
1229 vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1230#if MAP_ENTRY_CREATION_DEBUG
39236c6e 1231 entry->vme_creation_maphdr = map_header;
39037602 1232 backtrace(&entry->vme_creation_bt[0],
0a7de745 1233 (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)));
316670eb 1234#endif
0a7de745 1235 return entry;
1c79356b
A
1236}
1237
1238/*
1239 * vm_map_entry_dispose: [ internal use only ]
1240 *
1241 * Inverse of vm_map_entry_create.
2d21ac55 1242 *
0a7de745 1243 * write map lock held so no need to
2d21ac55 1244 * do anything special to insure correctness
0a7de745 1245 * of the stores
1c79356b 1246 */
0a7de745 1247#define vm_map_entry_dispose(map, entry) \
6d2010ae 1248 _vm_map_entry_dispose(&(map)->hdr, (entry))
1c79356b 1249
0a7de745 1250#define vm_map_copy_entry_dispose(map, entry) \
1c79356b
A
1251 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1252
91447636 1253static void
1c79356b 1254_vm_map_entry_dispose(
0a7de745
A
1255 struct vm_map_header *map_header,
1256 vm_map_entry_t entry)
1c79356b 1257{
0a7de745 1258 zone_t zone;
1c79356b 1259
0a7de745 1260 if (map_header->entries_pageable || !(entry->from_reserved_zone)) {
2d21ac55 1261 zone = vm_map_entry_zone;
0a7de745 1262 } else {
7ddcb079 1263 zone = vm_map_entry_reserved_zone;
0a7de745 1264 }
7ddcb079
A
1265
1266 if (!map_header->entries_pageable) {
0a7de745 1267 if (zone == vm_map_entry_zone) {
7ddcb079 1268 OSAddAtomic(-1, &nonreserved_zalloc_count);
0a7de745 1269 } else {
7ddcb079 1270 OSAddAtomic(-1, &reserved_zalloc_count);
0a7de745 1271 }
7ddcb079 1272 }
1c79356b 1273
91447636 1274 zfree(zone, entry);
1c79356b
A
1275}
1276
91447636 1277#if MACH_ASSERT
91447636 1278static boolean_t first_free_check = FALSE;
6d2010ae 1279boolean_t
1c79356b 1280first_free_is_valid(
0a7de745 1281 vm_map_t map)
1c79356b 1282{
0a7de745 1283 if (!first_free_check) {
1c79356b 1284 return TRUE;
0a7de745 1285 }
5ba3f43e 1286
0a7de745 1287 return first_free_is_valid_store( map );
1c79356b 1288}
91447636 1289#endif /* MACH_ASSERT */
1c79356b 1290
1c79356b 1291
0a7de745 1292#define vm_map_copy_entry_link(copy, after_where, entry) \
6d2010ae 1293 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1c79356b 1294
0a7de745 1295#define vm_map_copy_entry_unlink(copy, entry) \
6d2010ae 1296 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1c79356b 1297
0a7de745 1298#if MACH_ASSERT && TASK_SWAPPER
1c79356b
A
1299/*
1300 * vm_map_res_reference:
1301 *
1302 * Adds another valid residence count to the given map.
1303 *
1304 * Map is locked so this function can be called from
1305 * vm_map_swapin.
1306 *
1307 */
0a7de745
A
1308void
1309vm_map_res_reference(vm_map_t map)
1c79356b
A
1310{
1311 /* assert map is locked */
1312 assert(map->res_count >= 0);
d9a64523 1313 assert(map->map_refcnt >= map->res_count);
1c79356b 1314 if (map->res_count == 0) {
b0d623f7 1315 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1316 vm_map_lock(map);
1317 vm_map_swapin(map);
b0d623f7 1318 lck_mtx_lock(&map->s_lock);
1c79356b
A
1319 ++map->res_count;
1320 vm_map_unlock(map);
0a7de745 1321 } else {
1c79356b 1322 ++map->res_count;
0a7de745 1323 }
1c79356b
A
1324}
1325
1326/*
1327 * vm_map_reference_swap:
1328 *
1329 * Adds valid reference and residence counts to the given map.
1330 *
1331 * The map may not be in memory (i.e. zero residence count).
1332 *
1333 */
0a7de745
A
1334void
1335vm_map_reference_swap(vm_map_t map)
1c79356b
A
1336{
1337 assert(map != VM_MAP_NULL);
b0d623f7 1338 lck_mtx_lock(&map->s_lock);
1c79356b 1339 assert(map->res_count >= 0);
d9a64523
A
1340 assert(map->map_refcnt >= map->res_count);
1341 map->map_refcnt++;
1c79356b 1342 vm_map_res_reference(map);
b0d623f7 1343 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1344}
1345
1346/*
1347 * vm_map_res_deallocate:
1348 *
1349 * Decrement residence count on a map; possibly causing swapout.
1350 *
1351 * The map must be in memory (i.e. non-zero residence count).
1352 *
1353 * The map is locked, so this function is callable from vm_map_deallocate.
1354 *
1355 */
0a7de745
A
1356void
1357vm_map_res_deallocate(vm_map_t map)
1c79356b
A
1358{
1359 assert(map->res_count > 0);
1360 if (--map->res_count == 0) {
b0d623f7 1361 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1362 vm_map_lock(map);
1363 vm_map_swapout(map);
1364 vm_map_unlock(map);
b0d623f7 1365 lck_mtx_lock(&map->s_lock);
1c79356b 1366 }
d9a64523 1367 assert(map->map_refcnt >= map->res_count);
1c79356b 1368}
0a7de745 1369#endif /* MACH_ASSERT && TASK_SWAPPER */
1c79356b 1370
1c79356b
A
1371/*
1372 * vm_map_destroy:
1373 *
1374 * Actually destroy a map.
1375 */
1376void
1377vm_map_destroy(
0a7de745
A
1378 vm_map_t map,
1379 int flags)
5ba3f43e 1380{
1c79356b 1381 vm_map_lock(map);
2d21ac55 1382
3e170ce0
A
1383 /* final cleanup: no need to unnest shared region */
1384 flags |= VM_MAP_REMOVE_NO_UNNESTING;
5ba3f43e
A
1385 /* final cleanup: ok to remove immutable mappings */
1386 flags |= VM_MAP_REMOVE_IMMUTABLE;
d9a64523
A
1387 /* final cleanup: allow gaps in range */
1388 flags |= VM_MAP_REMOVE_GAPS_OK;
3e170ce0 1389
2d21ac55
A
1390 /* clean up regular map entries */
1391 (void) vm_map_delete(map, map->min_offset, map->max_offset,
0a7de745 1392 flags, VM_MAP_NULL);
2d21ac55 1393 /* clean up leftover special mappings (commpage, etc...) */
0a7de745 1394#if !defined(__arm__) && !defined(__arm64__)
2d21ac55 1395 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
0a7de745 1396 flags, VM_MAP_NULL);
5ba3f43e 1397#endif /* !__arm__ && !__arm64__ */
6d2010ae 1398
3e170ce0 1399 vm_map_disable_hole_optimization(map);
d9a64523
A
1400 vm_map_corpse_footprint_destroy(map);
1401
1c79356b
A
1402 vm_map_unlock(map);
1403
2d21ac55 1404 assert(map->hdr.nentries == 0);
5ba3f43e 1405
0a7de745 1406 if (map->pmap) {
55e303ae 1407 pmap_destroy(map->pmap);
0a7de745 1408 }
1c79356b 1409
39037602
A
1410 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1411 /*
1412 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1413 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1414 * structure or kalloc'ed via lck_mtx_init.
1415 * An example is s_lock_ext within struct _vm_map.
1416 *
1417 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1418 * can add another tag to detect embedded vs alloc'ed indirect external
1419 * mutexes but that'll be additional checks in the lock path and require
1420 * updating dependencies for the old vs new tag.
1421 *
1422 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1423 * just when lock debugging is ON, we choose to forego explicitly destroying
1424 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1425 * count on vm_map_lck_grp, which has no serious side-effect.
1426 */
1427 } else {
1428 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1429 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1430 }
1431
91447636 1432 zfree(vm_map_zone, map);
1c79356b
A
1433}
1434
5ba3f43e
A
1435/*
1436 * Returns pid of the task with the largest number of VM map entries.
1437 * Used in the zone-map-exhaustion jetsam path.
1438 */
1439pid_t
1440find_largest_process_vm_map_entries(void)
1441{
1442 pid_t victim_pid = -1;
1443 int max_vm_map_entries = 0;
1444 task_t task = TASK_NULL;
1445 queue_head_t *task_list = &tasks;
1446
1447 lck_mtx_lock(&tasks_threads_lock);
1448 queue_iterate(task_list, task, task_t, tasks) {
0a7de745 1449 if (task == kernel_task || !task->active) {
5ba3f43e 1450 continue;
0a7de745 1451 }
5ba3f43e
A
1452
1453 vm_map_t task_map = task->map;
1454 if (task_map != VM_MAP_NULL) {
1455 int task_vm_map_entries = task_map->hdr.nentries;
1456 if (task_vm_map_entries > max_vm_map_entries) {
1457 max_vm_map_entries = task_vm_map_entries;
1458 victim_pid = pid_from_task(task);
1459 }
1460 }
1461 }
1462 lck_mtx_unlock(&tasks_threads_lock);
1463
1464 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1465 return victim_pid;
1466}
1467
0a7de745 1468#if TASK_SWAPPER
1c79356b
A
1469/*
1470 * vm_map_swapin/vm_map_swapout
1471 *
5ba3f43e 1472 * Swap a map in and out, either referencing or releasing its resources.
1c79356b
A
1473 * These functions are internal use only; however, they must be exported
1474 * because they may be called from macros, which are exported.
1475 *
5ba3f43e
A
1476 * In the case of swapout, there could be races on the residence count,
1477 * so if the residence count is up, we return, assuming that a
1c79356b
A
1478 * vm_map_deallocate() call in the near future will bring us back.
1479 *
1480 * Locking:
1481 * -- We use the map write lock for synchronization among races.
1482 * -- The map write lock, and not the simple s_lock, protects the
1483 * swap state of the map.
1484 * -- If a map entry is a share map, then we hold both locks, in
1485 * hierarchical order.
1486 *
1487 * Synchronization Notes:
1488 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1489 * will block on the map lock and proceed when swapout is through.
1490 * 2) A vm_map_reference() call at this time is illegal, and will
1491 * cause a panic. vm_map_reference() is only allowed on resident
1492 * maps, since it refuses to block.
5ba3f43e 1493 * 3) A vm_map_swapin() call during a swapin will block, and
1c79356b
A
1494 * proceeed when the first swapin is done, turning into a nop.
1495 * This is the reason the res_count is not incremented until
1496 * after the swapin is complete.
1497 * 4) There is a timing hole after the checks of the res_count, before
1498 * the map lock is taken, during which a swapin may get the lock
1499 * before a swapout about to happen. If this happens, the swapin
1500 * will detect the state and increment the reference count, causing
5ba3f43e
A
1501 * the swapout to be a nop, thereby delaying it until a later
1502 * vm_map_deallocate. If the swapout gets the lock first, then
1503 * the swapin will simply block until the swapout is done, and
1c79356b
A
1504 * then proceed.
1505 *
1506 * Because vm_map_swapin() is potentially an expensive operation, it
1507 * should be used with caution.
1508 *
1509 * Invariants:
1510 * 1) A map with a residence count of zero is either swapped, or
1511 * being swapped.
1512 * 2) A map with a non-zero residence count is either resident,
1513 * or being swapped in.
1514 */
1515
1516int vm_map_swap_enable = 1;
1517
0a7de745
A
1518void
1519vm_map_swapin(vm_map_t map)
1c79356b 1520{
39037602 1521 vm_map_entry_t entry;
2d21ac55 1522
0a7de745 1523 if (!vm_map_swap_enable) { /* debug */
1c79356b 1524 return;
0a7de745 1525 }
1c79356b
A
1526
1527 /*
1528 * Map is locked
1529 * First deal with various races.
1530 */
0a7de745 1531 if (map->sw_state == MAP_SW_IN) {
5ba3f43e 1532 /*
1c79356b
A
1533 * we raced with swapout and won. Returning will incr.
1534 * the res_count, turning the swapout into a nop.
1535 */
1536 return;
0a7de745 1537 }
1c79356b
A
1538
1539 /*
1540 * The residence count must be zero. If we raced with another
1541 * swapin, the state would have been IN; if we raced with a
1542 * swapout (after another competing swapin), we must have lost
1543 * the race to get here (see above comment), in which case
1544 * res_count is still 0.
1545 */
1546 assert(map->res_count == 0);
1547
1548 /*
1549 * There are no intermediate states of a map going out or
1550 * coming in, since the map is locked during the transition.
1551 */
1552 assert(map->sw_state == MAP_SW_OUT);
1553
1554 /*
5ba3f43e 1555 * We now operate upon each map entry. If the entry is a sub-
1c79356b
A
1556 * or share-map, we call vm_map_res_reference upon it.
1557 * If the entry is an object, we call vm_object_res_reference
1558 * (this may iterate through the shadow chain).
1559 * Note that we hold the map locked the entire time,
1560 * even if we get back here via a recursive call in
1561 * vm_map_res_reference.
1562 */
1563 entry = vm_map_first_entry(map);
1564
1565 while (entry != vm_map_to_entry(map)) {
3e170ce0 1566 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1c79356b 1567 if (entry->is_sub_map) {
3e170ce0 1568 vm_map_t lmap = VME_SUBMAP(entry);
b0d623f7 1569 lck_mtx_lock(&lmap->s_lock);
1c79356b 1570 vm_map_res_reference(lmap);
b0d623f7 1571 lck_mtx_unlock(&lmap->s_lock);
1c79356b 1572 } else {
3e170ce0 1573 vm_object_t object = VME_OBEJCT(entry);
1c79356b
A
1574 vm_object_lock(object);
1575 /*
1576 * This call may iterate through the
1577 * shadow chain.
1578 */
1579 vm_object_res_reference(object);
1580 vm_object_unlock(object);
1581 }
1582 }
1583 entry = entry->vme_next;
1584 }
1585 assert(map->sw_state == MAP_SW_OUT);
1586 map->sw_state = MAP_SW_IN;
1587}
1588
0a7de745
A
1589void
1590vm_map_swapout(vm_map_t map)
1c79356b 1591{
39037602 1592 vm_map_entry_t entry;
5ba3f43e 1593
1c79356b
A
1594 /*
1595 * Map is locked
1596 * First deal with various races.
1597 * If we raced with a swapin and lost, the residence count
1598 * will have been incremented to 1, and we simply return.
1599 */
b0d623f7 1600 lck_mtx_lock(&map->s_lock);
1c79356b 1601 if (map->res_count != 0) {
b0d623f7 1602 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1603 return;
1604 }
b0d623f7 1605 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1606
1607 /*
1608 * There are no intermediate states of a map going out or
1609 * coming in, since the map is locked during the transition.
1610 */
1611 assert(map->sw_state == MAP_SW_IN);
1612
0a7de745 1613 if (!vm_map_swap_enable) {
1c79356b 1614 return;
0a7de745 1615 }
1c79356b
A
1616
1617 /*
5ba3f43e 1618 * We now operate upon each map entry. If the entry is a sub-
1c79356b
A
1619 * or share-map, we call vm_map_res_deallocate upon it.
1620 * If the entry is an object, we call vm_object_res_deallocate
1621 * (this may iterate through the shadow chain).
1622 * Note that we hold the map locked the entire time,
1623 * even if we get back here via a recursive call in
1624 * vm_map_res_deallocate.
1625 */
1626 entry = vm_map_first_entry(map);
1627
1628 while (entry != vm_map_to_entry(map)) {
3e170ce0 1629 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1c79356b 1630 if (entry->is_sub_map) {
3e170ce0 1631 vm_map_t lmap = VME_SUBMAP(entry);
b0d623f7 1632 lck_mtx_lock(&lmap->s_lock);
1c79356b 1633 vm_map_res_deallocate(lmap);
b0d623f7 1634 lck_mtx_unlock(&lmap->s_lock);
1c79356b 1635 } else {
3e170ce0 1636 vm_object_t object = VME_OBJECT(entry);
1c79356b
A
1637 vm_object_lock(object);
1638 /*
5ba3f43e
A
1639 * This call may take a long time,
1640 * since it could actively push
1641 * out pages (if we implement it
1c79356b
A
1642 * that way).
1643 */
1644 vm_object_res_deallocate(object);
1645 vm_object_unlock(object);
1646 }
1647 }
1648 entry = entry->vme_next;
1649 }
1650 assert(map->sw_state == MAP_SW_IN);
1651 map->sw_state = MAP_SW_OUT;
1652}
1653
0a7de745 1654#endif /* TASK_SWAPPER */
1c79356b 1655
1c79356b
A
1656/*
1657 * vm_map_lookup_entry: [ internal use only ]
1658 *
5ba3f43e
A
1659 * Calls into the vm map store layer to find the map
1660 * entry containing (or immediately preceding) the
6d2010ae 1661 * specified address in the given map; the entry is returned
1c79356b
A
1662 * in the "entry" parameter. The boolean
1663 * result indicates whether the address is
1664 * actually contained in the map.
1665 */
1666boolean_t
1667vm_map_lookup_entry(
0a7de745
A
1668 vm_map_t map,
1669 vm_map_offset_t address,
1670 vm_map_entry_t *entry) /* OUT */
1c79356b 1671{
0a7de745 1672 return vm_map_store_lookup_entry( map, address, entry );
1c79356b
A
1673}
1674
1675/*
1676 * Routine: vm_map_find_space
1677 * Purpose:
1678 * Allocate a range in the specified virtual address map,
1679 * returning the entry allocated for that range.
1680 * Used by kmem_alloc, etc.
1681 *
1682 * The map must be NOT be locked. It will be returned locked
1683 * on KERN_SUCCESS, unlocked on failure.
1684 *
1685 * If an entry is allocated, the object/offset fields
1686 * are initialized to zero.
1687 */
1688kern_return_t
1689vm_map_find_space(
0a7de745
A
1690 vm_map_t map,
1691 vm_map_offset_t *address, /* OUT */
1692 vm_map_size_t size,
1693 vm_map_offset_t mask,
1694 int flags __unused,
1695 vm_map_kernel_flags_t vmk_flags,
1696 vm_tag_t tag,
1697 vm_map_entry_t *o_entry) /* OUT */
1c79356b 1698{
0a7de745
A
1699 vm_map_entry_t entry, new_entry;
1700 vm_map_offset_t start;
1701 vm_map_offset_t end;
1702 vm_map_entry_t hole_entry;
91447636
A
1703
1704 if (size == 0) {
1705 *address = 0;
1706 return KERN_INVALID_ARGUMENT;
1707 }
1c79356b 1708
5ba3f43e 1709 if (vmk_flags.vmkf_guard_after) {
2d21ac55 1710 /* account for the back guard page in the size */
39236c6e 1711 size += VM_MAP_PAGE_SIZE(map);
2d21ac55
A
1712 }
1713
7ddcb079 1714 new_entry = vm_map_entry_create(map, FALSE);
1c79356b
A
1715
1716 /*
1717 * Look for the first possible address; if there's already
1718 * something at this address, we have to start after it.
1719 */
1720
1721 vm_map_lock(map);
1722
0a7de745 1723 if (map->disable_vmentry_reuse == TRUE) {
6d2010ae
A
1724 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1725 } else {
3e170ce0 1726 if (map->holelistenabled) {
d9a64523 1727 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
3e170ce0
A
1728
1729 if (hole_entry == NULL) {
1730 /*
1731 * No more space in the map?
1732 */
1733 vm_map_entry_dispose(map, new_entry);
1734 vm_map_unlock(map);
0a7de745 1735 return KERN_NO_SPACE;
3e170ce0
A
1736 }
1737
1738 entry = hole_entry;
1739 start = entry->vme_start;
1740 } else {
1741 assert(first_free_is_valid(map));
0a7de745 1742 if ((entry = map->first_free) == vm_map_to_entry(map)) {
3e170ce0 1743 start = map->min_offset;
0a7de745 1744 } else {
3e170ce0 1745 start = entry->vme_end;
0a7de745 1746 }
3e170ce0 1747 }
6d2010ae 1748 }
1c79356b
A
1749
1750 /*
1751 * In any case, the "entry" always precedes
1752 * the proposed new region throughout the loop:
1753 */
1754
1755 while (TRUE) {
0a7de745 1756 vm_map_entry_t next;
1c79356b
A
1757
1758 /*
1759 * Find the end of the proposed new region.
1760 * Be sure we didn't go beyond the end, or
1761 * wrap around the address.
1762 */
1763
5ba3f43e 1764 if (vmk_flags.vmkf_guard_before) {
2d21ac55 1765 /* reserve space for the front guard page */
39236c6e 1766 start += VM_MAP_PAGE_SIZE(map);
2d21ac55 1767 }
1c79356b 1768 end = ((start + mask) & ~mask);
5ba3f43e 1769
1c79356b
A
1770 if (end < start) {
1771 vm_map_entry_dispose(map, new_entry);
1772 vm_map_unlock(map);
0a7de745 1773 return KERN_NO_SPACE;
1c79356b
A
1774 }
1775 start = end;
d9a64523 1776 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1c79356b 1777 end += size;
d9a64523 1778 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1c79356b
A
1779
1780 if ((end > map->max_offset) || (end < start)) {
1781 vm_map_entry_dispose(map, new_entry);
1782 vm_map_unlock(map);
0a7de745 1783 return KERN_NO_SPACE;
1c79356b
A
1784 }
1785
1c79356b 1786 next = entry->vme_next;
1c79356b 1787
3e170ce0 1788 if (map->holelistenabled) {
0a7de745 1789 if (entry->vme_end >= end) {
3e170ce0 1790 break;
0a7de745 1791 }
3e170ce0
A
1792 } else {
1793 /*
1794 * If there are no more entries, we must win.
1795 *
1796 * OR
1797 *
1798 * If there is another entry, it must be
1799 * after the end of the potential new region.
1800 */
1c79356b 1801
0a7de745 1802 if (next == vm_map_to_entry(map)) {
3e170ce0 1803 break;
0a7de745 1804 }
3e170ce0 1805
0a7de745 1806 if (next->vme_start >= end) {
3e170ce0 1807 break;
0a7de745 1808 }
3e170ce0 1809 }
1c79356b
A
1810
1811 /*
1812 * Didn't fit -- move to the next entry.
1813 */
1814
1815 entry = next;
3e170ce0
A
1816
1817 if (map->holelistenabled) {
d9a64523 1818 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
3e170ce0
A
1819 /*
1820 * Wrapped around
1821 */
1822 vm_map_entry_dispose(map, new_entry);
1823 vm_map_unlock(map);
0a7de745 1824 return KERN_NO_SPACE;
3e170ce0
A
1825 }
1826 start = entry->vme_start;
1827 } else {
1828 start = entry->vme_end;
1829 }
1830 }
1831
1832 if (map->holelistenabled) {
1833 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1834 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1835 }
1c79356b
A
1836 }
1837
1838 /*
1839 * At this point,
1840 * "start" and "end" should define the endpoints of the
1841 * available new range, and
1842 * "entry" should refer to the region before the new
1843 * range, and
1844 *
1845 * the map should be locked.
1846 */
1847
5ba3f43e 1848 if (vmk_flags.vmkf_guard_before) {
2d21ac55 1849 /* go back for the front guard page */
39236c6e 1850 start -= VM_MAP_PAGE_SIZE(map);
2d21ac55 1851 }
1c79356b
A
1852 *address = start;
1853
e2d2fc5c 1854 assert(start < end);
1c79356b
A
1855 new_entry->vme_start = start;
1856 new_entry->vme_end = end;
1857 assert(page_aligned(new_entry->vme_start));
1858 assert(page_aligned(new_entry->vme_end));
39236c6e 1859 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
0a7de745 1860 VM_MAP_PAGE_MASK(map)));
39236c6e 1861 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
0a7de745 1862 VM_MAP_PAGE_MASK(map)));
1c79356b
A
1863
1864 new_entry->is_shared = FALSE;
1865 new_entry->is_sub_map = FALSE;
fe8ab488 1866 new_entry->use_pmap = TRUE;
3e170ce0
A
1867 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1868 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1c79356b
A
1869
1870 new_entry->needs_copy = FALSE;
1871
1872 new_entry->inheritance = VM_INHERIT_DEFAULT;
1873 new_entry->protection = VM_PROT_DEFAULT;
1874 new_entry->max_protection = VM_PROT_ALL;
1875 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1876 new_entry->wired_count = 0;
1877 new_entry->user_wired_count = 0;
1878
1879 new_entry->in_transition = FALSE;
1880 new_entry->needs_wakeup = FALSE;
2d21ac55 1881 new_entry->no_cache = FALSE;
b0d623f7 1882 new_entry->permanent = FALSE;
39236c6e
A
1883 new_entry->superpage_size = FALSE;
1884 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1885 new_entry->map_aligned = TRUE;
1886 } else {
1887 new_entry->map_aligned = FALSE;
1888 }
2d21ac55 1889
3e170ce0 1890 new_entry->used_for_jit = FALSE;
d9a64523 1891 new_entry->pmap_cs_associated = FALSE;
b0d623f7 1892 new_entry->zero_wired_pages = FALSE;
fe8ab488 1893 new_entry->iokit_acct = FALSE;
3e170ce0
A
1894 new_entry->vme_resilient_codesign = FALSE;
1895 new_entry->vme_resilient_media = FALSE;
0a7de745 1896 if (vmk_flags.vmkf_atomic_entry) {
39037602 1897 new_entry->vme_atomic = TRUE;
0a7de745 1898 } else {
39037602 1899 new_entry->vme_atomic = FALSE;
0a7de745 1900 }
1c79356b 1901
5ba3f43e 1902 VME_ALIAS_SET(new_entry, tag);
0c530ab8 1903
1c79356b
A
1904 /*
1905 * Insert the new entry into the list
1906 */
1907
d9a64523 1908 vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1c79356b
A
1909
1910 map->size += size;
1911
1912 /*
1913 * Update the lookup hint
1914 */
0c530ab8 1915 SAVE_HINT_MAP_WRITE(map, new_entry);
1c79356b
A
1916
1917 *o_entry = new_entry;
0a7de745 1918 return KERN_SUCCESS;
1c79356b
A
1919}
1920
1921int vm_map_pmap_enter_print = FALSE;
1922int vm_map_pmap_enter_enable = FALSE;
1923
1924/*
91447636 1925 * Routine: vm_map_pmap_enter [internal only]
1c79356b
A
1926 *
1927 * Description:
1928 * Force pages from the specified object to be entered into
1929 * the pmap at the specified address if they are present.
1930 * As soon as a page not found in the object the scan ends.
1931 *
1932 * Returns:
5ba3f43e 1933 * Nothing.
1c79356b
A
1934 *
1935 * In/out conditions:
1936 * The source map should not be locked on entry.
1937 */
fe8ab488 1938__unused static void
1c79356b 1939vm_map_pmap_enter(
0a7de745
A
1940 vm_map_t map,
1941 vm_map_offset_t addr,
1942 vm_map_offset_t end_addr,
1943 vm_object_t object,
1944 vm_object_offset_t offset,
1945 vm_prot_t protection)
1c79356b 1946{
0a7de745
A
1947 int type_of_fault;
1948 kern_return_t kr;
d9a64523 1949 struct vm_object_fault_info fault_info = {};
0b4e3aa0 1950
0a7de745 1951 if (map->pmap == 0) {
55e303ae 1952 return;
0a7de745 1953 }
55e303ae 1954
1c79356b 1955 while (addr < end_addr) {
0a7de745 1956 vm_page_t m;
1c79356b 1957
fe8ab488
A
1958
1959 /*
0a7de745 1960 * TODO:
fe8ab488
A
1961 * From vm_map_enter(), we come into this function without the map
1962 * lock held or the object lock held.
1963 * We haven't taken a reference on the object either.
1964 * We should do a proper lookup on the map to make sure
1965 * that things are sane before we go locking objects that
1966 * could have been deallocated from under us.
1967 */
1968
1c79356b 1969 vm_object_lock(object);
1c79356b
A
1970
1971 m = vm_page_lookup(object, offset);
5ba3f43e 1972
d9a64523 1973 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
0a7de745 1974 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
1c79356b
A
1975 vm_object_unlock(object);
1976 return;
1977 }
1978
1c79356b
A
1979 if (vm_map_pmap_enter_print) {
1980 printf("vm_map_pmap_enter:");
2d21ac55 1981 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
0a7de745 1982 map, (unsigned long long)addr, object, (unsigned long long)offset);
1c79356b 1983 }
2d21ac55 1984 type_of_fault = DBG_CACHE_HIT_FAULT;
d9a64523 1985 kr = vm_fault_enter(m, map->pmap,
0a7de745
A
1986 addr, protection, protection,
1987 VM_PAGE_WIRED(m),
1988 FALSE, /* change_wiring */
1989 VM_KERN_MEMORY_NONE, /* tag - not wiring */
1990 &fault_info,
1991 NULL, /* need_retry */
1992 &type_of_fault);
1c79356b 1993
1c79356b
A
1994 vm_object_unlock(object);
1995
1996 offset += PAGE_SIZE_64;
1997 addr += PAGE_SIZE;
1998 }
1999}
2000
91447636 2001boolean_t vm_map_pmap_is_empty(
0a7de745
A
2002 vm_map_t map,
2003 vm_map_offset_t start,
91447636 2004 vm_map_offset_t end);
0a7de745
A
2005boolean_t
2006vm_map_pmap_is_empty(
2007 vm_map_t map,
2008 vm_map_offset_t start,
2009 vm_map_offset_t end)
91447636 2010{
2d21ac55
A
2011#ifdef MACHINE_PMAP_IS_EMPTY
2012 return pmap_is_empty(map->pmap, start, end);
0a7de745
A
2013#else /* MACHINE_PMAP_IS_EMPTY */
2014 vm_map_offset_t offset;
2015 ppnum_t phys_page;
91447636
A
2016
2017 if (map->pmap == NULL) {
2018 return TRUE;
2019 }
2d21ac55 2020
91447636 2021 for (offset = start;
0a7de745
A
2022 offset < end;
2023 offset += PAGE_SIZE) {
91447636
A
2024 phys_page = pmap_find_phys(map->pmap, offset);
2025 if (phys_page) {
2026 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
0a7de745
A
2027 "page %d at 0x%llx\n",
2028 map, (long long)start, (long long)end,
2029 phys_page, (long long)offset);
91447636
A
2030 return FALSE;
2031 }
2032 }
2033 return TRUE;
0a7de745 2034#endif /* MACHINE_PMAP_IS_EMPTY */
91447636
A
2035}
2036
0a7de745 2037#define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
316670eb
A
2038kern_return_t
2039vm_map_random_address_for_size(
0a7de745
A
2040 vm_map_t map,
2041 vm_map_offset_t *address,
2042 vm_map_size_t size)
316670eb 2043{
0a7de745
A
2044 kern_return_t kr = KERN_SUCCESS;
2045 int tries = 0;
2046 vm_map_offset_t random_addr = 0;
316670eb
A
2047 vm_map_offset_t hole_end;
2048
0a7de745
A
2049 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2050 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2051 vm_map_size_t vm_hole_size = 0;
2052 vm_map_size_t addr_space_size;
316670eb
A
2053
2054 addr_space_size = vm_map_max(map) - vm_map_min(map);
2055
2056 assert(page_aligned(size));
2057
2058 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2059 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
39236c6e 2060 random_addr = vm_map_trunc_page(
0a7de745 2061 vm_map_min(map) + (random_addr % addr_space_size),
39236c6e 2062 VM_MAP_PAGE_MASK(map));
316670eb
A
2063
2064 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2065 if (prev_entry == vm_map_to_entry(map)) {
2066 next_entry = vm_map_first_entry(map);
2067 } else {
2068 next_entry = prev_entry->vme_next;
2069 }
2070 if (next_entry == vm_map_to_entry(map)) {
2071 hole_end = vm_map_max(map);
2072 } else {
2073 hole_end = next_entry->vme_start;
2074 }
2075 vm_hole_size = hole_end - random_addr;
2076 if (vm_hole_size >= size) {
2077 *address = random_addr;
2078 break;
2079 }
2080 }
2081 tries++;
2082 }
2083
2084 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2085 kr = KERN_NO_SPACE;
2086 }
2087 return kr;
2088}
2089
d9a64523
A
2090static boolean_t
2091vm_memory_malloc_no_cow(
2092 int alias)
2093{
2094 uint64_t alias_mask;
2095
2096 alias_mask = 1ULL << alias;
2097 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2098 return TRUE;
2099 }
2100 return FALSE;
2101}
2102
1c79356b
A
2103/*
2104 * Routine: vm_map_enter
2105 *
2106 * Description:
2107 * Allocate a range in the specified virtual address map.
2108 * The resulting range will refer to memory defined by
2109 * the given memory object and offset into that object.
2110 *
2111 * Arguments are as defined in the vm_map call.
2112 */
91447636
A
2113int _map_enter_debug = 0;
2114static unsigned int vm_map_enter_restore_successes = 0;
2115static unsigned int vm_map_enter_restore_failures = 0;
1c79356b
A
2116kern_return_t
2117vm_map_enter(
0a7de745
A
2118 vm_map_t map,
2119 vm_map_offset_t *address, /* IN/OUT */
2120 vm_map_size_t size,
2121 vm_map_offset_t mask,
2122 int flags,
2123 vm_map_kernel_flags_t vmk_flags,
2124 vm_tag_t alias,
2125 vm_object_t object,
2126 vm_object_offset_t offset,
2127 boolean_t needs_copy,
2128 vm_prot_t cur_protection,
2129 vm_prot_t max_protection,
2130 vm_inherit_t inheritance)
1c79356b 2131{
0a7de745
A
2132 vm_map_entry_t entry, new_entry;
2133 vm_map_offset_t start, tmp_start, tmp_offset;
2134 vm_map_offset_t end, tmp_end;
2135 vm_map_offset_t tmp2_start, tmp2_end;
2136 vm_map_offset_t desired_empty_end;
2137 vm_map_offset_t step;
2138 kern_return_t result = KERN_SUCCESS;
2139 vm_map_t zap_old_map = VM_MAP_NULL;
2140 vm_map_t zap_new_map = VM_MAP_NULL;
2141 boolean_t map_locked = FALSE;
2142 boolean_t pmap_empty = TRUE;
2143 boolean_t new_mapping_established = FALSE;
2144 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2145 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2146 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2147 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2148 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2149 boolean_t is_submap = vmk_flags.vmkf_submap;
2150 boolean_t permanent = vmk_flags.vmkf_permanent;
2151 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2152 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
2153 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2154 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2155 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2156 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2157 vm_tag_t user_alias;
2158 vm_map_offset_t effective_min_offset, effective_max_offset;
2159 kern_return_t kr;
2160 boolean_t clear_map_aligned = FALSE;
2161 vm_map_entry_t hole_entry;
2162 vm_map_size_t chunk_size = 0;
593a1d5f 2163
5ba3f43e
A
2164 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2165
a39ff7e2
A
2166 if (flags & VM_FLAGS_4GB_CHUNK) {
2167#if defined(__LP64__)
2168 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2169#else /* __LP64__ */
2170 chunk_size = ANON_CHUNK_SIZE;
2171#endif /* __LP64__ */
2172 } else {
2173 chunk_size = ANON_CHUNK_SIZE;
2174 }
2175
b0d623f7
A
2176 if (superpage_size) {
2177 switch (superpage_size) {
2178 /*
2179 * Note that the current implementation only supports
2180 * a single size for superpages, SUPERPAGE_SIZE, per
2181 * architecture. As soon as more sizes are supposed
2182 * to be supported, SUPERPAGE_SIZE has to be replaced
2183 * with a lookup of the size depending on superpage_size.
2184 */
2185#ifdef __x86_64__
0a7de745
A
2186 case SUPERPAGE_SIZE_ANY:
2187 /* handle it like 2 MB and round up to page size */
2188 size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2189 case SUPERPAGE_SIZE_2MB:
2190 break;
b0d623f7 2191#endif
0a7de745
A
2192 default:
2193 return KERN_INVALID_ARGUMENT;
b0d623f7 2194 }
0a7de745
A
2195 mask = SUPERPAGE_SIZE - 1;
2196 if (size & (SUPERPAGE_SIZE - 1)) {
b0d623f7 2197 return KERN_INVALID_ARGUMENT;
0a7de745
A
2198 }
2199 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
b0d623f7
A
2200 }
2201
6d2010ae 2202
d9a64523
A
2203 if ((cur_protection & VM_PROT_WRITE) &&
2204 (cur_protection & VM_PROT_EXECUTE) &&
2205#if !CONFIG_EMBEDDED
2206 map != kernel_map &&
2207 (cs_process_global_enforcement() ||
0a7de745
A
2208 (vmk_flags.vmkf_cs_enforcement_override
2209 ? vmk_flags.vmkf_cs_enforcement
2210 : cs_process_enforcement(NULL))) &&
d9a64523
A
2211#endif /* !CONFIG_EMBEDDED */
2212 !entry_for_jit) {
2213 DTRACE_VM3(cs_wx,
0a7de745
A
2214 uint64_t, 0,
2215 uint64_t, 0,
2216 vm_prot_t, cur_protection);
d9a64523
A
2217 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
2218#if VM_PROTECT_WX_FAIL
0a7de745 2219 "failing\n",
d9a64523 2220#else /* VM_PROTECT_WX_FAIL */
0a7de745 2221 "turning off execute\n",
d9a64523 2222#endif /* VM_PROTECT_WX_FAIL */
0a7de745
A
2223 proc_selfpid(),
2224 (current_task()->bsd_info
2225 ? proc_name_address(current_task()->bsd_info)
2226 : "?"),
2227 __FUNCTION__);
d9a64523
A
2228 cur_protection &= ~VM_PROT_EXECUTE;
2229#if VM_PROTECT_WX_FAIL
2230 return KERN_PROTECTION_FAILURE;
2231#endif /* VM_PROTECT_WX_FAIL */
5ba3f43e 2232 }
1c79356b 2233
a39ff7e2
A
2234 /*
2235 * If the task has requested executable lockdown,
2236 * deny any new executable mapping.
2237 */
2238 if (map->map_disallow_new_exec == TRUE) {
2239 if (cur_protection & VM_PROT_EXECUTE) {
2240 return KERN_PROTECTION_FAILURE;
2241 }
2242 }
2243
3e170ce0
A
2244 if (resilient_codesign || resilient_media) {
2245 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
2246 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2247 return KERN_PROTECTION_FAILURE;
2248 }
2249 }
2250
2d21ac55
A
2251 if (is_submap) {
2252 if (purgable) {
2253 /* submaps can not be purgeable */
2254 return KERN_INVALID_ARGUMENT;
2255 }
2256 if (object == VM_OBJECT_NULL) {
2257 /* submaps can not be created lazily */
2258 return KERN_INVALID_ARGUMENT;
2259 }
2260 }
5ba3f43e 2261 if (vmk_flags.vmkf_already) {
2d21ac55
A
2262 /*
2263 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2264 * is already present. For it to be meaningul, the requested
2265 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2266 * we shouldn't try and remove what was mapped there first
2267 * (!VM_FLAGS_OVERWRITE).
2268 */
2269 if ((flags & VM_FLAGS_ANYWHERE) ||
2270 (flags & VM_FLAGS_OVERWRITE)) {
2271 return KERN_INVALID_ARGUMENT;
2272 }
2273 }
2274
6d2010ae 2275 effective_min_offset = map->min_offset;
b0d623f7 2276
5ba3f43e 2277 if (vmk_flags.vmkf_beyond_max) {
2d21ac55 2278 /*
b0d623f7 2279 * Allow an insertion beyond the map's max offset.
2d21ac55 2280 */
0a7de745
A
2281#if !defined(__arm__) && !defined(__arm64__)
2282 if (vm_map_is_64bit(map)) {
2d21ac55 2283 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
0a7de745
A
2284 } else
2285#endif /* __arm__ */
2286 effective_max_offset = 0x00000000FFFFF000ULL;
2d21ac55
A
2287 } else {
2288 effective_max_offset = map->max_offset;
2289 }
2290
2291 if (size == 0 ||
2292 (offset & PAGE_MASK_64) != 0) {
91447636
A
2293 *address = 0;
2294 return KERN_INVALID_ARGUMENT;
2295 }
2296
3e170ce0
A
2297 if (map->pmap == kernel_pmap) {
2298 user_alias = VM_KERN_MEMORY_NONE;
2299 } else {
2300 user_alias = alias;
2301 }
2d21ac55 2302
0a7de745
A
2303 if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2304 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2305 }
2306
2307#define RETURN(value) { result = value; goto BailOut; }
1c79356b
A
2308
2309 assert(page_aligned(*address));
2310 assert(page_aligned(size));
91447636 2311
39236c6e
A
2312 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2313 /*
2314 * In most cases, the caller rounds the size up to the
2315 * map's page size.
2316 * If we get a size that is explicitly not map-aligned here,
2317 * we'll have to respect the caller's wish and mark the
2318 * mapping as "not map-aligned" to avoid tripping the
2319 * map alignment checks later.
2320 */
2321 clear_map_aligned = TRUE;
2322 }
5ba3f43e 2323 if (!anywhere &&
fe8ab488
A
2324 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2325 /*
2326 * We've been asked to map at a fixed address and that
2327 * address is not aligned to the map's specific alignment.
2328 * The caller should know what it's doing (i.e. most likely
2329 * mapping some fragmented copy map, transferring memory from
2330 * a VM map with a different alignment), so clear map_aligned
2331 * for this new VM map entry and proceed.
2332 */
2333 clear_map_aligned = TRUE;
2334 }
39236c6e 2335
91447636
A
2336 /*
2337 * Only zero-fill objects are allowed to be purgable.
2338 * LP64todo - limit purgable objects to 32-bits for now
2339 */
2340 if (purgable &&
2341 (offset != 0 ||
0a7de745
A
2342 (object != VM_OBJECT_NULL &&
2343 (object->vo_size != size ||
2344 object->purgable == VM_PURGABLE_DENY))
2345 || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
91447636 2346 return KERN_INVALID_ARGUMENT;
0a7de745 2347 }
91447636
A
2348
2349 if (!anywhere && overwrite) {
2350 /*
2351 * Create a temporary VM map to hold the old mappings in the
2352 * affected area while we create the new one.
2353 * This avoids releasing the VM map lock in
2354 * vm_map_entry_delete() and allows atomicity
2355 * when we want to replace some mappings with a new one.
2356 * It also allows us to restore the old VM mappings if the
2357 * new mapping fails.
2358 */
2359 zap_old_map = vm_map_create(PMAP_NULL,
0a7de745
A
2360 *address,
2361 *address + size,
2362 map->hdr.entries_pageable);
39236c6e 2363 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 2364 vm_map_disable_hole_optimization(zap_old_map);
91447636
A
2365 }
2366
0a7de745 2367StartAgain:;
1c79356b
A
2368
2369 start = *address;
2370
2371 if (anywhere) {
2372 vm_map_lock(map);
91447636 2373 map_locked = TRUE;
5ba3f43e 2374
316670eb 2375 if (entry_for_jit) {
d9a64523 2376#if CONFIG_EMBEDDED
316670eb
A
2377 if (map->jit_entry_exists) {
2378 result = KERN_INVALID_ARGUMENT;
2379 goto BailOut;
2380 }
39037602 2381 random_address = TRUE;
d9a64523 2382#endif /* CONFIG_EMBEDDED */
39037602
A
2383 }
2384
2385 if (random_address) {
316670eb
A
2386 /*
2387 * Get a random start address.
2388 */
2389 result = vm_map_random_address_for_size(map, address, size);
2390 if (result != KERN_SUCCESS) {
2391 goto BailOut;
2392 }
2393 start = *address;
6d2010ae 2394 }
5ba3f43e
A
2395#if __x86_64__
2396 else if ((start == 0 || start == vm_map_min(map)) &&
0a7de745
A
2397 !map->disable_vmentry_reuse &&
2398 map->vmmap_high_start != 0) {
5ba3f43e
A
2399 start = map->vmmap_high_start;
2400 }
2401#endif /* __x86_64__ */
1c79356b 2402
316670eb 2403
1c79356b
A
2404 /*
2405 * Calculate the first possible address.
2406 */
2407
0a7de745 2408 if (start < effective_min_offset) {
2d21ac55 2409 start = effective_min_offset;
0a7de745
A
2410 }
2411 if (start > effective_max_offset) {
1c79356b 2412 RETURN(KERN_NO_SPACE);
0a7de745 2413 }
1c79356b
A
2414
2415 /*
2416 * Look for the first possible address;
2417 * if there's already something at this
2418 * address, we have to start after it.
2419 */
2420
0a7de745 2421 if (map->disable_vmentry_reuse == TRUE) {
6d2010ae 2422 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1c79356b 2423 } else {
3e170ce0 2424 if (map->holelistenabled) {
d9a64523 2425 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
3e170ce0
A
2426
2427 if (hole_entry == NULL) {
2428 /*
2429 * No more space in the map?
2430 */
2431 result = KERN_NO_SPACE;
2432 goto BailOut;
2433 } else {
3e170ce0
A
2434 boolean_t found_hole = FALSE;
2435
2436 do {
2437 if (hole_entry->vme_start >= start) {
2438 start = hole_entry->vme_start;
2439 found_hole = TRUE;
2440 break;
2441 }
2442
2443 if (hole_entry->vme_end > start) {
2444 found_hole = TRUE;
2445 break;
2446 }
2447 hole_entry = hole_entry->vme_next;
d9a64523 2448 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
3e170ce0
A
2449
2450 if (found_hole == FALSE) {
2451 result = KERN_NO_SPACE;
2452 goto BailOut;
2453 }
2454
2455 entry = hole_entry;
6d2010ae 2456
0a7de745 2457 if (start == 0) {
3e170ce0 2458 start += PAGE_SIZE_64;
0a7de745 2459 }
3e170ce0 2460 }
6d2010ae 2461 } else {
3e170ce0
A
2462 assert(first_free_is_valid(map));
2463
2464 entry = map->first_free;
2465
2466 if (entry == vm_map_to_entry(map)) {
6d2010ae 2467 entry = NULL;
3e170ce0 2468 } else {
0a7de745
A
2469 if (entry->vme_next == vm_map_to_entry(map)) {
2470 /*
2471 * Hole at the end of the map.
2472 */
3e170ce0 2473 entry = NULL;
0a7de745
A
2474 } else {
2475 if (start < (entry->vme_next)->vme_start) {
3e170ce0
A
2476 start = entry->vme_end;
2477 start = vm_map_round_page(start,
0a7de745 2478 VM_MAP_PAGE_MASK(map));
3e170ce0
A
2479 } else {
2480 /*
2481 * Need to do a lookup.
2482 */
2483 entry = NULL;
2484 }
0a7de745 2485 }
3e170ce0
A
2486 }
2487
2488 if (entry == NULL) {
0a7de745 2489 vm_map_entry_t tmp_entry;
3e170ce0
A
2490 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2491 assert(!entry_for_jit);
2492 start = tmp_entry->vme_end;
39236c6e 2493 start = vm_map_round_page(start,
0a7de745 2494 VM_MAP_PAGE_MASK(map));
6d2010ae 2495 }
3e170ce0 2496 entry = tmp_entry;
316670eb 2497 }
6d2010ae 2498 }
1c79356b
A
2499 }
2500
2501 /*
2502 * In any case, the "entry" always precedes
2503 * the proposed new region throughout the
2504 * loop:
2505 */
2506
2507 while (TRUE) {
0a7de745 2508 vm_map_entry_t next;
1c79356b 2509
2d21ac55 2510 /*
1c79356b
A
2511 * Find the end of the proposed new region.
2512 * Be sure we didn't go beyond the end, or
2513 * wrap around the address.
2514 */
2515
2516 end = ((start + mask) & ~mask);
39236c6e 2517 end = vm_map_round_page(end,
0a7de745
A
2518 VM_MAP_PAGE_MASK(map));
2519 if (end < start) {
1c79356b 2520 RETURN(KERN_NO_SPACE);
0a7de745 2521 }
1c79356b 2522 start = end;
39236c6e 2523 assert(VM_MAP_PAGE_ALIGNED(start,
0a7de745 2524 VM_MAP_PAGE_MASK(map)));
1c79356b
A
2525 end += size;
2526
d9a64523
A
2527 /* We want an entire page of empty space, but don't increase the allocation size. */
2528 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2529
2530 if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
1c79356b 2531 if (map->wait_for_space) {
fe8ab488 2532 assert(!keep_map_locked);
2d21ac55 2533 if (size <= (effective_max_offset -
0a7de745 2534 effective_min_offset)) {
1c79356b 2535 assert_wait((event_t)map,
0a7de745 2536 THREAD_ABORTSAFE);
1c79356b 2537 vm_map_unlock(map);
91447636
A
2538 map_locked = FALSE;
2539 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
2540 goto StartAgain;
2541 }
2542 }
2543 RETURN(KERN_NO_SPACE);
2544 }
2545
1c79356b 2546 next = entry->vme_next;
1c79356b 2547
3e170ce0 2548 if (map->holelistenabled) {
0a7de745 2549 if (entry->vme_end >= desired_empty_end) {
3e170ce0 2550 break;
0a7de745 2551 }
3e170ce0
A
2552 } else {
2553 /*
2554 * If there are no more entries, we must win.
2555 *
2556 * OR
2557 *
2558 * If there is another entry, it must be
2559 * after the end of the potential new region.
2560 */
1c79356b 2561
0a7de745 2562 if (next == vm_map_to_entry(map)) {
3e170ce0 2563 break;
0a7de745 2564 }
3e170ce0 2565
0a7de745 2566 if (next->vme_start >= desired_empty_end) {
3e170ce0 2567 break;
0a7de745 2568 }
3e170ce0 2569 }
1c79356b
A
2570
2571 /*
2572 * Didn't fit -- move to the next entry.
2573 */
2574
2575 entry = next;
3e170ce0
A
2576
2577 if (map->holelistenabled) {
d9a64523 2578 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
3e170ce0
A
2579 /*
2580 * Wrapped around
2581 */
2582 result = KERN_NO_SPACE;
2583 goto BailOut;
2584 }
2585 start = entry->vme_start;
2586 } else {
2587 start = entry->vme_end;
2588 }
2589
39236c6e 2590 start = vm_map_round_page(start,
0a7de745 2591 VM_MAP_PAGE_MASK(map));
1c79356b 2592 }
3e170ce0
A
2593
2594 if (map->holelistenabled) {
2595 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2596 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2597 }
2598 }
2599
1c79356b 2600 *address = start;
39236c6e 2601 assert(VM_MAP_PAGE_ALIGNED(*address,
0a7de745 2602 VM_MAP_PAGE_MASK(map)));
1c79356b 2603 } else {
1c79356b
A
2604 /*
2605 * Verify that:
2606 * the address doesn't itself violate
2607 * the mask requirement.
2608 */
2609
2610 vm_map_lock(map);
91447636 2611 map_locked = TRUE;
0a7de745 2612 if ((start & mask) != 0) {
1c79356b 2613 RETURN(KERN_NO_SPACE);
0a7de745 2614 }
1c79356b
A
2615
2616 /*
2617 * ... the address is within bounds
2618 */
2619
2620 end = start + size;
2621
2d21ac55
A
2622 if ((start < effective_min_offset) ||
2623 (end > effective_max_offset) ||
1c79356b
A
2624 (start >= end)) {
2625 RETURN(KERN_INVALID_ADDRESS);
2626 }
2627
91447636 2628 if (overwrite && zap_old_map != VM_MAP_NULL) {
5ba3f43e 2629 int remove_flags;
91447636
A
2630 /*
2631 * Fixed mapping and "overwrite" flag: attempt to
2632 * remove all existing mappings in the specified
2633 * address range, saving them in our "zap_old_map".
2634 */
5ba3f43e
A
2635 remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2636 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2637 if (vmk_flags.vmkf_overwrite_immutable) {
2638 /* we can overwrite immutable mappings */
2639 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2640 }
91447636 2641 (void) vm_map_delete(map, start, end,
0a7de745
A
2642 remove_flags,
2643 zap_old_map);
91447636
A
2644 }
2645
1c79356b
A
2646 /*
2647 * ... the starting address isn't allocated
2648 */
2649
2d21ac55 2650 if (vm_map_lookup_entry(map, start, &entry)) {
0a7de745 2651 if (!(vmk_flags.vmkf_already)) {
2d21ac55
A
2652 RETURN(KERN_NO_SPACE);
2653 }
2654 /*
2655 * Check if what's already there is what we want.
2656 */
2657 tmp_start = start;
2658 tmp_offset = offset;
2659 if (entry->vme_start < start) {
2660 tmp_start -= start - entry->vme_start;
2661 tmp_offset -= start - entry->vme_start;
2d21ac55
A
2662 }
2663 for (; entry->vme_start < end;
0a7de745 2664 entry = entry->vme_next) {
4a3eedf9
A
2665 /*
2666 * Check if the mapping's attributes
2667 * match the existing map entry.
2668 */
2d21ac55
A
2669 if (entry == vm_map_to_entry(map) ||
2670 entry->vme_start != tmp_start ||
2671 entry->is_sub_map != is_submap ||
3e170ce0 2672 VME_OFFSET(entry) != tmp_offset ||
2d21ac55
A
2673 entry->needs_copy != needs_copy ||
2674 entry->protection != cur_protection ||
2675 entry->max_protection != max_protection ||
2676 entry->inheritance != inheritance ||
fe8ab488 2677 entry->iokit_acct != iokit_acct ||
3e170ce0 2678 VME_ALIAS(entry) != alias) {
2d21ac55
A
2679 /* not the same mapping ! */
2680 RETURN(KERN_NO_SPACE);
2681 }
4a3eedf9
A
2682 /*
2683 * Check if the same object is being mapped.
2684 */
2685 if (is_submap) {
3e170ce0 2686 if (VME_SUBMAP(entry) !=
4a3eedf9
A
2687 (vm_map_t) object) {
2688 /* not the same submap */
2689 RETURN(KERN_NO_SPACE);
2690 }
2691 } else {
3e170ce0 2692 if (VME_OBJECT(entry) != object) {
4a3eedf9
A
2693 /* not the same VM object... */
2694 vm_object_t obj2;
2695
3e170ce0 2696 obj2 = VME_OBJECT(entry);
4a3eedf9 2697 if ((obj2 == VM_OBJECT_NULL ||
0a7de745 2698 obj2->internal) &&
4a3eedf9 2699 (object == VM_OBJECT_NULL ||
0a7de745 2700 object->internal)) {
4a3eedf9
A
2701 /*
2702 * ... but both are
2703 * anonymous memory,
2704 * so equivalent.
2705 */
2706 } else {
2707 RETURN(KERN_NO_SPACE);
2708 }
2709 }
2710 }
2711
2d21ac55
A
2712 tmp_offset += entry->vme_end - entry->vme_start;
2713 tmp_start += entry->vme_end - entry->vme_start;
2714 if (entry->vme_end >= end) {
2715 /* reached the end of our mapping */
2716 break;
2717 }
2718 }
2719 /* it all matches: let's use what's already there ! */
2720 RETURN(KERN_MEMORY_PRESENT);
2721 }
1c79356b
A
2722
2723 /*
2724 * ... the next region doesn't overlap the
2725 * end point.
2726 */
2727
2728 if ((entry->vme_next != vm_map_to_entry(map)) &&
0a7de745 2729 (entry->vme_next->vme_start < end)) {
1c79356b 2730 RETURN(KERN_NO_SPACE);
0a7de745 2731 }
1c79356b
A
2732 }
2733
2734 /*
2735 * At this point,
2736 * "start" and "end" should define the endpoints of the
2737 * available new range, and
2738 * "entry" should refer to the region before the new
2739 * range, and
2740 *
2741 * the map should be locked.
2742 */
2743
2744 /*
2745 * See whether we can avoid creating a new entry (and object) by
2746 * extending one of our neighbors. [So far, we only attempt to
91447636
A
2747 * extend from below.] Note that we can never extend/join
2748 * purgable objects because they need to remain distinct
2749 * entities in order to implement their "volatile object"
2750 * semantics.
1c79356b
A
2751 */
2752
d9a64523
A
2753 if (purgable ||
2754 entry_for_jit ||
2755 vm_memory_malloc_no_cow(user_alias)) {
91447636
A
2756 if (object == VM_OBJECT_NULL) {
2757 object = vm_object_allocate(size);
2758 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
d9a64523 2759 object->true_share = FALSE;
316670eb 2760 if (purgable) {
fe8ab488 2761 task_t owner;
316670eb 2762 object->purgable = VM_PURGABLE_NONVOLATILE;
fe8ab488
A
2763 if (map->pmap == kernel_pmap) {
2764 /*
2765 * Purgeable mappings made in a kernel
2766 * map are "owned" by the kernel itself
2767 * rather than the current user task
2768 * because they're likely to be used by
2769 * more than this user task (see
2770 * execargs_purgeable_allocate(), for
2771 * example).
2772 */
2773 owner = kernel_task;
2774 } else {
2775 owner = current_task();
2776 }
d9a64523 2777 assert(object->vo_owner == NULL);
fe8ab488
A
2778 assert(object->resident_page_count == 0);
2779 assert(object->wired_page_count == 0);
2780 vm_object_lock(object);
2781 vm_purgeable_nonvolatile_enqueue(object, owner);
2782 vm_object_unlock(object);
316670eb 2783 }
91447636
A
2784 offset = (vm_object_offset_t)0;
2785 }
2d21ac55 2786 } else if ((is_submap == FALSE) &&
0a7de745
A
2787 (object == VM_OBJECT_NULL) &&
2788 (entry != vm_map_to_entry(map)) &&
2789 (entry->vme_end == start) &&
2790 (!entry->is_shared) &&
2791 (!entry->is_sub_map) &&
2792 (!entry->in_transition) &&
2793 (!entry->needs_wakeup) &&
2794 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2795 (entry->protection == cur_protection) &&
2796 (entry->max_protection == max_protection) &&
2797 (entry->inheritance == inheritance) &&
2798 ((user_alias == VM_MEMORY_REALLOC) ||
2799 (VME_ALIAS(entry) == alias)) &&
2800 (entry->no_cache == no_cache) &&
2801 (entry->permanent == permanent) &&
2802 /* no coalescing for immutable executable mappings */
2803 !((entry->protection & VM_PROT_EXECUTE) &&
2804 entry->permanent) &&
2805 (!entry->superpage_size && !superpage_size) &&
2806 /*
2807 * No coalescing if not map-aligned, to avoid propagating
2808 * that condition any further than needed:
2809 */
2810 (!entry->map_aligned || !clear_map_aligned) &&
2811 (!entry->zero_wired_pages) &&
2812 (!entry->used_for_jit && !entry_for_jit) &&
2813 (!entry->pmap_cs_associated) &&
2814 (entry->iokit_acct == iokit_acct) &&
2815 (!entry->vme_resilient_codesign) &&
2816 (!entry->vme_resilient_media) &&
2817 (!entry->vme_atomic) &&
2818
2819 ((entry->vme_end - entry->vme_start) + size <=
2820 (user_alias == VM_MEMORY_REALLOC ?
2821 ANON_CHUNK_SIZE :
2822 NO_COALESCE_LIMIT)) &&
2823
2824 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
3e170ce0 2825 if (vm_object_coalesce(VME_OBJECT(entry),
0a7de745
A
2826 VM_OBJECT_NULL,
2827 VME_OFFSET(entry),
2828 (vm_object_offset_t) 0,
2829 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2830 (vm_map_size_t)(end - entry->vme_end))) {
1c79356b
A
2831 /*
2832 * Coalesced the two objects - can extend
2833 * the previous map entry to include the
2834 * new range.
2835 */
2836 map->size += (end - entry->vme_end);
e2d2fc5c 2837 assert(entry->vme_start < end);
39236c6e 2838 assert(VM_MAP_PAGE_ALIGNED(end,
0a7de745
A
2839 VM_MAP_PAGE_MASK(map)));
2840 if (__improbable(vm_debug_events)) {
3e170ce0 2841 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
0a7de745 2842 }
1c79356b 2843 entry->vme_end = end;
3e170ce0
A
2844 if (map->holelistenabled) {
2845 vm_map_store_update_first_free(map, entry, TRUE);
2846 } else {
2847 vm_map_store_update_first_free(map, map->first_free, TRUE);
2848 }
fe8ab488 2849 new_mapping_established = TRUE;
1c79356b
A
2850 RETURN(KERN_SUCCESS);
2851 }
2852 }
2853
b0d623f7
A
2854 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2855 new_entry = NULL;
2856
0a7de745 2857 for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
b0d623f7
A
2858 tmp2_end = tmp2_start + step;
2859 /*
2860 * Create a new entry
b0d623f7
A
2861 *
2862 * XXX FBDP
2863 * The reserved "page zero" in each process's address space can
a39ff7e2 2864 * be arbitrarily large. Splitting it into separate objects and
b0d623f7
A
2865 * therefore different VM map entries serves no purpose and just
2866 * slows down operations on the VM map, so let's not split the
a39ff7e2 2867 * allocation into chunks if the max protection is NONE. That
b0d623f7
A
2868 * memory should never be accessible, so it will never get to the
2869 * default pager.
2870 */
2871 tmp_start = tmp2_start;
2872 if (object == VM_OBJECT_NULL &&
a39ff7e2 2873 size > chunk_size &&
b0d623f7 2874 max_protection != VM_PROT_NONE &&
0a7de745 2875 superpage_size == 0) {
a39ff7e2 2876 tmp_end = tmp_start + chunk_size;
0a7de745 2877 } else {
b0d623f7 2878 tmp_end = tmp2_end;
0a7de745 2879 }
b0d623f7 2880 do {
5ba3f43e
A
2881 new_entry = vm_map_entry_insert(
2882 map, entry, tmp_start, tmp_end,
0a7de745 2883 object, offset, needs_copy,
5ba3f43e
A
2884 FALSE, FALSE,
2885 cur_protection, max_protection,
2886 VM_BEHAVIOR_DEFAULT,
2887 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2888 0,
2889 no_cache,
2890 permanent,
2891 superpage_size,
2892 clear_map_aligned,
2893 is_submap,
2894 entry_for_jit,
2895 alias);
3e170ce0
A
2896
2897 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
6d2010ae 2898
3e170ce0 2899 if (resilient_codesign &&
0a7de745
A
2900 !((cur_protection | max_protection) &
2901 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
3e170ce0
A
2902 new_entry->vme_resilient_codesign = TRUE;
2903 }
2904
2905 if (resilient_media &&
0a7de745
A
2906 !((cur_protection | max_protection) &
2907 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
3e170ce0
A
2908 new_entry->vme_resilient_media = TRUE;
2909 }
2910
fe8ab488
A
2911 assert(!new_entry->iokit_acct);
2912 if (!is_submap &&
2913 object != VM_OBJECT_NULL &&
d9a64523 2914 (object->purgable != VM_PURGABLE_DENY ||
0a7de745 2915 object->vo_ledger_tag)) {
fe8ab488
A
2916 assert(new_entry->use_pmap);
2917 assert(!new_entry->iokit_acct);
2918 /*
2919 * Turn off pmap accounting since
d9a64523 2920 * purgeable (or tagged) objects have their
fe8ab488
A
2921 * own ledgers.
2922 */
2923 new_entry->use_pmap = FALSE;
2924 } else if (!is_submap &&
0a7de745
A
2925 iokit_acct &&
2926 object != VM_OBJECT_NULL &&
2927 object->internal) {
fe8ab488
A
2928 /* alternate accounting */
2929 assert(!new_entry->iokit_acct);
2930 assert(new_entry->use_pmap);
2931 new_entry->iokit_acct = TRUE;
2932 new_entry->use_pmap = FALSE;
ecc0ceb4
A
2933 DTRACE_VM4(
2934 vm_map_iokit_mapped_region,
2935 vm_map_t, map,
2936 vm_map_offset_t, new_entry->vme_start,
2937 vm_map_offset_t, new_entry->vme_end,
2938 int, VME_ALIAS(new_entry));
fe8ab488
A
2939 vm_map_iokit_mapped_region(
2940 map,
2941 (new_entry->vme_end -
0a7de745 2942 new_entry->vme_start));
fe8ab488
A
2943 } else if (!is_submap) {
2944 assert(!new_entry->iokit_acct);
2945 assert(new_entry->use_pmap);
2946 }
2947
b0d623f7 2948 if (is_submap) {
0a7de745
A
2949 vm_map_t submap;
2950 boolean_t submap_is_64bit;
2951 boolean_t use_pmap;
b0d623f7 2952
fe8ab488
A
2953 assert(new_entry->is_sub_map);
2954 assert(!new_entry->use_pmap);
2955 assert(!new_entry->iokit_acct);
b0d623f7
A
2956 submap = (vm_map_t) object;
2957 submap_is_64bit = vm_map_is_64bit(submap);
3e170ce0 2958 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
fe8ab488 2959#ifndef NO_NESTED_PMAP
b0d623f7 2960 if (use_pmap && submap->pmap == NULL) {
316670eb 2961 ledger_t ledger = map->pmap->ledger;
b0d623f7 2962 /* we need a sub pmap to nest... */
316670eb
A
2963 submap->pmap = pmap_create(ledger, 0,
2964 submap_is_64bit);
b0d623f7
A
2965 if (submap->pmap == NULL) {
2966 /* let's proceed without nesting... */
2967 }
0a7de745 2968#if defined(__arm__) || defined(__arm64__)
5ba3f43e
A
2969 else {
2970 pmap_set_nested(submap->pmap);
2971 }
2972#endif
2d21ac55 2973 }
b0d623f7
A
2974 if (use_pmap && submap->pmap != NULL) {
2975 kr = pmap_nest(map->pmap,
0a7de745
A
2976 submap->pmap,
2977 tmp_start,
2978 tmp_start,
2979 tmp_end - tmp_start);
b0d623f7
A
2980 if (kr != KERN_SUCCESS) {
2981 printf("vm_map_enter: "
0a7de745
A
2982 "pmap_nest(0x%llx,0x%llx) "
2983 "error 0x%x\n",
2984 (long long)tmp_start,
2985 (long long)tmp_end,
2986 kr);
b0d623f7
A
2987 } else {
2988 /* we're now nested ! */
2989 new_entry->use_pmap = TRUE;
2990 pmap_empty = FALSE;
2991 }
2992 }
fe8ab488 2993#endif /* NO_NESTED_PMAP */
2d21ac55 2994 }
b0d623f7
A
2995 entry = new_entry;
2996
2997 if (superpage_size) {
2998 vm_page_t pages, m;
2999 vm_object_t sp_object;
5ba3f43e 3000 vm_object_offset_t sp_offset;
b0d623f7 3001
3e170ce0 3002 VME_OFFSET_SET(entry, 0);
b0d623f7
A
3003
3004 /* allocate one superpage */
0a7de745 3005 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
2d21ac55 3006 if (kr != KERN_SUCCESS) {
3e170ce0
A
3007 /* deallocate whole range... */
3008 new_mapping_established = TRUE;
3009 /* ... but only up to "tmp_end" */
3010 size -= end - tmp_end;
b0d623f7
A
3011 RETURN(kr);
3012 }
3013
3014 /* create one vm_object per superpage */
3015 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3016 sp_object->phys_contiguous = TRUE;
0a7de745 3017 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3e170ce0 3018 VME_OBJECT_SET(entry, sp_object);
fe8ab488 3019 assert(entry->use_pmap);
b0d623f7
A
3020
3021 /* enter the base pages into the object */
3022 vm_object_lock(sp_object);
5ba3f43e 3023 for (sp_offset = 0;
0a7de745
A
3024 sp_offset < SUPERPAGE_SIZE;
3025 sp_offset += PAGE_SIZE) {
b0d623f7 3026 m = pages;
39037602 3027 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7
A
3028 pages = NEXT_PAGE(m);
3029 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5ba3f43e 3030 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
2d21ac55 3031 }
b0d623f7 3032 vm_object_unlock(sp_object);
2d21ac55 3033 }
5ba3f43e 3034 } while (tmp_end != tmp2_end &&
0a7de745
A
3035 (tmp_start = tmp_end) &&
3036 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3037 tmp_end + chunk_size : tmp2_end));
b0d623f7 3038 }
91447636 3039
91447636 3040 new_mapping_established = TRUE;
1c79356b 3041
fe8ab488
A
3042BailOut:
3043 assert(map_locked == TRUE);
2d21ac55 3044
593a1d5f
A
3045 if (result == KERN_SUCCESS) {
3046 vm_prot_t pager_prot;
3047 memory_object_t pager;
91447636 3048
fe8ab488 3049#if DEBUG
593a1d5f 3050 if (pmap_empty &&
5ba3f43e 3051 !(vmk_flags.vmkf_no_pmap_check)) {
593a1d5f 3052 assert(vm_map_pmap_is_empty(map,
0a7de745
A
3053 *address,
3054 *address + size));
593a1d5f 3055 }
fe8ab488 3056#endif /* DEBUG */
593a1d5f
A
3057
3058 /*
3059 * For "named" VM objects, let the pager know that the
3060 * memory object is being mapped. Some pagers need to keep
3061 * track of this, to know when they can reclaim the memory
3062 * object, for example.
3063 * VM calls memory_object_map() for each mapping (specifying
3064 * the protection of each mapping) and calls
3065 * memory_object_last_unmap() when all the mappings are gone.
3066 */
3067 pager_prot = max_protection;
3068 if (needs_copy) {
3069 /*
3070 * Copy-On-Write mapping: won't modify
3071 * the memory object.
3072 */
3073 pager_prot &= ~VM_PROT_WRITE;
3074 }
3075 if (!is_submap &&
3076 object != VM_OBJECT_NULL &&
3077 object->named &&
3078 object->pager != MEMORY_OBJECT_NULL) {
3079 vm_object_lock(object);
3080 pager = object->pager;
3081 if (object->named &&
3082 pager != MEMORY_OBJECT_NULL) {
3083 assert(object->pager_ready);
3084 vm_object_mapping_wait(object, THREAD_UNINT);
3085 vm_object_mapping_begin(object);
3086 vm_object_unlock(object);
3087
3088 kr = memory_object_map(pager, pager_prot);
3089 assert(kr == KERN_SUCCESS);
3090
3091 vm_object_lock(object);
3092 vm_object_mapping_end(object);
3093 }
3094 vm_object_unlock(object);
3095 }
fe8ab488
A
3096 }
3097
3098 assert(map_locked == TRUE);
3099
3100 if (!keep_map_locked) {
3101 vm_map_unlock(map);
3102 map_locked = FALSE;
3103 }
3104
3105 /*
3106 * We can't hold the map lock if we enter this block.
3107 */
3108
3109 if (result == KERN_SUCCESS) {
fe8ab488
A
3110 /* Wire down the new entry if the user
3111 * requested all new map entries be wired.
3112 */
0a7de745 3113 if ((map->wiring_required) || (superpage_size)) {
fe8ab488
A
3114 assert(!keep_map_locked);
3115 pmap_empty = FALSE; /* pmap won't be empty */
5ba3f43e 3116 kr = vm_map_wire_kernel(map, start, end,
0a7de745
A
3117 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3118 TRUE);
fe8ab488
A
3119 result = kr;
3120 }
3121
3122 }
3123
3124 if (result != KERN_SUCCESS) {
91447636
A
3125 if (new_mapping_established) {
3126 /*
3127 * We have to get rid of the new mappings since we
3128 * won't make them available to the user.
3129 * Try and do that atomically, to minimize the risk
3130 * that someone else create new mappings that range.
3131 */
3132 zap_new_map = vm_map_create(PMAP_NULL,
0a7de745
A
3133 *address,
3134 *address + size,
3135 map->hdr.entries_pageable);
39236c6e 3136 vm_map_set_page_shift(zap_new_map,
0a7de745 3137 VM_MAP_PAGE_SHIFT(map));
3e170ce0
A
3138 vm_map_disable_hole_optimization(zap_new_map);
3139
91447636
A
3140 if (!map_locked) {
3141 vm_map_lock(map);
3142 map_locked = TRUE;
3143 }
0a7de745
A
3144 (void) vm_map_delete(map, *address, *address + size,
3145 (VM_MAP_REMOVE_SAVE_ENTRIES |
3146 VM_MAP_REMOVE_NO_MAP_ALIGN),
3147 zap_new_map);
91447636
A
3148 }
3149 if (zap_old_map != VM_MAP_NULL &&
3150 zap_old_map->hdr.nentries != 0) {
0a7de745 3151 vm_map_entry_t entry1, entry2;
91447636
A
3152
3153 /*
3154 * The new mapping failed. Attempt to restore
3155 * the old mappings, saved in the "zap_old_map".
3156 */
3157 if (!map_locked) {
3158 vm_map_lock(map);
3159 map_locked = TRUE;
3160 }
3161
3162 /* first check if the coast is still clear */
3163 start = vm_map_first_entry(zap_old_map)->vme_start;
3164 end = vm_map_last_entry(zap_old_map)->vme_end;
3165 if (vm_map_lookup_entry(map, start, &entry1) ||
3166 vm_map_lookup_entry(map, end, &entry2) ||
3167 entry1 != entry2) {
3168 /*
3169 * Part of that range has already been
3170 * re-mapped: we can't restore the old
3171 * mappings...
3172 */
3173 vm_map_enter_restore_failures++;
3174 } else {
3175 /*
3176 * Transfer the saved map entries from
3177 * "zap_old_map" to the original "map",
3178 * inserting them all after "entry1".
3179 */
3180 for (entry2 = vm_map_first_entry(zap_old_map);
0a7de745
A
3181 entry2 != vm_map_to_entry(zap_old_map);
3182 entry2 = vm_map_first_entry(zap_old_map)) {
2d21ac55
A
3183 vm_map_size_t entry_size;
3184
3185 entry_size = (entry2->vme_end -
0a7de745 3186 entry2->vme_start);
6d2010ae 3187 vm_map_store_entry_unlink(zap_old_map,
0a7de745 3188 entry2);
2d21ac55 3189 zap_old_map->size -= entry_size;
d9a64523 3190 vm_map_store_entry_link(map, entry1, entry2,
0a7de745 3191 VM_MAP_KERNEL_FLAGS_NONE);
2d21ac55 3192 map->size += entry_size;
91447636
A
3193 entry1 = entry2;
3194 }
3195 if (map->wiring_required) {
3196 /*
3197 * XXX TODO: we should rewire the
3198 * old pages here...
3199 */
3200 }
3201 vm_map_enter_restore_successes++;
3202 }
3203 }
3204 }
3205
fe8ab488
A
3206 /*
3207 * The caller is responsible for releasing the lock if it requested to
3208 * keep the map locked.
3209 */
3210 if (map_locked && !keep_map_locked) {
91447636
A
3211 vm_map_unlock(map);
3212 }
3213
3214 /*
3215 * Get rid of the "zap_maps" and all the map entries that
3216 * they may still contain.
3217 */
3218 if (zap_old_map != VM_MAP_NULL) {
2d21ac55 3219 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
3220 zap_old_map = VM_MAP_NULL;
3221 }
3222 if (zap_new_map != VM_MAP_NULL) {
2d21ac55 3223 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
3224 zap_new_map = VM_MAP_NULL;
3225 }
3226
3227 return result;
1c79356b 3228
0a7de745 3229#undef RETURN
1c79356b
A
3230}
3231
5ba3f43e
A
3232#if __arm64__
3233extern const struct memory_object_pager_ops fourk_pager_ops;
3234kern_return_t
3235vm_map_enter_fourk(
0a7de745
A
3236 vm_map_t map,
3237 vm_map_offset_t *address, /* IN/OUT */
3238 vm_map_size_t size,
3239 vm_map_offset_t mask,
3240 int flags,
3241 vm_map_kernel_flags_t vmk_flags,
3242 vm_tag_t alias,
3243 vm_object_t object,
3244 vm_object_offset_t offset,
3245 boolean_t needs_copy,
3246 vm_prot_t cur_protection,
3247 vm_prot_t max_protection,
3248 vm_inherit_t inheritance)
91447636 3249{
0a7de745
A
3250 vm_map_entry_t entry, new_entry;
3251 vm_map_offset_t start, fourk_start;
3252 vm_map_offset_t end, fourk_end;
3253 vm_map_size_t fourk_size;
3254 kern_return_t result = KERN_SUCCESS;
3255 vm_map_t zap_old_map = VM_MAP_NULL;
3256 vm_map_t zap_new_map = VM_MAP_NULL;
3257 boolean_t map_locked = FALSE;
3258 boolean_t pmap_empty = TRUE;
3259 boolean_t new_mapping_established = FALSE;
3260 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3261 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3262 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3263 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3264 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3265 boolean_t is_submap = vmk_flags.vmkf_submap;
3266 boolean_t permanent = vmk_flags.vmkf_permanent;
3267 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
5ba3f43e 3268// boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
0a7de745
A
3269 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3270 vm_map_offset_t effective_min_offset, effective_max_offset;
3271 kern_return_t kr;
3272 boolean_t clear_map_aligned = FALSE;
3273 memory_object_t fourk_mem_obj;
3274 vm_object_t fourk_object;
3275 vm_map_offset_t fourk_pager_offset;
3276 int fourk_pager_index_start, fourk_pager_index_num;
3277 int cur_idx;
3278 boolean_t fourk_copy;
3279 vm_object_t copy_object;
3280 vm_object_offset_t copy_offset;
5ba3f43e
A
3281
3282 fourk_mem_obj = MEMORY_OBJECT_NULL;
3283 fourk_object = VM_OBJECT_NULL;
6d2010ae 3284
5ba3f43e
A
3285 if (superpage_size) {
3286 return KERN_NOT_SUPPORTED;
3287 }
91447636 3288
d9a64523
A
3289 if ((cur_protection & VM_PROT_WRITE) &&
3290 (cur_protection & VM_PROT_EXECUTE) &&
3291#if !CONFIG_EMBEDDED
3292 map != kernel_map &&
3293 cs_process_enforcement(NULL) &&
3294#endif /* !CONFIG_EMBEDDED */
3295 !entry_for_jit) {
3296 DTRACE_VM3(cs_wx,
0a7de745
A
3297 uint64_t, 0,
3298 uint64_t, 0,
3299 vm_prot_t, cur_protection);
d9a64523 3300 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
0a7de745
A
3301 "turning off execute\n",
3302 proc_selfpid(),
3303 (current_task()->bsd_info
3304 ? proc_name_address(current_task()->bsd_info)
3305 : "?"),
3306 __FUNCTION__);
d9a64523 3307 cur_protection &= ~VM_PROT_EXECUTE;
5ba3f43e 3308 }
5ba3f43e 3309
a39ff7e2
A
3310 /*
3311 * If the task has requested executable lockdown,
3312 * deny any new executable mapping.
3313 */
3314 if (map->map_disallow_new_exec == TRUE) {
3315 if (cur_protection & VM_PROT_EXECUTE) {
3316 return KERN_PROTECTION_FAILURE;
3317 }
3318 }
3319
5ba3f43e
A
3320 if (is_submap) {
3321 return KERN_NOT_SUPPORTED;
3322 }
3323 if (vmk_flags.vmkf_already) {
3324 return KERN_NOT_SUPPORTED;
3325 }
3326 if (purgable || entry_for_jit) {
3327 return KERN_NOT_SUPPORTED;
3328 }
3329
3330 effective_min_offset = map->min_offset;
3331
3332 if (vmk_flags.vmkf_beyond_max) {
3333 return KERN_NOT_SUPPORTED;
3334 } else {
3335 effective_max_offset = map->max_offset;
3336 }
3337
3338 if (size == 0 ||
3339 (offset & FOURK_PAGE_MASK) != 0) {
3340 *address = 0;
2d21ac55 3341 return KERN_INVALID_ARGUMENT;
3e170ce0 3342 }
5ba3f43e 3343
0a7de745 3344#define RETURN(value) { result = value; goto BailOut; }
5ba3f43e
A
3345
3346 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3347 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3348
3349 if (!anywhere && overwrite) {
3350 return KERN_NOT_SUPPORTED;
3351 }
3352 if (!anywhere && overwrite) {
3353 /*
3354 * Create a temporary VM map to hold the old mappings in the
3355 * affected area while we create the new one.
3356 * This avoids releasing the VM map lock in
3357 * vm_map_entry_delete() and allows atomicity
3358 * when we want to replace some mappings with a new one.
3359 * It also allows us to restore the old VM mappings if the
3360 * new mapping fails.
3361 */
3362 zap_old_map = vm_map_create(PMAP_NULL,
0a7de745
A
3363 *address,
3364 *address + size,
3365 map->hdr.entries_pageable);
5ba3f43e
A
3366 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3367 vm_map_disable_hole_optimization(zap_old_map);
3e170ce0 3368 }
593a1d5f 3369
5ba3f43e
A
3370 fourk_start = *address;
3371 fourk_size = size;
3372 fourk_end = fourk_start + fourk_size;
2d21ac55 3373
5ba3f43e
A
3374 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3375 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3376 size = end - start;
39236c6e 3377
5ba3f43e
A
3378 if (anywhere) {
3379 return KERN_NOT_SUPPORTED;
3380 } else {
3381 /*
3382 * Verify that:
3383 * the address doesn't itself violate
3384 * the mask requirement.
3385 */
3386
3387 vm_map_lock(map);
3388 map_locked = TRUE;
3389 if ((start & mask) != 0) {
3390 RETURN(KERN_NO_SPACE);
6d2010ae 3391 }
5ba3f43e
A
3392
3393 /*
3394 * ... the address is within bounds
3395 */
3396
3397 end = start + size;
3398
3399 if ((start < effective_min_offset) ||
3400 (end > effective_max_offset) ||
3401 (start >= end)) {
3402 RETURN(KERN_INVALID_ADDRESS);
22ba694c 3403 }
5ba3f43e
A
3404
3405 if (overwrite && zap_old_map != VM_MAP_NULL) {
3406 /*
3407 * Fixed mapping and "overwrite" flag: attempt to
3408 * remove all existing mappings in the specified
3409 * address range, saving them in our "zap_old_map".
3410 */
3411 (void) vm_map_delete(map, start, end,
0a7de745
A
3412 (VM_MAP_REMOVE_SAVE_ENTRIES |
3413 VM_MAP_REMOVE_NO_MAP_ALIGN),
3414 zap_old_map);
3e170ce0 3415 }
2d21ac55 3416
5ba3f43e
A
3417 /*
3418 * ... the starting address isn't allocated
3419 */
3420 if (vm_map_lookup_entry(map, start, &entry)) {
3421 vm_object_t cur_object, shadow_object;
3422
3423 /*
3424 * We might already some 4K mappings
3425 * in a 16K page here.
3426 */
3427
3428 if (entry->vme_end - entry->vme_start
3429 != SIXTEENK_PAGE_SIZE) {
3430 RETURN(KERN_NO_SPACE);
3431 }
3432 if (entry->is_sub_map) {
3433 RETURN(KERN_NO_SPACE);
3434 }
3435 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3436 RETURN(KERN_NO_SPACE);
3437 }
3438
3439 /* go all the way down the shadow chain */
3440 cur_object = VME_OBJECT(entry);
3441 vm_object_lock(cur_object);
3442 while (cur_object->shadow != VM_OBJECT_NULL) {
3443 shadow_object = cur_object->shadow;
3444 vm_object_lock(shadow_object);
3445 vm_object_unlock(cur_object);
3446 cur_object = shadow_object;
3447 shadow_object = VM_OBJECT_NULL;
3448 }
3449 if (cur_object->internal ||
3450 cur_object->pager == NULL) {
3451 vm_object_unlock(cur_object);
3452 RETURN(KERN_NO_SPACE);
3453 }
3454 if (cur_object->pager->mo_pager_ops
3455 != &fourk_pager_ops) {
3456 vm_object_unlock(cur_object);
3457 RETURN(KERN_NO_SPACE);
3458 }
3459 fourk_object = cur_object;
3460 fourk_mem_obj = fourk_object->pager;
3461
3462 /* keep the "4K" object alive */
3463 vm_object_reference_locked(fourk_object);
3464 vm_object_unlock(fourk_object);
3465
3466 /* merge permissions */
3467 entry->protection |= cur_protection;
3468 entry->max_protection |= max_protection;
3469 if ((entry->protection & (VM_PROT_WRITE |
0a7de745 3470 VM_PROT_EXECUTE)) ==
5ba3f43e
A
3471 (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3472 fourk_binary_compatibility_unsafe &&
3473 fourk_binary_compatibility_allow_wx) {
3474 /* write+execute: need to be "jit" */
3475 entry->used_for_jit = TRUE;
3476 }
3477
3478 goto map_in_fourk_pager;
3479 }
3480
3481 /*
3482 * ... the next region doesn't overlap the
3483 * end point.
3484 */
3485
3486 if ((entry->vme_next != vm_map_to_entry(map)) &&
3487 (entry->vme_next->vme_start < end)) {
3488 RETURN(KERN_NO_SPACE);
3489 }
3490 }
3491
3492 /*
3493 * At this point,
3494 * "start" and "end" should define the endpoints of the
3495 * available new range, and
3496 * "entry" should refer to the region before the new
3497 * range, and
3498 *
3499 * the map should be locked.
3500 */
3501
3502 /* create a new "4K" pager */
3503 fourk_mem_obj = fourk_pager_create();
3504 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3505 assert(fourk_object);
3506
3507 /* keep the "4" object alive */
3508 vm_object_reference(fourk_object);
3509
3510 /* create a "copy" object, to map the "4K" object copy-on-write */
3511 fourk_copy = TRUE;
3512 result = vm_object_copy_strategically(fourk_object,
0a7de745
A
3513 0,
3514 end - start,
3515 &copy_object,
3516 &copy_offset,
3517 &fourk_copy);
5ba3f43e
A
3518 assert(result == KERN_SUCCESS);
3519 assert(copy_object != VM_OBJECT_NULL);
3520 assert(copy_offset == 0);
3521
3522 /* take a reference on the copy object, for this mapping */
3523 vm_object_reference(copy_object);
3524
3525 /* map the "4K" pager's copy object */
3526 new_entry =
0a7de745
A
3527 vm_map_entry_insert(map, entry,
3528 vm_map_trunc_page(start,
3529 VM_MAP_PAGE_MASK(map)),
3530 vm_map_round_page(end,
3531 VM_MAP_PAGE_MASK(map)),
3532 copy_object,
3533 0, /* offset */
3534 FALSE, /* needs_copy */
3535 FALSE, FALSE,
3536 cur_protection, max_protection,
3537 VM_BEHAVIOR_DEFAULT,
3538 ((entry_for_jit)
3539 ? VM_INHERIT_NONE
3540 : inheritance),
3541 0,
3542 no_cache,
3543 permanent,
3544 superpage_size,
3545 clear_map_aligned,
3546 is_submap,
3547 FALSE, /* jit */
3548 alias);
5ba3f43e
A
3549 entry = new_entry;
3550
3551#if VM_MAP_DEBUG_FOURK
3552 if (vm_map_debug_fourk) {
3553 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
0a7de745
A
3554 map,
3555 (uint64_t) entry->vme_start,
3556 (uint64_t) entry->vme_end,
3557 fourk_mem_obj);
5ba3f43e
A
3558 }
3559#endif /* VM_MAP_DEBUG_FOURK */
3560
3561 new_mapping_established = TRUE;
3562
3563map_in_fourk_pager:
3564 /* "map" the original "object" where it belongs in the "4K" pager */
3565 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3566 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3567 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3568 fourk_pager_index_num = 4;
3569 } else {
3570 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3571 }
3572 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3573 fourk_pager_index_num = 4 - fourk_pager_index_start;
3574 }
3575 for (cur_idx = 0;
0a7de745
A
3576 cur_idx < fourk_pager_index_num;
3577 cur_idx++) {
3578 vm_object_t old_object;
3579 vm_object_offset_t old_offset;
5ba3f43e
A
3580
3581 kr = fourk_pager_populate(fourk_mem_obj,
0a7de745
A
3582 TRUE, /* overwrite */
3583 fourk_pager_index_start + cur_idx,
3584 object,
3585 (object
3586 ? (offset +
3587 (cur_idx * FOURK_PAGE_SIZE))
3588 : 0),
3589 &old_object,
3590 &old_offset);
5ba3f43e
A
3591#if VM_MAP_DEBUG_FOURK
3592 if (vm_map_debug_fourk) {
3593 if (old_object == (vm_object_t) -1 &&
3594 old_offset == (vm_object_offset_t) -1) {
3595 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
0a7de745
A
3596 "pager [%p:0x%llx] "
3597 "populate[%d] "
3598 "[object:%p,offset:0x%llx]\n",
3599 map,
3600 (uint64_t) entry->vme_start,
3601 (uint64_t) entry->vme_end,
3602 fourk_mem_obj,
3603 VME_OFFSET(entry),
3604 fourk_pager_index_start + cur_idx,
3605 object,
3606 (object
3607 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3608 : 0));
5ba3f43e
A
3609 } else {
3610 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
0a7de745
A
3611 "pager [%p:0x%llx] "
3612 "populate[%d] [object:%p,offset:0x%llx] "
3613 "old [%p:0x%llx]\n",
3614 map,
3615 (uint64_t) entry->vme_start,
3616 (uint64_t) entry->vme_end,
3617 fourk_mem_obj,
3618 VME_OFFSET(entry),
3619 fourk_pager_index_start + cur_idx,
3620 object,
3621 (object
3622 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3623 : 0),
3624 old_object,
3625 old_offset);
5ba3f43e
A
3626 }
3627 }
3628#endif /* VM_MAP_DEBUG_FOURK */
3629
3630 assert(kr == KERN_SUCCESS);
3631 if (object != old_object &&
3632 object != VM_OBJECT_NULL &&
3633 object != (vm_object_t) -1) {
3634 vm_object_reference(object);
3635 }
3636 if (object != old_object &&
3637 old_object != VM_OBJECT_NULL &&
3638 old_object != (vm_object_t) -1) {
3639 vm_object_deallocate(old_object);
3640 }
3641 }
3642
3643BailOut:
3644 assert(map_locked == TRUE);
3645
3646 if (fourk_object != VM_OBJECT_NULL) {
3647 vm_object_deallocate(fourk_object);
3648 fourk_object = VM_OBJECT_NULL;
3649 fourk_mem_obj = MEMORY_OBJECT_NULL;
3650 }
3651
3652 if (result == KERN_SUCCESS) {
3653 vm_prot_t pager_prot;
3654 memory_object_t pager;
3655
3656#if DEBUG
3657 if (pmap_empty &&
3658 !(vmk_flags.vmkf_no_pmap_check)) {
3659 assert(vm_map_pmap_is_empty(map,
0a7de745
A
3660 *address,
3661 *address + size));
5ba3f43e
A
3662 }
3663#endif /* DEBUG */
3664
3665 /*
3666 * For "named" VM objects, let the pager know that the
3667 * memory object is being mapped. Some pagers need to keep
3668 * track of this, to know when they can reclaim the memory
3669 * object, for example.
3670 * VM calls memory_object_map() for each mapping (specifying
3671 * the protection of each mapping) and calls
3672 * memory_object_last_unmap() when all the mappings are gone.
3673 */
3674 pager_prot = max_protection;
3675 if (needs_copy) {
3676 /*
3677 * Copy-On-Write mapping: won't modify
3678 * the memory object.
3679 */
3680 pager_prot &= ~VM_PROT_WRITE;
3681 }
3682 if (!is_submap &&
3683 object != VM_OBJECT_NULL &&
3684 object->named &&
3685 object->pager != MEMORY_OBJECT_NULL) {
3686 vm_object_lock(object);
3687 pager = object->pager;
3688 if (object->named &&
3689 pager != MEMORY_OBJECT_NULL) {
3690 assert(object->pager_ready);
3691 vm_object_mapping_wait(object, THREAD_UNINT);
3692 vm_object_mapping_begin(object);
3693 vm_object_unlock(object);
3694
3695 kr = memory_object_map(pager, pager_prot);
3696 assert(kr == KERN_SUCCESS);
3697
3698 vm_object_lock(object);
3699 vm_object_mapping_end(object);
3700 }
3701 vm_object_unlock(object);
3702 }
3703 if (!is_submap &&
3704 fourk_object != VM_OBJECT_NULL &&
3705 fourk_object->named &&
3706 fourk_object->pager != MEMORY_OBJECT_NULL) {
3707 vm_object_lock(fourk_object);
3708 pager = fourk_object->pager;
3709 if (fourk_object->named &&
3710 pager != MEMORY_OBJECT_NULL) {
3711 assert(fourk_object->pager_ready);
3712 vm_object_mapping_wait(fourk_object,
0a7de745 3713 THREAD_UNINT);
5ba3f43e
A
3714 vm_object_mapping_begin(fourk_object);
3715 vm_object_unlock(fourk_object);
3716
3717 kr = memory_object_map(pager, VM_PROT_READ);
3718 assert(kr == KERN_SUCCESS);
3719
3720 vm_object_lock(fourk_object);
3721 vm_object_mapping_end(fourk_object);
3722 }
3723 vm_object_unlock(fourk_object);
3724 }
3725 }
3726
3727 assert(map_locked == TRUE);
3728
3729 if (!keep_map_locked) {
3730 vm_map_unlock(map);
3731 map_locked = FALSE;
3732 }
3733
3734 /*
3735 * We can't hold the map lock if we enter this block.
3736 */
3737
3738 if (result == KERN_SUCCESS) {
5ba3f43e
A
3739 /* Wire down the new entry if the user
3740 * requested all new map entries be wired.
3741 */
0a7de745 3742 if ((map->wiring_required) || (superpage_size)) {
5ba3f43e
A
3743 assert(!keep_map_locked);
3744 pmap_empty = FALSE; /* pmap won't be empty */
3745 kr = vm_map_wire_kernel(map, start, end,
0a7de745
A
3746 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3747 TRUE);
5ba3f43e
A
3748 result = kr;
3749 }
3750
3751 }
3752
3753 if (result != KERN_SUCCESS) {
3754 if (new_mapping_established) {
3755 /*
3756 * We have to get rid of the new mappings since we
3757 * won't make them available to the user.
3758 * Try and do that atomically, to minimize the risk
3759 * that someone else create new mappings that range.
3760 */
3761 zap_new_map = vm_map_create(PMAP_NULL,
0a7de745
A
3762 *address,
3763 *address + size,
3764 map->hdr.entries_pageable);
5ba3f43e 3765 vm_map_set_page_shift(zap_new_map,
0a7de745 3766 VM_MAP_PAGE_SHIFT(map));
5ba3f43e
A
3767 vm_map_disable_hole_optimization(zap_new_map);
3768
3769 if (!map_locked) {
3770 vm_map_lock(map);
3771 map_locked = TRUE;
3772 }
0a7de745
A
3773 (void) vm_map_delete(map, *address, *address + size,
3774 (VM_MAP_REMOVE_SAVE_ENTRIES |
3775 VM_MAP_REMOVE_NO_MAP_ALIGN),
3776 zap_new_map);
5ba3f43e
A
3777 }
3778 if (zap_old_map != VM_MAP_NULL &&
3779 zap_old_map->hdr.nentries != 0) {
0a7de745 3780 vm_map_entry_t entry1, entry2;
5ba3f43e
A
3781
3782 /*
3783 * The new mapping failed. Attempt to restore
3784 * the old mappings, saved in the "zap_old_map".
3785 */
3786 if (!map_locked) {
3787 vm_map_lock(map);
3788 map_locked = TRUE;
3789 }
3790
3791 /* first check if the coast is still clear */
3792 start = vm_map_first_entry(zap_old_map)->vme_start;
3793 end = vm_map_last_entry(zap_old_map)->vme_end;
3794 if (vm_map_lookup_entry(map, start, &entry1) ||
3795 vm_map_lookup_entry(map, end, &entry2) ||
3796 entry1 != entry2) {
3797 /*
3798 * Part of that range has already been
3799 * re-mapped: we can't restore the old
3800 * mappings...
3801 */
3802 vm_map_enter_restore_failures++;
3803 } else {
3804 /*
3805 * Transfer the saved map entries from
3806 * "zap_old_map" to the original "map",
3807 * inserting them all after "entry1".
3808 */
3809 for (entry2 = vm_map_first_entry(zap_old_map);
0a7de745
A
3810 entry2 != vm_map_to_entry(zap_old_map);
3811 entry2 = vm_map_first_entry(zap_old_map)) {
5ba3f43e
A
3812 vm_map_size_t entry_size;
3813
3814 entry_size = (entry2->vme_end -
0a7de745 3815 entry2->vme_start);
5ba3f43e 3816 vm_map_store_entry_unlink(zap_old_map,
0a7de745 3817 entry2);
5ba3f43e 3818 zap_old_map->size -= entry_size;
d9a64523 3819 vm_map_store_entry_link(map, entry1, entry2,
0a7de745 3820 VM_MAP_KERNEL_FLAGS_NONE);
5ba3f43e
A
3821 map->size += entry_size;
3822 entry1 = entry2;
3823 }
3824 if (map->wiring_required) {
3825 /*
3826 * XXX TODO: we should rewire the
3827 * old pages here...
3828 */
3829 }
3830 vm_map_enter_restore_successes++;
3831 }
3832 }
3833 }
3834
3835 /*
3836 * The caller is responsible for releasing the lock if it requested to
3837 * keep the map locked.
3838 */
3839 if (map_locked && !keep_map_locked) {
3840 vm_map_unlock(map);
3841 }
3842
3843 /*
3844 * Get rid of the "zap_maps" and all the map entries that
3845 * they may still contain.
3846 */
3847 if (zap_old_map != VM_MAP_NULL) {
3848 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3849 zap_old_map = VM_MAP_NULL;
3850 }
3851 if (zap_new_map != VM_MAP_NULL) {
3852 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3853 zap_new_map = VM_MAP_NULL;
3854 }
3855
3856 return result;
3857
0a7de745 3858#undef RETURN
5ba3f43e
A
3859}
3860#endif /* __arm64__ */
3861
3862/*
3863 * Counters for the prefault optimization.
3864 */
3865int64_t vm_prefault_nb_pages = 0;
3866int64_t vm_prefault_nb_bailout = 0;
3867
3868static kern_return_t
3869vm_map_enter_mem_object_helper(
0a7de745
A
3870 vm_map_t target_map,
3871 vm_map_offset_t *address,
3872 vm_map_size_t initial_size,
3873 vm_map_offset_t mask,
3874 int flags,
3875 vm_map_kernel_flags_t vmk_flags,
3876 vm_tag_t tag,
3877 ipc_port_t port,
3878 vm_object_offset_t offset,
3879 boolean_t copy,
3880 vm_prot_t cur_protection,
3881 vm_prot_t max_protection,
3882 vm_inherit_t inheritance,
3883 upl_page_list_ptr_t page_list,
3884 unsigned int page_list_count)
5ba3f43e 3885{
0a7de745
A
3886 vm_map_address_t map_addr;
3887 vm_map_size_t map_size;
3888 vm_object_t object;
3889 vm_object_size_t size;
3890 kern_return_t result;
3891 boolean_t mask_cur_protection, mask_max_protection;
3892 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
3893 vm_map_offset_t offset_in_mapping = 0;
5ba3f43e 3894#if __arm64__
0a7de745 3895 boolean_t fourk = vmk_flags.vmkf_fourk;
5ba3f43e
A
3896#endif /* __arm64__ */
3897
3898 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
3899
3900 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
3901 mask_max_protection = max_protection & VM_PROT_IS_MASK;
3902 cur_protection &= ~VM_PROT_IS_MASK;
3903 max_protection &= ~VM_PROT_IS_MASK;
3904
3905 /*
3906 * Check arguments for validity
3907 */
3908 if ((target_map == VM_MAP_NULL) ||
3909 (cur_protection & ~VM_PROT_ALL) ||
3910 (max_protection & ~VM_PROT_ALL) ||
3911 (inheritance > VM_INHERIT_LAST_VALID) ||
3912 (try_prefault && (copy || !page_list)) ||
3913 initial_size == 0) {
3914 return KERN_INVALID_ARGUMENT;
3915 }
3916
3917#if __arm64__
3918 if (fourk) {
3919 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
3920 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
3921 } else
3922#endif /* __arm64__ */
3923 {
3924 map_addr = vm_map_trunc_page(*address,
0a7de745 3925 VM_MAP_PAGE_MASK(target_map));
5ba3f43e 3926 map_size = vm_map_round_page(initial_size,
0a7de745 3927 VM_MAP_PAGE_MASK(target_map));
5ba3f43e
A
3928 }
3929 size = vm_object_round_page(initial_size);
3930
3931 /*
3932 * Find the vm object (if any) corresponding to this port.
3933 */
3934 if (!IP_VALID(port)) {
3935 object = VM_OBJECT_NULL;
3936 offset = 0;
3937 copy = FALSE;
3938 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
0a7de745 3939 vm_named_entry_t named_entry;
5ba3f43e
A
3940
3941 named_entry = (vm_named_entry_t) port->ip_kobject;
3942
3943 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 3944 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
5ba3f43e
A
3945 offset += named_entry->data_offset;
3946 }
3947
3948 /* a few checks to make sure user is obeying rules */
3949 if (size == 0) {
0a7de745 3950 if (offset >= named_entry->size) {
5ba3f43e 3951 return KERN_INVALID_RIGHT;
0a7de745 3952 }
5ba3f43e
A
3953 size = named_entry->size - offset;
3954 }
3955 if (mask_max_protection) {
3956 max_protection &= named_entry->protection;
3957 }
3958 if (mask_cur_protection) {
3959 cur_protection &= named_entry->protection;
3960 }
3961 if ((named_entry->protection & max_protection) !=
0a7de745 3962 max_protection) {
5ba3f43e 3963 return KERN_INVALID_RIGHT;
0a7de745 3964 }
5ba3f43e 3965 if ((named_entry->protection & cur_protection) !=
0a7de745 3966 cur_protection) {
5ba3f43e 3967 return KERN_INVALID_RIGHT;
0a7de745 3968 }
5ba3f43e
A
3969 if (offset + size < offset) {
3970 /* overflow */
3971 return KERN_INVALID_ARGUMENT;
3972 }
3973 if (named_entry->size < (offset + initial_size)) {
3974 return KERN_INVALID_ARGUMENT;
3975 }
3976
3977 if (named_entry->is_copy) {
3978 /* for a vm_map_copy, we can only map it whole */
3979 if ((size != named_entry->size) &&
3980 (vm_map_round_page(size,
0a7de745
A
3981 VM_MAP_PAGE_MASK(target_map)) ==
3982 named_entry->size)) {
5ba3f43e 3983 /* XXX FBDP use the rounded size... */
39236c6e
A
3984 size = vm_map_round_page(
3985 size,
3986 VM_MAP_PAGE_MASK(target_map));
3987 }
5ba3f43e 3988
fe8ab488
A
3989 if (!(flags & VM_FLAGS_ANYWHERE) &&
3990 (offset != 0 ||
0a7de745 3991 size != named_entry->size)) {
fe8ab488
A
3992 /*
3993 * XXX for a mapping at a "fixed" address,
3994 * we can't trim after mapping the whole
3995 * memory entry, so reject a request for a
3996 * partial mapping.
3997 */
39236c6e
A
3998 return KERN_INVALID_ARGUMENT;
3999 }
4000 }
4001
2d21ac55
A
4002 /* the callers parameter offset is defined to be the */
4003 /* offset from beginning of named entry offset in object */
4004 offset = offset + named_entry->offset;
5ba3f43e 4005
0a7de745
A
4006 if (!VM_MAP_PAGE_ALIGNED(size,
4007 VM_MAP_PAGE_MASK(target_map))) {
39236c6e
A
4008 /*
4009 * Let's not map more than requested;
4010 * vm_map_enter() will handle this "not map-aligned"
4011 * case.
4012 */
4013 map_size = size;
4014 }
4015
2d21ac55
A
4016 named_entry_lock(named_entry);
4017 if (named_entry->is_sub_map) {
0a7de745 4018 vm_map_t submap;
2d21ac55 4019
3e170ce0 4020 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4021 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4022 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4023 }
4024
2d21ac55
A
4025 submap = named_entry->backing.map;
4026 vm_map_lock(submap);
4027 vm_map_reference(submap);
4028 vm_map_unlock(submap);
4029 named_entry_unlock(named_entry);
4030
5ba3f43e
A
4031 vmk_flags.vmkf_submap = TRUE;
4032
2d21ac55 4033 result = vm_map_enter(target_map,
0a7de745
A
4034 &map_addr,
4035 map_size,
4036 mask,
4037 flags,
4038 vmk_flags,
4039 tag,
4040 (vm_object_t)(uintptr_t) submap,
4041 offset,
4042 copy,
4043 cur_protection,
4044 max_protection,
4045 inheritance);
2d21ac55
A
4046 if (result != KERN_SUCCESS) {
4047 vm_map_deallocate(submap);
4048 } else {
4049 /*
4050 * No need to lock "submap" just to check its
4051 * "mapped" flag: that flag is never reset
4052 * once it's been set and if we race, we'll
4053 * just end up setting it twice, which is OK.
4054 */
316670eb
A
4055 if (submap->mapped_in_other_pmaps == FALSE &&
4056 vm_map_pmap(submap) != PMAP_NULL &&
4057 vm_map_pmap(submap) !=
4058 vm_map_pmap(target_map)) {
2d21ac55 4059 /*
316670eb
A
4060 * This submap is being mapped in a map
4061 * that uses a different pmap.
4062 * Set its "mapped_in_other_pmaps" flag
5ba3f43e 4063 * to indicate that we now need to
316670eb
A
4064 * remove mappings from all pmaps rather
4065 * than just the submap's pmap.
2d21ac55
A
4066 */
4067 vm_map_lock(submap);
316670eb 4068 submap->mapped_in_other_pmaps = TRUE;
2d21ac55
A
4069 vm_map_unlock(submap);
4070 }
4071 *address = map_addr;
4072 }
4073 return result;
39236c6e 4074 } else if (named_entry->is_copy) {
0a7de745
A
4075 kern_return_t kr;
4076 vm_map_copy_t copy_map;
4077 vm_map_entry_t copy_entry;
4078 vm_map_offset_t copy_addr;
39236c6e
A
4079
4080 if (flags & ~(VM_FLAGS_FIXED |
0a7de745
A
4081 VM_FLAGS_ANYWHERE |
4082 VM_FLAGS_OVERWRITE |
4083 VM_FLAGS_RETURN_4K_DATA_ADDR |
4084 VM_FLAGS_RETURN_DATA_ADDR |
4085 VM_FLAGS_ALIAS_MASK)) {
39236c6e
A
4086 named_entry_unlock(named_entry);
4087 return KERN_INVALID_ARGUMENT;
4088 }
4089
3e170ce0 4090 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4091 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e 4092 offset_in_mapping = offset - vm_object_trunc_page(offset);
0a7de745 4093 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
3e170ce0 4094 offset_in_mapping &= ~((signed)(0xFFF));
0a7de745 4095 }
39236c6e
A
4096 offset = vm_object_trunc_page(offset);
4097 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4098 }
4099
4100 copy_map = named_entry->backing.copy;
4101 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4102 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4103 /* unsupported type; should not happen */
4104 printf("vm_map_enter_mem_object: "
0a7de745
A
4105 "memory_entry->backing.copy "
4106 "unsupported type 0x%x\n",
4107 copy_map->type);
39236c6e
A
4108 named_entry_unlock(named_entry);
4109 return KERN_INVALID_ARGUMENT;
4110 }
4111
4112 /* reserve a contiguous range */
4113 kr = vm_map_enter(target_map,
0a7de745
A
4114 &map_addr,
4115 /* map whole mem entry, trim later: */
4116 named_entry->size,
4117 mask,
4118 flags & (VM_FLAGS_ANYWHERE |
4119 VM_FLAGS_OVERWRITE |
4120 VM_FLAGS_RETURN_4K_DATA_ADDR |
4121 VM_FLAGS_RETURN_DATA_ADDR),
4122 vmk_flags,
4123 tag,
4124 VM_OBJECT_NULL,
4125 0,
4126 FALSE, /* copy */
4127 cur_protection,
4128 max_protection,
4129 inheritance);
39236c6e
A
4130 if (kr != KERN_SUCCESS) {
4131 named_entry_unlock(named_entry);
4132 return kr;
4133 }
4134
4135 copy_addr = map_addr;
4136
4137 for (copy_entry = vm_map_copy_first_entry(copy_map);
0a7de745
A
4138 copy_entry != vm_map_copy_to_entry(copy_map);
4139 copy_entry = copy_entry->vme_next) {
4140 int remap_flags;
4141 vm_map_kernel_flags_t vmk_remap_flags;
4142 vm_map_t copy_submap;
4143 vm_object_t copy_object;
4144 vm_map_size_t copy_size;
4145 vm_object_offset_t copy_offset;
4146 int copy_vm_alias;
39236c6e 4147
5ba3f43e
A
4148 remap_flags = 0;
4149 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4150
813fb2f6 4151 copy_object = VME_OBJECT(copy_entry);
3e170ce0 4152 copy_offset = VME_OFFSET(copy_entry);
39236c6e 4153 copy_size = (copy_entry->vme_end -
0a7de745 4154 copy_entry->vme_start);
39037602
A
4155 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4156 if (copy_vm_alias == 0) {
4157 /*
4158 * Caller does not want a specific
4159 * alias for this new mapping: use
4160 * the alias of the original mapping.
4161 */
4162 copy_vm_alias = VME_ALIAS(copy_entry);
4163 }
39236c6e
A
4164
4165 /* sanity check */
fe8ab488
A
4166 if ((copy_addr + copy_size) >
4167 (map_addr +
0a7de745 4168 named_entry->size /* XXX full size */)) {
39236c6e
A
4169 /* over-mapping too much !? */
4170 kr = KERN_INVALID_ARGUMENT;
4171 /* abort */
4172 break;
4173 }
4174
4175 /* take a reference on the object */
4176 if (copy_entry->is_sub_map) {
5ba3f43e 4177 vmk_remap_flags.vmkf_submap = TRUE;
3e170ce0 4178 copy_submap = VME_SUBMAP(copy_entry);
39236c6e
A
4179 vm_map_lock(copy_submap);
4180 vm_map_reference(copy_submap);
4181 vm_map_unlock(copy_submap);
d9a64523 4182 copy_object = (vm_object_t)(uintptr_t) copy_submap;
813fb2f6 4183 } else if (!copy &&
0a7de745
A
4184 copy_object != VM_OBJECT_NULL &&
4185 (copy_entry->needs_copy ||
4186 copy_object->shadowed ||
4187 (!copy_object->true_share &&
4188 !copy_entry->is_shared &&
4189 copy_object->vo_size > copy_size))) {
813fb2f6
A
4190 /*
4191 * We need to resolve our side of this
4192 * "symmetric" copy-on-write now; we
4193 * need a new object to map and share,
4194 * instead of the current one which
4195 * might still be shared with the
4196 * original mapping.
4197 *
4198 * Note: A "vm_map_copy_t" does not
4199 * have a lock but we're protected by
4200 * the named entry's lock here.
4201 */
4202 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4203 VME_OBJECT_SHADOW(copy_entry, copy_size);
4204 if (!copy_entry->needs_copy &&
4205 copy_entry->protection & VM_PROT_WRITE) {
4206 vm_prot_t prot;
4207
4208 prot = copy_entry->protection & ~VM_PROT_WRITE;
4209 vm_object_pmap_protect(copy_object,
0a7de745
A
4210 copy_offset,
4211 copy_size,
4212 PMAP_NULL,
4213 0,
4214 prot);
813fb2f6
A
4215 }
4216
4217 copy_entry->needs_copy = FALSE;
4218 copy_entry->is_shared = TRUE;
4219 copy_object = VME_OBJECT(copy_entry);
4220 copy_offset = VME_OFFSET(copy_entry);
4221 vm_object_lock(copy_object);
4222 vm_object_reference_locked(copy_object);
4223 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4224 /* we're about to make a shared mapping of this object */
4225 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4226 copy_object->true_share = TRUE;
4227 }
4228 vm_object_unlock(copy_object);
39236c6e 4229 } else {
813fb2f6
A
4230 /*
4231 * We already have the right object
4232 * to map.
4233 */
3e170ce0 4234 copy_object = VME_OBJECT(copy_entry);
39236c6e
A
4235 vm_object_reference(copy_object);
4236 }
4237
4238 /* over-map the object into destination */
4239 remap_flags |= flags;
4240 remap_flags |= VM_FLAGS_FIXED;
4241 remap_flags |= VM_FLAGS_OVERWRITE;
4242 remap_flags &= ~VM_FLAGS_ANYWHERE;
813fb2f6
A
4243 if (!copy && !copy_entry->is_sub_map) {
4244 /*
4245 * copy-on-write should have been
4246 * resolved at this point, or we would
4247 * end up sharing instead of copying.
4248 */
4249 assert(!copy_entry->needs_copy);
4250 }
d9a64523
A
4251#if !CONFIG_EMBEDDED
4252 if (copy_entry->used_for_jit) {
4253 vmk_remap_flags.vmkf_map_jit = TRUE;
4254 }
4255#endif /* !CONFIG_EMBEDDED */
39236c6e 4256 kr = vm_map_enter(target_map,
0a7de745
A
4257 &copy_addr,
4258 copy_size,
4259 (vm_map_offset_t) 0,
4260 remap_flags,
4261 vmk_remap_flags,
4262 copy_vm_alias,
4263 copy_object,
4264 copy_offset,
4265 ((copy_object == NULL) ? FALSE : copy),
4266 cur_protection,
4267 max_protection,
4268 inheritance);
39236c6e
A
4269 if (kr != KERN_SUCCESS) {
4270 if (copy_entry->is_sub_map) {
4271 vm_map_deallocate(copy_submap);
4272 } else {
4273 vm_object_deallocate(copy_object);
4274 }
4275 /* abort */
4276 break;
4277 }
4278
4279 /* next mapping */
4280 copy_addr += copy_size;
4281 }
5ba3f43e 4282
39236c6e 4283 if (kr == KERN_SUCCESS) {
3e170ce0 4284 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4285 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4286 *address = map_addr + offset_in_mapping;
4287 } else {
4288 *address = map_addr;
4289 }
fe8ab488
A
4290
4291 if (offset) {
4292 /*
4293 * Trim in front, from 0 to "offset".
4294 */
4295 vm_map_remove(target_map,
0a7de745
A
4296 map_addr,
4297 map_addr + offset,
4298 VM_MAP_REMOVE_NO_FLAGS);
fe8ab488
A
4299 *address += offset;
4300 }
4301 if (offset + map_size < named_entry->size) {
4302 /*
4303 * Trim in back, from
4304 * "offset + map_size" to
4305 * "named_entry->size".
4306 */
4307 vm_map_remove(target_map,
0a7de745
A
4308 (map_addr +
4309 offset + map_size),
4310 (map_addr +
4311 named_entry->size),
4312 VM_MAP_REMOVE_NO_FLAGS);
fe8ab488 4313 }
39236c6e
A
4314 }
4315 named_entry_unlock(named_entry);
4316
4317 if (kr != KERN_SUCCESS) {
0a7de745 4318 if (!(flags & VM_FLAGS_OVERWRITE)) {
39236c6e
A
4319 /* deallocate the contiguous range */
4320 (void) vm_deallocate(target_map,
0a7de745
A
4321 map_addr,
4322 map_size);
39236c6e
A
4323 }
4324 }
4325
4326 return kr;
2d21ac55 4327 } else {
0a7de745
A
4328 unsigned int access;
4329 vm_prot_t protections;
4330 unsigned int wimg_mode;
5ba3f43e
A
4331
4332 /* we are mapping a VM object */
4333
4334 protections = named_entry->protection & VM_PROT_ALL;
4335 access = GET_MAP_MEM(named_entry->protection);
4336
3e170ce0 4337 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4338 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e 4339 offset_in_mapping = offset - vm_object_trunc_page(offset);
0a7de745 4340 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
3e170ce0 4341 offset_in_mapping &= ~((signed)(0xFFF));
0a7de745 4342 }
39236c6e
A
4343 offset = vm_object_trunc_page(offset);
4344 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
5ba3f43e 4345 }
39236c6e 4346
2d21ac55
A
4347 object = named_entry->backing.object;
4348 assert(object != VM_OBJECT_NULL);
5ba3f43e 4349 vm_object_lock(object);
2d21ac55 4350 named_entry_unlock(named_entry);
5ba3f43e
A
4351
4352 vm_object_reference_locked(object);
4353
4354 wimg_mode = object->wimg_bits;
0a7de745
A
4355 vm_prot_to_wimg(access, &wimg_mode);
4356 if (object->wimg_bits != wimg_mode) {
5ba3f43e 4357 vm_object_change_wimg_mode(object, wimg_mode);
0a7de745 4358 }
5ba3f43e
A
4359
4360 vm_object_unlock(object);
2d21ac55
A
4361 }
4362 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4363 /*
4364 * JMM - This is temporary until we unify named entries
4365 * and raw memory objects.
4366 *
4367 * Detected fake ip_kotype for a memory object. In
4368 * this case, the port isn't really a port at all, but
4369 * instead is just a raw memory object.
4370 */
3e170ce0 4371 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4372 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4373 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4374 }
4375
5ba3f43e 4376 object = memory_object_to_vm_object((memory_object_t)port);
0a7de745 4377 if (object == VM_OBJECT_NULL) {
2d21ac55 4378 return KERN_INVALID_OBJECT;
0a7de745 4379 }
5ba3f43e 4380 vm_object_reference(object);
2d21ac55
A
4381
4382 /* wait for object (if any) to be ready */
4383 if (object != VM_OBJECT_NULL) {
4384 if (object == kernel_object) {
4385 printf("Warning: Attempt to map kernel object"
0a7de745 4386 " by a non-private kernel entity\n");
2d21ac55
A
4387 return KERN_INVALID_OBJECT;
4388 }
b0d623f7 4389 if (!object->pager_ready) {
2d21ac55 4390 vm_object_lock(object);
b0d623f7
A
4391
4392 while (!object->pager_ready) {
4393 vm_object_wait(object,
0a7de745
A
4394 VM_OBJECT_EVENT_PAGER_READY,
4395 THREAD_UNINT);
b0d623f7
A
4396 vm_object_lock(object);
4397 }
4398 vm_object_unlock(object);
2d21ac55 4399 }
2d21ac55
A
4400 }
4401 } else {
4402 return KERN_INVALID_OBJECT;
4403 }
4404
593a1d5f
A
4405 if (object != VM_OBJECT_NULL &&
4406 object->named &&
4407 object->pager != MEMORY_OBJECT_NULL &&
4408 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4409 memory_object_t pager;
0a7de745
A
4410 vm_prot_t pager_prot;
4411 kern_return_t kr;
593a1d5f
A
4412
4413 /*
4414 * For "named" VM objects, let the pager know that the
4415 * memory object is being mapped. Some pagers need to keep
4416 * track of this, to know when they can reclaim the memory
4417 * object, for example.
4418 * VM calls memory_object_map() for each mapping (specifying
4419 * the protection of each mapping) and calls
4420 * memory_object_last_unmap() when all the mappings are gone.
4421 */
4422 pager_prot = max_protection;
4423 if (copy) {
4424 /*
4425 * Copy-On-Write mapping: won't modify the
4426 * memory object.
4427 */
4428 pager_prot &= ~VM_PROT_WRITE;
4429 }
4430 vm_object_lock(object);
4431 pager = object->pager;
4432 if (object->named &&
4433 pager != MEMORY_OBJECT_NULL &&
4434 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4435 assert(object->pager_ready);
4436 vm_object_mapping_wait(object, THREAD_UNINT);
4437 vm_object_mapping_begin(object);
4438 vm_object_unlock(object);
4439
4440 kr = memory_object_map(pager, pager_prot);
4441 assert(kr == KERN_SUCCESS);
4442
4443 vm_object_lock(object);
4444 vm_object_mapping_end(object);
4445 }
4446 vm_object_unlock(object);
4447 }
4448
2d21ac55
A
4449 /*
4450 * Perform the copy if requested
4451 */
4452
4453 if (copy) {
0a7de745
A
4454 vm_object_t new_object;
4455 vm_object_offset_t new_offset;
2d21ac55 4456
3e170ce0 4457 result = vm_object_copy_strategically(object, offset,
0a7de745
A
4458 map_size,
4459 &new_object, &new_offset,
4460 &copy);
2d21ac55
A
4461
4462
4463 if (result == KERN_MEMORY_RESTART_COPY) {
4464 boolean_t success;
4465 boolean_t src_needs_copy;
4466
4467 /*
4468 * XXX
4469 * We currently ignore src_needs_copy.
4470 * This really is the issue of how to make
4471 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4472 * non-kernel users to use. Solution forthcoming.
4473 * In the meantime, since we don't allow non-kernel
4474 * memory managers to specify symmetric copy,
4475 * we won't run into problems here.
4476 */
4477 new_object = object;
4478 new_offset = offset;
4479 success = vm_object_copy_quickly(&new_object,
0a7de745
A
4480 new_offset,
4481 map_size,
4482 &src_needs_copy,
4483 &copy);
2d21ac55
A
4484 assert(success);
4485 result = KERN_SUCCESS;
4486 }
4487 /*
4488 * Throw away the reference to the
4489 * original object, as it won't be mapped.
4490 */
4491
4492 vm_object_deallocate(object);
4493
3e170ce0 4494 if (result != KERN_SUCCESS) {
2d21ac55 4495 return result;
3e170ce0 4496 }
2d21ac55
A
4497
4498 object = new_object;
4499 offset = new_offset;
4500 }
4501
fe8ab488 4502 /*
5ba3f43e 4503 * If non-kernel users want to try to prefault pages, the mapping and prefault
fe8ab488
A
4504 * needs to be atomic.
4505 */
5ba3f43e
A
4506 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4507 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4508
4509#if __arm64__
4510 if (fourk) {
4511 /* map this object in a "4K" pager */
4512 result = vm_map_enter_fourk(target_map,
0a7de745
A
4513 &map_addr,
4514 map_size,
4515 (vm_map_offset_t) mask,
4516 flags,
4517 vmk_flags,
4518 tag,
4519 object,
4520 offset,
4521 copy,
4522 cur_protection,
4523 max_protection,
4524 inheritance);
5ba3f43e
A
4525 } else
4526#endif /* __arm64__ */
3e170ce0
A
4527 {
4528 result = vm_map_enter(target_map,
0a7de745
A
4529 &map_addr, map_size,
4530 (vm_map_offset_t)mask,
4531 flags,
4532 vmk_flags,
4533 tag,
4534 object, offset,
4535 copy,
4536 cur_protection, max_protection,
4537 inheritance);
4538 }
4539 if (result != KERN_SUCCESS) {
2d21ac55 4540 vm_object_deallocate(object);
0a7de745 4541 }
39236c6e 4542
fe8ab488
A
4543 /*
4544 * Try to prefault, and do not forget to release the vm map lock.
4545 */
4546 if (result == KERN_SUCCESS && try_prefault) {
4547 mach_vm_address_t va = map_addr;
4548 kern_return_t kr = KERN_SUCCESS;
4549 unsigned int i = 0;
39037602
A
4550 int pmap_options;
4551
5ba3f43e 4552 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
39037602
A
4553 if (object->internal) {
4554 pmap_options |= PMAP_OPTIONS_INTERNAL;
4555 }
fe8ab488
A
4556
4557 for (i = 0; i < page_list_count; ++i) {
5ba3f43e
A
4558 if (!UPL_VALID_PAGE(page_list, i)) {
4559 if (kernel_prefault) {
4560 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4561 result = KERN_MEMORY_ERROR;
4562 break;
4563 }
4564 } else {
fe8ab488
A
4565 /*
4566 * If this function call failed, we should stop
4567 * trying to optimize, other calls are likely
4568 * going to fail too.
4569 *
4570 * We are not gonna report an error for such
4571 * failure though. That's an optimization, not
4572 * something critical.
4573 */
4574 kr = pmap_enter_options(target_map->pmap,
0a7de745
A
4575 va, UPL_PHYS_PAGE(page_list, i),
4576 cur_protection, VM_PROT_NONE,
4577 0, TRUE, pmap_options, NULL);
fe8ab488
A
4578 if (kr != KERN_SUCCESS) {
4579 OSIncrementAtomic64(&vm_prefault_nb_bailout);
5ba3f43e
A
4580 if (kernel_prefault) {
4581 result = kr;
4582 }
3e170ce0 4583 break;
fe8ab488
A
4584 }
4585 OSIncrementAtomic64(&vm_prefault_nb_pages);
4586 }
4587
4588 /* Next virtual address */
4589 va += PAGE_SIZE;
4590 }
5ba3f43e
A
4591 if (vmk_flags.vmkf_keep_map_locked) {
4592 vm_map_unlock(target_map);
4593 }
fe8ab488
A
4594 }
4595
3e170ce0 4596 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4597 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4598 *address = map_addr + offset_in_mapping;
4599 } else {
4600 *address = map_addr;
4601 }
2d21ac55
A
4602 return result;
4603}
4604
fe8ab488
A
4605kern_return_t
4606vm_map_enter_mem_object(
0a7de745
A
4607 vm_map_t target_map,
4608 vm_map_offset_t *address,
4609 vm_map_size_t initial_size,
4610 vm_map_offset_t mask,
4611 int flags,
4612 vm_map_kernel_flags_t vmk_flags,
4613 vm_tag_t tag,
4614 ipc_port_t port,
4615 vm_object_offset_t offset,
4616 boolean_t copy,
4617 vm_prot_t cur_protection,
4618 vm_prot_t max_protection,
4619 vm_inherit_t inheritance)
fe8ab488 4620{
5ba3f43e
A
4621 kern_return_t ret;
4622
4623 ret = vm_map_enter_mem_object_helper(target_map,
0a7de745
A
4624 address,
4625 initial_size,
4626 mask,
4627 flags,
4628 vmk_flags,
4629 tag,
4630 port,
4631 offset,
4632 copy,
4633 cur_protection,
4634 max_protection,
4635 inheritance,
4636 NULL,
4637 0);
5ba3f43e
A
4638
4639#if KASAN
4640 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4641 kasan_notify_address(*address, initial_size);
4642 }
4643#endif
4644
4645 return ret;
fe8ab488 4646}
b0d623f7 4647
fe8ab488
A
4648kern_return_t
4649vm_map_enter_mem_object_prefault(
0a7de745
A
4650 vm_map_t target_map,
4651 vm_map_offset_t *address,
4652 vm_map_size_t initial_size,
4653 vm_map_offset_t mask,
4654 int flags,
4655 vm_map_kernel_flags_t vmk_flags,
4656 vm_tag_t tag,
4657 ipc_port_t port,
4658 vm_object_offset_t offset,
4659 vm_prot_t cur_protection,
4660 vm_prot_t max_protection,
4661 upl_page_list_ptr_t page_list,
4662 unsigned int page_list_count)
fe8ab488 4663{
5ba3f43e
A
4664 kern_return_t ret;
4665
4666 ret = vm_map_enter_mem_object_helper(target_map,
0a7de745
A
4667 address,
4668 initial_size,
4669 mask,
4670 flags,
4671 vmk_flags,
4672 tag,
4673 port,
4674 offset,
4675 FALSE,
4676 cur_protection,
4677 max_protection,
4678 VM_INHERIT_DEFAULT,
4679 page_list,
4680 page_list_count);
5ba3f43e
A
4681
4682#if KASAN
4683 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4684 kasan_notify_address(*address, initial_size);
4685 }
4686#endif
4687
4688 return ret;
fe8ab488 4689}
b0d623f7
A
4690
4691
4692kern_return_t
4693vm_map_enter_mem_object_control(
0a7de745
A
4694 vm_map_t target_map,
4695 vm_map_offset_t *address,
4696 vm_map_size_t initial_size,
4697 vm_map_offset_t mask,
4698 int flags,
4699 vm_map_kernel_flags_t vmk_flags,
4700 vm_tag_t tag,
4701 memory_object_control_t control,
4702 vm_object_offset_t offset,
4703 boolean_t copy,
4704 vm_prot_t cur_protection,
4705 vm_prot_t max_protection,
4706 vm_inherit_t inheritance)
b0d623f7 4707{
0a7de745
A
4708 vm_map_address_t map_addr;
4709 vm_map_size_t map_size;
4710 vm_object_t object;
4711 vm_object_size_t size;
4712 kern_return_t result;
4713 memory_object_t pager;
4714 vm_prot_t pager_prot;
4715 kern_return_t kr;
5ba3f43e 4716#if __arm64__
0a7de745 4717 boolean_t fourk = vmk_flags.vmkf_fourk;
5ba3f43e 4718#endif /* __arm64__ */
b0d623f7
A
4719
4720 /*
4721 * Check arguments for validity
4722 */
4723 if ((target_map == VM_MAP_NULL) ||
4724 (cur_protection & ~VM_PROT_ALL) ||
4725 (max_protection & ~VM_PROT_ALL) ||
4726 (inheritance > VM_INHERIT_LAST_VALID) ||
3e170ce0 4727 initial_size == 0) {
b0d623f7 4728 return KERN_INVALID_ARGUMENT;
3e170ce0 4729 }
b0d623f7 4730
5ba3f43e
A
4731#if __arm64__
4732 if (fourk) {
4733 map_addr = vm_map_trunc_page(*address,
0a7de745 4734 FOURK_PAGE_MASK);
5ba3f43e 4735 map_size = vm_map_round_page(initial_size,
0a7de745 4736 FOURK_PAGE_MASK);
5ba3f43e
A
4737 } else
4738#endif /* __arm64__ */
3e170ce0
A
4739 {
4740 map_addr = vm_map_trunc_page(*address,
0a7de745 4741 VM_MAP_PAGE_MASK(target_map));
3e170ce0 4742 map_size = vm_map_round_page(initial_size,
0a7de745 4743 VM_MAP_PAGE_MASK(target_map));
3e170ce0
A
4744 }
4745 size = vm_object_round_page(initial_size);
b0d623f7
A
4746
4747 object = memory_object_control_to_vm_object(control);
4748
0a7de745 4749 if (object == VM_OBJECT_NULL) {
b0d623f7 4750 return KERN_INVALID_OBJECT;
0a7de745 4751 }
b0d623f7
A
4752
4753 if (object == kernel_object) {
4754 printf("Warning: Attempt to map kernel object"
0a7de745 4755 " by a non-private kernel entity\n");
b0d623f7
A
4756 return KERN_INVALID_OBJECT;
4757 }
4758
4759 vm_object_lock(object);
4760 object->ref_count++;
4761 vm_object_res_reference(object);
4762
4763 /*
4764 * For "named" VM objects, let the pager know that the
4765 * memory object is being mapped. Some pagers need to keep
4766 * track of this, to know when they can reclaim the memory
4767 * object, for example.
4768 * VM calls memory_object_map() for each mapping (specifying
4769 * the protection of each mapping) and calls
4770 * memory_object_last_unmap() when all the mappings are gone.
4771 */
4772 pager_prot = max_protection;
4773 if (copy) {
4774 pager_prot &= ~VM_PROT_WRITE;
4775 }
4776 pager = object->pager;
4777 if (object->named &&
4778 pager != MEMORY_OBJECT_NULL &&
4779 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4780 assert(object->pager_ready);
4781 vm_object_mapping_wait(object, THREAD_UNINT);
4782 vm_object_mapping_begin(object);
4783 vm_object_unlock(object);
4784
4785 kr = memory_object_map(pager, pager_prot);
4786 assert(kr == KERN_SUCCESS);
4787
4788 vm_object_lock(object);
4789 vm_object_mapping_end(object);
4790 }
4791 vm_object_unlock(object);
4792
4793 /*
4794 * Perform the copy if requested
4795 */
4796
4797 if (copy) {
0a7de745
A
4798 vm_object_t new_object;
4799 vm_object_offset_t new_offset;
b0d623f7
A
4800
4801 result = vm_object_copy_strategically(object, offset, size,
0a7de745
A
4802 &new_object, &new_offset,
4803 &copy);
b0d623f7
A
4804
4805
4806 if (result == KERN_MEMORY_RESTART_COPY) {
4807 boolean_t success;
4808 boolean_t src_needs_copy;
4809
4810 /*
4811 * XXX
4812 * We currently ignore src_needs_copy.
4813 * This really is the issue of how to make
4814 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4815 * non-kernel users to use. Solution forthcoming.
4816 * In the meantime, since we don't allow non-kernel
4817 * memory managers to specify symmetric copy,
4818 * we won't run into problems here.
4819 */
4820 new_object = object;
4821 new_offset = offset;
4822 success = vm_object_copy_quickly(&new_object,
0a7de745
A
4823 new_offset, size,
4824 &src_needs_copy,
4825 &copy);
b0d623f7
A
4826 assert(success);
4827 result = KERN_SUCCESS;
4828 }
4829 /*
4830 * Throw away the reference to the
4831 * original object, as it won't be mapped.
4832 */
4833
4834 vm_object_deallocate(object);
4835
3e170ce0 4836 if (result != KERN_SUCCESS) {
b0d623f7 4837 return result;
3e170ce0 4838 }
b0d623f7
A
4839
4840 object = new_object;
4841 offset = new_offset;
4842 }
4843
5ba3f43e
A
4844#if __arm64__
4845 if (fourk) {
4846 result = vm_map_enter_fourk(target_map,
0a7de745
A
4847 &map_addr,
4848 map_size,
4849 (vm_map_offset_t)mask,
4850 flags,
4851 vmk_flags,
4852 tag,
4853 object, offset,
4854 copy,
4855 cur_protection, max_protection,
4856 inheritance);
5ba3f43e
A
4857 } else
4858#endif /* __arm64__ */
3e170ce0
A
4859 {
4860 result = vm_map_enter(target_map,
0a7de745
A
4861 &map_addr, map_size,
4862 (vm_map_offset_t)mask,
4863 flags,
4864 vmk_flags,
4865 tag,
4866 object, offset,
4867 copy,
4868 cur_protection, max_protection,
4869 inheritance);
4870 }
4871 if (result != KERN_SUCCESS) {
b0d623f7 4872 vm_object_deallocate(object);
0a7de745 4873 }
b0d623f7
A
4874 *address = map_addr;
4875
4876 return result;
4877}
4878
4879
0a7de745 4880#if VM_CPM
2d21ac55
A
4881
4882#ifdef MACH_ASSERT
0a7de745 4883extern pmap_paddr_t avail_start, avail_end;
2d21ac55
A
4884#endif
4885
4886/*
4887 * Allocate memory in the specified map, with the caveat that
4888 * the memory is physically contiguous. This call may fail
4889 * if the system can't find sufficient contiguous memory.
4890 * This call may cause or lead to heart-stopping amounts of
4891 * paging activity.
4892 *
4893 * Memory obtained from this call should be freed in the
4894 * normal way, viz., via vm_deallocate.
4895 */
4896kern_return_t
4897vm_map_enter_cpm(
0a7de745
A
4898 vm_map_t map,
4899 vm_map_offset_t *addr,
4900 vm_map_size_t size,
4901 int flags)
2d21ac55 4902{
0a7de745
A
4903 vm_object_t cpm_obj;
4904 pmap_t pmap;
4905 vm_page_t m, pages;
4906 kern_return_t kr;
4907 vm_map_offset_t va, start, end, offset;
4908#if MACH_ASSERT
4909 vm_map_offset_t prev_addr = 0;
4910#endif /* MACH_ASSERT */
4911
4912 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3e170ce0
A
4913 vm_tag_t tag;
4914
4915 VM_GET_FLAGS_ALIAS(flags, tag);
2d21ac55 4916
2d21ac55
A
4917 if (size == 0) {
4918 *addr = 0;
4919 return KERN_SUCCESS;
4920 }
0a7de745 4921 if (anywhere) {
2d21ac55 4922 *addr = vm_map_min(map);
0a7de745 4923 } else {
39236c6e 4924 *addr = vm_map_trunc_page(*addr,
0a7de745
A
4925 VM_MAP_PAGE_MASK(map));
4926 }
39236c6e 4927 size = vm_map_round_page(size,
0a7de745 4928 VM_MAP_PAGE_MASK(map));
2d21ac55
A
4929
4930 /*
4931 * LP64todo - cpm_allocate should probably allow
4932 * allocations of >4GB, but not with the current
4933 * algorithm, so just cast down the size for now.
4934 */
0a7de745 4935 if (size > VM_MAX_ADDRESS) {
2d21ac55 4936 return KERN_RESOURCE_SHORTAGE;
0a7de745 4937 }
2d21ac55 4938 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
0a7de745 4939 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
2d21ac55 4940 return kr;
0a7de745 4941 }
2d21ac55
A
4942
4943 cpm_obj = vm_object_allocate((vm_object_size_t)size);
4944 assert(cpm_obj != VM_OBJECT_NULL);
4945 assert(cpm_obj->internal);
316670eb 4946 assert(cpm_obj->vo_size == (vm_object_size_t)size);
2d21ac55
A
4947 assert(cpm_obj->can_persist == FALSE);
4948 assert(cpm_obj->pager_created == FALSE);
4949 assert(cpm_obj->pageout == FALSE);
4950 assert(cpm_obj->shadow == VM_OBJECT_NULL);
91447636
A
4951
4952 /*
4953 * Insert pages into object.
4954 */
4955
4956 vm_object_lock(cpm_obj);
4957 for (offset = 0; offset < size; offset += PAGE_SIZE) {
4958 m = pages;
4959 pages = NEXT_PAGE(m);
0c530ab8 4960 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
91447636 4961
d9a64523
A
4962 assert(!m->vmp_gobbled);
4963 assert(!m->vmp_wanted);
4964 assert(!m->vmp_pageout);
4965 assert(!m->vmp_tabled);
b0d623f7 4966 assert(VM_PAGE_WIRED(m));
d9a64523 4967 assert(m->vmp_busy);
0a7de745 4968 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
91447636 4969
d9a64523 4970 m->vmp_busy = FALSE;
91447636
A
4971 vm_page_insert(m, cpm_obj, offset);
4972 }
4973 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
4974 vm_object_unlock(cpm_obj);
4975
4976 /*
4977 * Hang onto a reference on the object in case a
4978 * multi-threaded application for some reason decides
4979 * to deallocate the portion of the address space into
4980 * which we will insert this object.
4981 *
4982 * Unfortunately, we must insert the object now before
4983 * we can talk to the pmap module about which addresses
4984 * must be wired down. Hence, the race with a multi-
4985 * threaded app.
4986 */
4987 vm_object_reference(cpm_obj);
4988
4989 /*
4990 * Insert object into map.
4991 */
4992
4993 kr = vm_map_enter(
2d21ac55
A
4994 map,
4995 addr,
4996 size,
4997 (vm_map_offset_t)0,
4998 flags,
5ba3f43e 4999 VM_MAP_KERNEL_FLAGS_NONE,
2d21ac55
A
5000 cpm_obj,
5001 (vm_object_offset_t)0,
5002 FALSE,
5003 VM_PROT_ALL,
5004 VM_PROT_ALL,
5005 VM_INHERIT_DEFAULT);
91447636
A
5006
5007 if (kr != KERN_SUCCESS) {
5008 /*
5009 * A CPM object doesn't have can_persist set,
5010 * so all we have to do is deallocate it to
5011 * free up these pages.
5012 */
5013 assert(cpm_obj->pager_created == FALSE);
5014 assert(cpm_obj->can_persist == FALSE);
5015 assert(cpm_obj->pageout == FALSE);
5016 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5017 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5018 vm_object_deallocate(cpm_obj); /* kill creation ref */
5019 }
5020
5021 /*
5022 * Inform the physical mapping system that the
5023 * range of addresses may not fault, so that
5024 * page tables and such can be locked down as well.
5025 */
5026 start = *addr;
5027 end = start + size;
5028 pmap = vm_map_pmap(map);
5029 pmap_pageable(pmap, start, end, FALSE);
5030
5031 /*
5032 * Enter each page into the pmap, to avoid faults.
5033 * Note that this loop could be coded more efficiently,
5034 * if the need arose, rather than looking up each page
5035 * again.
5036 */
5037 for (offset = 0, va = start; offset < size;
0a7de745
A
5038 va += PAGE_SIZE, offset += PAGE_SIZE) {
5039 int type_of_fault;
2d21ac55 5040
91447636
A
5041 vm_object_lock(cpm_obj);
5042 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
91447636 5043 assert(m != VM_PAGE_NULL);
2d21ac55
A
5044
5045 vm_page_zero_fill(m);
5046
5047 type_of_fault = DBG_ZERO_FILL_FAULT;
5048
6d2010ae 5049 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
0a7de745
A
5050 VM_PAGE_WIRED(m),
5051 FALSE, /* change_wiring */
5052 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5053 FALSE, /* no_cache */
5054 FALSE, /* cs_bypass */
5055 0, /* user_tag */
5056 0, /* pmap_options */
5057 NULL, /* need_retry */
5058 &type_of_fault);
2d21ac55
A
5059
5060 vm_object_unlock(cpm_obj);
91447636
A
5061 }
5062
0a7de745 5063#if MACH_ASSERT
91447636
A
5064 /*
5065 * Verify ordering in address space.
5066 */
5067 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5068 vm_object_lock(cpm_obj);
5069 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5070 vm_object_unlock(cpm_obj);
0a7de745 5071 if (m == VM_PAGE_NULL) {
316670eb 5072 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
0a7de745
A
5073 cpm_obj, (uint64_t)offset);
5074 }
d9a64523
A
5075 assert(m->vmp_tabled);
5076 assert(!m->vmp_busy);
5077 assert(!m->vmp_wanted);
5078 assert(!m->vmp_fictitious);
5079 assert(!m->vmp_private);
5080 assert(!m->vmp_absent);
5081 assert(!m->vmp_error);
5082 assert(!m->vmp_cleaning);
5083 assert(!m->vmp_laundry);
5084 assert(!m->vmp_precious);
5085 assert(!m->vmp_clustered);
91447636 5086 if (offset != 0) {
39037602 5087 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
316670eb 5088 printf("start 0x%llx end 0x%llx va 0x%llx\n",
0a7de745 5089 (uint64_t)start, (uint64_t)end, (uint64_t)va);
316670eb
A
5090 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5091 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
91447636
A
5092 panic("vm_allocate_cpm: pages not contig!");
5093 }
5094 }
39037602 5095 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
91447636 5096 }
0a7de745 5097#endif /* MACH_ASSERT */
91447636
A
5098
5099 vm_object_deallocate(cpm_obj); /* kill extra ref */
5100
5101 return kr;
5102}
5103
5104
0a7de745 5105#else /* VM_CPM */
91447636
A
5106
5107/*
5108 * Interface is defined in all cases, but unless the kernel
5109 * is built explicitly for this option, the interface does
5110 * nothing.
5111 */
5112
5113kern_return_t
5114vm_map_enter_cpm(
0a7de745
A
5115 __unused vm_map_t map,
5116 __unused vm_map_offset_t *addr,
5117 __unused vm_map_size_t size,
5118 __unused int flags)
91447636
A
5119{
5120 return KERN_FAILURE;
5121}
5122#endif /* VM_CPM */
5123
b0d623f7
A
5124/* Not used without nested pmaps */
5125#ifndef NO_NESTED_PMAP
2d21ac55
A
5126/*
5127 * Clip and unnest a portion of a nested submap mapping.
5128 */
b0d623f7
A
5129
5130
2d21ac55
A
5131static void
5132vm_map_clip_unnest(
0a7de745
A
5133 vm_map_t map,
5134 vm_map_entry_t entry,
5135 vm_map_offset_t start_unnest,
5136 vm_map_offset_t end_unnest)
2d21ac55 5137{
b0d623f7
A
5138 vm_map_offset_t old_start_unnest = start_unnest;
5139 vm_map_offset_t old_end_unnest = end_unnest;
5140
2d21ac55 5141 assert(entry->is_sub_map);
3e170ce0 5142 assert(VME_SUBMAP(entry) != NULL);
fe8ab488 5143 assert(entry->use_pmap);
2d21ac55 5144
b0d623f7
A
5145 /*
5146 * Query the platform for the optimal unnest range.
5147 * DRK: There's some duplication of effort here, since
5148 * callers may have adjusted the range to some extent. This
5149 * routine was introduced to support 1GiB subtree nesting
5150 * for x86 platforms, which can also nest on 2MiB boundaries
5151 * depending on size/alignment.
5152 */
5153 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
39037602
A
5154 assert(VME_SUBMAP(entry)->is_nested_map);
5155 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5156 log_unnest_badness(map,
0a7de745
A
5157 old_start_unnest,
5158 old_end_unnest,
5159 VME_SUBMAP(entry)->is_nested_map,
5160 (entry->vme_start +
5161 VME_SUBMAP(entry)->lowest_unnestable_start -
5162 VME_OFFSET(entry)));
b0d623f7
A
5163 }
5164
2d21ac55
A
5165 if (entry->vme_start > start_unnest ||
5166 entry->vme_end < end_unnest) {
5167 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
0a7de745
A
5168 "bad nested entry: start=0x%llx end=0x%llx\n",
5169 (long long)start_unnest, (long long)end_unnest,
5170 (long long)entry->vme_start, (long long)entry->vme_end);
2d21ac55 5171 }
b0d623f7 5172
2d21ac55
A
5173 if (start_unnest > entry->vme_start) {
5174 _vm_map_clip_start(&map->hdr,
0a7de745
A
5175 entry,
5176 start_unnest);
3e170ce0
A
5177 if (map->holelistenabled) {
5178 vm_map_store_update_first_free(map, NULL, FALSE);
5179 } else {
5180 vm_map_store_update_first_free(map, map->first_free, FALSE);
5181 }
2d21ac55
A
5182 }
5183 if (entry->vme_end > end_unnest) {
5184 _vm_map_clip_end(&map->hdr,
0a7de745
A
5185 entry,
5186 end_unnest);
3e170ce0
A
5187 if (map->holelistenabled) {
5188 vm_map_store_update_first_free(map, NULL, FALSE);
5189 } else {
5190 vm_map_store_update_first_free(map, map->first_free, FALSE);
5191 }
2d21ac55
A
5192 }
5193
5194 pmap_unnest(map->pmap,
0a7de745
A
5195 entry->vme_start,
5196 entry->vme_end - entry->vme_start);
d9a64523 5197 if ((map->mapped_in_other_pmaps) && (map->map_refcnt)) {
2d21ac55
A
5198 /* clean up parent map/maps */
5199 vm_map_submap_pmap_clean(
5200 map, entry->vme_start,
5201 entry->vme_end,
3e170ce0
A
5202 VME_SUBMAP(entry),
5203 VME_OFFSET(entry));
2d21ac55
A
5204 }
5205 entry->use_pmap = FALSE;
3e170ce0
A
5206 if ((map->pmap != kernel_pmap) &&
5207 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5208 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
316670eb 5209 }
2d21ac55 5210}
0a7de745 5211#endif /* NO_NESTED_PMAP */
2d21ac55 5212
1c79356b
A
5213/*
5214 * vm_map_clip_start: [ internal use only ]
5215 *
5216 * Asserts that the given entry begins at or after
5217 * the specified address; if necessary,
5218 * it splits the entry into two.
5219 */
e2d2fc5c 5220void
2d21ac55 5221vm_map_clip_start(
0a7de745
A
5222 vm_map_t map,
5223 vm_map_entry_t entry,
5224 vm_map_offset_t startaddr)
2d21ac55 5225{
0c530ab8 5226#ifndef NO_NESTED_PMAP
fe8ab488
A
5227 if (entry->is_sub_map &&
5228 entry->use_pmap &&
2d21ac55 5229 startaddr >= entry->vme_start) {
0a7de745 5230 vm_map_offset_t start_unnest, end_unnest;
2d21ac55
A
5231
5232 /*
5233 * Make sure "startaddr" is no longer in a nested range
5234 * before we clip. Unnest only the minimum range the platform
5235 * can handle.
b0d623f7
A
5236 * vm_map_clip_unnest may perform additional adjustments to
5237 * the unnest range.
2d21ac55
A
5238 */
5239 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
5240 end_unnest = start_unnest + pmap_nesting_size_min;
5241 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5242 }
5243#endif /* NO_NESTED_PMAP */
5244 if (startaddr > entry->vme_start) {
3e170ce0 5245 if (VME_OBJECT(entry) &&
2d21ac55 5246 !entry->is_sub_map &&
3e170ce0 5247 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55 5248 pmap_remove(map->pmap,
0a7de745
A
5249 (addr64_t)(entry->vme_start),
5250 (addr64_t)(entry->vme_end));
2d21ac55 5251 }
39037602
A
5252 if (entry->vme_atomic) {
5253 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5ba3f43e 5254 }
d9a64523
A
5255
5256 DTRACE_VM5(
5257 vm_map_clip_start,
5258 vm_map_t, map,
5259 vm_map_offset_t, entry->vme_start,
5260 vm_map_offset_t, entry->vme_end,
5261 vm_map_offset_t, startaddr,
5262 int, VME_ALIAS(entry));
5263
2d21ac55 5264 _vm_map_clip_start(&map->hdr, entry, startaddr);
3e170ce0
A
5265 if (map->holelistenabled) {
5266 vm_map_store_update_first_free(map, NULL, FALSE);
5267 } else {
5268 vm_map_store_update_first_free(map, map->first_free, FALSE);
5269 }
2d21ac55
A
5270 }
5271}
5272
1c79356b
A
5273
5274#define vm_map_copy_clip_start(copy, entry, startaddr) \
5275 MACRO_BEGIN \
5276 if ((startaddr) > (entry)->vme_start) \
0a7de745 5277 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
1c79356b
A
5278 MACRO_END
5279
5280/*
5281 * This routine is called only when it is known that
5282 * the entry must be split.
5283 */
91447636 5284static void
1c79356b 5285_vm_map_clip_start(
0a7de745
A
5286 struct vm_map_header *map_header,
5287 vm_map_entry_t entry,
5288 vm_map_offset_t start)
1c79356b 5289{
0a7de745 5290 vm_map_entry_t new_entry;
1c79356b
A
5291
5292 /*
5293 * Split off the front portion --
5294 * note that we must insert the new
5295 * entry BEFORE this one, so that
5296 * this entry has the specified starting
5297 * address.
5298 */
5299
fe8ab488
A
5300 if (entry->map_aligned) {
5301 assert(VM_MAP_PAGE_ALIGNED(start,
0a7de745 5302 VM_MAP_HDR_PAGE_MASK(map_header)));
fe8ab488
A
5303 }
5304
7ddcb079 5305 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
5306 vm_map_entry_copy_full(new_entry, entry);
5307
5308 new_entry->vme_end = start;
e2d2fc5c 5309 assert(new_entry->vme_start < new_entry->vme_end);
3e170ce0 5310 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
e2d2fc5c 5311 assert(start < entry->vme_end);
1c79356b
A
5312 entry->vme_start = start;
5313
6d2010ae 5314 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
1c79356b 5315
0a7de745 5316 if (entry->is_sub_map) {
3e170ce0 5317 vm_map_reference(VME_SUBMAP(new_entry));
0a7de745 5318 } else {
3e170ce0 5319 vm_object_reference(VME_OBJECT(new_entry));
0a7de745 5320 }
1c79356b
A
5321}
5322
5323
5324/*
5325 * vm_map_clip_end: [ internal use only ]
5326 *
5327 * Asserts that the given entry ends at or before
5328 * the specified address; if necessary,
5329 * it splits the entry into two.
5330 */
e2d2fc5c 5331void
2d21ac55 5332vm_map_clip_end(
0a7de745
A
5333 vm_map_t map,
5334 vm_map_entry_t entry,
5335 vm_map_offset_t endaddr)
2d21ac55
A
5336{
5337 if (endaddr > entry->vme_end) {
5338 /*
5339 * Within the scope of this clipping, limit "endaddr" to
5340 * the end of this map entry...
5341 */
5342 endaddr = entry->vme_end;
5343 }
5344#ifndef NO_NESTED_PMAP
fe8ab488 5345 if (entry->is_sub_map && entry->use_pmap) {
0a7de745 5346 vm_map_offset_t start_unnest, end_unnest;
2d21ac55
A
5347
5348 /*
5349 * Make sure the range between the start of this entry and
5350 * the new "endaddr" is no longer nested before we clip.
5351 * Unnest only the minimum range the platform can handle.
b0d623f7
A
5352 * vm_map_clip_unnest may perform additional adjustments to
5353 * the unnest range.
2d21ac55
A
5354 */
5355 start_unnest = entry->vme_start;
5356 end_unnest =
0a7de745
A
5357 (endaddr + pmap_nesting_size_min - 1) &
5358 ~(pmap_nesting_size_min - 1);
2d21ac55
A
5359 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5360 }
5361#endif /* NO_NESTED_PMAP */
5362 if (endaddr < entry->vme_end) {
3e170ce0 5363 if (VME_OBJECT(entry) &&
2d21ac55 5364 !entry->is_sub_map &&
3e170ce0 5365 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55 5366 pmap_remove(map->pmap,
0a7de745
A
5367 (addr64_t)(entry->vme_start),
5368 (addr64_t)(entry->vme_end));
2d21ac55 5369 }
39037602
A
5370 if (entry->vme_atomic) {
5371 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5372 }
d9a64523
A
5373 DTRACE_VM5(
5374 vm_map_clip_end,
5375 vm_map_t, map,
5376 vm_map_offset_t, entry->vme_start,
5377 vm_map_offset_t, entry->vme_end,
5378 vm_map_offset_t, endaddr,
5379 int, VME_ALIAS(entry));
5380
2d21ac55 5381 _vm_map_clip_end(&map->hdr, entry, endaddr);
3e170ce0
A
5382 if (map->holelistenabled) {
5383 vm_map_store_update_first_free(map, NULL, FALSE);
5384 } else {
5385 vm_map_store_update_first_free(map, map->first_free, FALSE);
5386 }
2d21ac55
A
5387 }
5388}
0c530ab8 5389
1c79356b
A
5390
5391#define vm_map_copy_clip_end(copy, entry, endaddr) \
5392 MACRO_BEGIN \
5393 if ((endaddr) < (entry)->vme_end) \
0a7de745 5394 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
1c79356b
A
5395 MACRO_END
5396
5397/*
5398 * This routine is called only when it is known that
5399 * the entry must be split.
5400 */
91447636 5401static void
1c79356b 5402_vm_map_clip_end(
0a7de745
A
5403 struct vm_map_header *map_header,
5404 vm_map_entry_t entry,
5405 vm_map_offset_t end)
1c79356b 5406{
0a7de745 5407 vm_map_entry_t new_entry;
1c79356b
A
5408
5409 /*
5410 * Create a new entry and insert it
5411 * AFTER the specified entry
5412 */
5413
fe8ab488
A
5414 if (entry->map_aligned) {
5415 assert(VM_MAP_PAGE_ALIGNED(end,
0a7de745 5416 VM_MAP_HDR_PAGE_MASK(map_header)));
fe8ab488
A
5417 }
5418
7ddcb079 5419 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
5420 vm_map_entry_copy_full(new_entry, entry);
5421
e2d2fc5c 5422 assert(entry->vme_start < end);
1c79356b 5423 new_entry->vme_start = entry->vme_end = end;
3e170ce0 5424 VME_OFFSET_SET(new_entry,
0a7de745 5425 VME_OFFSET(new_entry) + (end - entry->vme_start));
e2d2fc5c 5426 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 5427
6d2010ae 5428 _vm_map_store_entry_link(map_header, entry, new_entry);
1c79356b 5429
0a7de745 5430 if (entry->is_sub_map) {
3e170ce0 5431 vm_map_reference(VME_SUBMAP(new_entry));
0a7de745 5432 } else {
3e170ce0 5433 vm_object_reference(VME_OBJECT(new_entry));
0a7de745 5434 }
1c79356b
A
5435}
5436
5437
5438/*
5439 * VM_MAP_RANGE_CHECK: [ internal use only ]
5440 *
5441 * Asserts that the starting and ending region
5442 * addresses fall within the valid range of the map.
5443 */
0a7de745
A
5444#define VM_MAP_RANGE_CHECK(map, start, end) \
5445 MACRO_BEGIN \
5446 if (start < vm_map_min(map)) \
5447 start = vm_map_min(map); \
5448 if (end > vm_map_max(map)) \
5449 end = vm_map_max(map); \
5450 if (start > end) \
5451 start = end; \
2d21ac55 5452 MACRO_END
1c79356b
A
5453
5454/*
5455 * vm_map_range_check: [ internal use only ]
5ba3f43e 5456 *
1c79356b
A
5457 * Check that the region defined by the specified start and
5458 * end addresses are wholly contained within a single map
5459 * entry or set of adjacent map entries of the spacified map,
5460 * i.e. the specified region contains no unmapped space.
5461 * If any or all of the region is unmapped, FALSE is returned.
5462 * Otherwise, TRUE is returned and if the output argument 'entry'
5463 * is not NULL it points to the map entry containing the start
5464 * of the region.
5465 *
5466 * The map is locked for reading on entry and is left locked.
5467 */
91447636 5468static boolean_t
1c79356b 5469vm_map_range_check(
0a7de745
A
5470 vm_map_t map,
5471 vm_map_offset_t start,
5472 vm_map_offset_t end,
5473 vm_map_entry_t *entry)
1c79356b 5474{
0a7de745
A
5475 vm_map_entry_t cur;
5476 vm_map_offset_t prev;
1c79356b
A
5477
5478 /*
0a7de745 5479 * Basic sanity checks first
1c79356b 5480 */
0a7de745
A
5481 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5482 return FALSE;
5483 }
1c79356b
A
5484
5485 /*
0a7de745 5486 * Check first if the region starts within a valid
1c79356b
A
5487 * mapping for the map.
5488 */
0a7de745
A
5489 if (!vm_map_lookup_entry(map, start, &cur)) {
5490 return FALSE;
5491 }
1c79356b
A
5492
5493 /*
5ba3f43e 5494 * Optimize for the case that the region is contained
1c79356b
A
5495 * in a single map entry.
5496 */
0a7de745 5497 if (entry != (vm_map_entry_t *) NULL) {
1c79356b 5498 *entry = cur;
0a7de745
A
5499 }
5500 if (end <= cur->vme_end) {
5501 return TRUE;
5502 }
1c79356b
A
5503
5504 /*
0a7de745
A
5505 * If the region is not wholly contained within a
5506 * single entry, walk the entries looking for holes.
1c79356b
A
5507 */
5508 prev = cur->vme_end;
5509 cur = cur->vme_next;
5510 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
0a7de745
A
5511 if (end <= cur->vme_end) {
5512 return TRUE;
5513 }
1c79356b
A
5514 prev = cur->vme_end;
5515 cur = cur->vme_next;
5516 }
0a7de745 5517 return FALSE;
1c79356b
A
5518}
5519
5520/*
5521 * vm_map_submap: [ kernel use only ]
5522 *
5523 * Mark the given range as handled by a subordinate map.
5524 *
5525 * This range must have been created with vm_map_find using
5526 * the vm_submap_object, and no other operations may have been
5527 * performed on this range prior to calling vm_map_submap.
5528 *
5529 * Only a limited number of operations can be performed
5530 * within this rage after calling vm_map_submap:
5531 * vm_fault
5532 * [Don't try vm_map_copyin!]
5533 *
5534 * To remove a submapping, one must first remove the
5535 * range from the superior map, and then destroy the
5536 * submap (if desired). [Better yet, don't try it.]
5537 */
5538kern_return_t
5539vm_map_submap(
0a7de745
A
5540 vm_map_t map,
5541 vm_map_offset_t start,
5542 vm_map_offset_t end,
5543 vm_map_t submap,
5544 vm_map_offset_t offset,
0c530ab8 5545#ifdef NO_NESTED_PMAP
91447636 5546 __unused
0a7de745
A
5547#endif /* NO_NESTED_PMAP */
5548 boolean_t use_pmap)
1c79356b 5549{
0a7de745
A
5550 vm_map_entry_t entry;
5551 kern_return_t result = KERN_INVALID_ARGUMENT;
5552 vm_object_t object;
1c79356b
A
5553
5554 vm_map_lock(map);
5555
0a7de745 5556 if (!vm_map_lookup_entry(map, start, &entry)) {
1c79356b 5557 entry = entry->vme_next;
2d21ac55 5558 }
1c79356b 5559
2d21ac55
A
5560 if (entry == vm_map_to_entry(map) ||
5561 entry->is_sub_map) {
1c79356b
A
5562 vm_map_unlock(map);
5563 return KERN_INVALID_ARGUMENT;
5564 }
5565
2d21ac55 5566 vm_map_clip_start(map, entry, start);
1c79356b
A
5567 vm_map_clip_end(map, entry, end);
5568
5569 if ((entry->vme_start == start) && (entry->vme_end == end) &&
5570 (!entry->is_sub_map) &&
3e170ce0 5571 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
1c79356b
A
5572 (object->resident_page_count == 0) &&
5573 (object->copy == VM_OBJECT_NULL) &&
5574 (object->shadow == VM_OBJECT_NULL) &&
5575 (!object->pager_created)) {
3e170ce0
A
5576 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5577 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
2d21ac55
A
5578 vm_object_deallocate(object);
5579 entry->is_sub_map = TRUE;
fe8ab488 5580 entry->use_pmap = FALSE;
3e170ce0 5581 VME_SUBMAP_SET(entry, submap);
2d21ac55 5582 vm_map_reference(submap);
316670eb
A
5583 if (submap->mapped_in_other_pmaps == FALSE &&
5584 vm_map_pmap(submap) != PMAP_NULL &&
5585 vm_map_pmap(submap) != vm_map_pmap(map)) {
5586 /*
5587 * This submap is being mapped in a map
5588 * that uses a different pmap.
5589 * Set its "mapped_in_other_pmaps" flag
5ba3f43e 5590 * to indicate that we now need to
316670eb
A
5591 * remove mappings from all pmaps rather
5592 * than just the submap's pmap.
5593 */
5594 submap->mapped_in_other_pmaps = TRUE;
5595 }
2d21ac55 5596
0c530ab8 5597#ifndef NO_NESTED_PMAP
2d21ac55
A
5598 if (use_pmap) {
5599 /* nest if platform code will allow */
0a7de745 5600 if (submap->pmap == NULL) {
316670eb
A
5601 ledger_t ledger = map->pmap->ledger;
5602 submap->pmap = pmap_create(ledger,
0a7de745
A
5603 (vm_map_size_t) 0, FALSE);
5604 if (submap->pmap == PMAP_NULL) {
2d21ac55 5605 vm_map_unlock(map);
0a7de745 5606 return KERN_NO_SPACE;
55e303ae 5607 }
0a7de745 5608#if defined(__arm__) || defined(__arm64__)
5ba3f43e
A
5609 pmap_set_nested(submap->pmap);
5610#endif
55e303ae 5611 }
2d21ac55 5612 result = pmap_nest(map->pmap,
0a7de745
A
5613 (VME_SUBMAP(entry))->pmap,
5614 (addr64_t)start,
5615 (addr64_t)start,
5616 (uint64_t)(end - start));
5617 if (result) {
2d21ac55 5618 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
0a7de745 5619 }
2d21ac55
A
5620 entry->use_pmap = TRUE;
5621 }
0a7de745 5622#else /* NO_NESTED_PMAP */
2d21ac55 5623 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
0a7de745 5624#endif /* NO_NESTED_PMAP */
2d21ac55 5625 result = KERN_SUCCESS;
1c79356b
A
5626 }
5627 vm_map_unlock(map);
5628
0a7de745 5629 return result;
1c79356b
A
5630}
5631
5632/*
5633 * vm_map_protect:
5634 *
5635 * Sets the protection of the specified address
5636 * region in the target map. If "set_max" is
5637 * specified, the maximum protection is to be set;
5638 * otherwise, only the current protection is affected.
5639 */
5640kern_return_t
5641vm_map_protect(
0a7de745
A
5642 vm_map_t map,
5643 vm_map_offset_t start,
5644 vm_map_offset_t end,
5645 vm_prot_t new_prot,
5646 boolean_t set_max)
39037602 5647{
0a7de745
A
5648 vm_map_entry_t current;
5649 vm_map_offset_t prev;
5650 vm_map_entry_t entry;
5651 vm_prot_t new_max;
5652 int pmap_options = 0;
5653 kern_return_t kr;
1c79356b
A
5654
5655 XPR(XPR_VM_MAP,
2d21ac55 5656 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
b0d623f7 5657 map, start, end, new_prot, set_max);
1c79356b 5658
5c9f4661 5659 if (new_prot & VM_PROT_COPY) {
0a7de745
A
5660 vm_map_offset_t new_start;
5661 vm_prot_t cur_prot, max_prot;
5662 vm_map_kernel_flags_t kflags;
5c9f4661
A
5663
5664 /* LP64todo - see below */
5665 if (start >= map->max_offset) {
5666 return KERN_INVALID_ADDRESS;
5667 }
5668
d9a64523
A
5669#if VM_PROTECT_WX_FAIL
5670 if ((new_prot & VM_PROT_EXECUTE) &&
5671 map != kernel_map &&
5672 cs_process_enforcement(NULL)) {
5673 DTRACE_VM3(cs_wx,
0a7de745
A
5674 uint64_t, (uint64_t) start,
5675 uint64_t, (uint64_t) end,
5676 vm_prot_t, new_prot);
d9a64523 5677 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
0a7de745
A
5678 proc_selfpid(),
5679 (current_task()->bsd_info
5680 ? proc_name_address(current_task()->bsd_info)
5681 : "?"),
5682 __FUNCTION__);
d9a64523
A
5683 return KERN_PROTECTION_FAILURE;
5684 }
5685#endif /* VM_PROTECT_WX_FAIL */
5686
5687 /*
5688 * Let vm_map_remap_extract() know that it will need to:
5689 * + make a copy of the mapping
5690 * + add VM_PROT_WRITE to the max protections
5691 * + remove any protections that are no longer allowed from the
5692 * max protections (to avoid any WRITE/EXECUTE conflict, for
5693 * example).
5694 * Note that "max_prot" is an IN/OUT parameter only for this
5695 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5696 * only.
5697 */
5698 max_prot = new_prot & VM_PROT_ALL;
5c9f4661
A
5699 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5700 kflags.vmkf_remap_prot_copy = TRUE;
d9a64523 5701 kflags.vmkf_overwrite_immutable = TRUE;
5c9f4661
A
5702 new_start = start;
5703 kr = vm_map_remap(map,
0a7de745
A
5704 &new_start,
5705 end - start,
5706 0, /* mask */
5707 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5708 kflags,
5709 0,
5710 map,
5711 start,
5712 TRUE, /* copy-on-write remapping! */
5713 &cur_prot,
5714 &max_prot,
5715 VM_INHERIT_DEFAULT);
5c9f4661
A
5716 if (kr != KERN_SUCCESS) {
5717 return kr;
5718 }
5719 new_prot &= ~VM_PROT_COPY;
5720 }
5721
1c79356b
A
5722 vm_map_lock(map);
5723
91447636
A
5724 /* LP64todo - remove this check when vm_map_commpage64()
5725 * no longer has to stuff in a map_entry for the commpage
5726 * above the map's max_offset.
5727 */
5728 if (start >= map->max_offset) {
5729 vm_map_unlock(map);
0a7de745 5730 return KERN_INVALID_ADDRESS;
91447636
A
5731 }
5732
0a7de745 5733 while (1) {
b0d623f7 5734 /*
0a7de745 5735 * Lookup the entry. If it doesn't start in a valid
b0d623f7
A
5736 * entry, return an error.
5737 */
0a7de745 5738 if (!vm_map_lookup_entry(map, start, &entry)) {
b0d623f7 5739 vm_map_unlock(map);
0a7de745 5740 return KERN_INVALID_ADDRESS;
b0d623f7
A
5741 }
5742
0a7de745 5743 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
b0d623f7
A
5744 start = SUPERPAGE_ROUND_DOWN(start);
5745 continue;
5746 }
5747 break;
0a7de745
A
5748 }
5749 if (entry->superpage_size) {
5750 end = SUPERPAGE_ROUND_UP(end);
5751 }
1c79356b
A
5752
5753 /*
5754 * Make a first pass to check for protection and address
5755 * violations.
5756 */
5757
5758 current = entry;
5759 prev = current->vme_start;
5760 while ((current != vm_map_to_entry(map)) &&
0a7de745 5761 (current->vme_start < end)) {
1c79356b
A
5762 /*
5763 * If there is a hole, return an error.
5764 */
5765 if (current->vme_start != prev) {
5766 vm_map_unlock(map);
0a7de745 5767 return KERN_INVALID_ADDRESS;
1c79356b
A
5768 }
5769
5770 new_max = current->max_protection;
5c9f4661
A
5771 if ((new_prot & new_max) != new_prot) {
5772 vm_map_unlock(map);
0a7de745 5773 return KERN_PROTECTION_FAILURE;
1c79356b 5774 }
5ba3f43e 5775
d9a64523
A
5776 if ((new_prot & VM_PROT_WRITE) &&
5777 (new_prot & VM_PROT_EXECUTE) &&
5778#if !CONFIG_EMBEDDED
5779 map != kernel_map &&
5780 cs_process_enforcement(NULL) &&
5781#endif /* !CONFIG_EMBEDDED */
5782 !(current->used_for_jit)) {
5783 DTRACE_VM3(cs_wx,
0a7de745
A
5784 uint64_t, (uint64_t) current->vme_start,
5785 uint64_t, (uint64_t) current->vme_end,
5786 vm_prot_t, new_prot);
d9a64523 5787 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
0a7de745
A
5788 proc_selfpid(),
5789 (current_task()->bsd_info
5790 ? proc_name_address(current_task()->bsd_info)
5791 : "?"),
5792 __FUNCTION__);
d9a64523
A
5793 new_prot &= ~VM_PROT_EXECUTE;
5794#if VM_PROTECT_WX_FAIL
5795 vm_map_unlock(map);
5796 return KERN_PROTECTION_FAILURE;
5797#endif /* VM_PROTECT_WX_FAIL */
5ba3f43e 5798 }
593a1d5f 5799
a39ff7e2
A
5800 /*
5801 * If the task has requested executable lockdown,
5802 * deny both:
5803 * - adding executable protections OR
5804 * - adding write protections to an existing executable mapping.
5805 */
5806 if (map->map_disallow_new_exec == TRUE) {
5807 if ((new_prot & VM_PROT_EXECUTE) ||
5808 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5809 vm_map_unlock(map);
0a7de745 5810 return KERN_PROTECTION_FAILURE;
a39ff7e2
A
5811 }
5812 }
5813
1c79356b
A
5814 prev = current->vme_end;
5815 current = current->vme_next;
5816 }
39037602 5817
5ba3f43e
A
5818#if __arm64__
5819 if (end > prev &&
5820 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5821 vm_map_entry_t prev_entry;
5822
5823 prev_entry = current->vme_prev;
5824 if (prev_entry != vm_map_to_entry(map) &&
5825 !prev_entry->map_aligned &&
5826 (vm_map_round_page(prev_entry->vme_end,
0a7de745
A
5827 VM_MAP_PAGE_MASK(map))
5828 == end)) {
5ba3f43e
A
5829 /*
5830 * The last entry in our range is not "map-aligned"
5831 * but it would have reached all the way to "end"
5832 * if it had been map-aligned, so this is not really
5833 * a hole in the range and we can proceed.
5834 */
5835 prev = end;
5836 }
5837 }
5838#endif /* __arm64__ */
39037602 5839
1c79356b
A
5840 if (end > prev) {
5841 vm_map_unlock(map);
0a7de745 5842 return KERN_INVALID_ADDRESS;
1c79356b
A
5843 }
5844
5845 /*
5846 * Go back and fix up protections.
5847 * Clip to start here if the range starts within
5848 * the entry.
5849 */
5850
5851 current = entry;
2d21ac55
A
5852 if (current != vm_map_to_entry(map)) {
5853 /* clip and unnest if necessary */
5854 vm_map_clip_start(map, current, start);
1c79356b 5855 }
2d21ac55 5856
1c79356b 5857 while ((current != vm_map_to_entry(map)) &&
0a7de745
A
5858 (current->vme_start < end)) {
5859 vm_prot_t old_prot;
1c79356b
A
5860
5861 vm_map_clip_end(map, current, end);
5862
fe8ab488
A
5863 if (current->is_sub_map) {
5864 /* clipping did unnest if needed */
5865 assert(!current->use_pmap);
5866 }
2d21ac55 5867
1c79356b
A
5868 old_prot = current->protection;
5869
5c9f4661
A
5870 if (set_max) {
5871 current->max_protection = new_prot;
5872 current->protection = new_prot & old_prot;
5873 } else {
5874 current->protection = new_prot;
5875 }
1c79356b
A
5876
5877 /*
5878 * Update physical map if necessary.
5ba3f43e
A
5879 * If the request is to turn off write protection,
5880 * we won't do it for real (in pmap). This is because
5881 * it would cause copy-on-write to fail. We've already
5882 * set, the new protection in the map, so if a
5883 * write-protect fault occurred, it will be fixed up
1c79356b
A
5884 * properly, COW or not.
5885 */
1c79356b 5886 if (current->protection != old_prot) {
1c79356b
A
5887 /* Look one level in we support nested pmaps */
5888 /* from mapped submaps which are direct entries */
5889 /* in our map */
0c530ab8 5890
2d21ac55 5891 vm_prot_t prot;
0c530ab8 5892
39037602
A
5893 prot = current->protection;
5894 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
0a7de745
A
5895 prot &= ~VM_PROT_WRITE;
5896 } else {
5897 assert(!VME_OBJECT(current)->code_signed);
5898 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
39037602 5899 }
2d21ac55 5900
0a7de745
A
5901 if (override_nx(map, VME_ALIAS(current)) && prot) {
5902 prot |= VM_PROT_EXECUTE;
5903 }
2d21ac55 5904
5ba3f43e
A
5905#if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5906 if (!(old_prot & VM_PROT_EXECUTE) &&
5907 (prot & VM_PROT_EXECUTE) &&
d9a64523
A
5908 panic_on_unsigned_execute &&
5909 (proc_selfcsflags() & CS_KILL)) {
5ba3f43e
A
5910 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
5911 }
5912#endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5913
5914 if (pmap_has_prot_policy(prot)) {
5915 if (current->wired_count) {
5916 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
0a7de745 5917 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
5ba3f43e
A
5918 }
5919
5920 /* If the pmap layer cares about this
5921 * protection type, force a fault for
5922 * each page so that vm_fault will
5923 * repopulate the page with the full
5924 * set of protections.
5925 */
5926 /*
5927 * TODO: We don't seem to need this,
5928 * but this is due to an internal
5929 * implementation detail of
5930 * pmap_protect. Do we want to rely
5931 * on this?
5932 */
5933 prot = VM_PROT_NONE;
5934 }
490019cf 5935
0c530ab8 5936 if (current->is_sub_map && current->use_pmap) {
5ba3f43e 5937 pmap_protect(VME_SUBMAP(current)->pmap,
0a7de745
A
5938 current->vme_start,
5939 current->vme_end,
5940 prot);
1c79356b 5941 } else {
5ba3f43e
A
5942 if (prot & VM_PROT_WRITE) {
5943 if (VME_OBJECT(current) == compressor_object) {
5944 /*
5945 * For write requests on the
5946 * compressor, we wil ask the
5947 * pmap layer to prevent us from
5948 * taking a write fault when we
5949 * attempt to access the mapping
5950 * next.
5951 */
5952 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
5953 }
5954 }
5955
5956 pmap_protect_options(map->pmap,
0a7de745
A
5957 current->vme_start,
5958 current->vme_end,
5959 prot,
5960 pmap_options,
5961 NULL);
1c79356b 5962 }
1c79356b
A
5963 }
5964 current = current->vme_next;
5965 }
5966
5353443c 5967 current = entry;
91447636 5968 while ((current != vm_map_to_entry(map)) &&
0a7de745 5969 (current->vme_start <= end)) {
5353443c
A
5970 vm_map_simplify_entry(map, current);
5971 current = current->vme_next;
5972 }
5973
1c79356b 5974 vm_map_unlock(map);
0a7de745 5975 return KERN_SUCCESS;
1c79356b
A
5976}
5977
5978/*
5979 * vm_map_inherit:
5980 *
5981 * Sets the inheritance of the specified address
5982 * range in the target map. Inheritance
5983 * affects how the map will be shared with
5984 * child maps at the time of vm_map_fork.
5985 */
5986kern_return_t
5987vm_map_inherit(
0a7de745
A
5988 vm_map_t map,
5989 vm_map_offset_t start,
5990 vm_map_offset_t end,
5991 vm_inherit_t new_inheritance)
1c79356b 5992{
0a7de745
A
5993 vm_map_entry_t entry;
5994 vm_map_entry_t temp_entry;
1c79356b
A
5995
5996 vm_map_lock(map);
5997
5998 VM_MAP_RANGE_CHECK(map, start, end);
5999
6000 if (vm_map_lookup_entry(map, start, &temp_entry)) {
6001 entry = temp_entry;
0a7de745 6002 } else {
1c79356b
A
6003 temp_entry = temp_entry->vme_next;
6004 entry = temp_entry;
6005 }
6006
6007 /* first check entire range for submaps which can't support the */
6008 /* given inheritance. */
6009 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
0a7de745
A
6010 if (entry->is_sub_map) {
6011 if (new_inheritance == VM_INHERIT_COPY) {
91447636 6012 vm_map_unlock(map);
0a7de745 6013 return KERN_INVALID_ARGUMENT;
91447636 6014 }
1c79356b
A
6015 }
6016
6017 entry = entry->vme_next;
6018 }
6019
6020 entry = temp_entry;
2d21ac55
A
6021 if (entry != vm_map_to_entry(map)) {
6022 /* clip and unnest if necessary */
6023 vm_map_clip_start(map, entry, start);
6024 }
1c79356b
A
6025
6026 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6027 vm_map_clip_end(map, entry, end);
fe8ab488
A
6028 if (entry->is_sub_map) {
6029 /* clip did unnest if needed */
6030 assert(!entry->use_pmap);
6031 }
1c79356b
A
6032
6033 entry->inheritance = new_inheritance;
6034
6035 entry = entry->vme_next;
6036 }
6037
6038 vm_map_unlock(map);
0a7de745 6039 return KERN_SUCCESS;
1c79356b
A
6040}
6041
2d21ac55
A
6042/*
6043 * Update the accounting for the amount of wired memory in this map. If the user has
6044 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6045 */
6046
6047static kern_return_t
6048add_wire_counts(
0a7de745
A
6049 vm_map_t map,
6050 vm_map_entry_t entry,
6051 boolean_t user_wire)
5ba3f43e 6052{
0a7de745 6053 vm_map_size_t size;
2d21ac55
A
6054
6055 if (user_wire) {
6d2010ae 6056 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
2d21ac55
A
6057
6058 /*
6059 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6060 * this map entry.
6061 */
6062
6063 if (entry->user_wired_count == 0) {
6064 size = entry->vme_end - entry->vme_start;
5ba3f43e 6065
2d21ac55
A
6066 /*
6067 * Since this is the first time the user is wiring this map entry, check to see if we're
6068 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6069 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
6070 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6071 * limit, then we fail.
6072 */
6073
0a7de745
A
6074 if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
6075 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
6076 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount) {
2d21ac55 6077 return KERN_RESOURCE_SHORTAGE;
0a7de745 6078 }
2d21ac55
A
6079
6080 /*
6081 * The first time the user wires an entry, we also increment the wired_count and add this to
6082 * the total that has been wired in the map.
6083 */
6084
0a7de745 6085 if (entry->wired_count >= MAX_WIRE_COUNT) {
2d21ac55 6086 return KERN_FAILURE;
0a7de745 6087 }
2d21ac55
A
6088
6089 entry->wired_count++;
6090 map->user_wire_size += size;
6091 }
6092
0a7de745 6093 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
2d21ac55 6094 return KERN_FAILURE;
0a7de745 6095 }
2d21ac55
A
6096
6097 entry->user_wired_count++;
2d21ac55 6098 } else {
2d21ac55
A
6099 /*
6100 * The kernel's wiring the memory. Just bump the count and continue.
6101 */
6102
0a7de745 6103 if (entry->wired_count >= MAX_WIRE_COUNT) {
2d21ac55 6104 panic("vm_map_wire: too many wirings");
0a7de745 6105 }
2d21ac55
A
6106
6107 entry->wired_count++;
6108 }
6109
6110 return KERN_SUCCESS;
6111}
6112
6113/*
6114 * Update the memory wiring accounting now that the given map entry is being unwired.
6115 */
6116
6117static void
6118subtract_wire_counts(
0a7de745
A
6119 vm_map_t map,
6120 vm_map_entry_t entry,
6121 boolean_t user_wire)
5ba3f43e 6122{
2d21ac55 6123 if (user_wire) {
2d21ac55
A
6124 /*
6125 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6126 */
6127
6128 if (entry->user_wired_count == 1) {
2d21ac55
A
6129 /*
6130 * We're removing the last user wire reference. Decrement the wired_count and the total
6131 * user wired memory for this map.
6132 */
6133
6134 assert(entry->wired_count >= 1);
6135 entry->wired_count--;
6136 map->user_wire_size -= entry->vme_end - entry->vme_start;
6137 }
6138
6139 assert(entry->user_wired_count >= 1);
6140 entry->user_wired_count--;
2d21ac55 6141 } else {
2d21ac55
A
6142 /*
6143 * The kernel is unwiring the memory. Just update the count.
6144 */
6145
6146 assert(entry->wired_count >= 1);
6147 entry->wired_count--;
6148 }
6149}
6150
5ba3f43e 6151int cs_executable_wire = 0;
39037602 6152
1c79356b
A
6153/*
6154 * vm_map_wire:
6155 *
6156 * Sets the pageability of the specified address range in the
6157 * target map as wired. Regions specified as not pageable require
6158 * locked-down physical memory and physical page maps. The
6159 * access_type variable indicates types of accesses that must not
6160 * generate page faults. This is checked against protection of
6161 * memory being locked-down.
6162 *
6163 * The map must not be locked, but a reference must remain to the
6164 * map throughout the call.
6165 */
91447636 6166static kern_return_t
1c79356b 6167vm_map_wire_nested(
0a7de745
A
6168 vm_map_t map,
6169 vm_map_offset_t start,
6170 vm_map_offset_t end,
6171 vm_prot_t caller_prot,
6172 vm_tag_t tag,
6173 boolean_t user_wire,
6174 pmap_t map_pmap,
6175 vm_map_offset_t pmap_addr,
6176 ppnum_t *physpage_p)
1c79356b 6177{
0a7de745
A
6178 vm_map_entry_t entry;
6179 vm_prot_t access_type;
6180 struct vm_map_entry *first_entry, tmp_entry;
6181 vm_map_t real_map;
6182 vm_map_offset_t s, e;
6183 kern_return_t rc;
6184 boolean_t need_wakeup;
6185 boolean_t main_map = FALSE;
6186 wait_interrupt_t interruptible_state;
6187 thread_t cur_thread;
6188 unsigned int last_timestamp;
6189 vm_map_size_t size;
6190 boolean_t wire_and_extract;
fe8ab488 6191
3e170ce0
A
6192 access_type = (caller_prot & VM_PROT_ALL);
6193
fe8ab488
A
6194 wire_and_extract = FALSE;
6195 if (physpage_p != NULL) {
6196 /*
6197 * The caller wants the physical page number of the
6198 * wired page. We return only one physical page number
6199 * so this works for only one page at a time.
6200 */
6201 if ((end - start) != PAGE_SIZE) {
6202 return KERN_INVALID_ARGUMENT;
6203 }
6204 wire_and_extract = TRUE;
6205 *physpage_p = 0;
6206 }
1c79356b
A
6207
6208 vm_map_lock(map);
0a7de745 6209 if (map_pmap == NULL) {
1c79356b 6210 main_map = TRUE;
0a7de745 6211 }
1c79356b
A
6212 last_timestamp = map->timestamp;
6213
6214 VM_MAP_RANGE_CHECK(map, start, end);
6215 assert(page_aligned(start));
6216 assert(page_aligned(end));
39236c6e
A
6217 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6218 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
0b4e3aa0
A
6219 if (start == end) {
6220 /* We wired what the caller asked for, zero pages */
6221 vm_map_unlock(map);
6222 return KERN_SUCCESS;
6223 }
1c79356b 6224
2d21ac55
A
6225 need_wakeup = FALSE;
6226 cur_thread = current_thread();
6227
6228 s = start;
6229 rc = KERN_SUCCESS;
6230
6231 if (vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b 6232 entry = first_entry;
2d21ac55
A
6233 /*
6234 * vm_map_clip_start will be done later.
6235 * We don't want to unnest any nested submaps here !
6236 */
1c79356b
A
6237 } else {
6238 /* Start address is not in map */
2d21ac55
A
6239 rc = KERN_INVALID_ADDRESS;
6240 goto done;
1c79356b
A
6241 }
6242
2d21ac55
A
6243 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6244 /*
6245 * At this point, we have wired from "start" to "s".
6246 * We still need to wire from "s" to "end".
6247 *
6248 * "entry" hasn't been clipped, so it could start before "s"
6249 * and/or end after "end".
6250 */
6251
6252 /* "e" is how far we want to wire in this entry */
6253 e = entry->vme_end;
0a7de745 6254 if (e > end) {
2d21ac55 6255 e = end;
0a7de745 6256 }
2d21ac55 6257
1c79356b
A
6258 /*
6259 * If another thread is wiring/unwiring this entry then
6260 * block after informing other thread to wake us up.
6261 */
6262 if (entry->in_transition) {
9bccf70c
A
6263 wait_result_t wait_result;
6264
1c79356b
A
6265 /*
6266 * We have not clipped the entry. Make sure that
6267 * the start address is in range so that the lookup
6268 * below will succeed.
2d21ac55
A
6269 * "s" is the current starting point: we've already
6270 * wired from "start" to "s" and we still have
6271 * to wire from "s" to "end".
1c79356b 6272 */
1c79356b
A
6273
6274 entry->needs_wakeup = TRUE;
6275
6276 /*
6277 * wake up anybody waiting on entries that we have
6278 * already wired.
6279 */
6280 if (need_wakeup) {
6281 vm_map_entry_wakeup(map);
6282 need_wakeup = FALSE;
6283 }
6284 /*
6285 * User wiring is interruptible
6286 */
5ba3f43e 6287 wait_result = vm_map_entry_wait(map,
0a7de745
A
6288 (user_wire) ? THREAD_ABORTSAFE :
6289 THREAD_UNINT);
6290 if (user_wire && wait_result == THREAD_INTERRUPTED) {
1c79356b
A
6291 /*
6292 * undo the wirings we have done so far
6293 * We do not clear the needs_wakeup flag,
6294 * because we cannot tell if we were the
6295 * only one waiting.
6296 */
2d21ac55
A
6297 rc = KERN_FAILURE;
6298 goto done;
1c79356b
A
6299 }
6300
1c79356b
A
6301 /*
6302 * Cannot avoid a lookup here. reset timestamp.
6303 */
6304 last_timestamp = map->timestamp;
6305
6306 /*
6307 * The entry could have been clipped, look it up again.
6308 * Worse that can happen is, it may not exist anymore.
6309 */
6310 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
6311 /*
6312 * User: undo everything upto the previous
6313 * entry. let vm_map_unwire worry about
6314 * checking the validity of the range.
6315 */
2d21ac55
A
6316 rc = KERN_FAILURE;
6317 goto done;
1c79356b
A
6318 }
6319 entry = first_entry;
6320 continue;
6321 }
5ba3f43e 6322
2d21ac55 6323 if (entry->is_sub_map) {
0a7de745
A
6324 vm_map_offset_t sub_start;
6325 vm_map_offset_t sub_end;
6326 vm_map_offset_t local_start;
6327 vm_map_offset_t local_end;
6328 pmap_t pmap;
2d21ac55 6329
fe8ab488
A
6330 if (wire_and_extract) {
6331 /*
6332 * Wiring would result in copy-on-write
6333 * which would not be compatible with
6334 * the sharing we have with the original
6335 * provider of this memory.
6336 */
6337 rc = KERN_INVALID_ARGUMENT;
6338 goto done;
6339 }
6340
2d21ac55 6341 vm_map_clip_start(map, entry, s);
1c79356b
A
6342 vm_map_clip_end(map, entry, end);
6343
3e170ce0 6344 sub_start = VME_OFFSET(entry);
2d21ac55 6345 sub_end = entry->vme_end;
3e170ce0 6346 sub_end += VME_OFFSET(entry) - entry->vme_start;
5ba3f43e 6347
1c79356b 6348 local_end = entry->vme_end;
0a7de745
A
6349 if (map_pmap == NULL) {
6350 vm_object_t object;
6351 vm_object_offset_t offset;
6352 vm_prot_t prot;
6353 boolean_t wired;
6354 vm_map_entry_t local_entry;
6355 vm_map_version_t version;
6356 vm_map_t lookup_map;
6357
6358 if (entry->use_pmap) {
3e170ce0 6359 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c
A
6360 /* ppc implementation requires that */
6361 /* submaps pmap address ranges line */
6362 /* up with parent map */
6363#ifdef notdef
6364 pmap_addr = sub_start;
6365#endif
2d21ac55 6366 pmap_addr = s;
1c79356b
A
6367 } else {
6368 pmap = map->pmap;
2d21ac55 6369 pmap_addr = s;
1c79356b 6370 }
2d21ac55 6371
1c79356b 6372 if (entry->wired_count) {
0a7de745 6373 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
2d21ac55 6374 goto done;
0a7de745 6375 }
2d21ac55
A
6376
6377 /*
6378 * The map was not unlocked:
6379 * no need to goto re-lookup.
6380 * Just go directly to next entry.
6381 */
1c79356b 6382 entry = entry->vme_next;
2d21ac55 6383 s = entry->vme_start;
1c79356b 6384 continue;
2d21ac55 6385 }
9bccf70c 6386
2d21ac55
A
6387 /* call vm_map_lookup_locked to */
6388 /* cause any needs copy to be */
6389 /* evaluated */
6390 local_start = entry->vme_start;
6391 lookup_map = map;
6392 vm_map_lock_write_to_read(map);
0a7de745
A
6393 if (vm_map_lookup_locked(
6394 &lookup_map, local_start,
6395 access_type | VM_PROT_COPY,
6396 OBJECT_LOCK_EXCLUSIVE,
6397 &version, &object,
6398 &offset, &prot, &wired,
6399 NULL,
6400 &real_map)) {
2d21ac55 6401 vm_map_unlock_read(lookup_map);
4bd07ac2 6402 assert(map_pmap == NULL);
2d21ac55 6403 vm_map_unwire(map, start,
0a7de745
A
6404 s, user_wire);
6405 return KERN_FAILURE;
2d21ac55 6406 }
316670eb 6407 vm_object_unlock(object);
0a7de745 6408 if (real_map != lookup_map) {
2d21ac55 6409 vm_map_unlock(real_map);
0a7de745 6410 }
2d21ac55
A
6411 vm_map_unlock_read(lookup_map);
6412 vm_map_lock(map);
1c79356b 6413
2d21ac55 6414 /* we unlocked, so must re-lookup */
5ba3f43e 6415 if (!vm_map_lookup_entry(map,
0a7de745
A
6416 local_start,
6417 &local_entry)) {
2d21ac55
A
6418 rc = KERN_FAILURE;
6419 goto done;
6420 }
6421
6422 /*
6423 * entry could have been "simplified",
6424 * so re-clip
6425 */
6426 entry = local_entry;
6427 assert(s == local_start);
6428 vm_map_clip_start(map, entry, s);
6429 vm_map_clip_end(map, entry, end);
6430 /* re-compute "e" */
6431 e = entry->vme_end;
0a7de745 6432 if (e > end) {
2d21ac55 6433 e = end;
0a7de745 6434 }
2d21ac55
A
6435
6436 /* did we have a change of type? */
6437 if (!entry->is_sub_map) {
6438 last_timestamp = map->timestamp;
6439 continue;
1c79356b
A
6440 }
6441 } else {
9bccf70c 6442 local_start = entry->vme_start;
2d21ac55
A
6443 pmap = map_pmap;
6444 }
6445
0a7de745 6446 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
2d21ac55 6447 goto done;
0a7de745 6448 }
2d21ac55
A
6449
6450 entry->in_transition = TRUE;
6451
6452 vm_map_unlock(map);
5ba3f43e 6453 rc = vm_map_wire_nested(VME_SUBMAP(entry),
0a7de745
A
6454 sub_start, sub_end,
6455 caller_prot, tag,
6456 user_wire, pmap, pmap_addr,
6457 NULL);
2d21ac55 6458 vm_map_lock(map);
9bccf70c 6459
1c79356b
A
6460 /*
6461 * Find the entry again. It could have been clipped
6462 * after we unlocked the map.
6463 */
9bccf70c 6464 if (!vm_map_lookup_entry(map, local_start,
0a7de745 6465 &first_entry)) {
9bccf70c 6466 panic("vm_map_wire: re-lookup failed");
0a7de745 6467 }
9bccf70c 6468 entry = first_entry;
1c79356b 6469
2d21ac55
A
6470 assert(local_start == s);
6471 /* re-compute "e" */
6472 e = entry->vme_end;
0a7de745 6473 if (e > end) {
2d21ac55 6474 e = end;
0a7de745 6475 }
2d21ac55 6476
1c79356b
A
6477 last_timestamp = map->timestamp;
6478 while ((entry != vm_map_to_entry(map)) &&
0a7de745 6479 (entry->vme_start < e)) {
1c79356b
A
6480 assert(entry->in_transition);
6481 entry->in_transition = FALSE;
6482 if (entry->needs_wakeup) {
6483 entry->needs_wakeup = FALSE;
6484 need_wakeup = TRUE;
6485 }
6486 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
2d21ac55 6487 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
6488 }
6489 entry = entry->vme_next;
6490 }
0a7de745 6491 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 6492 goto done;
1c79356b 6493 }
2d21ac55
A
6494
6495 /* no need to relookup again */
6496 s = entry->vme_start;
1c79356b
A
6497 continue;
6498 }
6499
6500 /*
6501 * If this entry is already wired then increment
6502 * the appropriate wire reference count.
6503 */
9bccf70c 6504 if (entry->wired_count) {
fe8ab488
A
6505 if ((entry->protection & access_type) != access_type) {
6506 /* found a protection problem */
6507
6508 /*
6509 * XXX FBDP
6510 * We should always return an error
6511 * in this case but since we didn't
6512 * enforce it before, let's do
6513 * it only for the new "wire_and_extract"
6514 * code path for now...
6515 */
6516 if (wire_and_extract) {
6517 rc = KERN_PROTECTION_FAILURE;
6518 goto done;
6519 }
6520 }
6521
1c79356b
A
6522 /*
6523 * entry is already wired down, get our reference
6524 * after clipping to our range.
6525 */
2d21ac55 6526 vm_map_clip_start(map, entry, s);
1c79356b 6527 vm_map_clip_end(map, entry, end);
1c79356b 6528
0a7de745 6529 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
2d21ac55 6530 goto done;
0a7de745 6531 }
2d21ac55 6532
fe8ab488 6533 if (wire_and_extract) {
0a7de745
A
6534 vm_object_t object;
6535 vm_object_offset_t offset;
6536 vm_page_t m;
fe8ab488
A
6537
6538 /*
6539 * We don't have to "wire" the page again
6540 * bit we still have to "extract" its
6541 * physical page number, after some sanity
6542 * checks.
6543 */
6544 assert((entry->vme_end - entry->vme_start)
0a7de745 6545 == PAGE_SIZE);
fe8ab488
A
6546 assert(!entry->needs_copy);
6547 assert(!entry->is_sub_map);
3e170ce0 6548 assert(VME_OBJECT(entry));
fe8ab488 6549 if (((entry->vme_end - entry->vme_start)
0a7de745 6550 != PAGE_SIZE) ||
fe8ab488
A
6551 entry->needs_copy ||
6552 entry->is_sub_map ||
3e170ce0 6553 VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
6554 rc = KERN_INVALID_ARGUMENT;
6555 goto done;
6556 }
6557
3e170ce0
A
6558 object = VME_OBJECT(entry);
6559 offset = VME_OFFSET(entry);
fe8ab488
A
6560 /* need exclusive lock to update m->dirty */
6561 if (entry->protection & VM_PROT_WRITE) {
6562 vm_object_lock(object);
6563 } else {
6564 vm_object_lock_shared(object);
6565 }
6566 m = vm_page_lookup(object, offset);
6567 assert(m != VM_PAGE_NULL);
39037602
A
6568 assert(VM_PAGE_WIRED(m));
6569 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6570 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
fe8ab488
A
6571 if (entry->protection & VM_PROT_WRITE) {
6572 vm_object_lock_assert_exclusive(
39037602 6573 object);
d9a64523 6574 m->vmp_dirty = TRUE;
fe8ab488
A
6575 }
6576 } else {
6577 /* not already wired !? */
6578 *physpage_p = 0;
6579 }
6580 vm_object_unlock(object);
6581 }
6582
2d21ac55 6583 /* map was not unlocked: no need to relookup */
1c79356b 6584 entry = entry->vme_next;
2d21ac55 6585 s = entry->vme_start;
1c79356b
A
6586 continue;
6587 }
6588
6589 /*
6590 * Unwired entry or wire request transmitted via submap
6591 */
6592
5ba3f43e
A
6593 /*
6594 * Wiring would copy the pages to the shadow object.
6595 * The shadow object would not be code-signed so
6596 * attempting to execute code from these copied pages
6597 * would trigger a code-signing violation.
6598 */
d9a64523
A
6599
6600 if ((entry->protection & VM_PROT_EXECUTE)
6601#if !CONFIG_EMBEDDED
6602 &&
6603 map != kernel_map &&
6604 cs_process_enforcement(NULL)
6605#endif /* !CONFIG_EMBEDDED */
0a7de745 6606 ) {
5ba3f43e
A
6607#if MACH_ASSERT
6608 printf("pid %d[%s] wiring executable range from "
0a7de745
A
6609 "0x%llx to 0x%llx: rejected to preserve "
6610 "code-signing\n",
6611 proc_selfpid(),
6612 (current_task()->bsd_info
6613 ? proc_name_address(current_task()->bsd_info)
6614 : "?"),
6615 (uint64_t) entry->vme_start,
6616 (uint64_t) entry->vme_end);
5ba3f43e
A
6617#endif /* MACH_ASSERT */
6618 DTRACE_VM2(cs_executable_wire,
0a7de745
A
6619 uint64_t, (uint64_t)entry->vme_start,
6620 uint64_t, (uint64_t)entry->vme_end);
5ba3f43e
A
6621 cs_executable_wire++;
6622 rc = KERN_PROTECTION_FAILURE;
6623 goto done;
6624 }
39037602 6625
1c79356b
A
6626 /*
6627 * Perform actions of vm_map_lookup that need the write
6628 * lock on the map: create a shadow object for a
6629 * copy-on-write region, or an object for a zero-fill
6630 * region.
6631 */
6632 size = entry->vme_end - entry->vme_start;
6633 /*
6634 * If wiring a copy-on-write page, we need to copy it now
6635 * even if we're only (currently) requesting read access.
6636 * This is aggressive, but once it's wired we can't move it.
6637 */
6638 if (entry->needs_copy) {
fe8ab488
A
6639 if (wire_and_extract) {
6640 /*
6641 * We're supposed to share with the original
6642 * provider so should not be "needs_copy"
6643 */
6644 rc = KERN_INVALID_ARGUMENT;
6645 goto done;
6646 }
3e170ce0
A
6647
6648 VME_OBJECT_SHADOW(entry, size);
1c79356b 6649 entry->needs_copy = FALSE;
3e170ce0 6650 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
6651 if (wire_and_extract) {
6652 /*
6653 * We're supposed to share with the original
6654 * provider so should already have an object.
6655 */
6656 rc = KERN_INVALID_ARGUMENT;
6657 goto done;
6658 }
3e170ce0
A
6659 VME_OBJECT_SET(entry, vm_object_allocate(size));
6660 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
fe8ab488 6661 assert(entry->use_pmap);
1c79356b
A
6662 }
6663
2d21ac55 6664 vm_map_clip_start(map, entry, s);
1c79356b
A
6665 vm_map_clip_end(map, entry, end);
6666
2d21ac55 6667 /* re-compute "e" */
1c79356b 6668 e = entry->vme_end;
0a7de745 6669 if (e > end) {
2d21ac55 6670 e = end;
0a7de745 6671 }
1c79356b
A
6672
6673 /*
6674 * Check for holes and protection mismatch.
6675 * Holes: Next entry should be contiguous unless this
6676 * is the end of the region.
6677 * Protection: Access requested must be allowed, unless
6678 * wiring is by protection class
6679 */
2d21ac55
A
6680 if ((entry->vme_end < end) &&
6681 ((entry->vme_next == vm_map_to_entry(map)) ||
0a7de745 6682 (entry->vme_next->vme_start > entry->vme_end))) {
2d21ac55
A
6683 /* found a hole */
6684 rc = KERN_INVALID_ADDRESS;
6685 goto done;
6686 }
6687 if ((entry->protection & access_type) != access_type) {
6688 /* found a protection problem */
6689 rc = KERN_PROTECTION_FAILURE;
6690 goto done;
1c79356b
A
6691 }
6692
6693 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6694
0a7de745 6695 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
2d21ac55 6696 goto done;
0a7de745 6697 }
1c79356b
A
6698
6699 entry->in_transition = TRUE;
6700
6701 /*
6702 * This entry might get split once we unlock the map.
6703 * In vm_fault_wire(), we need the current range as
6704 * defined by this entry. In order for this to work
6705 * along with a simultaneous clip operation, we make a
6706 * temporary copy of this entry and use that for the
6707 * wiring. Note that the underlying objects do not
6708 * change during a clip.
6709 */
6710 tmp_entry = *entry;
6711
6712 /*
6713 * The in_transition state guarentees that the entry
6714 * (or entries for this range, if split occured) will be
6715 * there when the map lock is acquired for the second time.
6716 */
6717 vm_map_unlock(map);
0b4e3aa0 6718
0a7de745 6719 if (!user_wire && cur_thread != THREAD_NULL) {
9bccf70c 6720 interruptible_state = thread_interrupt_level(THREAD_UNINT);
0a7de745 6721 } else {
91447636 6722 interruptible_state = THREAD_UNINT;
0a7de745 6723 }
9bccf70c 6724
0a7de745 6725 if (map_pmap) {
5ba3f43e 6726 rc = vm_fault_wire(map,
0a7de745
A
6727 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6728 physpage_p);
6729 } else {
5ba3f43e 6730 rc = vm_fault_wire(map,
0a7de745
A
6731 &tmp_entry, caller_prot, tag, map->pmap,
6732 tmp_entry.vme_start,
6733 physpage_p);
6734 }
0b4e3aa0 6735
0a7de745 6736 if (!user_wire && cur_thread != THREAD_NULL) {
9bccf70c 6737 thread_interrupt_level(interruptible_state);
0a7de745 6738 }
0b4e3aa0 6739
1c79356b
A
6740 vm_map_lock(map);
6741
0a7de745 6742 if (last_timestamp + 1 != map->timestamp) {
1c79356b
A
6743 /*
6744 * Find the entry again. It could have been clipped
6745 * after we unlocked the map.
6746 */
6747 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
0a7de745 6748 &first_entry)) {
1c79356b 6749 panic("vm_map_wire: re-lookup failed");
0a7de745 6750 }
1c79356b
A
6751
6752 entry = first_entry;
6753 }
6754
6755 last_timestamp = map->timestamp;
6756
6757 while ((entry != vm_map_to_entry(map)) &&
0a7de745 6758 (entry->vme_start < tmp_entry.vme_end)) {
1c79356b
A
6759 assert(entry->in_transition);
6760 entry->in_transition = FALSE;
6761 if (entry->needs_wakeup) {
6762 entry->needs_wakeup = FALSE;
6763 need_wakeup = TRUE;
6764 }
0a7de745 6765 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 6766 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
6767 }
6768 entry = entry->vme_next;
6769 }
6770
0a7de745 6771 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 6772 goto done;
1c79356b 6773 }
2d21ac55 6774
d190cdc3
A
6775 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6776 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
6777 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6778 /* found a "new" hole */
6779 s = tmp_entry.vme_end;
6780 rc = KERN_INVALID_ADDRESS;
6781 goto done;
6782 }
6783
2d21ac55 6784 s = entry->vme_start;
1c79356b 6785 } /* end while loop through map entries */
2d21ac55
A
6786
6787done:
6788 if (rc == KERN_SUCCESS) {
6789 /* repair any damage we may have made to the VM map */
6790 vm_map_simplify_range(map, start, end);
6791 }
6792
1c79356b
A
6793 vm_map_unlock(map);
6794
6795 /*
6796 * wake up anybody waiting on entries we wired.
6797 */
0a7de745 6798 if (need_wakeup) {
1c79356b 6799 vm_map_entry_wakeup(map);
0a7de745 6800 }
1c79356b 6801
2d21ac55
A
6802 if (rc != KERN_SUCCESS) {
6803 /* undo what has been wired so far */
4bd07ac2 6804 vm_map_unwire_nested(map, start, s, user_wire,
0a7de745 6805 map_pmap, pmap_addr);
fe8ab488
A
6806 if (physpage_p) {
6807 *physpage_p = 0;
6808 }
2d21ac55
A
6809 }
6810
6811 return rc;
1c79356b
A
6812}
6813
6814kern_return_t
3e170ce0 6815vm_map_wire_external(
0a7de745
A
6816 vm_map_t map,
6817 vm_map_offset_t start,
6818 vm_map_offset_t end,
6819 vm_prot_t caller_prot,
6820 boolean_t user_wire)
1c79356b 6821{
0a7de745 6822 kern_return_t kret;
3e170ce0 6823
5ba3f43e 6824 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
0a7de745 6825 user_wire, (pmap_t)NULL, 0, NULL);
3e170ce0
A
6826 return kret;
6827}
1c79356b 6828
3e170ce0 6829kern_return_t
5ba3f43e 6830vm_map_wire_kernel(
0a7de745
A
6831 vm_map_t map,
6832 vm_map_offset_t start,
6833 vm_map_offset_t end,
6834 vm_prot_t caller_prot,
6835 vm_tag_t tag,
6836 boolean_t user_wire)
3e170ce0 6837{
0a7de745 6838 kern_return_t kret;
1c79356b 6839
5ba3f43e 6840 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
0a7de745 6841 user_wire, (pmap_t)NULL, 0, NULL);
fe8ab488
A
6842 return kret;
6843}
6844
6845kern_return_t
3e170ce0 6846vm_map_wire_and_extract_external(
0a7de745
A
6847 vm_map_t map,
6848 vm_map_offset_t start,
6849 vm_prot_t caller_prot,
6850 boolean_t user_wire,
6851 ppnum_t *physpage_p)
fe8ab488 6852{
0a7de745 6853 kern_return_t kret;
3e170ce0 6854
3e170ce0 6855 kret = vm_map_wire_nested(map,
0a7de745
A
6856 start,
6857 start + VM_MAP_PAGE_SIZE(map),
6858 caller_prot,
6859 vm_tag_bt(),
6860 user_wire,
6861 (pmap_t)NULL,
6862 0,
6863 physpage_p);
3e170ce0
A
6864 if (kret != KERN_SUCCESS &&
6865 physpage_p != NULL) {
6866 *physpage_p = 0;
6867 }
6868 return kret;
6869}
fe8ab488 6870
3e170ce0 6871kern_return_t
5ba3f43e 6872vm_map_wire_and_extract_kernel(
0a7de745
A
6873 vm_map_t map,
6874 vm_map_offset_t start,
6875 vm_prot_t caller_prot,
6876 vm_tag_t tag,
6877 boolean_t user_wire,
6878 ppnum_t *physpage_p)
3e170ce0 6879{
0a7de745 6880 kern_return_t kret;
fe8ab488
A
6881
6882 kret = vm_map_wire_nested(map,
0a7de745
A
6883 start,
6884 start + VM_MAP_PAGE_SIZE(map),
6885 caller_prot,
6886 tag,
6887 user_wire,
6888 (pmap_t)NULL,
6889 0,
6890 physpage_p);
fe8ab488
A
6891 if (kret != KERN_SUCCESS &&
6892 physpage_p != NULL) {
6893 *physpage_p = 0;
6894 }
1c79356b
A
6895 return kret;
6896}
6897
6898/*
6899 * vm_map_unwire:
6900 *
6901 * Sets the pageability of the specified address range in the target
6902 * as pageable. Regions specified must have been wired previously.
6903 *
6904 * The map must not be locked, but a reference must remain to the map
6905 * throughout the call.
6906 *
6907 * Kernel will panic on failures. User unwire ignores holes and
6908 * unwired and intransition entries to avoid losing memory by leaving
6909 * it unwired.
6910 */
91447636 6911static kern_return_t
1c79356b 6912vm_map_unwire_nested(
0a7de745
A
6913 vm_map_t map,
6914 vm_map_offset_t start,
6915 vm_map_offset_t end,
6916 boolean_t user_wire,
6917 pmap_t map_pmap,
6918 vm_map_offset_t pmap_addr)
1c79356b 6919{
0a7de745
A
6920 vm_map_entry_t entry;
6921 struct vm_map_entry *first_entry, tmp_entry;
6922 boolean_t need_wakeup;
6923 boolean_t main_map = FALSE;
6924 unsigned int last_timestamp;
1c79356b
A
6925
6926 vm_map_lock(map);
0a7de745 6927 if (map_pmap == NULL) {
1c79356b 6928 main_map = TRUE;
0a7de745 6929 }
1c79356b
A
6930 last_timestamp = map->timestamp;
6931
6932 VM_MAP_RANGE_CHECK(map, start, end);
6933 assert(page_aligned(start));
6934 assert(page_aligned(end));
39236c6e
A
6935 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6936 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1c79356b 6937
2d21ac55
A
6938 if (start == end) {
6939 /* We unwired what the caller asked for: zero pages */
6940 vm_map_unlock(map);
6941 return KERN_SUCCESS;
6942 }
6943
1c79356b
A
6944 if (vm_map_lookup_entry(map, start, &first_entry)) {
6945 entry = first_entry;
2d21ac55
A
6946 /*
6947 * vm_map_clip_start will be done later.
6948 * We don't want to unnest any nested sub maps here !
6949 */
0a7de745 6950 } else {
2d21ac55
A
6951 if (!user_wire) {
6952 panic("vm_map_unwire: start not found");
6953 }
1c79356b
A
6954 /* Start address is not in map. */
6955 vm_map_unlock(map);
0a7de745 6956 return KERN_INVALID_ADDRESS;
1c79356b
A
6957 }
6958
b0d623f7
A
6959 if (entry->superpage_size) {
6960 /* superpages are always wired */
6961 vm_map_unlock(map);
6962 return KERN_INVALID_ADDRESS;
6963 }
6964
1c79356b
A
6965 need_wakeup = FALSE;
6966 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6967 if (entry->in_transition) {
6968 /*
6969 * 1)
6970 * Another thread is wiring down this entry. Note
6971 * that if it is not for the other thread we would
6972 * be unwiring an unwired entry. This is not
6973 * permitted. If we wait, we will be unwiring memory
6974 * we did not wire.
6975 *
6976 * 2)
6977 * Another thread is unwiring this entry. We did not
6978 * have a reference to it, because if we did, this
6979 * entry will not be getting unwired now.
6980 */
2d21ac55
A
6981 if (!user_wire) {
6982 /*
6983 * XXX FBDP
6984 * This could happen: there could be some
6985 * overlapping vslock/vsunlock operations
6986 * going on.
6987 * We should probably just wait and retry,
6988 * but then we have to be careful that this
5ba3f43e 6989 * entry could get "simplified" after
2d21ac55
A
6990 * "in_transition" gets unset and before
6991 * we re-lookup the entry, so we would
6992 * have to re-clip the entry to avoid
6993 * re-unwiring what we have already unwired...
6994 * See vm_map_wire_nested().
6995 *
6996 * Or we could just ignore "in_transition"
6997 * here and proceed to decement the wired
6998 * count(s) on this entry. That should be fine
6999 * as long as "wired_count" doesn't drop all
7000 * the way to 0 (and we should panic if THAT
7001 * happens).
7002 */
1c79356b 7003 panic("vm_map_unwire: in_transition entry");
2d21ac55 7004 }
1c79356b
A
7005
7006 entry = entry->vme_next;
7007 continue;
7008 }
7009
2d21ac55 7010 if (entry->is_sub_map) {
0a7de745
A
7011 vm_map_offset_t sub_start;
7012 vm_map_offset_t sub_end;
7013 vm_map_offset_t local_end;
7014 pmap_t pmap;
5ba3f43e 7015
1c79356b
A
7016 vm_map_clip_start(map, entry, start);
7017 vm_map_clip_end(map, entry, end);
7018
3e170ce0 7019 sub_start = VME_OFFSET(entry);
1c79356b 7020 sub_end = entry->vme_end - entry->vme_start;
3e170ce0 7021 sub_end += VME_OFFSET(entry);
1c79356b 7022 local_end = entry->vme_end;
0a7de745
A
7023 if (map_pmap == NULL) {
7024 if (entry->use_pmap) {
3e170ce0 7025 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c 7026 pmap_addr = sub_start;
2d21ac55 7027 } else {
1c79356b 7028 pmap = map->pmap;
9bccf70c 7029 pmap_addr = start;
2d21ac55
A
7030 }
7031 if (entry->wired_count == 0 ||
7032 (user_wire && entry->user_wired_count == 0)) {
0a7de745 7033 if (!user_wire) {
2d21ac55 7034 panic("vm_map_unwire: entry is unwired");
0a7de745 7035 }
2d21ac55
A
7036 entry = entry->vme_next;
7037 continue;
7038 }
7039
7040 /*
7041 * Check for holes
7042 * Holes: Next entry should be contiguous unless
7043 * this is the end of the region.
7044 */
5ba3f43e 7045 if (((entry->vme_end < end) &&
0a7de745
A
7046 ((entry->vme_next == vm_map_to_entry(map)) ||
7047 (entry->vme_next->vme_start
7048 > entry->vme_end)))) {
7049 if (!user_wire) {
2d21ac55 7050 panic("vm_map_unwire: non-contiguous region");
0a7de745 7051 }
1c79356b 7052/*
0a7de745
A
7053 * entry = entry->vme_next;
7054 * continue;
7055 */
2d21ac55 7056 }
1c79356b 7057
2d21ac55 7058 subtract_wire_counts(map, entry, user_wire);
1c79356b 7059
2d21ac55
A
7060 if (entry->wired_count != 0) {
7061 entry = entry->vme_next;
7062 continue;
7063 }
1c79356b 7064
2d21ac55
A
7065 entry->in_transition = TRUE;
7066 tmp_entry = *entry;/* see comment in vm_map_wire() */
7067
7068 /*
7069 * We can unlock the map now. The in_transition state
7070 * guarantees existance of the entry.
7071 */
7072 vm_map_unlock(map);
5ba3f43e 7073 vm_map_unwire_nested(VME_SUBMAP(entry),
0a7de745 7074 sub_start, sub_end, user_wire, pmap, pmap_addr);
2d21ac55 7075 vm_map_lock(map);
1c79356b 7076
0a7de745 7077 if (last_timestamp + 1 != map->timestamp) {
2d21ac55 7078 /*
5ba3f43e 7079 * Find the entry again. It could have been
2d21ac55
A
7080 * clipped or deleted after we unlocked the map.
7081 */
5ba3f43e 7082 if (!vm_map_lookup_entry(map,
0a7de745
A
7083 tmp_entry.vme_start,
7084 &first_entry)) {
7085 if (!user_wire) {
2d21ac55 7086 panic("vm_map_unwire: re-lookup failed");
0a7de745 7087 }
2d21ac55 7088 entry = first_entry->vme_next;
0a7de745 7089 } else {
2d21ac55 7090 entry = first_entry;
0a7de745 7091 }
2d21ac55
A
7092 }
7093 last_timestamp = map->timestamp;
1c79356b 7094
1c79356b 7095 /*
2d21ac55 7096 * clear transition bit for all constituent entries
5ba3f43e 7097 * that were in the original entry (saved in
2d21ac55
A
7098 * tmp_entry). Also check for waiters.
7099 */
7100 while ((entry != vm_map_to_entry(map)) &&
0a7de745 7101 (entry->vme_start < tmp_entry.vme_end)) {
2d21ac55
A
7102 assert(entry->in_transition);
7103 entry->in_transition = FALSE;
7104 if (entry->needs_wakeup) {
7105 entry->needs_wakeup = FALSE;
7106 need_wakeup = TRUE;
7107 }
7108 entry = entry->vme_next;
1c79356b 7109 }
2d21ac55 7110 continue;
1c79356b 7111 } else {
2d21ac55 7112 vm_map_unlock(map);
3e170ce0 7113 vm_map_unwire_nested(VME_SUBMAP(entry),
0a7de745
A
7114 sub_start, sub_end, user_wire, map_pmap,
7115 pmap_addr);
2d21ac55 7116 vm_map_lock(map);
1c79356b 7117
0a7de745 7118 if (last_timestamp + 1 != map->timestamp) {
2d21ac55 7119 /*
5ba3f43e 7120 * Find the entry again. It could have been
2d21ac55
A
7121 * clipped or deleted after we unlocked the map.
7122 */
5ba3f43e 7123 if (!vm_map_lookup_entry(map,
0a7de745
A
7124 tmp_entry.vme_start,
7125 &first_entry)) {
7126 if (!user_wire) {
2d21ac55 7127 panic("vm_map_unwire: re-lookup failed");
0a7de745 7128 }
2d21ac55 7129 entry = first_entry->vme_next;
0a7de745 7130 } else {
2d21ac55 7131 entry = first_entry;
0a7de745 7132 }
2d21ac55
A
7133 }
7134 last_timestamp = map->timestamp;
1c79356b
A
7135 }
7136 }
7137
7138
9bccf70c 7139 if ((entry->wired_count == 0) ||
2d21ac55 7140 (user_wire && entry->user_wired_count == 0)) {
0a7de745 7141 if (!user_wire) {
1c79356b 7142 panic("vm_map_unwire: entry is unwired");
0a7de745 7143 }
1c79356b
A
7144
7145 entry = entry->vme_next;
7146 continue;
7147 }
5ba3f43e 7148
1c79356b 7149 assert(entry->wired_count > 0 &&
0a7de745 7150 (!user_wire || entry->user_wired_count > 0));
1c79356b
A
7151
7152 vm_map_clip_start(map, entry, start);
7153 vm_map_clip_end(map, entry, end);
7154
7155 /*
7156 * Check for holes
7157 * Holes: Next entry should be contiguous unless
7158 * this is the end of the region.
7159 */
5ba3f43e 7160 if (((entry->vme_end < end) &&
0a7de745
A
7161 ((entry->vme_next == vm_map_to_entry(map)) ||
7162 (entry->vme_next->vme_start > entry->vme_end)))) {
7163 if (!user_wire) {
1c79356b 7164 panic("vm_map_unwire: non-contiguous region");
0a7de745 7165 }
1c79356b
A
7166 entry = entry->vme_next;
7167 continue;
7168 }
7169
2d21ac55 7170 subtract_wire_counts(map, entry, user_wire);
1c79356b 7171
9bccf70c 7172 if (entry->wired_count != 0) {
1c79356b
A
7173 entry = entry->vme_next;
7174 continue;
1c79356b
A
7175 }
7176
0a7de745 7177 if (entry->zero_wired_pages) {
b0d623f7
A
7178 entry->zero_wired_pages = FALSE;
7179 }
7180
1c79356b 7181 entry->in_transition = TRUE;
0a7de745 7182 tmp_entry = *entry; /* see comment in vm_map_wire() */
1c79356b
A
7183
7184 /*
7185 * We can unlock the map now. The in_transition state
7186 * guarantees existance of the entry.
7187 */
7188 vm_map_unlock(map);
0a7de745 7189 if (map_pmap) {
5ba3f43e 7190 vm_fault_unwire(map,
0a7de745 7191 &tmp_entry, FALSE, map_pmap, pmap_addr);
1c79356b 7192 } else {
5ba3f43e 7193 vm_fault_unwire(map,
0a7de745
A
7194 &tmp_entry, FALSE, map->pmap,
7195 tmp_entry.vme_start);
1c79356b
A
7196 }
7197 vm_map_lock(map);
7198
0a7de745 7199 if (last_timestamp + 1 != map->timestamp) {
1c79356b
A
7200 /*
7201 * Find the entry again. It could have been clipped
7202 * or deleted after we unlocked the map.
7203 */
7204 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
0a7de745
A
7205 &first_entry)) {
7206 if (!user_wire) {
2d21ac55 7207 panic("vm_map_unwire: re-lookup failed");
0a7de745 7208 }
1c79356b 7209 entry = first_entry->vme_next;
0a7de745 7210 } else {
1c79356b 7211 entry = first_entry;
0a7de745 7212 }
1c79356b
A
7213 }
7214 last_timestamp = map->timestamp;
7215
7216 /*
7217 * clear transition bit for all constituent entries that
7218 * were in the original entry (saved in tmp_entry). Also
7219 * check for waiters.
7220 */
7221 while ((entry != vm_map_to_entry(map)) &&
0a7de745 7222 (entry->vme_start < tmp_entry.vme_end)) {
1c79356b
A
7223 assert(entry->in_transition);
7224 entry->in_transition = FALSE;
7225 if (entry->needs_wakeup) {
7226 entry->needs_wakeup = FALSE;
7227 need_wakeup = TRUE;
7228 }
7229 entry = entry->vme_next;
7230 }
7231 }
91447636
A
7232
7233 /*
7234 * We might have fragmented the address space when we wired this
7235 * range of addresses. Attempt to re-coalesce these VM map entries
7236 * with their neighbors now that they're no longer wired.
7237 * Under some circumstances, address space fragmentation can
7238 * prevent VM object shadow chain collapsing, which can cause
7239 * swap space leaks.
7240 */
7241 vm_map_simplify_range(map, start, end);
7242
1c79356b
A
7243 vm_map_unlock(map);
7244 /*
7245 * wake up anybody waiting on entries that we have unwired.
7246 */
0a7de745 7247 if (need_wakeup) {
1c79356b 7248 vm_map_entry_wakeup(map);
0a7de745
A
7249 }
7250 return KERN_SUCCESS;
1c79356b
A
7251}
7252
7253kern_return_t
7254vm_map_unwire(
0a7de745
A
7255 vm_map_t map,
7256 vm_map_offset_t start,
7257 vm_map_offset_t end,
7258 boolean_t user_wire)
1c79356b 7259{
5ba3f43e 7260 return vm_map_unwire_nested(map, start, end,
0a7de745 7261 user_wire, (pmap_t)NULL, 0);
1c79356b
A
7262}
7263
7264
7265/*
7266 * vm_map_entry_delete: [ internal use only ]
7267 *
7268 * Deallocate the given entry from the target map.
5ba3f43e 7269 */
91447636 7270static void
1c79356b 7271vm_map_entry_delete(
0a7de745
A
7272 vm_map_t map,
7273 vm_map_entry_t entry)
1c79356b 7274{
0a7de745
A
7275 vm_map_offset_t s, e;
7276 vm_object_t object;
7277 vm_map_t submap;
1c79356b
A
7278
7279 s = entry->vme_start;
7280 e = entry->vme_end;
7281 assert(page_aligned(s));
7282 assert(page_aligned(e));
39236c6e
A
7283 if (entry->map_aligned == TRUE) {
7284 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7285 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7286 }
1c79356b
A
7287 assert(entry->wired_count == 0);
7288 assert(entry->user_wired_count == 0);
b0d623f7 7289 assert(!entry->permanent);
1c79356b
A
7290
7291 if (entry->is_sub_map) {
7292 object = NULL;
3e170ce0 7293 submap = VME_SUBMAP(entry);
1c79356b
A
7294 } else {
7295 submap = NULL;
3e170ce0 7296 object = VME_OBJECT(entry);
1c79356b
A
7297 }
7298
6d2010ae 7299 vm_map_store_entry_unlink(map, entry);
1c79356b
A
7300 map->size -= e - s;
7301
7302 vm_map_entry_dispose(map, entry);
7303
7304 vm_map_unlock(map);
7305 /*
7306 * Deallocate the object only after removing all
7307 * pmap entries pointing to its pages.
7308 */
0a7de745 7309 if (submap) {
1c79356b 7310 vm_map_deallocate(submap);
0a7de745 7311 } else {
2d21ac55 7312 vm_object_deallocate(object);
0a7de745 7313 }
1c79356b
A
7314}
7315
7316void
7317vm_map_submap_pmap_clean(
0a7de745
A
7318 vm_map_t map,
7319 vm_map_offset_t start,
7320 vm_map_offset_t end,
7321 vm_map_t sub_map,
7322 vm_map_offset_t offset)
1c79356b 7323{
0a7de745
A
7324 vm_map_offset_t submap_start;
7325 vm_map_offset_t submap_end;
7326 vm_map_size_t remove_size;
7327 vm_map_entry_t entry;
1c79356b
A
7328
7329 submap_end = offset + (end - start);
7330 submap_start = offset;
b7266188
A
7331
7332 vm_map_lock_read(sub_map);
0a7de745 7333 if (vm_map_lookup_entry(sub_map, offset, &entry)) {
1c79356b 7334 remove_size = (entry->vme_end - entry->vme_start);
0a7de745 7335 if (offset > entry->vme_start) {
1c79356b 7336 remove_size -= offset - entry->vme_start;
0a7de745 7337 }
5ba3f43e 7338
1c79356b 7339
0a7de745 7340 if (submap_end < entry->vme_end) {
1c79356b 7341 remove_size -=
0a7de745 7342 entry->vme_end - submap_end;
1c79356b 7343 }
0a7de745 7344 if (entry->is_sub_map) {
1c79356b
A
7345 vm_map_submap_pmap_clean(
7346 sub_map,
7347 start,
7348 start + remove_size,
3e170ce0
A
7349 VME_SUBMAP(entry),
7350 VME_OFFSET(entry));
1c79356b 7351 } else {
0a7de745
A
7352 if ((map->mapped_in_other_pmaps) && (map->map_refcnt)
7353 && (VME_OBJECT(entry) != NULL)) {
3e170ce0
A
7354 vm_object_pmap_protect_options(
7355 VME_OBJECT(entry),
7356 (VME_OFFSET(entry) +
0a7de745
A
7357 offset -
7358 entry->vme_start),
9bccf70c
A
7359 remove_size,
7360 PMAP_NULL,
7361 entry->vme_start,
3e170ce0
A
7362 VM_PROT_NONE,
7363 PMAP_OPTIONS_REMOVE);
9bccf70c 7364 } else {
5ba3f43e 7365 pmap_remove(map->pmap,
0a7de745
A
7366 (addr64_t)start,
7367 (addr64_t)(start + remove_size));
9bccf70c 7368 }
1c79356b
A
7369 }
7370 }
7371
7372 entry = entry->vme_next;
2d21ac55 7373
0a7de745
A
7374 while ((entry != vm_map_to_entry(sub_map))
7375 && (entry->vme_start < submap_end)) {
5ba3f43e 7376 remove_size = (entry->vme_end - entry->vme_start);
0a7de745 7377 if (submap_end < entry->vme_end) {
1c79356b
A
7378 remove_size -= entry->vme_end - submap_end;
7379 }
0a7de745 7380 if (entry->is_sub_map) {
1c79356b
A
7381 vm_map_submap_pmap_clean(
7382 sub_map,
7383 (start + entry->vme_start) - offset,
7384 ((start + entry->vme_start) - offset) + remove_size,
3e170ce0
A
7385 VME_SUBMAP(entry),
7386 VME_OFFSET(entry));
1c79356b 7387 } else {
0a7de745
A
7388 if ((map->mapped_in_other_pmaps) && (map->map_refcnt)
7389 && (VME_OBJECT(entry) != NULL)) {
3e170ce0
A
7390 vm_object_pmap_protect_options(
7391 VME_OBJECT(entry),
7392 VME_OFFSET(entry),
9bccf70c
A
7393 remove_size,
7394 PMAP_NULL,
7395 entry->vme_start,
3e170ce0
A
7396 VM_PROT_NONE,
7397 PMAP_OPTIONS_REMOVE);
9bccf70c 7398 } else {
5ba3f43e 7399 pmap_remove(map->pmap,
0a7de745
A
7400 (addr64_t)((start + entry->vme_start)
7401 - offset),
7402 (addr64_t)(((start + entry->vme_start)
7403 - offset) + remove_size));
9bccf70c 7404 }
1c79356b
A
7405 }
7406 entry = entry->vme_next;
b7266188
A
7407 }
7408 vm_map_unlock_read(sub_map);
1c79356b
A
7409 return;
7410}
7411
d9a64523
A
7412/*
7413 * virt_memory_guard_ast:
7414 *
7415 * Handle the AST callout for a virtual memory guard.
7416 * raise an EXC_GUARD exception and terminate the task
7417 * if configured to do so.
7418 */
7419void
7420virt_memory_guard_ast(
7421 thread_t thread,
7422 mach_exception_data_type_t code,
7423 mach_exception_data_type_t subcode)
7424{
7425 task_t task = thread->task;
7426 assert(task != kernel_task);
7427 assert(task == current_task());
7428 uint32_t behavior;
7429
7430 behavior = task->task_exc_guard;
7431
7432 /* Is delivery enabled */
7433 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7434 return;
7435 }
7436
7437 /* If only once, make sure we're that once */
7438 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7439 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7440
7441 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7442 break;
7443 }
7444 behavior = task->task_exc_guard;
7445 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7446 return;
7447 }
7448 }
7449
7450 /* Raise exception via corpse fork or synchronously */
7451 if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7452 (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7453 task_violated_guard(code, subcode, NULL);
7454 } else {
7455 task_exception_notify(EXC_GUARD, code, subcode);
7456 }
7457
7458 /* Terminate the task if desired */
7459 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7460 task_bsdtask_kill(current_task());
7461 }
7462}
7463
7464/*
7465 * vm_map_guard_exception:
7466 *
7467 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7468 *
7469 * Right now, we do this when we find nothing mapped, or a
7470 * gap in the mapping when a user address space deallocate
7471 * was requested. We report the address of the first gap found.
7472 */
7473static void
7474vm_map_guard_exception(
7475 vm_map_offset_t gap_start,
7476 unsigned reason)
7477{
7478 mach_exception_code_t code = 0;
7479 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7480 unsigned int target = 0; /* should we pass in pid associated with map? */
7481 mach_exception_data_type_t subcode = (uint64_t)gap_start;
7482
7483 /* Can't deliver exceptions to kernel task */
0a7de745 7484 if (current_task() == kernel_task) {
d9a64523 7485 return;
0a7de745 7486 }
d9a64523
A
7487
7488 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7489 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7490 EXC_GUARD_ENCODE_TARGET(code, target);
7491 thread_guard_violation(current_thread(), code, subcode);
7492}
7493
1c79356b
A
7494/*
7495 * vm_map_delete: [ internal use only ]
7496 *
7497 * Deallocates the given address range from the target map.
7498 * Removes all user wirings. Unwires one kernel wiring if
7499 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7500 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7501 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7502 *
7503 * This routine is called with map locked and leaves map locked.
7504 */
91447636 7505static kern_return_t
1c79356b 7506vm_map_delete(
0a7de745
A
7507 vm_map_t map,
7508 vm_map_offset_t start,
7509 vm_map_offset_t end,
7510 int flags,
7511 vm_map_t zap_map)
1c79356b 7512{
0a7de745
A
7513 vm_map_entry_t entry, next;
7514 struct vm_map_entry *first_entry, tmp_entry;
7515 vm_map_offset_t s;
7516 vm_object_t object;
7517 boolean_t need_wakeup;
7518 unsigned int last_timestamp = ~0; /* unlikely value */
7519 int interruptible;
7520 vm_map_offset_t gap_start;
7521 vm_map_offset_t save_start = start;
7522 vm_map_offset_t save_end = end;
7523 const vm_map_offset_t FIND_GAP = 1; /* a not page aligned value */
7524 const vm_map_offset_t GAPS_OK = 2; /* a different not page aligned value */
7525
7526 if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK)) {
d9a64523 7527 gap_start = FIND_GAP;
0a7de745 7528 } else {
d9a64523 7529 gap_start = GAPS_OK;
0a7de745 7530 }
1c79356b 7531
5ba3f43e 7532 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
0a7de745 7533 THREAD_ABORTSAFE : THREAD_UNINT;
1c79356b
A
7534
7535 /*
7536 * All our DMA I/O operations in IOKit are currently done by
7537 * wiring through the map entries of the task requesting the I/O.
7538 * Because of this, we must always wait for kernel wirings
7539 * to go away on the entries before deleting them.
7540 *
7541 * Any caller who wants to actually remove a kernel wiring
7542 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7543 * properly remove one wiring instead of blasting through
7544 * them all.
7545 */
7546 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7547
0a7de745 7548 while (1) {
b0d623f7
A
7549 /*
7550 * Find the start of the region, and clip it
7551 */
7552 if (vm_map_lookup_entry(map, start, &first_entry)) {
7553 entry = first_entry;
fe8ab488
A
7554 if (map == kalloc_map &&
7555 (entry->vme_start != start ||
0a7de745 7556 entry->vme_end != end)) {
fe8ab488 7557 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
7558 "mismatched entry %p [0x%llx:0x%llx]\n",
7559 map,
7560 (uint64_t)start,
7561 (uint64_t)end,
7562 entry,
7563 (uint64_t)entry->vme_start,
7564 (uint64_t)entry->vme_end);
fe8ab488 7565 }
d9a64523
A
7566
7567 /*
7568 * If in a superpage, extend the range to include the start of the mapping.
7569 */
7570 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
b0d623f7
A
7571 start = SUPERPAGE_ROUND_DOWN(start);
7572 continue;
7573 }
d9a64523 7574
b0d623f7
A
7575 if (start == entry->vme_start) {
7576 /*
7577 * No need to clip. We don't want to cause
7578 * any unnecessary unnesting in this case...
7579 */
7580 } else {
fe8ab488
A
7581 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7582 entry->map_aligned &&
7583 !VM_MAP_PAGE_ALIGNED(
7584 start,
7585 VM_MAP_PAGE_MASK(map))) {
7586 /*
7587 * The entry will no longer be
7588 * map-aligned after clipping
7589 * and the caller said it's OK.
7590 */
7591 entry->map_aligned = FALSE;
7592 }
7593 if (map == kalloc_map) {
7594 panic("vm_map_delete(%p,0x%llx,0x%llx):"
0a7de745
A
7595 " clipping %p at 0x%llx\n",
7596 map,
7597 (uint64_t)start,
7598 (uint64_t)end,
7599 entry,
7600 (uint64_t)start);
fe8ab488 7601 }
b0d623f7
A
7602 vm_map_clip_start(map, entry, start);
7603 }
7604
2d21ac55 7605 /*
b0d623f7
A
7606 * Fix the lookup hint now, rather than each
7607 * time through the loop.
2d21ac55 7608 */
b0d623f7 7609 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
2d21ac55 7610 } else {
fe8ab488 7611 if (map->pmap == kernel_pmap &&
d9a64523 7612 map->map_refcnt != 0) {
fe8ab488 7613 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
7614 "no map entry at 0x%llx\n",
7615 map,
7616 (uint64_t)start,
7617 (uint64_t)end,
7618 (uint64_t)start);
fe8ab488 7619 }
b0d623f7 7620 entry = first_entry->vme_next;
0a7de745 7621 if (gap_start == FIND_GAP) {
d9a64523 7622 gap_start = start;
0a7de745 7623 }
2d21ac55 7624 }
b0d623f7 7625 break;
1c79356b 7626 }
0a7de745 7627 if (entry->superpage_size) {
b0d623f7 7628 end = SUPERPAGE_ROUND_UP(end);
0a7de745 7629 }
1c79356b
A
7630
7631 need_wakeup = FALSE;
7632 /*
7633 * Step through all entries in this region
7634 */
2d21ac55
A
7635 s = entry->vme_start;
7636 while ((entry != vm_map_to_entry(map)) && (s < end)) {
7637 /*
7638 * At this point, we have deleted all the memory entries
7639 * between "start" and "s". We still need to delete
7640 * all memory entries between "s" and "end".
7641 * While we were blocked and the map was unlocked, some
7642 * new memory entries could have been re-allocated between
7643 * "start" and "s" and we don't want to mess with those.
7644 * Some of those entries could even have been re-assembled
7645 * with an entry after "s" (in vm_map_simplify_entry()), so
7646 * we may have to vm_map_clip_start() again.
7647 */
1c79356b 7648
2d21ac55
A
7649 if (entry->vme_start >= s) {
7650 /*
7651 * This entry starts on or after "s"
7652 * so no need to clip its start.
7653 */
7654 } else {
7655 /*
7656 * This entry has been re-assembled by a
7657 * vm_map_simplify_entry(). We need to
7658 * re-clip its start.
7659 */
fe8ab488
A
7660 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7661 entry->map_aligned &&
7662 !VM_MAP_PAGE_ALIGNED(s,
0a7de745 7663 VM_MAP_PAGE_MASK(map))) {
fe8ab488
A
7664 /*
7665 * The entry will no longer be map-aligned
7666 * after clipping and the caller said it's OK.
7667 */
7668 entry->map_aligned = FALSE;
7669 }
7670 if (map == kalloc_map) {
7671 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
7672 "clipping %p at 0x%llx\n",
7673 map,
7674 (uint64_t)start,
7675 (uint64_t)end,
7676 entry,
7677 (uint64_t)s);
fe8ab488 7678 }
2d21ac55
A
7679 vm_map_clip_start(map, entry, s);
7680 }
7681 if (entry->vme_end <= end) {
7682 /*
7683 * This entry is going away completely, so no need
7684 * to clip and possibly cause an unnecessary unnesting.
7685 */
7686 } else {
fe8ab488
A
7687 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7688 entry->map_aligned &&
7689 !VM_MAP_PAGE_ALIGNED(end,
0a7de745 7690 VM_MAP_PAGE_MASK(map))) {
fe8ab488
A
7691 /*
7692 * The entry will no longer be map-aligned
7693 * after clipping and the caller said it's OK.
7694 */
7695 entry->map_aligned = FALSE;
7696 }
7697 if (map == kalloc_map) {
7698 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
7699 "clipping %p at 0x%llx\n",
7700 map,
7701 (uint64_t)start,
7702 (uint64_t)end,
7703 entry,
7704 (uint64_t)end);
fe8ab488 7705 }
2d21ac55
A
7706 vm_map_clip_end(map, entry, end);
7707 }
b0d623f7
A
7708
7709 if (entry->permanent) {
5ba3f43e
A
7710 if (map->pmap == kernel_pmap) {
7711 panic("%s(%p,0x%llx,0x%llx): "
0a7de745
A
7712 "attempt to remove permanent "
7713 "VM map entry "
7714 "%p [0x%llx:0x%llx]\n",
7715 __FUNCTION__,
7716 map,
7717 (uint64_t) start,
7718 (uint64_t) end,
7719 entry,
7720 (uint64_t) entry->vme_start,
7721 (uint64_t) entry->vme_end);
5ba3f43e
A
7722 } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7723// printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7724 entry->permanent = FALSE;
d9a64523
A
7725#if PMAP_CS
7726 } else if ((entry->protection & VM_PROT_EXECUTE) && !pmap_cs_enforced(map->pmap)) {
7727 entry->permanent = FALSE;
7728
7729 printf("%d[%s] %s(0x%llx,0x%llx): "
0a7de745
A
7730 "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
7731 "prot 0x%x/0x%x\n",
7732 proc_selfpid(),
7733 (current_task()->bsd_info
7734 ? proc_name_address(current_task()->bsd_info)
7735 : "?"),
7736 __FUNCTION__,
7737 (uint64_t) start,
7738 (uint64_t) end,
7739 (uint64_t)entry->vme_start,
7740 (uint64_t)entry->vme_end,
7741 entry->protection,
7742 entry->max_protection);
d9a64523 7743#endif
5ba3f43e 7744 } else {
d9a64523 7745 if (vm_map_executable_immutable_verbose) {
5ba3f43e 7746 printf("%d[%s] %s(0x%llx,0x%llx): "
0a7de745
A
7747 "permanent entry [0x%llx:0x%llx] "
7748 "prot 0x%x/0x%x\n",
7749 proc_selfpid(),
7750 (current_task()->bsd_info
7751 ? proc_name_address(current_task()->bsd_info)
7752 : "?"),
7753 __FUNCTION__,
7754 (uint64_t) start,
7755 (uint64_t) end,
7756 (uint64_t)entry->vme_start,
7757 (uint64_t)entry->vme_end,
7758 entry->protection,
7759 entry->max_protection);
5ba3f43e
A
7760 }
7761 /*
7762 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7763 */
7764 DTRACE_VM5(vm_map_delete_permanent,
0a7de745
A
7765 vm_map_offset_t, entry->vme_start,
7766 vm_map_offset_t, entry->vme_end,
7767 vm_prot_t, entry->protection,
7768 vm_prot_t, entry->max_protection,
7769 int, VME_ALIAS(entry));
5ba3f43e 7770 }
b0d623f7
A
7771 }
7772
7773
1c79356b 7774 if (entry->in_transition) {
9bccf70c
A
7775 wait_result_t wait_result;
7776
1c79356b
A
7777 /*
7778 * Another thread is wiring/unwiring this entry.
7779 * Let the other thread know we are waiting.
7780 */
2d21ac55 7781 assert(s == entry->vme_start);
1c79356b
A
7782 entry->needs_wakeup = TRUE;
7783
7784 /*
7785 * wake up anybody waiting on entries that we have
7786 * already unwired/deleted.
7787 */
7788 if (need_wakeup) {
7789 vm_map_entry_wakeup(map);
7790 need_wakeup = FALSE;
7791 }
7792
9bccf70c 7793 wait_result = vm_map_entry_wait(map, interruptible);
1c79356b
A
7794
7795 if (interruptible &&
9bccf70c 7796 wait_result == THREAD_INTERRUPTED) {
1c79356b
A
7797 /*
7798 * We do not clear the needs_wakeup flag,
7799 * since we cannot tell if we were the only one.
7800 */
7801 return KERN_ABORTED;
9bccf70c 7802 }
1c79356b
A
7803
7804 /*
7805 * The entry could have been clipped or it
7806 * may not exist anymore. Look it up again.
7807 */
7808 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
7809 /*
7810 * User: use the next entry
7811 */
0a7de745 7812 if (gap_start == FIND_GAP) {
d9a64523 7813 gap_start = s;
0a7de745 7814 }
1c79356b 7815 entry = first_entry->vme_next;
2d21ac55 7816 s = entry->vme_start;
1c79356b
A
7817 } else {
7818 entry = first_entry;
0c530ab8 7819 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 7820 }
9bccf70c 7821 last_timestamp = map->timestamp;
1c79356b
A
7822 continue;
7823 } /* end in_transition */
7824
7825 if (entry->wired_count) {
0a7de745 7826 boolean_t user_wire;
2d21ac55
A
7827
7828 user_wire = entry->user_wired_count > 0;
7829
1c79356b 7830 /*
0a7de745 7831 * Remove a kernel wiring if requested
1c79356b 7832 */
b0d623f7 7833 if (flags & VM_MAP_REMOVE_KUNWIRE) {
1c79356b 7834 entry->wired_count--;
b0d623f7 7835 }
5ba3f43e 7836
b0d623f7
A
7837 /*
7838 * Remove all user wirings for proper accounting
7839 */
7840 if (entry->user_wired_count > 0) {
0a7de745 7841 while (entry->user_wired_count) {
b0d623f7 7842 subtract_wire_counts(map, entry, user_wire);
0a7de745 7843 }
b0d623f7 7844 }
1c79356b
A
7845
7846 if (entry->wired_count != 0) {
2d21ac55 7847 assert(map != kernel_map);
1c79356b
A
7848 /*
7849 * Cannot continue. Typical case is when
7850 * a user thread has physical io pending on
7851 * on this page. Either wait for the
7852 * kernel wiring to go away or return an
7853 * error.
7854 */
7855 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
9bccf70c 7856 wait_result_t wait_result;
1c79356b 7857
2d21ac55 7858 assert(s == entry->vme_start);
1c79356b 7859 entry->needs_wakeup = TRUE;
9bccf70c 7860 wait_result = vm_map_entry_wait(map,
0a7de745 7861 interruptible);
1c79356b
A
7862
7863 if (interruptible &&
2d21ac55 7864 wait_result == THREAD_INTERRUPTED) {
1c79356b 7865 /*
5ba3f43e
A
7866 * We do not clear the
7867 * needs_wakeup flag, since we
7868 * cannot tell if we were the
1c79356b 7869 * only one.
2d21ac55 7870 */
1c79356b 7871 return KERN_ABORTED;
9bccf70c 7872 }
1c79356b
A
7873
7874 /*
2d21ac55 7875 * The entry could have been clipped or
1c79356b
A
7876 * it may not exist anymore. Look it
7877 * up again.
2d21ac55 7878 */
5ba3f43e 7879 if (!vm_map_lookup_entry(map, s,
0a7de745 7880 &first_entry)) {
2d21ac55 7881 assert(map != kernel_map);
1c79356b 7882 /*
2d21ac55
A
7883 * User: use the next entry
7884 */
0a7de745 7885 if (gap_start == FIND_GAP) {
d9a64523 7886 gap_start = s;
0a7de745 7887 }
1c79356b 7888 entry = first_entry->vme_next;
2d21ac55 7889 s = entry->vme_start;
1c79356b
A
7890 } else {
7891 entry = first_entry;
0c530ab8 7892 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 7893 }
9bccf70c 7894 last_timestamp = map->timestamp;
1c79356b 7895 continue;
0a7de745 7896 } else {
1c79356b
A
7897 return KERN_FAILURE;
7898 }
7899 }
7900
7901 entry->in_transition = TRUE;
7902 /*
7903 * copy current entry. see comment in vm_map_wire()
7904 */
7905 tmp_entry = *entry;
2d21ac55 7906 assert(s == entry->vme_start);
1c79356b
A
7907
7908 /*
7909 * We can unlock the map now. The in_transition
7910 * state guarentees existance of the entry.
7911 */
7912 vm_map_unlock(map);
2d21ac55
A
7913
7914 if (tmp_entry.is_sub_map) {
7915 vm_map_t sub_map;
7916 vm_map_offset_t sub_start, sub_end;
7917 pmap_t pmap;
7918 vm_map_offset_t pmap_addr;
5ba3f43e 7919
2d21ac55 7920
3e170ce0
A
7921 sub_map = VME_SUBMAP(&tmp_entry);
7922 sub_start = VME_OFFSET(&tmp_entry);
2d21ac55 7923 sub_end = sub_start + (tmp_entry.vme_end -
0a7de745 7924 tmp_entry.vme_start);
2d21ac55
A
7925 if (tmp_entry.use_pmap) {
7926 pmap = sub_map->pmap;
7927 pmap_addr = tmp_entry.vme_start;
7928 } else {
7929 pmap = map->pmap;
7930 pmap_addr = tmp_entry.vme_start;
7931 }
7932 (void) vm_map_unwire_nested(sub_map,
0a7de745
A
7933 sub_start, sub_end,
7934 user_wire,
7935 pmap, pmap_addr);
2d21ac55 7936 } else {
3e170ce0 7937 if (VME_OBJECT(&tmp_entry) == kernel_object) {
39236c6e
A
7938 pmap_protect_options(
7939 map->pmap,
7940 tmp_entry.vme_start,
7941 tmp_entry.vme_end,
7942 VM_PROT_NONE,
7943 PMAP_OPTIONS_REMOVE,
7944 NULL);
7945 }
2d21ac55 7946 vm_fault_unwire(map, &tmp_entry,
0a7de745
A
7947 VME_OBJECT(&tmp_entry) == kernel_object,
7948 map->pmap, tmp_entry.vme_start);
2d21ac55
A
7949 }
7950
1c79356b
A
7951 vm_map_lock(map);
7952
0a7de745 7953 if (last_timestamp + 1 != map->timestamp) {
1c79356b
A
7954 /*
7955 * Find the entry again. It could have
7956 * been clipped after we unlocked the map.
7957 */
0a7de745 7958 if (!vm_map_lookup_entry(map, s, &first_entry)) {
5ba3f43e 7959 assert((map != kernel_map) &&
0a7de745
A
7960 (!entry->is_sub_map));
7961 if (gap_start == FIND_GAP) {
d9a64523 7962 gap_start = s;
0a7de745 7963 }
1c79356b 7964 first_entry = first_entry->vme_next;
2d21ac55 7965 s = first_entry->vme_start;
1c79356b 7966 } else {
0c530ab8 7967 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
7968 }
7969 } else {
0c530ab8 7970 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
7971 first_entry = entry;
7972 }
7973
7974 last_timestamp = map->timestamp;
7975
7976 entry = first_entry;
7977 while ((entry != vm_map_to_entry(map)) &&
0a7de745 7978 (entry->vme_start < tmp_entry.vme_end)) {
1c79356b
A
7979 assert(entry->in_transition);
7980 entry->in_transition = FALSE;
7981 if (entry->needs_wakeup) {
7982 entry->needs_wakeup = FALSE;
7983 need_wakeup = TRUE;
7984 }
7985 entry = entry->vme_next;
7986 }
7987 /*
7988 * We have unwired the entry(s). Go back and
7989 * delete them.
7990 */
7991 entry = first_entry;
7992 continue;
7993 }
7994
7995 /* entry is unwired */
7996 assert(entry->wired_count == 0);
7997 assert(entry->user_wired_count == 0);
7998
2d21ac55
A
7999 assert(s == entry->vme_start);
8000
8001 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8002 /*
8003 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8004 * vm_map_delete(), some map entries might have been
8005 * transferred to a "zap_map", which doesn't have a
8006 * pmap. The original pmap has already been flushed
8007 * in the vm_map_delete() call targeting the original
8008 * map, but when we get to destroying the "zap_map",
8009 * we don't have any pmap to flush, so let's just skip
8010 * all this.
8011 */
8012 } else if (entry->is_sub_map) {
8013 if (entry->use_pmap) {
0c530ab8 8014#ifndef NO_NESTED_PMAP
3e170ce0
A
8015 int pmap_flags;
8016
8017 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8018 /*
8019 * This is the final cleanup of the
8020 * address space being terminated.
8021 * No new mappings are expected and
8022 * we don't really need to unnest the
8023 * shared region (and lose the "global"
8024 * pmap mappings, if applicable).
8025 *
8026 * Tell the pmap layer that we're
8027 * "clean" wrt nesting.
8028 */
8029 pmap_flags = PMAP_UNNEST_CLEAN;
8030 } else {
8031 /*
8032 * We're unmapping part of the nested
8033 * shared region, so we can't keep the
8034 * nested pmap.
8035 */
8036 pmap_flags = 0;
8037 }
8038 pmap_unnest_options(
8039 map->pmap,
8040 (addr64_t)entry->vme_start,
8041 entry->vme_end - entry->vme_start,
8042 pmap_flags);
0a7de745 8043#endif /* NO_NESTED_PMAP */
d9a64523 8044 if ((map->mapped_in_other_pmaps) && (map->map_refcnt)) {
9bccf70c
A
8045 /* clean up parent map/maps */
8046 vm_map_submap_pmap_clean(
8047 map, entry->vme_start,
8048 entry->vme_end,
3e170ce0
A
8049 VME_SUBMAP(entry),
8050 VME_OFFSET(entry));
9bccf70c 8051 }
2d21ac55 8052 } else {
1c79356b
A
8053 vm_map_submap_pmap_clean(
8054 map, entry->vme_start, entry->vme_end,
3e170ce0
A
8055 VME_SUBMAP(entry),
8056 VME_OFFSET(entry));
2d21ac55 8057 }
3e170ce0 8058 } else if (VME_OBJECT(entry) != kernel_object &&
0a7de745 8059 VME_OBJECT(entry) != compressor_object) {
3e170ce0 8060 object = VME_OBJECT(entry);
d9a64523 8061 if ((map->mapped_in_other_pmaps) && (map->map_refcnt)) {
39236c6e 8062 vm_object_pmap_protect_options(
3e170ce0 8063 object, VME_OFFSET(entry),
55e303ae
A
8064 entry->vme_end - entry->vme_start,
8065 PMAP_NULL,
8066 entry->vme_start,
39236c6e
A
8067 VM_PROT_NONE,
8068 PMAP_OPTIONS_REMOVE);
3e170ce0 8069 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
0a7de745 8070 (map->pmap == kernel_pmap)) {
39236c6e
A
8071 /* Remove translations associated
8072 * with this range unless the entry
8073 * does not have an object, or
8074 * it's the kernel map or a descendant
8075 * since the platform could potentially
8076 * create "backdoor" mappings invisible
8077 * to the VM. It is expected that
8078 * objectless, non-kernel ranges
8079 * do not have such VM invisible
8080 * translations.
8081 */
8082 pmap_remove_options(map->pmap,
0a7de745
A
8083 (addr64_t)entry->vme_start,
8084 (addr64_t)entry->vme_end,
8085 PMAP_OPTIONS_REMOVE);
1c79356b
A
8086 }
8087 }
8088
fe8ab488
A
8089 if (entry->iokit_acct) {
8090 /* alternate accounting */
ecc0ceb4 8091 DTRACE_VM4(vm_map_iokit_unmapped_region,
0a7de745
A
8092 vm_map_t, map,
8093 vm_map_offset_t, entry->vme_start,
8094 vm_map_offset_t, entry->vme_end,
8095 int, VME_ALIAS(entry));
fe8ab488 8096 vm_map_iokit_unmapped_region(map,
0a7de745
A
8097 (entry->vme_end -
8098 entry->vme_start));
fe8ab488 8099 entry->iokit_acct = FALSE;
a39ff7e2 8100 entry->use_pmap = FALSE;
fe8ab488
A
8101 }
8102
91447636
A
8103 /*
8104 * All pmap mappings for this map entry must have been
8105 * cleared by now.
8106 */
fe8ab488 8107#if DEBUG
91447636 8108 assert(vm_map_pmap_is_empty(map,
0a7de745
A
8109 entry->vme_start,
8110 entry->vme_end));
fe8ab488 8111#endif /* DEBUG */
91447636 8112
1c79356b 8113 next = entry->vme_next;
fe8ab488
A
8114
8115 if (map->pmap == kernel_pmap &&
d9a64523 8116 map->map_refcnt != 0 &&
fe8ab488
A
8117 entry->vme_end < end &&
8118 (next == vm_map_to_entry(map) ||
0a7de745 8119 next->vme_start != entry->vme_end)) {
fe8ab488 8120 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
8121 "hole after %p at 0x%llx\n",
8122 map,
8123 (uint64_t)start,
8124 (uint64_t)end,
8125 entry,
8126 (uint64_t)entry->vme_end);
fe8ab488
A
8127 }
8128
d9a64523
A
8129 /*
8130 * If the desired range didn't end with "entry", then there is a gap if
8131 * we wrapped around to the start of the map or if "entry" and "next"
8132 * aren't contiguous.
8133 *
8134 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8135 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8136 */
8137 if (gap_start == FIND_GAP &&
8138 vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8139 (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8140 gap_start = entry->vme_end;
8141 }
1c79356b
A
8142 s = next->vme_start;
8143 last_timestamp = map->timestamp;
91447636 8144
5ba3f43e
A
8145 if (entry->permanent) {
8146 /*
8147 * A permanent entry can not be removed, so leave it
8148 * in place but remove all access permissions.
8149 */
8150 entry->protection = VM_PROT_NONE;
8151 entry->max_protection = VM_PROT_NONE;
8152 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
0a7de745 8153 zap_map != VM_MAP_NULL) {
2d21ac55 8154 vm_map_size_t entry_size;
91447636
A
8155 /*
8156 * The caller wants to save the affected VM map entries
8157 * into the "zap_map". The caller will take care of
8158 * these entries.
8159 */
8160 /* unlink the entry from "map" ... */
6d2010ae 8161 vm_map_store_entry_unlink(map, entry);
91447636 8162 /* ... and add it to the end of the "zap_map" */
6d2010ae 8163 vm_map_store_entry_link(zap_map,
0a7de745
A
8164 vm_map_last_entry(zap_map),
8165 entry,
8166 VM_MAP_KERNEL_FLAGS_NONE);
2d21ac55
A
8167 entry_size = entry->vme_end - entry->vme_start;
8168 map->size -= entry_size;
8169 zap_map->size += entry_size;
8170 /* we didn't unlock the map, so no timestamp increase */
8171 last_timestamp--;
91447636
A
8172 } else {
8173 vm_map_entry_delete(map, entry);
8174 /* vm_map_entry_delete unlocks the map */
8175 vm_map_lock(map);
8176 }
8177
1c79356b
A
8178 entry = next;
8179
0a7de745 8180 if (entry == vm_map_to_entry(map)) {
1c79356b
A
8181 break;
8182 }
d9a64523 8183 if (last_timestamp + 1 != map->timestamp) {
1c79356b 8184 /*
d9a64523
A
8185 * We are responsible for deleting everything
8186 * from the given space. If someone has interfered,
8187 * we pick up where we left off. Back fills should
8188 * be all right for anyone, except map_delete, and
1c79356b
A
8189 * we have to assume that the task has been fully
8190 * disabled before we get here
8191 */
0a7de745
A
8192 if (!vm_map_lookup_entry(map, s, &entry)) {
8193 entry = entry->vme_next;
d9a64523
A
8194
8195 /*
8196 * Nothing found for s. If we weren't already done, then there is a gap.
8197 */
0a7de745 8198 if (gap_start == FIND_GAP && s < end) {
d9a64523 8199 gap_start = s;
0a7de745 8200 }
2d21ac55 8201 s = entry->vme_start;
0a7de745 8202 } else {
2d21ac55 8203 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
0a7de745 8204 }
5ba3f43e
A
8205 /*
8206 * others can not only allocate behind us, we can
8207 * also see coalesce while we don't have the map lock
1c79356b 8208 */
d9a64523 8209 if (entry == vm_map_to_entry(map)) {
1c79356b
A
8210 break;
8211 }
1c79356b
A
8212 }
8213 last_timestamp = map->timestamp;
8214 }
8215
0a7de745 8216 if (map->wait_for_space) {
1c79356b 8217 thread_wakeup((event_t) map);
0a7de745 8218 }
1c79356b
A
8219 /*
8220 * wake up anybody waiting on entries that we have already deleted.
8221 */
0a7de745 8222 if (need_wakeup) {
1c79356b 8223 vm_map_entry_wakeup(map);
0a7de745 8224 }
1c79356b 8225
d9a64523
A
8226 if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8227 DTRACE_VM3(kern_vm_deallocate_gap,
8228 vm_map_offset_t, gap_start,
8229 vm_map_offset_t, save_start,
8230 vm_map_offset_t, save_end);
8231 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8232#if defined(DEVELOPMENT) || defined(DEBUG)
8233 /* log just once if not checking, otherwise log each one */
8234 if (!map->warned_delete_gap ||
8235 (task_exc_guard_default & TASK_EXC_GUARD_VM_ALL) != 0) {
8236 printf("vm_map_delete: map %p [%p...%p] nothing at %p\n",
8237 (void *)map, (void *)save_start, (void *)save_end,
8238 (void *)gap_start);
8239 if (!map->warned_delete_gap) {
8240 map->warned_delete_gap = 1;
8241 }
8242 }
8243#endif
8244 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8245 }
8246 }
8247
1c79356b
A
8248 return KERN_SUCCESS;
8249}
8250
8251/*
8252 * vm_map_remove:
8253 *
8254 * Remove the given address range from the target map.
8255 * This is the exported form of vm_map_delete.
8256 */
8257kern_return_t
8258vm_map_remove(
0a7de745
A
8259 vm_map_t map,
8260 vm_map_offset_t start,
8261 vm_map_offset_t end,
8262 boolean_t flags)
1c79356b 8263{
0a7de745 8264 kern_return_t result;
9bccf70c 8265
1c79356b
A
8266 vm_map_lock(map);
8267 VM_MAP_RANGE_CHECK(map, start, end);
39236c6e
A
8268 /*
8269 * For the zone_map, the kernel controls the allocation/freeing of memory.
8270 * Any free to the zone_map should be within the bounds of the map and
8271 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8272 * free to the zone_map into a no-op, there is a problem and we should
8273 * panic.
8274 */
0a7de745 8275 if ((map == zone_map) && (start == end)) {
39236c6e 8276 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
0a7de745 8277 }
91447636 8278 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
1c79356b 8279 vm_map_unlock(map);
91447636 8280
0a7de745 8281 return result;
1c79356b
A
8282}
8283
39037602
A
8284/*
8285 * vm_map_remove_locked:
8286 *
8287 * Remove the given address range from the target locked map.
8288 * This is the exported form of vm_map_delete.
8289 */
8290kern_return_t
8291vm_map_remove_locked(
0a7de745
A
8292 vm_map_t map,
8293 vm_map_offset_t start,
8294 vm_map_offset_t end,
8295 boolean_t flags)
39037602 8296{
0a7de745 8297 kern_return_t result;
39037602
A
8298
8299 VM_MAP_RANGE_CHECK(map, start, end);
8300 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
0a7de745 8301 return result;
39037602
A
8302}
8303
1c79356b 8304
d9a64523
A
8305/*
8306 * Routine: vm_map_copy_allocate
8307 *
8308 * Description:
8309 * Allocates and initializes a map copy object.
8310 */
8311static vm_map_copy_t
8312vm_map_copy_allocate(void)
8313{
8314 vm_map_copy_t new_copy;
8315
8316 new_copy = zalloc(vm_map_copy_zone);
0a7de745 8317 bzero(new_copy, sizeof(*new_copy));
d9a64523
A
8318 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8319 vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8320 vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8321 return new_copy;
8322}
8323
1c79356b
A
8324/*
8325 * Routine: vm_map_copy_discard
8326 *
8327 * Description:
8328 * Dispose of a map copy object (returned by
8329 * vm_map_copyin).
8330 */
8331void
8332vm_map_copy_discard(
0a7de745 8333 vm_map_copy_t copy)
1c79356b 8334{
0a7de745 8335 if (copy == VM_MAP_COPY_NULL) {
1c79356b 8336 return;
0a7de745 8337 }
1c79356b
A
8338
8339 switch (copy->type) {
8340 case VM_MAP_COPY_ENTRY_LIST:
8341 while (vm_map_copy_first_entry(copy) !=
0a7de745
A
8342 vm_map_copy_to_entry(copy)) {
8343 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
1c79356b
A
8344
8345 vm_map_copy_entry_unlink(copy, entry);
39236c6e 8346 if (entry->is_sub_map) {
3e170ce0 8347 vm_map_deallocate(VME_SUBMAP(entry));
39236c6e 8348 } else {
3e170ce0 8349 vm_object_deallocate(VME_OBJECT(entry));
39236c6e 8350 }
1c79356b
A
8351 vm_map_copy_entry_dispose(copy, entry);
8352 }
8353 break;
0a7de745 8354 case VM_MAP_COPY_OBJECT:
1c79356b
A
8355 vm_object_deallocate(copy->cpy_object);
8356 break;
1c79356b
A
8357 case VM_MAP_COPY_KERNEL_BUFFER:
8358
8359 /*
8360 * The vm_map_copy_t and possibly the data buffer were
8361 * allocated by a single call to kalloc(), i.e. the
8362 * vm_map_copy_t was not allocated out of the zone.
8363 */
0a7de745 8364 if (copy->size > msg_ool_size_small || copy->offset) {
3e170ce0 8365 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
0a7de745
A
8366 (long long)copy->size, (long long)copy->offset);
8367 }
3e170ce0 8368 kfree(copy, copy->size + cpy_kdata_hdr_sz);
1c79356b
A
8369 return;
8370 }
91447636 8371 zfree(vm_map_copy_zone, copy);
1c79356b
A
8372}
8373
8374/*
8375 * Routine: vm_map_copy_copy
8376 *
8377 * Description:
8378 * Move the information in a map copy object to
8379 * a new map copy object, leaving the old one
8380 * empty.
8381 *
8382 * This is used by kernel routines that need
8383 * to look at out-of-line data (in copyin form)
8384 * before deciding whether to return SUCCESS.
8385 * If the routine returns FAILURE, the original
8386 * copy object will be deallocated; therefore,
8387 * these routines must make a copy of the copy
8388 * object and leave the original empty so that
8389 * deallocation will not fail.
8390 */
8391vm_map_copy_t
8392vm_map_copy_copy(
0a7de745 8393 vm_map_copy_t copy)
1c79356b 8394{
0a7de745 8395 vm_map_copy_t new_copy;
1c79356b 8396
0a7de745 8397 if (copy == VM_MAP_COPY_NULL) {
1c79356b 8398 return VM_MAP_COPY_NULL;
0a7de745 8399 }
1c79356b
A
8400
8401 /*
8402 * Allocate a new copy object, and copy the information
8403 * from the old one into it.
8404 */
8405
8406 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8407 *new_copy = *copy;
8408
8409 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8410 /*
8411 * The links in the entry chain must be
8412 * changed to point to the new copy object.
8413 */
8414 vm_map_copy_first_entry(copy)->vme_prev
0a7de745 8415 = vm_map_copy_to_entry(new_copy);
1c79356b 8416 vm_map_copy_last_entry(copy)->vme_next
0a7de745 8417 = vm_map_copy_to_entry(new_copy);
1c79356b
A
8418 }
8419
8420 /*
8421 * Change the old copy object into one that contains
8422 * nothing to be deallocated.
8423 */
8424 copy->type = VM_MAP_COPY_OBJECT;
8425 copy->cpy_object = VM_OBJECT_NULL;
8426
8427 /*
8428 * Return the new object.
8429 */
8430 return new_copy;
8431}
8432
91447636 8433static kern_return_t
1c79356b 8434vm_map_overwrite_submap_recurse(
0a7de745
A
8435 vm_map_t dst_map,
8436 vm_map_offset_t dst_addr,
8437 vm_map_size_t dst_size)
1c79356b 8438{
0a7de745
A
8439 vm_map_offset_t dst_end;
8440 vm_map_entry_t tmp_entry;
8441 vm_map_entry_t entry;
8442 kern_return_t result;
8443 boolean_t encountered_sub_map = FALSE;
1c79356b
A
8444
8445
8446
8447 /*
8448 * Verify that the destination is all writeable
8449 * initially. We have to trunc the destination
8450 * address and round the copy size or we'll end up
8451 * splitting entries in strange ways.
8452 */
8453
39236c6e 8454 dst_end = vm_map_round_page(dst_addr + dst_size,
0a7de745 8455 VM_MAP_PAGE_MASK(dst_map));
9bccf70c 8456 vm_map_lock(dst_map);
1c79356b
A
8457
8458start_pass_1:
1c79356b
A
8459 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8460 vm_map_unlock(dst_map);
0a7de745 8461 return KERN_INVALID_ADDRESS;
1c79356b
A
8462 }
8463
39236c6e 8464 vm_map_clip_start(dst_map,
0a7de745
A
8465 tmp_entry,
8466 vm_map_trunc_page(dst_addr,
8467 VM_MAP_PAGE_MASK(dst_map)));
fe8ab488
A
8468 if (tmp_entry->is_sub_map) {
8469 /* clipping did unnest if needed */
8470 assert(!tmp_entry->use_pmap);
8471 }
1c79356b
A
8472
8473 for (entry = tmp_entry;;) {
0a7de745 8474 vm_map_entry_t next;
1c79356b
A
8475
8476 next = entry->vme_next;
0a7de745
A
8477 while (entry->is_sub_map) {
8478 vm_map_offset_t sub_start;
8479 vm_map_offset_t sub_end;
8480 vm_map_offset_t local_end;
1c79356b
A
8481
8482 if (entry->in_transition) {
2d21ac55
A
8483 /*
8484 * Say that we are waiting, and wait for entry.
8485 */
0a7de745
A
8486 entry->needs_wakeup = TRUE;
8487 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
8488
8489 goto start_pass_1;
8490 }
8491
8492 encountered_sub_map = TRUE;
3e170ce0 8493 sub_start = VME_OFFSET(entry);
1c79356b 8494
0a7de745 8495 if (entry->vme_end < dst_end) {
1c79356b 8496 sub_end = entry->vme_end;
0a7de745 8497 } else {
1c79356b 8498 sub_end = dst_end;
0a7de745 8499 }
1c79356b 8500 sub_end -= entry->vme_start;
3e170ce0 8501 sub_end += VME_OFFSET(entry);
1c79356b
A
8502 local_end = entry->vme_end;
8503 vm_map_unlock(dst_map);
5ba3f43e 8504
1c79356b 8505 result = vm_map_overwrite_submap_recurse(
3e170ce0 8506 VME_SUBMAP(entry),
2d21ac55
A
8507 sub_start,
8508 sub_end - sub_start);
1c79356b 8509
0a7de745 8510 if (result != KERN_SUCCESS) {
1c79356b 8511 return result;
0a7de745
A
8512 }
8513 if (dst_end <= entry->vme_end) {
1c79356b 8514 return KERN_SUCCESS;
0a7de745 8515 }
1c79356b 8516 vm_map_lock(dst_map);
0a7de745
A
8517 if (!vm_map_lookup_entry(dst_map, local_end,
8518 &tmp_entry)) {
1c79356b 8519 vm_map_unlock(dst_map);
0a7de745 8520 return KERN_INVALID_ADDRESS;
1c79356b
A
8521 }
8522 entry = tmp_entry;
8523 next = entry->vme_next;
8524 }
8525
0a7de745 8526 if (!(entry->protection & VM_PROT_WRITE)) {
1c79356b 8527 vm_map_unlock(dst_map);
0a7de745 8528 return KERN_PROTECTION_FAILURE;
1c79356b
A
8529 }
8530
8531 /*
8532 * If the entry is in transition, we must wait
8533 * for it to exit that state. Anything could happen
8534 * when we unlock the map, so start over.
8535 */
0a7de745
A
8536 if (entry->in_transition) {
8537 /*
8538 * Say that we are waiting, and wait for entry.
8539 */
8540 entry->needs_wakeup = TRUE;
8541 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
8542
8543 goto start_pass_1;
8544 }
8545
8546/*
8547 * our range is contained completely within this map entry
8548 */
8549 if (dst_end <= entry->vme_end) {
8550 vm_map_unlock(dst_map);
8551 return KERN_SUCCESS;
8552 }
8553/*
8554 * check that range specified is contiguous region
8555 */
8556 if ((next == vm_map_to_entry(dst_map)) ||
8557 (next->vme_start != entry->vme_end)) {
8558 vm_map_unlock(dst_map);
0a7de745 8559 return KERN_INVALID_ADDRESS;
1c79356b
A
8560 }
8561
8562 /*
8563 * Check for permanent objects in the destination.
8564 */
3e170ce0
A
8565 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8566 ((!VME_OBJECT(entry)->internal) ||
0a7de745
A
8567 (VME_OBJECT(entry)->true_share))) {
8568 if (encountered_sub_map) {
1c79356b 8569 vm_map_unlock(dst_map);
0a7de745 8570 return KERN_FAILURE;
1c79356b
A
8571 }
8572 }
8573
8574
8575 entry = next;
8576 }/* for */
8577 vm_map_unlock(dst_map);
0a7de745 8578 return KERN_SUCCESS;
1c79356b
A
8579}
8580
8581/*
8582 * Routine: vm_map_copy_overwrite
8583 *
8584 * Description:
8585 * Copy the memory described by the map copy
8586 * object (copy; returned by vm_map_copyin) onto
8587 * the specified destination region (dst_map, dst_addr).
8588 * The destination must be writeable.
8589 *
8590 * Unlike vm_map_copyout, this routine actually
8591 * writes over previously-mapped memory. If the
8592 * previous mapping was to a permanent (user-supplied)
8593 * memory object, it is preserved.
8594 *
8595 * The attributes (protection and inheritance) of the
8596 * destination region are preserved.
8597 *
8598 * If successful, consumes the copy object.
8599 * Otherwise, the caller is responsible for it.
8600 *
8601 * Implementation notes:
8602 * To overwrite aligned temporary virtual memory, it is
8603 * sufficient to remove the previous mapping and insert
8604 * the new copy. This replacement is done either on
8605 * the whole region (if no permanent virtual memory
8606 * objects are embedded in the destination region) or
8607 * in individual map entries.
8608 *
8609 * To overwrite permanent virtual memory , it is necessary
8610 * to copy each page, as the external memory management
8611 * interface currently does not provide any optimizations.
8612 *
8613 * Unaligned memory also has to be copied. It is possible
8614 * to use 'vm_trickery' to copy the aligned data. This is
8615 * not done but not hard to implement.
8616 *
8617 * Once a page of permanent memory has been overwritten,
8618 * it is impossible to interrupt this function; otherwise,
8619 * the call would be neither atomic nor location-independent.
8620 * The kernel-state portion of a user thread must be
8621 * interruptible.
8622 *
8623 * It may be expensive to forward all requests that might
8624 * overwrite permanent memory (vm_write, vm_copy) to
8625 * uninterruptible kernel threads. This routine may be
8626 * called by interruptible threads; however, success is
8627 * not guaranteed -- if the request cannot be performed
8628 * atomically and interruptibly, an error indication is
8629 * returned.
8630 */
8631
91447636 8632static kern_return_t
1c79356b 8633vm_map_copy_overwrite_nested(
0a7de745
A
8634 vm_map_t dst_map,
8635 vm_map_address_t dst_addr,
8636 vm_map_copy_t copy,
8637 boolean_t interruptible,
8638 pmap_t pmap,
8639 boolean_t discard_on_success)
1c79356b 8640{
0a7de745
A
8641 vm_map_offset_t dst_end;
8642 vm_map_entry_t tmp_entry;
8643 vm_map_entry_t entry;
8644 kern_return_t kr;
8645 boolean_t aligned = TRUE;
8646 boolean_t contains_permanent_objects = FALSE;
8647 boolean_t encountered_sub_map = FALSE;
8648 vm_map_offset_t base_addr;
8649 vm_map_size_t copy_size;
8650 vm_map_size_t total_size;
1c79356b
A
8651
8652
8653 /*
8654 * Check for null copy object.
8655 */
8656
0a7de745
A
8657 if (copy == VM_MAP_COPY_NULL) {
8658 return KERN_SUCCESS;
8659 }
1c79356b
A
8660
8661 /*
8662 * Check for special kernel buffer allocated
8663 * by new_ipc_kmsg_copyin.
8664 */
8665
8666 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
0a7de745
A
8667 return vm_map_copyout_kernel_buffer(
8668 dst_map, &dst_addr,
8669 copy, copy->size, TRUE, discard_on_success);
1c79356b
A
8670 }
8671
8672 /*
8673 * Only works for entry lists at the moment. Will
8674 * support page lists later.
8675 */
8676
8677 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8678
8679 if (copy->size == 0) {
0a7de745 8680 if (discard_on_success) {
6d2010ae 8681 vm_map_copy_discard(copy);
0a7de745
A
8682 }
8683 return KERN_SUCCESS;
1c79356b
A
8684 }
8685
8686 /*
8687 * Verify that the destination is all writeable
8688 * initially. We have to trunc the destination
8689 * address and round the copy size or we'll end up
8690 * splitting entries in strange ways.
8691 */
8692
39236c6e 8693 if (!VM_MAP_PAGE_ALIGNED(copy->size,
0a7de745 8694 VM_MAP_PAGE_MASK(dst_map)) ||
39236c6e 8695 !VM_MAP_PAGE_ALIGNED(copy->offset,
0a7de745 8696 VM_MAP_PAGE_MASK(dst_map)) ||
39236c6e 8697 !VM_MAP_PAGE_ALIGNED(dst_addr,
0a7de745 8698 VM_MAP_PAGE_MASK(dst_map))) {
1c79356b 8699 aligned = FALSE;
39236c6e 8700 dst_end = vm_map_round_page(dst_addr + copy->size,
0a7de745 8701 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
8702 } else {
8703 dst_end = dst_addr + copy->size;
8704 }
8705
1c79356b 8706 vm_map_lock(dst_map);
9bccf70c 8707
91447636
A
8708 /* LP64todo - remove this check when vm_map_commpage64()
8709 * no longer has to stuff in a map_entry for the commpage
8710 * above the map's max_offset.
8711 */
8712 if (dst_addr >= dst_map->max_offset) {
8713 vm_map_unlock(dst_map);
0a7de745 8714 return KERN_INVALID_ADDRESS;
91447636 8715 }
5ba3f43e 8716
9bccf70c 8717start_pass_1:
1c79356b
A
8718 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8719 vm_map_unlock(dst_map);
0a7de745 8720 return KERN_INVALID_ADDRESS;
1c79356b 8721 }
39236c6e 8722 vm_map_clip_start(dst_map,
0a7de745
A
8723 tmp_entry,
8724 vm_map_trunc_page(dst_addr,
8725 VM_MAP_PAGE_MASK(dst_map)));
1c79356b 8726 for (entry = tmp_entry;;) {
0a7de745 8727 vm_map_entry_t next = entry->vme_next;
1c79356b 8728
0a7de745
A
8729 while (entry->is_sub_map) {
8730 vm_map_offset_t sub_start;
8731 vm_map_offset_t sub_end;
8732 vm_map_offset_t local_end;
1c79356b 8733
0a7de745 8734 if (entry->in_transition) {
2d21ac55
A
8735 /*
8736 * Say that we are waiting, and wait for entry.
8737 */
0a7de745
A
8738 entry->needs_wakeup = TRUE;
8739 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
8740
8741 goto start_pass_1;
8742 }
8743
8744 local_end = entry->vme_end;
0a7de745 8745 if (!(entry->needs_copy)) {
1c79356b
A
8746 /* if needs_copy we are a COW submap */
8747 /* in such a case we just replace so */
8748 /* there is no need for the follow- */
8749 /* ing check. */
8750 encountered_sub_map = TRUE;
3e170ce0 8751 sub_start = VME_OFFSET(entry);
1c79356b 8752
0a7de745 8753 if (entry->vme_end < dst_end) {
1c79356b 8754 sub_end = entry->vme_end;
0a7de745 8755 } else {
1c79356b 8756 sub_end = dst_end;
0a7de745 8757 }
1c79356b 8758 sub_end -= entry->vme_start;
3e170ce0 8759 sub_end += VME_OFFSET(entry);
1c79356b 8760 vm_map_unlock(dst_map);
5ba3f43e 8761
1c79356b 8762 kr = vm_map_overwrite_submap_recurse(
3e170ce0 8763 VME_SUBMAP(entry),
1c79356b
A
8764 sub_start,
8765 sub_end - sub_start);
0a7de745 8766 if (kr != KERN_SUCCESS) {
1c79356b 8767 return kr;
0a7de745 8768 }
1c79356b
A
8769 vm_map_lock(dst_map);
8770 }
8771
0a7de745 8772 if (dst_end <= entry->vme_end) {
1c79356b 8773 goto start_overwrite;
0a7de745
A
8774 }
8775 if (!vm_map_lookup_entry(dst_map, local_end,
8776 &entry)) {
1c79356b 8777 vm_map_unlock(dst_map);
0a7de745 8778 return KERN_INVALID_ADDRESS;
1c79356b
A
8779 }
8780 next = entry->vme_next;
8781 }
8782
0a7de745 8783 if (!(entry->protection & VM_PROT_WRITE)) {
1c79356b 8784 vm_map_unlock(dst_map);
0a7de745 8785 return KERN_PROTECTION_FAILURE;
1c79356b
A
8786 }
8787
8788 /*
8789 * If the entry is in transition, we must wait
8790 * for it to exit that state. Anything could happen
8791 * when we unlock the map, so start over.
8792 */
0a7de745
A
8793 if (entry->in_transition) {
8794 /*
8795 * Say that we are waiting, and wait for entry.
8796 */
8797 entry->needs_wakeup = TRUE;
8798 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
8799
8800 goto start_pass_1;
8801 }
8802
8803/*
8804 * our range is contained completely within this map entry
8805 */
0a7de745 8806 if (dst_end <= entry->vme_end) {
1c79356b 8807 break;
0a7de745 8808 }
1c79356b
A
8809/*
8810 * check that range specified is contiguous region
8811 */
8812 if ((next == vm_map_to_entry(dst_map)) ||
8813 (next->vme_start != entry->vme_end)) {
8814 vm_map_unlock(dst_map);
0a7de745 8815 return KERN_INVALID_ADDRESS;
1c79356b
A
8816 }
8817
8818
8819 /*
8820 * Check for permanent objects in the destination.
8821 */
3e170ce0
A
8822 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8823 ((!VME_OBJECT(entry)->internal) ||
0a7de745 8824 (VME_OBJECT(entry)->true_share))) {
1c79356b
A
8825 contains_permanent_objects = TRUE;
8826 }
8827
8828 entry = next;
8829 }/* for */
8830
8831start_overwrite:
8832 /*
8833 * If there are permanent objects in the destination, then
8834 * the copy cannot be interrupted.
8835 */
8836
8837 if (interruptible && contains_permanent_objects) {
8838 vm_map_unlock(dst_map);
0a7de745 8839 return KERN_FAILURE; /* XXX */
1c79356b
A
8840 }
8841
8842 /*
0a7de745 8843 *
1c79356b
A
8844 * Make a second pass, overwriting the data
8845 * At the beginning of each loop iteration,
8846 * the next entry to be overwritten is "tmp_entry"
8847 * (initially, the value returned from the lookup above),
8848 * and the starting address expected in that entry
8849 * is "start".
8850 */
8851
8852 total_size = copy->size;
0a7de745 8853 if (encountered_sub_map) {
1c79356b
A
8854 copy_size = 0;
8855 /* re-calculate tmp_entry since we've had the map */
8856 /* unlocked */
8857 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
8858 vm_map_unlock(dst_map);
0a7de745 8859 return KERN_INVALID_ADDRESS;
1c79356b
A
8860 }
8861 } else {
8862 copy_size = copy->size;
8863 }
5ba3f43e 8864
1c79356b 8865 base_addr = dst_addr;
0a7de745 8866 while (TRUE) {
1c79356b
A
8867 /* deconstruct the copy object and do in parts */
8868 /* only in sub_map, interruptable case */
0a7de745
A
8869 vm_map_entry_t copy_entry;
8870 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
8871 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
8872 int nentries;
8873 int remaining_entries = 0;
8874 vm_map_offset_t new_offset = 0;
5ba3f43e 8875
1c79356b 8876 for (entry = tmp_entry; copy_size == 0;) {
0a7de745 8877 vm_map_entry_t next;
1c79356b
A
8878
8879 next = entry->vme_next;
8880
8881 /* tmp_entry and base address are moved along */
8882 /* each time we encounter a sub-map. Otherwise */
8883 /* entry can outpase tmp_entry, and the copy_size */
8884 /* may reflect the distance between them */
8885 /* if the current entry is found to be in transition */
8886 /* we will start over at the beginning or the last */
8887 /* encounter of a submap as dictated by base_addr */
8888 /* we will zero copy_size accordingly. */
8889 if (entry->in_transition) {
0a7de745
A
8890 /*
8891 * Say that we are waiting, and wait for entry.
8892 */
8893 entry->needs_wakeup = TRUE;
8894 vm_map_entry_wait(dst_map, THREAD_UNINT);
8895
8896 if (!vm_map_lookup_entry(dst_map, base_addr,
8897 &tmp_entry)) {
1c79356b 8898 vm_map_unlock(dst_map);
0a7de745 8899 return KERN_INVALID_ADDRESS;
1c79356b
A
8900 }
8901 copy_size = 0;
8902 entry = tmp_entry;
8903 continue;
8904 }
5ba3f43e 8905 if (entry->is_sub_map) {
0a7de745
A
8906 vm_map_offset_t sub_start;
8907 vm_map_offset_t sub_end;
8908 vm_map_offset_t local_end;
1c79356b 8909
0a7de745 8910 if (entry->needs_copy) {
1c79356b
A
8911 /* if this is a COW submap */
8912 /* just back the range with a */
8913 /* anonymous entry */
0a7de745 8914 if (entry->vme_end < dst_end) {
1c79356b 8915 sub_end = entry->vme_end;
0a7de745 8916 } else {
1c79356b 8917 sub_end = dst_end;
0a7de745
A
8918 }
8919 if (entry->vme_start < base_addr) {
1c79356b 8920 sub_start = base_addr;
0a7de745 8921 } else {
1c79356b 8922 sub_start = entry->vme_start;
0a7de745 8923 }
1c79356b
A
8924 vm_map_clip_end(
8925 dst_map, entry, sub_end);
8926 vm_map_clip_start(
8927 dst_map, entry, sub_start);
2d21ac55 8928 assert(!entry->use_pmap);
a39ff7e2
A
8929 assert(!entry->iokit_acct);
8930 entry->use_pmap = TRUE;
1c79356b
A
8931 entry->is_sub_map = FALSE;
8932 vm_map_deallocate(
3e170ce0 8933 VME_SUBMAP(entry));
5ba3f43e
A
8934 VME_OBJECT_SET(entry, NULL);
8935 VME_OFFSET_SET(entry, 0);
1c79356b
A
8936 entry->is_shared = FALSE;
8937 entry->needs_copy = FALSE;
5ba3f43e 8938 entry->protection = VM_PROT_DEFAULT;
1c79356b
A
8939 entry->max_protection = VM_PROT_ALL;
8940 entry->wired_count = 0;
8941 entry->user_wired_count = 0;
0a7de745
A
8942 if (entry->inheritance
8943 == VM_INHERIT_SHARE) {
2d21ac55 8944 entry->inheritance = VM_INHERIT_COPY;
0a7de745 8945 }
1c79356b
A
8946 continue;
8947 }
8948 /* first take care of any non-sub_map */
8949 /* entries to send */
0a7de745 8950 if (base_addr < entry->vme_start) {
1c79356b 8951 /* stuff to send */
5ba3f43e 8952 copy_size =
0a7de745 8953 entry->vme_start - base_addr;
1c79356b
A
8954 break;
8955 }
3e170ce0 8956 sub_start = VME_OFFSET(entry);
1c79356b 8957
0a7de745 8958 if (entry->vme_end < dst_end) {
1c79356b 8959 sub_end = entry->vme_end;
0a7de745 8960 } else {
1c79356b 8961 sub_end = dst_end;
0a7de745 8962 }
1c79356b 8963 sub_end -= entry->vme_start;
3e170ce0 8964 sub_end += VME_OFFSET(entry);
1c79356b
A
8965 local_end = entry->vme_end;
8966 vm_map_unlock(dst_map);
8967 copy_size = sub_end - sub_start;
8968
8969 /* adjust the copy object */
8970 if (total_size > copy_size) {
0a7de745
A
8971 vm_map_size_t local_size = 0;
8972 vm_map_size_t entry_size;
1c79356b 8973
2d21ac55
A
8974 nentries = 1;
8975 new_offset = copy->offset;
8976 copy_entry = vm_map_copy_first_entry(copy);
0a7de745
A
8977 while (copy_entry !=
8978 vm_map_copy_to_entry(copy)) {
5ba3f43e 8979 entry_size = copy_entry->vme_end -
0a7de745
A
8980 copy_entry->vme_start;
8981 if ((local_size < copy_size) &&
8982 ((local_size + entry_size)
2d21ac55 8983 >= copy_size)) {
5ba3f43e 8984 vm_map_copy_clip_end(copy,
0a7de745
A
8985 copy_entry,
8986 copy_entry->vme_start +
8987 (copy_size - local_size));
5ba3f43e 8988 entry_size = copy_entry->vme_end -
0a7de745 8989 copy_entry->vme_start;
2d21ac55
A
8990 local_size += entry_size;
8991 new_offset += entry_size;
8992 }
0a7de745 8993 if (local_size >= copy_size) {
2d21ac55 8994 next_copy = copy_entry->vme_next;
5ba3f43e 8995 copy_entry->vme_next =
0a7de745 8996 vm_map_copy_to_entry(copy);
5ba3f43e 8997 previous_prev =
0a7de745 8998 copy->cpy_hdr.links.prev;
2d21ac55
A
8999 copy->cpy_hdr.links.prev = copy_entry;
9000 copy->size = copy_size;
5ba3f43e 9001 remaining_entries =
0a7de745 9002 copy->cpy_hdr.nentries;
2d21ac55
A
9003 remaining_entries -= nentries;
9004 copy->cpy_hdr.nentries = nentries;
9005 break;
9006 } else {
9007 local_size += entry_size;
9008 new_offset += entry_size;
9009 nentries++;
9010 }
9011 copy_entry = copy_entry->vme_next;
9012 }
1c79356b 9013 }
5ba3f43e 9014
0a7de745 9015 if ((entry->use_pmap) && (pmap == NULL)) {
1c79356b 9016 kr = vm_map_copy_overwrite_nested(
3e170ce0 9017 VME_SUBMAP(entry),
1c79356b
A
9018 sub_start,
9019 copy,
5ba3f43e 9020 interruptible,
3e170ce0 9021 VME_SUBMAP(entry)->pmap,
6d2010ae 9022 TRUE);
1c79356b
A
9023 } else if (pmap != NULL) {
9024 kr = vm_map_copy_overwrite_nested(
3e170ce0 9025 VME_SUBMAP(entry),
1c79356b
A
9026 sub_start,
9027 copy,
6d2010ae
A
9028 interruptible, pmap,
9029 TRUE);
1c79356b
A
9030 } else {
9031 kr = vm_map_copy_overwrite_nested(
3e170ce0 9032 VME_SUBMAP(entry),
1c79356b
A
9033 sub_start,
9034 copy,
9035 interruptible,
6d2010ae
A
9036 dst_map->pmap,
9037 TRUE);
1c79356b 9038 }
0a7de745
A
9039 if (kr != KERN_SUCCESS) {
9040 if (next_copy != NULL) {
5ba3f43e 9041 copy->cpy_hdr.nentries +=
0a7de745 9042 remaining_entries;
5ba3f43e 9043 copy->cpy_hdr.links.prev->vme_next =
0a7de745 9044 next_copy;
5ba3f43e 9045 copy->cpy_hdr.links.prev
0a7de745 9046 = previous_prev;
2d21ac55 9047 copy->size = total_size;
1c79356b
A
9048 }
9049 return kr;
9050 }
9051 if (dst_end <= local_end) {
0a7de745 9052 return KERN_SUCCESS;
1c79356b
A
9053 }
9054 /* otherwise copy no longer exists, it was */
9055 /* destroyed after successful copy_overwrite */
d9a64523 9056 copy = vm_map_copy_allocate();
1c79356b
A
9057 copy->type = VM_MAP_COPY_ENTRY_LIST;
9058 copy->offset = new_offset;
9059
e2d2fc5c
A
9060 /*
9061 * XXX FBDP
9062 * this does not seem to deal with
9063 * the VM map store (R&B tree)
9064 */
9065
1c79356b
A
9066 total_size -= copy_size;
9067 copy_size = 0;
9068 /* put back remainder of copy in container */
0a7de745 9069 if (next_copy != NULL) {
2d21ac55
A
9070 copy->cpy_hdr.nentries = remaining_entries;
9071 copy->cpy_hdr.links.next = next_copy;
9072 copy->cpy_hdr.links.prev = previous_prev;
9073 copy->size = total_size;
5ba3f43e 9074 next_copy->vme_prev =
0a7de745 9075 vm_map_copy_to_entry(copy);
2d21ac55 9076 next_copy = NULL;
1c79356b
A
9077 }
9078 base_addr = local_end;
9079 vm_map_lock(dst_map);
0a7de745
A
9080 if (!vm_map_lookup_entry(dst_map,
9081 local_end, &tmp_entry)) {
1c79356b 9082 vm_map_unlock(dst_map);
0a7de745 9083 return KERN_INVALID_ADDRESS;
1c79356b
A
9084 }
9085 entry = tmp_entry;
9086 continue;
5ba3f43e 9087 }
1c79356b
A
9088 if (dst_end <= entry->vme_end) {
9089 copy_size = dst_end - base_addr;
9090 break;
9091 }
9092
9093 if ((next == vm_map_to_entry(dst_map)) ||
2d21ac55 9094 (next->vme_start != entry->vme_end)) {
1c79356b 9095 vm_map_unlock(dst_map);
0a7de745 9096 return KERN_INVALID_ADDRESS;
1c79356b
A
9097 }
9098
9099 entry = next;
9100 }/* for */
9101
9102 next_copy = NULL;
9103 nentries = 1;
9104
9105 /* adjust the copy object */
9106 if (total_size > copy_size) {
0a7de745
A
9107 vm_map_size_t local_size = 0;
9108 vm_map_size_t entry_size;
1c79356b
A
9109
9110 new_offset = copy->offset;
9111 copy_entry = vm_map_copy_first_entry(copy);
0a7de745 9112 while (copy_entry != vm_map_copy_to_entry(copy)) {
5ba3f43e 9113 entry_size = copy_entry->vme_end -
0a7de745
A
9114 copy_entry->vme_start;
9115 if ((local_size < copy_size) &&
9116 ((local_size + entry_size)
2d21ac55 9117 >= copy_size)) {
5ba3f43e 9118 vm_map_copy_clip_end(copy, copy_entry,
0a7de745
A
9119 copy_entry->vme_start +
9120 (copy_size - local_size));
5ba3f43e 9121 entry_size = copy_entry->vme_end -
0a7de745 9122 copy_entry->vme_start;
1c79356b
A
9123 local_size += entry_size;
9124 new_offset += entry_size;
9125 }
0a7de745 9126 if (local_size >= copy_size) {
1c79356b 9127 next_copy = copy_entry->vme_next;
5ba3f43e 9128 copy_entry->vme_next =
0a7de745 9129 vm_map_copy_to_entry(copy);
5ba3f43e 9130 previous_prev =
0a7de745 9131 copy->cpy_hdr.links.prev;
1c79356b
A
9132 copy->cpy_hdr.links.prev = copy_entry;
9133 copy->size = copy_size;
5ba3f43e 9134 remaining_entries =
0a7de745 9135 copy->cpy_hdr.nentries;
1c79356b
A
9136 remaining_entries -= nentries;
9137 copy->cpy_hdr.nentries = nentries;
9138 break;
9139 } else {
9140 local_size += entry_size;
9141 new_offset += entry_size;
9142 nentries++;
9143 }
9144 copy_entry = copy_entry->vme_next;
9145 }
9146 }
9147
9148 if (aligned) {
0a7de745 9149 pmap_t local_pmap;
1c79356b 9150
0a7de745 9151 if (pmap) {
1c79356b 9152 local_pmap = pmap;
0a7de745 9153 } else {
1c79356b 9154 local_pmap = dst_map->pmap;
0a7de745 9155 }
1c79356b 9156
5ba3f43e 9157 if ((kr = vm_map_copy_overwrite_aligned(
0a7de745
A
9158 dst_map, tmp_entry, copy,
9159 base_addr, local_pmap)) != KERN_SUCCESS) {
9160 if (next_copy != NULL) {
5ba3f43e 9161 copy->cpy_hdr.nentries +=
0a7de745
A
9162 remaining_entries;
9163 copy->cpy_hdr.links.prev->vme_next =
9164 next_copy;
9165 copy->cpy_hdr.links.prev =
9166 previous_prev;
1c79356b
A
9167 copy->size += copy_size;
9168 }
9169 return kr;
9170 }
9171 vm_map_unlock(dst_map);
9172 } else {
2d21ac55
A
9173 /*
9174 * Performance gain:
9175 *
9176 * if the copy and dst address are misaligned but the same
9177 * offset within the page we can copy_not_aligned the
9178 * misaligned parts and copy aligned the rest. If they are
9179 * aligned but len is unaligned we simply need to copy
9180 * the end bit unaligned. We'll need to split the misaligned
9181 * bits of the region in this case !
9182 */
9183 /* ALWAYS UNLOCKS THE dst_map MAP */
39236c6e
A
9184 kr = vm_map_copy_overwrite_unaligned(
9185 dst_map,
9186 tmp_entry,
9187 copy,
9188 base_addr,
9189 discard_on_success);
9190 if (kr != KERN_SUCCESS) {
0a7de745 9191 if (next_copy != NULL) {
1c79356b 9192 copy->cpy_hdr.nentries +=
0a7de745
A
9193 remaining_entries;
9194 copy->cpy_hdr.links.prev->vme_next =
9195 next_copy;
9196 copy->cpy_hdr.links.prev =
9197 previous_prev;
1c79356b
A
9198 copy->size += copy_size;
9199 }
9200 return kr;
9201 }
9202 }
9203 total_size -= copy_size;
0a7de745 9204 if (total_size == 0) {
1c79356b 9205 break;
0a7de745 9206 }
1c79356b
A
9207 base_addr += copy_size;
9208 copy_size = 0;
9209 copy->offset = new_offset;
0a7de745 9210 if (next_copy != NULL) {
1c79356b
A
9211 copy->cpy_hdr.nentries = remaining_entries;
9212 copy->cpy_hdr.links.next = next_copy;
9213 copy->cpy_hdr.links.prev = previous_prev;
9214 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9215 copy->size = total_size;
9216 }
9217 vm_map_lock(dst_map);
0a7de745 9218 while (TRUE) {
5ba3f43e 9219 if (!vm_map_lookup_entry(dst_map,
0a7de745 9220 base_addr, &tmp_entry)) {
1c79356b 9221 vm_map_unlock(dst_map);
0a7de745 9222 return KERN_INVALID_ADDRESS;
1c79356b 9223 }
0a7de745
A
9224 if (tmp_entry->in_transition) {
9225 entry->needs_wakeup = TRUE;
9226 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
9227 } else {
9228 break;
9229 }
9230 }
39236c6e 9231 vm_map_clip_start(dst_map,
0a7de745
A
9232 tmp_entry,
9233 vm_map_trunc_page(base_addr,
9234 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
9235
9236 entry = tmp_entry;
9237 } /* while */
9238
9239 /*
9240 * Throw away the vm_map_copy object
9241 */
0a7de745 9242 if (discard_on_success) {
6d2010ae 9243 vm_map_copy_discard(copy);
0a7de745 9244 }
1c79356b 9245
0a7de745 9246 return KERN_SUCCESS;
1c79356b
A
9247}/* vm_map_copy_overwrite */
9248
9249kern_return_t
9250vm_map_copy_overwrite(
0a7de745
A
9251 vm_map_t dst_map,
9252 vm_map_offset_t dst_addr,
9253 vm_map_copy_t copy,
9254 boolean_t interruptible)
1c79356b 9255{
0a7de745
A
9256 vm_map_size_t head_size, tail_size;
9257 vm_map_copy_t head_copy, tail_copy;
9258 vm_map_offset_t head_addr, tail_addr;
9259 vm_map_entry_t entry;
9260 kern_return_t kr;
9261 vm_map_offset_t effective_page_mask, effective_page_size;
6d2010ae
A
9262
9263 head_size = 0;
9264 tail_size = 0;
9265 head_copy = NULL;
9266 tail_copy = NULL;
9267 head_addr = 0;
9268 tail_addr = 0;
9269
9270 if (interruptible ||
9271 copy == VM_MAP_COPY_NULL ||
9272 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9273 /*
9274 * We can't split the "copy" map if we're interruptible
9275 * or if we don't have a "copy" map...
9276 */
0a7de745 9277blunt_copy:
6d2010ae 9278 return vm_map_copy_overwrite_nested(dst_map,
0a7de745
A
9279 dst_addr,
9280 copy,
9281 interruptible,
9282 (pmap_t) NULL,
9283 TRUE);
6d2010ae
A
9284 }
9285
5ba3f43e
A
9286 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9287 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
0a7de745 9288 effective_page_mask);
5ba3f43e
A
9289 effective_page_size = effective_page_mask + 1;
9290
9291 if (copy->size < 3 * effective_page_size) {
6d2010ae
A
9292 /*
9293 * Too small to bother with optimizing...
9294 */
9295 goto blunt_copy;
9296 }
9297
5ba3f43e
A
9298 if ((dst_addr & effective_page_mask) !=
9299 (copy->offset & effective_page_mask)) {
6d2010ae
A
9300 /*
9301 * Incompatible mis-alignment of source and destination...
9302 */
9303 goto blunt_copy;
9304 }
9305
9306 /*
9307 * Proper alignment or identical mis-alignment at the beginning.
9308 * Let's try and do a small unaligned copy first (if needed)
9309 * and then an aligned copy for the rest.
9310 */
5ba3f43e 9311 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
6d2010ae 9312 head_addr = dst_addr;
5ba3f43e 9313 head_size = (effective_page_size -
0a7de745 9314 (copy->offset & effective_page_mask));
5ba3f43e 9315 head_size = MIN(head_size, copy->size);
6d2010ae 9316 }
5ba3f43e 9317 if (!vm_map_page_aligned(copy->offset + copy->size,
0a7de745 9318 effective_page_mask)) {
6d2010ae
A
9319 /*
9320 * Mis-alignment at the end.
9321 * Do an aligned copy up to the last page and
9322 * then an unaligned copy for the remaining bytes.
9323 */
39236c6e 9324 tail_size = ((copy->offset + copy->size) &
0a7de745 9325 effective_page_mask);
5ba3f43e 9326 tail_size = MIN(tail_size, copy->size);
6d2010ae 9327 tail_addr = dst_addr + copy->size - tail_size;
5ba3f43e 9328 assert(tail_addr >= head_addr + head_size);
6d2010ae 9329 }
5ba3f43e 9330 assert(head_size + tail_size <= copy->size);
6d2010ae
A
9331
9332 if (head_size + tail_size == copy->size) {
9333 /*
9334 * It's all unaligned, no optimization possible...
9335 */
9336 goto blunt_copy;
9337 }
9338
9339 /*
9340 * Can't optimize if there are any submaps in the
9341 * destination due to the way we free the "copy" map
9342 * progressively in vm_map_copy_overwrite_nested()
9343 * in that case.
9344 */
9345 vm_map_lock_read(dst_map);
0a7de745 9346 if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6d2010ae
A
9347 vm_map_unlock_read(dst_map);
9348 goto blunt_copy;
9349 }
9350 for (;
0a7de745
A
9351 (entry != vm_map_copy_to_entry(copy) &&
9352 entry->vme_start < dst_addr + copy->size);
9353 entry = entry->vme_next) {
6d2010ae
A
9354 if (entry->is_sub_map) {
9355 vm_map_unlock_read(dst_map);
9356 goto blunt_copy;
9357 }
9358 }
9359 vm_map_unlock_read(dst_map);
9360
9361 if (head_size) {
9362 /*
9363 * Unaligned copy of the first "head_size" bytes, to reach
9364 * a page boundary.
9365 */
5ba3f43e 9366
6d2010ae
A
9367 /*
9368 * Extract "head_copy" out of "copy".
9369 */
d9a64523 9370 head_copy = vm_map_copy_allocate();
6d2010ae 9371 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6d2010ae 9372 head_copy->cpy_hdr.entries_pageable =
0a7de745 9373 copy->cpy_hdr.entries_pageable;
6d2010ae
A
9374 vm_map_store_init(&head_copy->cpy_hdr);
9375
5ba3f43e
A
9376 entry = vm_map_copy_first_entry(copy);
9377 if (entry->vme_end < copy->offset + head_size) {
9378 head_size = entry->vme_end - copy->offset;
9379 }
9380
6d2010ae
A
9381 head_copy->offset = copy->offset;
9382 head_copy->size = head_size;
6d2010ae
A
9383 copy->offset += head_size;
9384 copy->size -= head_size;
9385
6d2010ae
A
9386 vm_map_copy_clip_end(copy, entry, copy->offset);
9387 vm_map_copy_entry_unlink(copy, entry);
9388 vm_map_copy_entry_link(head_copy,
0a7de745
A
9389 vm_map_copy_to_entry(head_copy),
9390 entry);
6d2010ae
A
9391
9392 /*
9393 * Do the unaligned copy.
9394 */
9395 kr = vm_map_copy_overwrite_nested(dst_map,
0a7de745
A
9396 head_addr,
9397 head_copy,
9398 interruptible,
9399 (pmap_t) NULL,
9400 FALSE);
9401 if (kr != KERN_SUCCESS) {
6d2010ae 9402 goto done;
0a7de745 9403 }
6d2010ae
A
9404 }
9405
9406 if (tail_size) {
9407 /*
9408 * Extract "tail_copy" out of "copy".
9409 */
d9a64523 9410 tail_copy = vm_map_copy_allocate();
6d2010ae 9411 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6d2010ae 9412 tail_copy->cpy_hdr.entries_pageable =
0a7de745 9413 copy->cpy_hdr.entries_pageable;
6d2010ae
A
9414 vm_map_store_init(&tail_copy->cpy_hdr);
9415
9416 tail_copy->offset = copy->offset + copy->size - tail_size;
9417 tail_copy->size = tail_size;
9418
9419 copy->size -= tail_size;
9420
9421 entry = vm_map_copy_last_entry(copy);
9422 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9423 entry = vm_map_copy_last_entry(copy);
9424 vm_map_copy_entry_unlink(copy, entry);
9425 vm_map_copy_entry_link(tail_copy,
0a7de745
A
9426 vm_map_copy_last_entry(tail_copy),
9427 entry);
6d2010ae
A
9428 }
9429
9430 /*
9431 * Copy most (or possibly all) of the data.
9432 */
9433 kr = vm_map_copy_overwrite_nested(dst_map,
0a7de745
A
9434 dst_addr + head_size,
9435 copy,
9436 interruptible,
9437 (pmap_t) NULL,
9438 FALSE);
6d2010ae
A
9439 if (kr != KERN_SUCCESS) {
9440 goto done;
9441 }
9442
9443 if (tail_size) {
9444 kr = vm_map_copy_overwrite_nested(dst_map,
0a7de745
A
9445 tail_addr,
9446 tail_copy,
9447 interruptible,
9448 (pmap_t) NULL,
9449 FALSE);
6d2010ae
A
9450 }
9451
9452done:
9453 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9454 if (kr == KERN_SUCCESS) {
9455 /*
9456 * Discard all the copy maps.
9457 */
9458 if (head_copy) {
9459 vm_map_copy_discard(head_copy);
9460 head_copy = NULL;
9461 }
9462 vm_map_copy_discard(copy);
9463 if (tail_copy) {
9464 vm_map_copy_discard(tail_copy);
9465 tail_copy = NULL;
9466 }
9467 } else {
9468 /*
9469 * Re-assemble the original copy map.
9470 */
9471 if (head_copy) {
9472 entry = vm_map_copy_first_entry(head_copy);
9473 vm_map_copy_entry_unlink(head_copy, entry);
9474 vm_map_copy_entry_link(copy,
0a7de745
A
9475 vm_map_copy_to_entry(copy),
9476 entry);
6d2010ae
A
9477 copy->offset -= head_size;
9478 copy->size += head_size;
9479 vm_map_copy_discard(head_copy);
9480 head_copy = NULL;
9481 }
9482 if (tail_copy) {
9483 entry = vm_map_copy_last_entry(tail_copy);
9484 vm_map_copy_entry_unlink(tail_copy, entry);
9485 vm_map_copy_entry_link(copy,
0a7de745
A
9486 vm_map_copy_last_entry(copy),
9487 entry);
6d2010ae
A
9488 copy->size += tail_size;
9489 vm_map_copy_discard(tail_copy);
9490 tail_copy = NULL;
9491 }
9492 }
9493 return kr;
1c79356b
A
9494}
9495
9496
9497/*
91447636 9498 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
1c79356b
A
9499 *
9500 * Decription:
9501 * Physically copy unaligned data
9502 *
9503 * Implementation:
9504 * Unaligned parts of pages have to be physically copied. We use
9505 * a modified form of vm_fault_copy (which understands none-aligned
9506 * page offsets and sizes) to do the copy. We attempt to copy as
9507 * much memory in one go as possibly, however vm_fault_copy copies
9508 * within 1 memory object so we have to find the smaller of "amount left"
9509 * "source object data size" and "target object data size". With
9510 * unaligned data we don't need to split regions, therefore the source
9511 * (copy) object should be one map entry, the target range may be split
9512 * over multiple map entries however. In any event we are pessimistic
9513 * about these assumptions.
9514 *
9515 * Assumptions:
9516 * dst_map is locked on entry and is return locked on success,
9517 * unlocked on error.
9518 */
9519
91447636 9520static kern_return_t
1c79356b 9521vm_map_copy_overwrite_unaligned(
0a7de745
A
9522 vm_map_t dst_map,
9523 vm_map_entry_t entry,
9524 vm_map_copy_t copy,
9525 vm_map_offset_t start,
9526 boolean_t discard_on_success)
1c79356b 9527{
0a7de745
A
9528 vm_map_entry_t copy_entry;
9529 vm_map_entry_t copy_entry_next;
9530 vm_map_version_t version;
9531 vm_object_t dst_object;
9532 vm_object_offset_t dst_offset;
9533 vm_object_offset_t src_offset;
9534 vm_object_offset_t entry_offset;
9535 vm_map_offset_t entry_end;
9536 vm_map_size_t src_size,
9537 dst_size,
9538 copy_size,
9539 amount_left;
9540 kern_return_t kr = KERN_SUCCESS;
1c79356b 9541
5ba3f43e 9542
39236c6e
A
9543 copy_entry = vm_map_copy_first_entry(copy);
9544
1c79356b
A
9545 vm_map_lock_write_to_read(dst_map);
9546
91447636 9547 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
1c79356b
A
9548 amount_left = copy->size;
9549/*
9550 * unaligned so we never clipped this entry, we need the offset into
9551 * the vm_object not just the data.
5ba3f43e 9552 */
1c79356b 9553 while (amount_left > 0) {
1c79356b
A
9554 if (entry == vm_map_to_entry(dst_map)) {
9555 vm_map_unlock_read(dst_map);
9556 return KERN_INVALID_ADDRESS;
9557 }
9558
9559 /* "start" must be within the current map entry */
0a7de745 9560 assert((start >= entry->vme_start) && (start < entry->vme_end));
1c79356b
A
9561
9562 dst_offset = start - entry->vme_start;
9563
9564 dst_size = entry->vme_end - start;
9565
9566 src_size = copy_entry->vme_end -
0a7de745 9567 (copy_entry->vme_start + src_offset);
1c79356b
A
9568
9569 if (dst_size < src_size) {
9570/*
9571 * we can only copy dst_size bytes before
9572 * we have to get the next destination entry
9573 */
9574 copy_size = dst_size;
9575 } else {
9576/*
9577 * we can only copy src_size bytes before
9578 * we have to get the next source copy entry
9579 */
9580 copy_size = src_size;
9581 }
9582
9583 if (copy_size > amount_left) {
9584 copy_size = amount_left;
9585 }
9586/*
9587 * Entry needs copy, create a shadow shadow object for
9588 * Copy on write region.
9589 */
9590 if (entry->needs_copy &&
0a7de745 9591 ((entry->protection & VM_PROT_WRITE) != 0)) {
1c79356b
A
9592 if (vm_map_lock_read_to_write(dst_map)) {
9593 vm_map_lock_read(dst_map);
9594 goto RetryLookup;
9595 }
3e170ce0 9596 VME_OBJECT_SHADOW(entry,
0a7de745
A
9597 (vm_map_size_t)(entry->vme_end
9598 - entry->vme_start));
1c79356b
A
9599 entry->needs_copy = FALSE;
9600 vm_map_lock_write_to_read(dst_map);
9601 }
3e170ce0 9602 dst_object = VME_OBJECT(entry);
1c79356b
A
9603/*
9604 * unlike with the virtual (aligned) copy we're going
9605 * to fault on it therefore we need a target object.
9606 */
0a7de745 9607 if (dst_object == VM_OBJECT_NULL) {
1c79356b
A
9608 if (vm_map_lock_read_to_write(dst_map)) {
9609 vm_map_lock_read(dst_map);
9610 goto RetryLookup;
9611 }
91447636 9612 dst_object = vm_object_allocate((vm_map_size_t)
0a7de745 9613 entry->vme_end - entry->vme_start);
3e170ce0
A
9614 VME_OBJECT(entry) = dst_object;
9615 VME_OFFSET_SET(entry, 0);
fe8ab488 9616 assert(entry->use_pmap);
1c79356b
A
9617 vm_map_lock_write_to_read(dst_map);
9618 }
9619/*
9620 * Take an object reference and unlock map. The "entry" may
9621 * disappear or change when the map is unlocked.
9622 */
9623 vm_object_reference(dst_object);
9624 version.main_timestamp = dst_map->timestamp;
3e170ce0 9625 entry_offset = VME_OFFSET(entry);
1c79356b
A
9626 entry_end = entry->vme_end;
9627 vm_map_unlock_read(dst_map);
9628/*
9629 * Copy as much as possible in one pass
9630 */
9631 kr = vm_fault_copy(
3e170ce0
A
9632 VME_OBJECT(copy_entry),
9633 VME_OFFSET(copy_entry) + src_offset,
1c79356b
A
9634 &copy_size,
9635 dst_object,
9636 entry_offset + dst_offset,
9637 dst_map,
9638 &version,
9639 THREAD_UNINT );
9640
9641 start += copy_size;
9642 src_offset += copy_size;
9643 amount_left -= copy_size;
9644/*
9645 * Release the object reference
9646 */
9647 vm_object_deallocate(dst_object);
9648/*
9649 * If a hard error occurred, return it now
9650 */
0a7de745 9651 if (kr != KERN_SUCCESS) {
1c79356b 9652 return kr;
0a7de745 9653 }
1c79356b
A
9654
9655 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
0a7de745 9656 || amount_left == 0) {
1c79356b
A
9657/*
9658 * all done with this copy entry, dispose.
9659 */
39236c6e
A
9660 copy_entry_next = copy_entry->vme_next;
9661
9662 if (discard_on_success) {
9663 vm_map_copy_entry_unlink(copy, copy_entry);
9664 assert(!copy_entry->is_sub_map);
3e170ce0 9665 vm_object_deallocate(VME_OBJECT(copy_entry));
39236c6e
A
9666 vm_map_copy_entry_dispose(copy, copy_entry);
9667 }
1c79356b 9668
39236c6e
A
9669 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9670 amount_left) {
1c79356b
A
9671/*
9672 * not finished copying but run out of source
9673 */
9674 return KERN_INVALID_ADDRESS;
9675 }
39236c6e
A
9676
9677 copy_entry = copy_entry_next;
9678
1c79356b
A
9679 src_offset = 0;
9680 }
9681
0a7de745 9682 if (amount_left == 0) {
1c79356b 9683 return KERN_SUCCESS;
0a7de745 9684 }
1c79356b
A
9685
9686 vm_map_lock_read(dst_map);
9687 if (version.main_timestamp == dst_map->timestamp) {
9688 if (start == entry_end) {
9689/*
9690 * destination region is split. Use the version
9691 * information to avoid a lookup in the normal
9692 * case.
9693 */
9694 entry = entry->vme_next;
9695/*
9696 * should be contiguous. Fail if we encounter
9697 * a hole in the destination.
9698 */
9699 if (start != entry->vme_start) {
9700 vm_map_unlock_read(dst_map);
0a7de745 9701 return KERN_INVALID_ADDRESS;
1c79356b
A
9702 }
9703 }
9704 } else {
9705/*
9706 * Map version check failed.
9707 * we must lookup the entry because somebody
9708 * might have changed the map behind our backs.
9709 */
0a7de745
A
9710RetryLookup:
9711 if (!vm_map_lookup_entry(dst_map, start, &entry)) {
1c79356b 9712 vm_map_unlock_read(dst_map);
0a7de745 9713 return KERN_INVALID_ADDRESS;
1c79356b
A
9714 }
9715 }
9716 }/* while */
9717
1c79356b
A
9718 return KERN_SUCCESS;
9719}/* vm_map_copy_overwrite_unaligned */
9720
9721/*
91447636 9722 * Routine: vm_map_copy_overwrite_aligned [internal use only]
1c79356b
A
9723 *
9724 * Description:
9725 * Does all the vm_trickery possible for whole pages.
9726 *
9727 * Implementation:
9728 *
9729 * If there are no permanent objects in the destination,
9730 * and the source and destination map entry zones match,
9731 * and the destination map entry is not shared,
9732 * then the map entries can be deleted and replaced
9733 * with those from the copy. The following code is the
9734 * basic idea of what to do, but there are lots of annoying
9735 * little details about getting protection and inheritance
9736 * right. Should add protection, inheritance, and sharing checks
9737 * to the above pass and make sure that no wiring is involved.
9738 */
9739
e2d2fc5c
A
9740int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9741int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9742int vm_map_copy_overwrite_aligned_src_large = 0;
9743
91447636 9744static kern_return_t
1c79356b 9745vm_map_copy_overwrite_aligned(
0a7de745
A
9746 vm_map_t dst_map,
9747 vm_map_entry_t tmp_entry,
9748 vm_map_copy_t copy,
9749 vm_map_offset_t start,
9750 __unused pmap_t pmap)
1c79356b 9751{
0a7de745
A
9752 vm_object_t object;
9753 vm_map_entry_t copy_entry;
9754 vm_map_size_t copy_size;
9755 vm_map_size_t size;
9756 vm_map_entry_t entry;
5ba3f43e 9757
1c79356b 9758 while ((copy_entry = vm_map_copy_first_entry(copy))
0a7de745 9759 != vm_map_copy_to_entry(copy)) {
1c79356b 9760 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
5ba3f43e 9761
1c79356b 9762 entry = tmp_entry;
fe8ab488
A
9763 if (entry->is_sub_map) {
9764 /* unnested when clipped earlier */
9765 assert(!entry->use_pmap);
9766 }
1c79356b
A
9767 if (entry == vm_map_to_entry(dst_map)) {
9768 vm_map_unlock(dst_map);
9769 return KERN_INVALID_ADDRESS;
9770 }
9771 size = (entry->vme_end - entry->vme_start);
9772 /*
9773 * Make sure that no holes popped up in the
9774 * address map, and that the protection is
9775 * still valid, in case the map was unlocked
9776 * earlier.
9777 */
9778
9779 if ((entry->vme_start != start) || ((entry->is_sub_map)
0a7de745 9780 && !entry->needs_copy)) {
1c79356b 9781 vm_map_unlock(dst_map);
0a7de745 9782 return KERN_INVALID_ADDRESS;
1c79356b
A
9783 }
9784 assert(entry != vm_map_to_entry(dst_map));
9785
9786 /*
9787 * Check protection again
9788 */
9789
0a7de745 9790 if (!(entry->protection & VM_PROT_WRITE)) {
1c79356b 9791 vm_map_unlock(dst_map);
0a7de745 9792 return KERN_PROTECTION_FAILURE;
1c79356b
A
9793 }
9794
9795 /*
9796 * Adjust to source size first
9797 */
9798
9799 if (copy_size < size) {
fe8ab488
A
9800 if (entry->map_aligned &&
9801 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
0a7de745 9802 VM_MAP_PAGE_MASK(dst_map))) {
fe8ab488
A
9803 /* no longer map-aligned */
9804 entry->map_aligned = FALSE;
9805 }
1c79356b
A
9806 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
9807 size = copy_size;
9808 }
9809
9810 /*
9811 * Adjust to destination size
9812 */
9813
9814 if (size < copy_size) {
9815 vm_map_copy_clip_end(copy, copy_entry,
0a7de745 9816 copy_entry->vme_start + size);
1c79356b
A
9817 copy_size = size;
9818 }
9819
9820 assert((entry->vme_end - entry->vme_start) == size);
9821 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
9822 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
9823
9824 /*
9825 * If the destination contains temporary unshared memory,
9826 * we can perform the copy by throwing it away and
9827 * installing the source data.
9828 */
9829
3e170ce0 9830 object = VME_OBJECT(entry);
5ba3f43e 9831 if ((!entry->is_shared &&
0a7de745
A
9832 ((object == VM_OBJECT_NULL) ||
9833 (object->internal && !object->true_share))) ||
1c79356b 9834 entry->needs_copy) {
0a7de745
A
9835 vm_object_t old_object = VME_OBJECT(entry);
9836 vm_object_offset_t old_offset = VME_OFFSET(entry);
9837 vm_object_offset_t offset;
1c79356b
A
9838
9839 /*
9840 * Ensure that the source and destination aren't
9841 * identical
9842 */
3e170ce0
A
9843 if (old_object == VME_OBJECT(copy_entry) &&
9844 old_offset == VME_OFFSET(copy_entry)) {
1c79356b
A
9845 vm_map_copy_entry_unlink(copy, copy_entry);
9846 vm_map_copy_entry_dispose(copy, copy_entry);
9847
0a7de745 9848 if (old_object != VM_OBJECT_NULL) {
1c79356b 9849 vm_object_deallocate(old_object);
0a7de745 9850 }
1c79356b
A
9851
9852 start = tmp_entry->vme_end;
9853 tmp_entry = tmp_entry->vme_next;
9854 continue;
9855 }
9856
5ba3f43e 9857#if !CONFIG_EMBEDDED
0a7de745
A
9858#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9859#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
3e170ce0
A
9860 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
9861 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
e2d2fc5c
A
9862 copy_size <= __TRADEOFF1_COPY_SIZE) {
9863 /*
9864 * Virtual vs. Physical copy tradeoff #1.
9865 *
9866 * Copying only a few pages out of a large
9867 * object: do a physical copy instead of
9868 * a virtual copy, to avoid possibly keeping
9869 * the entire large object alive because of
9870 * those few copy-on-write pages.
9871 */
9872 vm_map_copy_overwrite_aligned_src_large++;
9873 goto slow_copy;
9874 }
5ba3f43e 9875#endif /* !CONFIG_EMBEDDED */
e2d2fc5c 9876
3e170ce0
A
9877 if ((dst_map->pmap != kernel_pmap) &&
9878 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
0a7de745 9879 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
ebb1b9f4
A
9880 vm_object_t new_object, new_shadow;
9881
9882 /*
9883 * We're about to map something over a mapping
9884 * established by malloc()...
9885 */
3e170ce0 9886 new_object = VME_OBJECT(copy_entry);
ebb1b9f4
A
9887 if (new_object != VM_OBJECT_NULL) {
9888 vm_object_lock_shared(new_object);
9889 }
9890 while (new_object != VM_OBJECT_NULL &&
5ba3f43e 9891#if !CONFIG_EMBEDDED
0a7de745
A
9892 !new_object->true_share &&
9893 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
5ba3f43e 9894#endif /* !CONFIG_EMBEDDED */
0a7de745 9895 new_object->internal) {
ebb1b9f4
A
9896 new_shadow = new_object->shadow;
9897 if (new_shadow == VM_OBJECT_NULL) {
9898 break;
9899 }
9900 vm_object_lock_shared(new_shadow);
9901 vm_object_unlock(new_object);
9902 new_object = new_shadow;
9903 }
9904 if (new_object != VM_OBJECT_NULL) {
9905 if (!new_object->internal) {
9906 /*
9907 * The new mapping is backed
9908 * by an external object. We
9909 * don't want malloc'ed memory
9910 * to be replaced with such a
9911 * non-anonymous mapping, so
9912 * let's go off the optimized
9913 * path...
9914 */
e2d2fc5c 9915 vm_map_copy_overwrite_aligned_src_not_internal++;
ebb1b9f4
A
9916 vm_object_unlock(new_object);
9917 goto slow_copy;
9918 }
5ba3f43e 9919#if !CONFIG_EMBEDDED
e2d2fc5c
A
9920 if (new_object->true_share ||
9921 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
9922 /*
9923 * Same if there's a "true_share"
9924 * object in the shadow chain, or
9925 * an object with a non-default
9926 * (SYMMETRIC) copy strategy.
9927 */
9928 vm_map_copy_overwrite_aligned_src_not_symmetric++;
9929 vm_object_unlock(new_object);
9930 goto slow_copy;
9931 }
5ba3f43e 9932#endif /* !CONFIG_EMBEDDED */
ebb1b9f4
A
9933 vm_object_unlock(new_object);
9934 }
9935 /*
9936 * The new mapping is still backed by
9937 * anonymous (internal) memory, so it's
9938 * OK to substitute it for the original
9939 * malloc() mapping.
9940 */
9941 }
9942
1c79356b 9943 if (old_object != VM_OBJECT_NULL) {
0a7de745
A
9944 if (entry->is_sub_map) {
9945 if (entry->use_pmap) {
0c530ab8 9946#ifndef NO_NESTED_PMAP
5ba3f43e 9947 pmap_unnest(dst_map->pmap,
0a7de745
A
9948 (addr64_t)entry->vme_start,
9949 entry->vme_end - entry->vme_start);
9950#endif /* NO_NESTED_PMAP */
9951 if (dst_map->mapped_in_other_pmaps) {
9bccf70c
A
9952 /* clean up parent */
9953 /* map/maps */
2d21ac55
A
9954 vm_map_submap_pmap_clean(
9955 dst_map, entry->vme_start,
9956 entry->vme_end,
3e170ce0
A
9957 VME_SUBMAP(entry),
9958 VME_OFFSET(entry));
9bccf70c
A
9959 }
9960 } else {
9961 vm_map_submap_pmap_clean(
5ba3f43e 9962 dst_map, entry->vme_start,
9bccf70c 9963 entry->vme_end,
3e170ce0
A
9964 VME_SUBMAP(entry),
9965 VME_OFFSET(entry));
9bccf70c 9966 }
0a7de745
A
9967 vm_map_deallocate(VME_SUBMAP(entry));
9968 } else {
9969 if (dst_map->mapped_in_other_pmaps) {
39236c6e 9970 vm_object_pmap_protect_options(
3e170ce0
A
9971 VME_OBJECT(entry),
9972 VME_OFFSET(entry),
5ba3f43e 9973 entry->vme_end
2d21ac55 9974 - entry->vme_start,
9bccf70c
A
9975 PMAP_NULL,
9976 entry->vme_start,
39236c6e
A
9977 VM_PROT_NONE,
9978 PMAP_OPTIONS_REMOVE);
9bccf70c 9979 } else {
39236c6e 9980 pmap_remove_options(
5ba3f43e
A
9981 dst_map->pmap,
9982 (addr64_t)(entry->vme_start),
39236c6e
A
9983 (addr64_t)(entry->vme_end),
9984 PMAP_OPTIONS_REMOVE);
9bccf70c 9985 }
1c79356b 9986 vm_object_deallocate(old_object);
0a7de745 9987 }
1c79356b
A
9988 }
9989
a39ff7e2
A
9990 if (entry->iokit_acct) {
9991 /* keep using iokit accounting */
9992 entry->use_pmap = FALSE;
9993 } else {
9994 /* use pmap accounting */
9995 entry->use_pmap = TRUE;
9996 }
1c79356b 9997 entry->is_sub_map = FALSE;
3e170ce0
A
9998 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
9999 object = VME_OBJECT(entry);
1c79356b
A
10000 entry->needs_copy = copy_entry->needs_copy;
10001 entry->wired_count = 0;
10002 entry->user_wired_count = 0;
3e170ce0 10003 offset = VME_OFFSET(copy_entry);
5ba3f43e 10004 VME_OFFSET_SET(entry, offset);
1c79356b
A
10005
10006 vm_map_copy_entry_unlink(copy, copy_entry);
10007 vm_map_copy_entry_dispose(copy, copy_entry);
2d21ac55 10008
1c79356b 10009 /*
2d21ac55 10010 * we could try to push pages into the pmap at this point, BUT
1c79356b
A
10011 * this optimization only saved on average 2 us per page if ALL
10012 * the pages in the source were currently mapped
10013 * and ALL the pages in the dest were touched, if there were fewer
10014 * than 2/3 of the pages touched, this optimization actually cost more cycles
2d21ac55 10015 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
1c79356b
A
10016 */
10017
1c79356b
A
10018 /*
10019 * Set up for the next iteration. The map
10020 * has not been unlocked, so the next
10021 * address should be at the end of this
10022 * entry, and the next map entry should be
10023 * the one following it.
10024 */
10025
10026 start = tmp_entry->vme_end;
10027 tmp_entry = tmp_entry->vme_next;
10028 } else {
0a7de745
A
10029 vm_map_version_t version;
10030 vm_object_t dst_object;
10031 vm_object_offset_t dst_offset;
10032 kern_return_t r;
1c79356b 10033
0a7de745 10034slow_copy:
e2d2fc5c 10035 if (entry->needs_copy) {
3e170ce0 10036 VME_OBJECT_SHADOW(entry,
0a7de745
A
10037 (entry->vme_end -
10038 entry->vme_start));
e2d2fc5c
A
10039 entry->needs_copy = FALSE;
10040 }
10041
3e170ce0
A
10042 dst_object = VME_OBJECT(entry);
10043 dst_offset = VME_OFFSET(entry);
ebb1b9f4 10044
1c79356b
A
10045 /*
10046 * Take an object reference, and record
10047 * the map version information so that the
10048 * map can be safely unlocked.
10049 */
10050
ebb1b9f4
A
10051 if (dst_object == VM_OBJECT_NULL) {
10052 /*
10053 * We would usually have just taken the
10054 * optimized path above if the destination
10055 * object has not been allocated yet. But we
10056 * now disable that optimization if the copy
10057 * entry's object is not backed by anonymous
10058 * memory to avoid replacing malloc'ed
10059 * (i.e. re-usable) anonymous memory with a
10060 * not-so-anonymous mapping.
10061 * So we have to handle this case here and
10062 * allocate a new VM object for this map entry.
10063 */
10064 dst_object = vm_object_allocate(
10065 entry->vme_end - entry->vme_start);
10066 dst_offset = 0;
3e170ce0
A
10067 VME_OBJECT_SET(entry, dst_object);
10068 VME_OFFSET_SET(entry, dst_offset);
fe8ab488 10069 assert(entry->use_pmap);
ebb1b9f4
A
10070 }
10071
1c79356b
A
10072 vm_object_reference(dst_object);
10073
9bccf70c
A
10074 /* account for unlock bumping up timestamp */
10075 version.main_timestamp = dst_map->timestamp + 1;
1c79356b
A
10076
10077 vm_map_unlock(dst_map);
10078
10079 /*
10080 * Copy as much as possible in one pass
10081 */
10082
10083 copy_size = size;
10084 r = vm_fault_copy(
3e170ce0
A
10085 VME_OBJECT(copy_entry),
10086 VME_OFFSET(copy_entry),
2d21ac55
A
10087 &copy_size,
10088 dst_object,
10089 dst_offset,
10090 dst_map,
10091 &version,
10092 THREAD_UNINT );
1c79356b
A
10093
10094 /*
10095 * Release the object reference
10096 */
10097
10098 vm_object_deallocate(dst_object);
10099
10100 /*
10101 * If a hard error occurred, return it now
10102 */
10103
0a7de745
A
10104 if (r != KERN_SUCCESS) {
10105 return r;
10106 }
1c79356b
A
10107
10108 if (copy_size != 0) {
10109 /*
10110 * Dispose of the copied region
10111 */
10112
10113 vm_map_copy_clip_end(copy, copy_entry,
0a7de745 10114 copy_entry->vme_start + copy_size);
1c79356b 10115 vm_map_copy_entry_unlink(copy, copy_entry);
3e170ce0 10116 vm_object_deallocate(VME_OBJECT(copy_entry));
1c79356b
A
10117 vm_map_copy_entry_dispose(copy, copy_entry);
10118 }
10119
10120 /*
10121 * Pick up in the destination map where we left off.
10122 *
10123 * Use the version information to avoid a lookup
10124 * in the normal case.
10125 */
10126
10127 start += copy_size;
10128 vm_map_lock(dst_map);
e2d2fc5c
A
10129 if (version.main_timestamp == dst_map->timestamp &&
10130 copy_size != 0) {
1c79356b
A
10131 /* We can safely use saved tmp_entry value */
10132
fe8ab488
A
10133 if (tmp_entry->map_aligned &&
10134 !VM_MAP_PAGE_ALIGNED(
10135 start,
10136 VM_MAP_PAGE_MASK(dst_map))) {
10137 /* no longer map-aligned */
10138 tmp_entry->map_aligned = FALSE;
10139 }
1c79356b
A
10140 vm_map_clip_end(dst_map, tmp_entry, start);
10141 tmp_entry = tmp_entry->vme_next;
10142 } else {
10143 /* Must do lookup of tmp_entry */
10144
10145 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10146 vm_map_unlock(dst_map);
0a7de745 10147 return KERN_INVALID_ADDRESS;
1c79356b 10148 }
fe8ab488
A
10149 if (tmp_entry->map_aligned &&
10150 !VM_MAP_PAGE_ALIGNED(
10151 start,
10152 VM_MAP_PAGE_MASK(dst_map))) {
10153 /* no longer map-aligned */
10154 tmp_entry->map_aligned = FALSE;
10155 }
1c79356b
A
10156 vm_map_clip_start(dst_map, tmp_entry, start);
10157 }
10158 }
10159 }/* while */
10160
0a7de745 10161 return KERN_SUCCESS;
1c79356b
A
10162}/* vm_map_copy_overwrite_aligned */
10163
10164/*
91447636 10165 * Routine: vm_map_copyin_kernel_buffer [internal use only]
1c79356b
A
10166 *
10167 * Description:
10168 * Copy in data to a kernel buffer from space in the
91447636 10169 * source map. The original space may be optionally
1c79356b
A
10170 * deallocated.
10171 *
10172 * If successful, returns a new copy object.
10173 */
91447636 10174static kern_return_t
1c79356b 10175vm_map_copyin_kernel_buffer(
0a7de745
A
10176 vm_map_t src_map,
10177 vm_map_offset_t src_addr,
10178 vm_map_size_t len,
10179 boolean_t src_destroy,
10180 vm_map_copy_t *copy_result)
1c79356b 10181{
91447636 10182 kern_return_t kr;
1c79356b 10183 vm_map_copy_t copy;
b0d623f7
A
10184 vm_size_t kalloc_size;
10185
0a7de745 10186 if (len > msg_ool_size_small) {
3e170ce0 10187 return KERN_INVALID_ARGUMENT;
0a7de745 10188 }
1c79356b 10189
3e170ce0
A
10190 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
10191
10192 copy = (vm_map_copy_t)kalloc(kalloc_size);
0a7de745 10193 if (copy == VM_MAP_COPY_NULL) {
1c79356b 10194 return KERN_RESOURCE_SHORTAGE;
0a7de745 10195 }
1c79356b
A
10196 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10197 copy->size = len;
10198 copy->offset = 0;
1c79356b 10199
3e170ce0 10200 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
91447636
A
10201 if (kr != KERN_SUCCESS) {
10202 kfree(copy, kalloc_size);
10203 return kr;
1c79356b
A
10204 }
10205 if (src_destroy) {
39236c6e
A
10206 (void) vm_map_remove(
10207 src_map,
10208 vm_map_trunc_page(src_addr,
0a7de745 10209 VM_MAP_PAGE_MASK(src_map)),
39236c6e 10210 vm_map_round_page(src_addr + len,
0a7de745 10211 VM_MAP_PAGE_MASK(src_map)),
39236c6e 10212 (VM_MAP_REMOVE_INTERRUPTIBLE |
0a7de745
A
10213 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10214 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
1c79356b
A
10215 }
10216 *copy_result = copy;
10217 return KERN_SUCCESS;
10218}
10219
10220/*
91447636 10221 * Routine: vm_map_copyout_kernel_buffer [internal use only]
1c79356b
A
10222 *
10223 * Description:
10224 * Copy out data from a kernel buffer into space in the
10225 * destination map. The space may be otpionally dynamically
10226 * allocated.
10227 *
10228 * If successful, consumes the copy object.
10229 * Otherwise, the caller is responsible for it.
10230 */
91447636
A
10231static int vm_map_copyout_kernel_buffer_failures = 0;
10232static kern_return_t
1c79356b 10233vm_map_copyout_kernel_buffer(
0a7de745
A
10234 vm_map_t map,
10235 vm_map_address_t *addr, /* IN/OUT */
10236 vm_map_copy_t copy,
10237 vm_map_size_t copy_size,
10238 boolean_t overwrite,
10239 boolean_t consume_on_success)
1c79356b
A
10240{
10241 kern_return_t kr = KERN_SUCCESS;
91447636 10242 thread_t thread = current_thread();
1c79356b 10243
39037602
A
10244 assert(copy->size == copy_size);
10245
3e170ce0
A
10246 /*
10247 * check for corrupted vm_map_copy structure
10248 */
0a7de745 10249 if (copy_size > msg_ool_size_small || copy->offset) {
3e170ce0 10250 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
0a7de745
A
10251 (long long)copy->size, (long long)copy->offset);
10252 }
3e170ce0 10253
1c79356b 10254 if (!overwrite) {
1c79356b
A
10255 /*
10256 * Allocate space in the target map for the data
10257 */
10258 *addr = 0;
5ba3f43e 10259 kr = vm_map_enter(map,
0a7de745
A
10260 addr,
10261 vm_map_round_page(copy_size,
10262 VM_MAP_PAGE_MASK(map)),
10263 (vm_map_offset_t) 0,
10264 VM_FLAGS_ANYWHERE,
10265 VM_MAP_KERNEL_FLAGS_NONE,
10266 VM_KERN_MEMORY_NONE,
10267 VM_OBJECT_NULL,
10268 (vm_object_offset_t) 0,
10269 FALSE,
10270 VM_PROT_DEFAULT,
10271 VM_PROT_ALL,
10272 VM_INHERIT_DEFAULT);
10273 if (kr != KERN_SUCCESS) {
91447636 10274 return kr;
0a7de745 10275 }
5ba3f43e
A
10276#if KASAN
10277 if (map->pmap == kernel_pmap) {
10278 kasan_notify_address(*addr, copy->size);
10279 }
10280#endif
1c79356b
A
10281 }
10282
10283 /*
10284 * Copyout the data from the kernel buffer to the target map.
5ba3f43e 10285 */
91447636 10286 if (thread->map == map) {
1c79356b
A
10287 /*
10288 * If the target map is the current map, just do
10289 * the copy.
10290 */
39037602
A
10291 assert((vm_size_t)copy_size == copy_size);
10292 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
91447636 10293 kr = KERN_INVALID_ADDRESS;
1c79356b 10294 }
0a7de745 10295 } else {
1c79356b
A
10296 vm_map_t oldmap;
10297
10298 /*
10299 * If the target map is another map, assume the
10300 * target's address space identity for the duration
10301 * of the copy.
10302 */
10303 vm_map_reference(map);
10304 oldmap = vm_map_switch(map);
10305
39037602
A
10306 assert((vm_size_t)copy_size == copy_size);
10307 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
91447636
A
10308 vm_map_copyout_kernel_buffer_failures++;
10309 kr = KERN_INVALID_ADDRESS;
1c79356b 10310 }
5ba3f43e 10311
1c79356b
A
10312 (void) vm_map_switch(oldmap);
10313 vm_map_deallocate(map);
10314 }
10315
91447636
A
10316 if (kr != KERN_SUCCESS) {
10317 /* the copy failed, clean up */
10318 if (!overwrite) {
10319 /*
10320 * Deallocate the space we allocated in the target map.
10321 */
39236c6e
A
10322 (void) vm_map_remove(
10323 map,
10324 vm_map_trunc_page(*addr,
0a7de745 10325 VM_MAP_PAGE_MASK(map)),
39236c6e 10326 vm_map_round_page((*addr +
0a7de745
A
10327 vm_map_round_page(copy_size,
10328 VM_MAP_PAGE_MASK(map))),
10329 VM_MAP_PAGE_MASK(map)),
d9a64523 10330 VM_MAP_REMOVE_NO_FLAGS);
91447636
A
10331 *addr = 0;
10332 }
10333 } else {
10334 /* copy was successful, dicard the copy structure */
39236c6e 10335 if (consume_on_success) {
39037602 10336 kfree(copy, copy_size + cpy_kdata_hdr_sz);
39236c6e 10337 }
91447636 10338 }
1c79356b 10339
91447636 10340 return kr;
1c79356b 10341}
5ba3f43e 10342
1c79356b 10343/*
0a7de745 10344 * Routine: vm_map_copy_insert [internal use only]
5ba3f43e 10345 *
1c79356b
A
10346 * Description:
10347 * Link a copy chain ("copy") into a map at the
10348 * specified location (after "where").
10349 * Side effects:
10350 * The copy chain is destroyed.
1c79356b 10351 */
d9a64523
A
10352static void
10353vm_map_copy_insert(
0a7de745
A
10354 vm_map_t map,
10355 vm_map_entry_t after_where,
10356 vm_map_copy_t copy)
d9a64523 10357{
0a7de745 10358 vm_map_entry_t entry;
d9a64523
A
10359
10360 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10361 entry = vm_map_copy_first_entry(copy);
10362 vm_map_copy_entry_unlink(copy, entry);
10363 vm_map_store_entry_link(map, after_where, entry,
0a7de745 10364 VM_MAP_KERNEL_FLAGS_NONE);
d9a64523
A
10365 after_where = entry;
10366 }
10367 zfree(vm_map_copy_zone, copy);
10368}
1c79356b 10369
39236c6e
A
10370void
10371vm_map_copy_remap(
0a7de745
A
10372 vm_map_t map,
10373 vm_map_entry_t where,
10374 vm_map_copy_t copy,
10375 vm_map_offset_t adjustment,
10376 vm_prot_t cur_prot,
10377 vm_prot_t max_prot,
10378 vm_inherit_t inheritance)
39236c6e 10379{
0a7de745 10380 vm_map_entry_t copy_entry, new_entry;
39236c6e
A
10381
10382 for (copy_entry = vm_map_copy_first_entry(copy);
0a7de745
A
10383 copy_entry != vm_map_copy_to_entry(copy);
10384 copy_entry = copy_entry->vme_next) {
39236c6e
A
10385 /* get a new VM map entry for the map */
10386 new_entry = vm_map_entry_create(map,
0a7de745 10387 !map->hdr.entries_pageable);
39236c6e
A
10388 /* copy the "copy entry" to the new entry */
10389 vm_map_entry_copy(new_entry, copy_entry);
10390 /* adjust "start" and "end" */
10391 new_entry->vme_start += adjustment;
10392 new_entry->vme_end += adjustment;
10393 /* clear some attributes */
10394 new_entry->inheritance = inheritance;
10395 new_entry->protection = cur_prot;
10396 new_entry->max_protection = max_prot;
10397 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10398 /* take an extra reference on the entry's "object" */
10399 if (new_entry->is_sub_map) {
fe8ab488 10400 assert(!new_entry->use_pmap); /* not nested */
3e170ce0
A
10401 vm_map_lock(VME_SUBMAP(new_entry));
10402 vm_map_reference(VME_SUBMAP(new_entry));
10403 vm_map_unlock(VME_SUBMAP(new_entry));
39236c6e 10404 } else {
3e170ce0 10405 vm_object_reference(VME_OBJECT(new_entry));
39236c6e
A
10406 }
10407 /* insert the new entry in the map */
d9a64523 10408 vm_map_store_entry_link(map, where, new_entry,
0a7de745 10409 VM_MAP_KERNEL_FLAGS_NONE);
39236c6e
A
10410 /* continue inserting the "copy entries" after the new entry */
10411 where = new_entry;
10412 }
10413}
10414
2dced7af 10415
39037602
A
10416/*
10417 * Returns true if *size matches (or is in the range of) copy->size.
10418 * Upon returning true, the *size field is updated with the actual size of the
10419 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10420 */
2dced7af
A
10421boolean_t
10422vm_map_copy_validate_size(
0a7de745
A
10423 vm_map_t dst_map,
10424 vm_map_copy_t copy,
10425 vm_map_size_t *size)
2dced7af 10426{
0a7de745 10427 if (copy == VM_MAP_COPY_NULL) {
2dced7af 10428 return FALSE;
0a7de745 10429 }
39037602
A
10430 vm_map_size_t copy_sz = copy->size;
10431 vm_map_size_t sz = *size;
2dced7af
A
10432 switch (copy->type) {
10433 case VM_MAP_COPY_OBJECT:
10434 case VM_MAP_COPY_KERNEL_BUFFER:
0a7de745 10435 if (sz == copy_sz) {
2dced7af 10436 return TRUE;
0a7de745 10437 }
2dced7af
A
10438 break;
10439 case VM_MAP_COPY_ENTRY_LIST:
10440 /*
10441 * potential page-size rounding prevents us from exactly
10442 * validating this flavor of vm_map_copy, but we can at least
10443 * assert that it's within a range.
10444 */
39037602
A
10445 if (copy_sz >= sz &&
10446 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10447 *size = copy_sz;
2dced7af 10448 return TRUE;
39037602 10449 }
2dced7af
A
10450 break;
10451 default:
10452 break;
10453 }
10454 return FALSE;
10455}
10456
39037602
A
10457/*
10458 * Routine: vm_map_copyout_size
10459 *
10460 * Description:
10461 * Copy out a copy chain ("copy") into newly-allocated
10462 * space in the destination map. Uses a prevalidated
10463 * size for the copy object (vm_map_copy_validate_size).
10464 *
10465 * If successful, consumes the copy object.
10466 * Otherwise, the caller is responsible for it.
10467 */
10468kern_return_t
10469vm_map_copyout_size(
0a7de745
A
10470 vm_map_t dst_map,
10471 vm_map_address_t *dst_addr, /* OUT */
10472 vm_map_copy_t copy,
10473 vm_map_size_t copy_size)
39037602
A
10474{
10475 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
0a7de745
A
10476 TRUE, /* consume_on_success */
10477 VM_PROT_DEFAULT,
10478 VM_PROT_ALL,
10479 VM_INHERIT_DEFAULT);
39037602 10480}
2dced7af 10481
1c79356b
A
10482/*
10483 * Routine: vm_map_copyout
10484 *
10485 * Description:
10486 * Copy out a copy chain ("copy") into newly-allocated
10487 * space in the destination map.
10488 *
10489 * If successful, consumes the copy object.
10490 * Otherwise, the caller is responsible for it.
10491 */
10492kern_return_t
10493vm_map_copyout(
0a7de745
A
10494 vm_map_t dst_map,
10495 vm_map_address_t *dst_addr, /* OUT */
10496 vm_map_copy_t copy)
39236c6e 10497{
39037602 10498 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
0a7de745
A
10499 TRUE, /* consume_on_success */
10500 VM_PROT_DEFAULT,
10501 VM_PROT_ALL,
10502 VM_INHERIT_DEFAULT);
39236c6e
A
10503}
10504
10505kern_return_t
10506vm_map_copyout_internal(
0a7de745
A
10507 vm_map_t dst_map,
10508 vm_map_address_t *dst_addr, /* OUT */
10509 vm_map_copy_t copy,
10510 vm_map_size_t copy_size,
10511 boolean_t consume_on_success,
10512 vm_prot_t cur_protection,
10513 vm_prot_t max_protection,
10514 vm_inherit_t inheritance)
1c79356b 10515{
0a7de745
A
10516 vm_map_size_t size;
10517 vm_map_size_t adjustment;
10518 vm_map_offset_t start;
10519 vm_object_offset_t vm_copy_start;
10520 vm_map_entry_t last;
10521 vm_map_entry_t entry;
10522 vm_map_entry_t hole_entry;
1c79356b
A
10523
10524 /*
10525 * Check for null copy object.
10526 */
10527
10528 if (copy == VM_MAP_COPY_NULL) {
10529 *dst_addr = 0;
0a7de745 10530 return KERN_SUCCESS;
1c79356b
A
10531 }
10532
39037602
A
10533 if (copy->size != copy_size) {
10534 *dst_addr = 0;
10535 return KERN_FAILURE;
10536 }
10537
1c79356b
A
10538 /*
10539 * Check for special copy object, created
10540 * by vm_map_copyin_object.
10541 */
10542
10543 if (copy->type == VM_MAP_COPY_OBJECT) {
0a7de745
A
10544 vm_object_t object = copy->cpy_object;
10545 kern_return_t kr;
10546 vm_object_offset_t offset;
1c79356b 10547
91447636 10548 offset = vm_object_trunc_page(copy->offset);
39037602 10549 size = vm_map_round_page((copy_size +
0a7de745
A
10550 (vm_map_size_t)(copy->offset -
10551 offset)),
10552 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
10553 *dst_addr = 0;
10554 kr = vm_map_enter(dst_map, dst_addr, size,
0a7de745
A
10555 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10556 VM_MAP_KERNEL_FLAGS_NONE,
10557 VM_KERN_MEMORY_NONE,
10558 object, offset, FALSE,
10559 VM_PROT_DEFAULT, VM_PROT_ALL,
10560 VM_INHERIT_DEFAULT);
10561 if (kr != KERN_SUCCESS) {
10562 return kr;
10563 }
1c79356b 10564 /* Account for non-pagealigned copy object */
91447636 10565 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
0a7de745 10566 if (consume_on_success) {
39236c6e 10567 zfree(vm_map_copy_zone, copy);
0a7de745
A
10568 }
10569 return KERN_SUCCESS;
1c79356b
A
10570 }
10571
10572 /*
10573 * Check for special kernel buffer allocated
10574 * by new_ipc_kmsg_copyin.
10575 */
10576
10577 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
39037602 10578 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
0a7de745
A
10579 copy, copy_size, FALSE,
10580 consume_on_success);
1c79356b
A
10581 }
10582
39236c6e 10583
1c79356b
A
10584 /*
10585 * Find space for the data
10586 */
10587
39236c6e 10588 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
0a7de745 10589 VM_MAP_COPY_PAGE_MASK(copy));
39037602 10590 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
0a7de745
A
10591 VM_MAP_COPY_PAGE_MASK(copy))
10592 - vm_copy_start;
1c79356b 10593
39236c6e 10594
0a7de745 10595StartAgain:;
1c79356b
A
10596
10597 vm_map_lock(dst_map);
0a7de745 10598 if (dst_map->disable_vmentry_reuse == TRUE) {
6d2010ae
A
10599 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10600 last = entry;
10601 } else {
3e170ce0 10602 if (dst_map->holelistenabled) {
d9a64523 10603 hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
3e170ce0
A
10604
10605 if (hole_entry == NULL) {
10606 /*
10607 * No more space in the map?
10608 */
10609 vm_map_unlock(dst_map);
0a7de745 10610 return KERN_NO_SPACE;
3e170ce0
A
10611 }
10612
10613 last = hole_entry;
10614 start = last->vme_start;
10615 } else {
10616 assert(first_free_is_valid(dst_map));
10617 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
0a7de745 10618 vm_map_min(dst_map) : last->vme_end;
3e170ce0 10619 }
39236c6e 10620 start = vm_map_round_page(start,
0a7de745 10621 VM_MAP_PAGE_MASK(dst_map));
6d2010ae 10622 }
1c79356b
A
10623
10624 while (TRUE) {
0a7de745
A
10625 vm_map_entry_t next = last->vme_next;
10626 vm_map_offset_t end = start + size;
1c79356b
A
10627
10628 if ((end > dst_map->max_offset) || (end < start)) {
10629 if (dst_map->wait_for_space) {
10630 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10631 assert_wait((event_t) dst_map,
0a7de745 10632 THREAD_INTERRUPTIBLE);
1c79356b 10633 vm_map_unlock(dst_map);
91447636 10634 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
10635 goto StartAgain;
10636 }
10637 }
10638 vm_map_unlock(dst_map);
0a7de745 10639 return KERN_NO_SPACE;
1c79356b
A
10640 }
10641
3e170ce0 10642 if (dst_map->holelistenabled) {
0a7de745 10643 if (last->vme_end >= end) {
3e170ce0 10644 break;
0a7de745 10645 }
3e170ce0
A
10646 } else {
10647 /*
10648 * If there are no more entries, we must win.
10649 *
10650 * OR
10651 *
10652 * If there is another entry, it must be
10653 * after the end of the potential new region.
10654 */
10655
0a7de745 10656 if (next == vm_map_to_entry(dst_map)) {
3e170ce0 10657 break;
0a7de745 10658 }
3e170ce0 10659
0a7de745 10660 if (next->vme_start >= end) {
3e170ce0 10661 break;
0a7de745 10662 }
3e170ce0 10663 }
1c79356b
A
10664
10665 last = next;
3e170ce0
A
10666
10667 if (dst_map->holelistenabled) {
d9a64523 10668 if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
3e170ce0
A
10669 /*
10670 * Wrapped around
10671 */
10672 vm_map_unlock(dst_map);
0a7de745 10673 return KERN_NO_SPACE;
3e170ce0
A
10674 }
10675 start = last->vme_start;
10676 } else {
10677 start = last->vme_end;
10678 }
39236c6e 10679 start = vm_map_round_page(start,
0a7de745 10680 VM_MAP_PAGE_MASK(dst_map));
39236c6e
A
10681 }
10682
3e170ce0
A
10683 if (dst_map->holelistenabled) {
10684 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10685 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10686 }
10687 }
10688
10689
39236c6e 10690 adjustment = start - vm_copy_start;
0a7de745 10691 if (!consume_on_success) {
39236c6e
A
10692 /*
10693 * We're not allowed to consume "copy", so we'll have to
10694 * copy its map entries into the destination map below.
10695 * No need to re-allocate map entries from the correct
10696 * (pageable or not) zone, since we'll get new map entries
10697 * during the transfer.
10698 * We'll also adjust the map entries's "start" and "end"
10699 * during the transfer, to keep "copy"'s entries consistent
10700 * with its "offset".
10701 */
10702 goto after_adjustments;
1c79356b
A
10703 }
10704
10705 /*
10706 * Since we're going to just drop the map
10707 * entries from the copy into the destination
10708 * map, they must come from the same pool.
10709 */
10710
10711 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
2d21ac55
A
10712 /*
10713 * Mismatches occur when dealing with the default
10714 * pager.
10715 */
0a7de745
A
10716 zone_t old_zone;
10717 vm_map_entry_t next, new;
2d21ac55
A
10718
10719 /*
10720 * Find the zone that the copies were allocated from
10721 */
7ddcb079 10722
2d21ac55
A
10723 entry = vm_map_copy_first_entry(copy);
10724
10725 /*
10726 * Reinitialize the copy so that vm_map_copy_entry_link
10727 * will work.
10728 */
6d2010ae 10729 vm_map_store_copy_reset(copy, entry);
2d21ac55 10730 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
2d21ac55
A
10731
10732 /*
10733 * Copy each entry.
10734 */
10735 while (entry != vm_map_copy_to_entry(copy)) {
7ddcb079 10736 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
2d21ac55 10737 vm_map_entry_copy_full(new, entry);
fe8ab488
A
10738 assert(!new->iokit_acct);
10739 if (new->is_sub_map) {
10740 /* clr address space specifics */
10741 new->use_pmap = FALSE;
10742 }
2d21ac55 10743 vm_map_copy_entry_link(copy,
0a7de745
A
10744 vm_map_copy_last_entry(copy),
10745 new);
2d21ac55 10746 next = entry->vme_next;
7ddcb079 10747 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
2d21ac55
A
10748 zfree(old_zone, entry);
10749 entry = next;
10750 }
1c79356b
A
10751 }
10752
10753 /*
10754 * Adjust the addresses in the copy chain, and
10755 * reset the region attributes.
10756 */
10757
1c79356b 10758 for (entry = vm_map_copy_first_entry(copy);
0a7de745
A
10759 entry != vm_map_copy_to_entry(copy);
10760 entry = entry->vme_next) {
39236c6e
A
10761 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
10762 /*
10763 * We're injecting this copy entry into a map that
10764 * has the standard page alignment, so clear
10765 * "map_aligned" (which might have been inherited
10766 * from the original map entry).
10767 */
10768 entry->map_aligned = FALSE;
10769 }
10770
1c79356b
A
10771 entry->vme_start += adjustment;
10772 entry->vme_end += adjustment;
10773
39236c6e
A
10774 if (entry->map_aligned) {
10775 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
0a7de745 10776 VM_MAP_PAGE_MASK(dst_map)));
39236c6e 10777 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
0a7de745 10778 VM_MAP_PAGE_MASK(dst_map)));
39236c6e
A
10779 }
10780
1c79356b
A
10781 entry->inheritance = VM_INHERIT_DEFAULT;
10782 entry->protection = VM_PROT_DEFAULT;
10783 entry->max_protection = VM_PROT_ALL;
10784 entry->behavior = VM_BEHAVIOR_DEFAULT;
10785
10786 /*
10787 * If the entry is now wired,
10788 * map the pages into the destination map.
10789 */
10790 if (entry->wired_count != 0) {
39037602 10791 vm_map_offset_t va;
0a7de745 10792 vm_object_offset_t offset;
39037602 10793 vm_object_t object;
2d21ac55 10794 vm_prot_t prot;
0a7de745 10795 int type_of_fault;
1c79356b 10796
3e170ce0
A
10797 object = VME_OBJECT(entry);
10798 offset = VME_OFFSET(entry);
2d21ac55 10799 va = entry->vme_start;
1c79356b 10800
2d21ac55 10801 pmap_pageable(dst_map->pmap,
0a7de745
A
10802 entry->vme_start,
10803 entry->vme_end,
10804 TRUE);
1c79356b 10805
2d21ac55 10806 while (va < entry->vme_end) {
0a7de745 10807 vm_page_t m;
d9a64523 10808 struct vm_object_fault_info fault_info = {};
1c79356b 10809
2d21ac55
A
10810 /*
10811 * Look up the page in the object.
10812 * Assert that the page will be found in the
10813 * top object:
10814 * either
10815 * the object was newly created by
10816 * vm_object_copy_slowly, and has
10817 * copies of all of the pages from
10818 * the source object
10819 * or
10820 * the object was moved from the old
10821 * map entry; because the old map
10822 * entry was wired, all of the pages
10823 * were in the top-level object.
10824 * (XXX not true if we wire pages for
10825 * reading)
10826 */
10827 vm_object_lock(object);
91447636 10828
2d21ac55 10829 m = vm_page_lookup(object, offset);
b0d623f7 10830 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
0a7de745 10831 m->vmp_absent) {
2d21ac55 10832 panic("vm_map_copyout: wiring %p", m);
0a7de745 10833 }
1c79356b 10834
2d21ac55 10835 prot = entry->protection;
1c79356b 10836
3e170ce0 10837 if (override_nx(dst_map, VME_ALIAS(entry)) &&
0a7de745
A
10838 prot) {
10839 prot |= VM_PROT_EXECUTE;
10840 }
1c79356b 10841
2d21ac55 10842 type_of_fault = DBG_CACHE_HIT_FAULT;
1c79356b 10843
d9a64523
A
10844 fault_info.user_tag = VME_ALIAS(entry);
10845 fault_info.pmap_options = 0;
10846 if (entry->iokit_acct ||
10847 (!entry->is_sub_map && !entry->use_pmap)) {
10848 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
10849 }
10850
10851 vm_fault_enter(m,
0a7de745
A
10852 dst_map->pmap,
10853 va,
10854 prot,
10855 prot,
10856 VM_PAGE_WIRED(m),
10857 FALSE, /* change_wiring */
10858 VM_KERN_MEMORY_NONE, /* tag - not wiring */
10859 &fault_info,
10860 NULL, /* need_retry */
10861 &type_of_fault);
1c79356b 10862
2d21ac55 10863 vm_object_unlock(object);
1c79356b 10864
2d21ac55
A
10865 offset += PAGE_SIZE_64;
10866 va += PAGE_SIZE;
1c79356b
A
10867 }
10868 }
10869 }
10870
39236c6e
A
10871after_adjustments:
10872
1c79356b
A
10873 /*
10874 * Correct the page alignment for the result
10875 */
10876
10877 *dst_addr = start + (copy->offset - vm_copy_start);
10878
5ba3f43e
A
10879#if KASAN
10880 kasan_notify_address(*dst_addr, size);
10881#endif
10882
1c79356b
A
10883 /*
10884 * Update the hints and the map size
10885 */
10886
39236c6e
A
10887 if (consume_on_success) {
10888 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
10889 } else {
10890 SAVE_HINT_MAP_WRITE(dst_map, last);
10891 }
1c79356b
A
10892
10893 dst_map->size += size;
10894
10895 /*
10896 * Link in the copy
10897 */
10898
39236c6e
A
10899 if (consume_on_success) {
10900 vm_map_copy_insert(dst_map, last, copy);
10901 } else {
10902 vm_map_copy_remap(dst_map, last, copy, adjustment,
0a7de745
A
10903 cur_protection, max_protection,
10904 inheritance);
39236c6e 10905 }
1c79356b
A
10906
10907 vm_map_unlock(dst_map);
10908
10909 /*
10910 * XXX If wiring_required, call vm_map_pageable
10911 */
10912
0a7de745 10913 return KERN_SUCCESS;
1c79356b
A
10914}
10915
1c79356b
A
10916/*
10917 * Routine: vm_map_copyin
10918 *
10919 * Description:
2d21ac55
A
10920 * see vm_map_copyin_common. Exported via Unsupported.exports.
10921 *
10922 */
10923
10924#undef vm_map_copyin
10925
10926kern_return_t
10927vm_map_copyin(
0a7de745
A
10928 vm_map_t src_map,
10929 vm_map_address_t src_addr,
10930 vm_map_size_t len,
10931 boolean_t src_destroy,
10932 vm_map_copy_t *copy_result) /* OUT */
2d21ac55 10933{
0a7de745
A
10934 return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
10935 FALSE, copy_result, FALSE);
2d21ac55
A
10936}
10937
10938/*
10939 * Routine: vm_map_copyin_common
10940 *
10941 * Description:
1c79356b
A
10942 * Copy the specified region (src_addr, len) from the
10943 * source address space (src_map), possibly removing
10944 * the region from the source address space (src_destroy).
10945 *
10946 * Returns:
10947 * A vm_map_copy_t object (copy_result), suitable for
10948 * insertion into another address space (using vm_map_copyout),
10949 * copying over another address space region (using
10950 * vm_map_copy_overwrite). If the copy is unused, it
10951 * should be destroyed (using vm_map_copy_discard).
10952 *
10953 * In/out conditions:
10954 * The source map should not be locked on entry.
10955 */
10956
10957typedef struct submap_map {
0a7de745
A
10958 vm_map_t parent_map;
10959 vm_map_offset_t base_start;
10960 vm_map_offset_t base_end;
10961 vm_map_size_t base_len;
1c79356b
A
10962 struct submap_map *next;
10963} submap_map_t;
10964
10965kern_return_t
10966vm_map_copyin_common(
0a7de745 10967 vm_map_t src_map,
91447636 10968 vm_map_address_t src_addr,
0a7de745
A
10969 vm_map_size_t len,
10970 boolean_t src_destroy,
10971 __unused boolean_t src_volatile,
10972 vm_map_copy_t *copy_result, /* OUT */
10973 boolean_t use_maxprot)
4bd07ac2
A
10974{
10975 int flags;
10976
10977 flags = 0;
10978 if (src_destroy) {
10979 flags |= VM_MAP_COPYIN_SRC_DESTROY;
10980 }
10981 if (use_maxprot) {
10982 flags |= VM_MAP_COPYIN_USE_MAXPROT;
10983 }
10984 return vm_map_copyin_internal(src_map,
0a7de745
A
10985 src_addr,
10986 len,
10987 flags,
10988 copy_result);
4bd07ac2
A
10989}
10990kern_return_t
10991vm_map_copyin_internal(
0a7de745 10992 vm_map_t src_map,
4bd07ac2 10993 vm_map_address_t src_addr,
0a7de745
A
10994 vm_map_size_t len,
10995 int flags,
10996 vm_map_copy_t *copy_result) /* OUT */
1c79356b 10997{
0a7de745
A
10998 vm_map_entry_t tmp_entry; /* Result of last map lookup --
10999 * in multi-level lookup, this
11000 * entry contains the actual
11001 * vm_object/offset.
11002 */
11003 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
11004
11005 vm_map_offset_t src_start; /* Start of current entry --
11006 * where copy is taking place now
11007 */
11008 vm_map_offset_t src_end; /* End of entire region to be
11009 * copied */
2d21ac55 11010 vm_map_offset_t src_base;
0a7de745
A
11011 vm_map_t base_map = src_map;
11012 boolean_t map_share = FALSE;
11013 submap_map_t *parent_maps = NULL;
1c79356b 11014
0a7de745 11015 vm_map_copy_t copy; /* Resulting copy */
fe8ab488 11016 vm_map_address_t copy_addr;
0a7de745
A
11017 vm_map_size_t copy_size;
11018 boolean_t src_destroy;
11019 boolean_t use_maxprot;
11020 boolean_t preserve_purgeable;
11021 boolean_t entry_was_shared;
11022 vm_map_entry_t saved_src_entry;
4bd07ac2
A
11023
11024 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11025 return KERN_INVALID_ARGUMENT;
11026 }
5ba3f43e 11027
4bd07ac2
A
11028 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11029 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
39037602 11030 preserve_purgeable =
0a7de745 11031 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
1c79356b
A
11032
11033 /*
11034 * Check for copies of zero bytes.
11035 */
11036
11037 if (len == 0) {
11038 *copy_result = VM_MAP_COPY_NULL;
0a7de745 11039 return KERN_SUCCESS;
1c79356b
A
11040 }
11041
4a249263
A
11042 /*
11043 * Check that the end address doesn't overflow
11044 */
11045 src_end = src_addr + len;
0a7de745 11046 if (src_end < src_addr) {
4a249263 11047 return KERN_INVALID_ADDRESS;
0a7de745 11048 }
4a249263 11049
39037602
A
11050 /*
11051 * Compute (page aligned) start and end of region
11052 */
11053 src_start = vm_map_trunc_page(src_addr,
0a7de745 11054 VM_MAP_PAGE_MASK(src_map));
39037602 11055 src_end = vm_map_round_page(src_end,
0a7de745 11056 VM_MAP_PAGE_MASK(src_map));
39037602 11057
1c79356b
A
11058 /*
11059 * If the copy is sufficiently small, use a kernel buffer instead
11060 * of making a virtual copy. The theory being that the cost of
11061 * setting up VM (and taking C-O-W faults) dominates the copy costs
11062 * for small regions.
11063 */
4bd07ac2
A
11064 if ((len < msg_ool_size_small) &&
11065 !use_maxprot &&
39037602
A
11066 !preserve_purgeable &&
11067 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11068 /*
11069 * Since the "msg_ool_size_small" threshold was increased and
11070 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11071 * address space limits, we revert to doing a virtual copy if the
11072 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11073 * of the commpage would now fail when it used to work.
11074 */
11075 (src_start >= vm_map_min(src_map) &&
0a7de745
A
11076 src_start < vm_map_max(src_map) &&
11077 src_end >= vm_map_min(src_map) &&
11078 src_end < vm_map_max(src_map))) {
2d21ac55 11079 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
0a7de745
A
11080 src_destroy, copy_result);
11081 }
1c79356b 11082
b0d623f7 11083 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
1c79356b 11084
1c79356b
A
11085 /*
11086 * Allocate a header element for the list.
11087 *
5ba3f43e 11088 * Use the start and end in the header to
1c79356b
A
11089 * remember the endpoints prior to rounding.
11090 */
11091
d9a64523 11092 copy = vm_map_copy_allocate();
1c79356b 11093 copy->type = VM_MAP_COPY_ENTRY_LIST;
1c79356b 11094 copy->cpy_hdr.entries_pageable = TRUE;
39236c6e
A
11095#if 00
11096 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
11097#else
11098 /*
11099 * The copy entries can be broken down for a variety of reasons,
11100 * so we can't guarantee that they will remain map-aligned...
11101 * Will need to adjust the first copy_entry's "vme_start" and
11102 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
11103 * rather than the original map's alignment.
11104 */
11105 copy->cpy_hdr.page_shift = PAGE_SHIFT;
11106#endif
1c79356b 11107
0a7de745 11108 vm_map_store_init( &(copy->cpy_hdr));
6d2010ae 11109
1c79356b
A
11110 copy->offset = src_addr;
11111 copy->size = len;
5ba3f43e 11112
7ddcb079 11113 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b 11114
0a7de745
A
11115#define RETURN(x) \
11116 MACRO_BEGIN \
11117 vm_map_unlock(src_map); \
11118 if(src_map != base_map) \
11119 vm_map_deallocate(src_map); \
11120 if (new_entry != VM_MAP_ENTRY_NULL) \
11121 vm_map_copy_entry_dispose(copy,new_entry); \
11122 vm_map_copy_discard(copy); \
11123 { \
11124 submap_map_t *_ptr; \
11125 \
11126 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11127 parent_maps=parent_maps->next; \
11128 if (_ptr->parent_map != base_map) \
11129 vm_map_deallocate(_ptr->parent_map); \
11130 kfree(_ptr, sizeof(submap_map_t)); \
11131 } \
11132 } \
11133 MACRO_RETURN(x); \
1c79356b
A
11134 MACRO_END
11135
11136 /*
11137 * Find the beginning of the region.
11138 */
11139
0a7de745 11140 vm_map_lock(src_map);
1c79356b 11141
fe8ab488
A
11142 /*
11143 * Lookup the original "src_addr" rather than the truncated
11144 * "src_start", in case "src_start" falls in a non-map-aligned
11145 * map entry *before* the map entry that contains "src_addr"...
11146 */
0a7de745 11147 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
1c79356b 11148 RETURN(KERN_INVALID_ADDRESS);
0a7de745
A
11149 }
11150 if (!tmp_entry->is_sub_map) {
fe8ab488
A
11151 /*
11152 * ... but clip to the map-rounded "src_start" rather than
11153 * "src_addr" to preserve map-alignment. We'll adjust the
11154 * first copy entry at the end, if needed.
11155 */
1c79356b
A
11156 vm_map_clip_start(src_map, tmp_entry, src_start);
11157 }
fe8ab488
A
11158 if (src_start < tmp_entry->vme_start) {
11159 /*
11160 * Move "src_start" up to the start of the
11161 * first map entry to copy.
11162 */
11163 src_start = tmp_entry->vme_start;
11164 }
1c79356b
A
11165 /* set for later submap fix-up */
11166 copy_addr = src_start;
11167
11168 /*
11169 * Go through entries until we get to the end.
11170 */
11171
11172 while (TRUE) {
0a7de745
A
11173 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11174 vm_map_size_t src_size; /* Size of source
11175 * map entry (in both
11176 * maps)
11177 */
11178
11179 vm_object_t src_object; /* Object to copy */
11180 vm_object_offset_t src_offset;
11181
11182 boolean_t src_needs_copy; /* Should source map
11183 * be made read-only
11184 * for copy-on-write?
11185 */
11186
11187 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11188
11189 boolean_t was_wired; /* Was source wired? */
11190 vm_map_version_t version; /* Version before locks
11191 * dropped to make copy
11192 */
11193 kern_return_t result; /* Return value from
11194 * copy_strategically.
11195 */
11196 while (tmp_entry->is_sub_map) {
91447636 11197 vm_map_size_t submap_len;
1c79356b
A
11198 submap_map_t *ptr;
11199
11200 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11201 ptr->next = parent_maps;
11202 parent_maps = ptr;
11203 ptr->parent_map = src_map;
11204 ptr->base_start = src_start;
11205 ptr->base_end = src_end;
11206 submap_len = tmp_entry->vme_end - src_start;
0a7de745
A
11207 if (submap_len > (src_end - src_start)) {
11208 submap_len = src_end - src_start;
11209 }
2d21ac55 11210 ptr->base_len = submap_len;
5ba3f43e 11211
1c79356b 11212 src_start -= tmp_entry->vme_start;
3e170ce0 11213 src_start += VME_OFFSET(tmp_entry);
1c79356b 11214 src_end = src_start + submap_len;
3e170ce0 11215 src_map = VME_SUBMAP(tmp_entry);
1c79356b 11216 vm_map_lock(src_map);
9bccf70c
A
11217 /* keep an outstanding reference for all maps in */
11218 /* the parents tree except the base map */
11219 vm_map_reference(src_map);
1c79356b
A
11220 vm_map_unlock(ptr->parent_map);
11221 if (!vm_map_lookup_entry(
0a7de745 11222 src_map, src_start, &tmp_entry)) {
1c79356b 11223 RETURN(KERN_INVALID_ADDRESS);
0a7de745 11224 }
1c79356b 11225 map_share = TRUE;
0a7de745 11226 if (!tmp_entry->is_sub_map) {
2d21ac55 11227 vm_map_clip_start(src_map, tmp_entry, src_start);
0a7de745 11228 }
1c79356b
A
11229 src_entry = tmp_entry;
11230 }
2d21ac55
A
11231 /* we are now in the lowest level submap... */
11232
5ba3f43e 11233 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
3e170ce0 11234 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
55e303ae
A
11235 /* This is not, supported for now.In future */
11236 /* we will need to detect the phys_contig */
11237 /* condition and then upgrade copy_slowly */
11238 /* to do physical copy from the device mem */
11239 /* based object. We can piggy-back off of */
11240 /* the was wired boolean to set-up the */
11241 /* proper handling */
0b4e3aa0
A
11242 RETURN(KERN_PROTECTION_FAILURE);
11243 }
1c79356b 11244 /*
5ba3f43e 11245 * Create a new address map entry to hold the result.
1c79356b
A
11246 * Fill in the fields from the appropriate source entries.
11247 * We must unlock the source map to do this if we need
11248 * to allocate a map entry.
11249 */
11250 if (new_entry == VM_MAP_ENTRY_NULL) {
2d21ac55
A
11251 version.main_timestamp = src_map->timestamp;
11252 vm_map_unlock(src_map);
1c79356b 11253
7ddcb079 11254 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b 11255
2d21ac55
A
11256 vm_map_lock(src_map);
11257 if ((version.main_timestamp + 1) != src_map->timestamp) {
11258 if (!vm_map_lookup_entry(src_map, src_start,
0a7de745 11259 &tmp_entry)) {
2d21ac55
A
11260 RETURN(KERN_INVALID_ADDRESS);
11261 }
0a7de745 11262 if (!tmp_entry->is_sub_map) {
2d21ac55 11263 vm_map_clip_start(src_map, tmp_entry, src_start);
0a7de745 11264 }
2d21ac55 11265 continue; /* restart w/ new tmp_entry */
1c79356b 11266 }
1c79356b
A
11267 }
11268
11269 /*
11270 * Verify that the region can be read.
11271 */
11272 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
0a7de745
A
11273 !use_maxprot) ||
11274 (src_entry->max_protection & VM_PROT_READ) == 0) {
1c79356b 11275 RETURN(KERN_PROTECTION_FAILURE);
0a7de745 11276 }
1c79356b
A
11277
11278 /*
11279 * Clip against the endpoints of the entire region.
11280 */
11281
11282 vm_map_clip_end(src_map, src_entry, src_end);
11283
11284 src_size = src_entry->vme_end - src_start;
3e170ce0
A
11285 src_object = VME_OBJECT(src_entry);
11286 src_offset = VME_OFFSET(src_entry);
1c79356b
A
11287 was_wired = (src_entry->wired_count != 0);
11288
11289 vm_map_entry_copy(new_entry, src_entry);
fe8ab488
A
11290 if (new_entry->is_sub_map) {
11291 /* clr address space specifics */
11292 new_entry->use_pmap = FALSE;
a39ff7e2
A
11293 } else {
11294 /*
11295 * We're dealing with a copy-on-write operation,
11296 * so the resulting mapping should not inherit the
11297 * original mapping's accounting settings.
11298 * "iokit_acct" should have been cleared in
11299 * vm_map_entry_copy().
11300 * "use_pmap" should be reset to its default (TRUE)
11301 * so that the new mapping gets accounted for in
11302 * the task's memory footprint.
11303 */
11304 assert(!new_entry->iokit_acct);
11305 new_entry->use_pmap = TRUE;
fe8ab488 11306 }
1c79356b
A
11307
11308 /*
11309 * Attempt non-blocking copy-on-write optimizations.
11310 */
11311
813fb2f6
A
11312 if (src_destroy &&
11313 (src_object == VM_OBJECT_NULL ||
0a7de745
A
11314 (src_object->internal &&
11315 src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11316 src_entry->vme_start <= src_addr &&
11317 src_entry->vme_end >= src_end &&
11318 !map_share))) {
2d21ac55
A
11319 /*
11320 * If we are destroying the source, and the object
11321 * is internal, we can move the object reference
11322 * from the source to the copy. The copy is
11323 * copy-on-write only if the source is.
11324 * We make another reference to the object, because
11325 * destroying the source entry will deallocate it.
b226f5e5
A
11326 *
11327 * This memory transfer has to be atomic (to prevent
11328 * the VM object from being shared or copied while
11329 * it's being moved here), so we can only do this
11330 * if we won't have to unlock the VM map, i.e. the
11331 * entire range must be covered by this map entry.
2d21ac55
A
11332 */
11333 vm_object_reference(src_object);
1c79356b 11334
2d21ac55
A
11335 /*
11336 * Copy is always unwired. vm_map_copy_entry
11337 * set its wired count to zero.
11338 */
1c79356b 11339
2d21ac55 11340 goto CopySuccessful;
1c79356b
A
11341 }
11342
11343
0a7de745 11344RestartCopy:
1c79356b 11345 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
3e170ce0 11346 src_object, new_entry, VME_OBJECT(new_entry),
1c79356b 11347 was_wired, 0);
55e303ae 11348 if ((src_object == VM_OBJECT_NULL ||
0a7de745 11349 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
2d21ac55 11350 vm_object_copy_quickly(
3e170ce0 11351 &VME_OBJECT(new_entry),
2d21ac55
A
11352 src_offset,
11353 src_size,
11354 &src_needs_copy,
11355 &new_entry_needs_copy)) {
1c79356b
A
11356 new_entry->needs_copy = new_entry_needs_copy;
11357
11358 /*
11359 * Handle copy-on-write obligations
11360 */
11361
11362 if (src_needs_copy && !tmp_entry->needs_copy) {
0a7de745 11363 vm_prot_t prot;
0c530ab8
A
11364
11365 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 11366
3e170ce0 11367 if (override_nx(src_map, VME_ALIAS(src_entry))
0a7de745
A
11368 && prot) {
11369 prot |= VM_PROT_EXECUTE;
11370 }
2d21ac55 11371
55e303ae
A
11372 vm_object_pmap_protect(
11373 src_object,
11374 src_offset,
11375 src_size,
0a7de745
A
11376 (src_entry->is_shared ?
11377 PMAP_NULL
11378 : src_map->pmap),
55e303ae 11379 src_entry->vme_start,
0c530ab8
A
11380 prot);
11381
3e170ce0 11382 assert(tmp_entry->wired_count == 0);
55e303ae 11383 tmp_entry->needs_copy = TRUE;
1c79356b
A
11384 }
11385
11386 /*
11387 * The map has never been unlocked, so it's safe
11388 * to move to the next entry rather than doing
11389 * another lookup.
11390 */
11391
11392 goto CopySuccessful;
11393 }
11394
5ba3f43e
A
11395 entry_was_shared = tmp_entry->is_shared;
11396
1c79356b
A
11397 /*
11398 * Take an object reference, so that we may
11399 * release the map lock(s).
11400 */
11401
11402 assert(src_object != VM_OBJECT_NULL);
11403 vm_object_reference(src_object);
11404
11405 /*
11406 * Record the timestamp for later verification.
11407 * Unlock the map.
11408 */
11409
11410 version.main_timestamp = src_map->timestamp;
0a7de745 11411 vm_map_unlock(src_map); /* Increments timestamp once! */
5ba3f43e
A
11412 saved_src_entry = src_entry;
11413 tmp_entry = VM_MAP_ENTRY_NULL;
11414 src_entry = VM_MAP_ENTRY_NULL;
1c79356b
A
11415
11416 /*
11417 * Perform the copy
11418 */
11419
11420 if (was_wired) {
0a7de745 11421CopySlowly:
1c79356b
A
11422 vm_object_lock(src_object);
11423 result = vm_object_copy_slowly(
2d21ac55
A
11424 src_object,
11425 src_offset,
11426 src_size,
11427 THREAD_UNINT,
3e170ce0
A
11428 &VME_OBJECT(new_entry));
11429 VME_OFFSET_SET(new_entry, 0);
1c79356b 11430 new_entry->needs_copy = FALSE;
0a7de745
A
11431 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11432 (entry_was_shared || map_share)) {
11433 vm_object_t new_object;
55e303ae 11434
2d21ac55 11435 vm_object_lock_shared(src_object);
55e303ae 11436 new_object = vm_object_copy_delayed(
2d21ac55 11437 src_object,
5ba3f43e 11438 src_offset,
2d21ac55
A
11439 src_size,
11440 TRUE);
0a7de745
A
11441 if (new_object == VM_OBJECT_NULL) {
11442 goto CopySlowly;
11443 }
55e303ae 11444
3e170ce0
A
11445 VME_OBJECT_SET(new_entry, new_object);
11446 assert(new_entry->wired_count == 0);
55e303ae 11447 new_entry->needs_copy = TRUE;
fe8ab488
A
11448 assert(!new_entry->iokit_acct);
11449 assert(new_object->purgable == VM_PURGABLE_DENY);
a39ff7e2 11450 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
55e303ae 11451 result = KERN_SUCCESS;
1c79356b 11452 } else {
3e170ce0
A
11453 vm_object_offset_t new_offset;
11454 new_offset = VME_OFFSET(new_entry);
1c79356b 11455 result = vm_object_copy_strategically(src_object,
0a7de745
A
11456 src_offset,
11457 src_size,
11458 &VME_OBJECT(new_entry),
11459 &new_offset,
11460 &new_entry_needs_copy);
3e170ce0
A
11461 if (new_offset != VME_OFFSET(new_entry)) {
11462 VME_OFFSET_SET(new_entry, new_offset);
11463 }
1c79356b
A
11464
11465 new_entry->needs_copy = new_entry_needs_copy;
1c79356b
A
11466 }
11467
39037602
A
11468 if (result == KERN_SUCCESS &&
11469 preserve_purgeable &&
11470 src_object->purgable != VM_PURGABLE_DENY) {
0a7de745 11471 vm_object_t new_object;
39037602
A
11472
11473 new_object = VME_OBJECT(new_entry);
11474 assert(new_object != src_object);
11475 vm_object_lock(new_object);
11476 assert(new_object->ref_count == 1);
11477 assert(new_object->shadow == VM_OBJECT_NULL);
11478 assert(new_object->copy == VM_OBJECT_NULL);
d9a64523 11479 assert(new_object->vo_owner == NULL);
39037602
A
11480
11481 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
11482 new_object->true_share = TRUE;
11483 /* start as non-volatile with no owner... */
11484 new_object->purgable = VM_PURGABLE_NONVOLATILE;
11485 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11486 /* ... and move to src_object's purgeable state */
11487 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11488 int state;
11489 state = src_object->purgable;
11490 vm_object_purgable_control(
11491 new_object,
5ba3f43e 11492 VM_PURGABLE_SET_STATE_FROM_KERNEL,
39037602
A
11493 &state);
11494 }
11495 vm_object_unlock(new_object);
11496 new_object = VM_OBJECT_NULL;
a39ff7e2
A
11497 /* no pmap accounting for purgeable objects */
11498 new_entry->use_pmap = FALSE;
39037602
A
11499 }
11500
1c79356b
A
11501 if (result != KERN_SUCCESS &&
11502 result != KERN_MEMORY_RESTART_COPY) {
11503 vm_map_lock(src_map);
11504 RETURN(result);
11505 }
11506
11507 /*
11508 * Throw away the extra reference
11509 */
11510
11511 vm_object_deallocate(src_object);
11512
11513 /*
11514 * Verify that the map has not substantially
11515 * changed while the copy was being made.
11516 */
11517
9bccf70c 11518 vm_map_lock(src_map);
1c79356b 11519
5ba3f43e
A
11520 if ((version.main_timestamp + 1) == src_map->timestamp) {
11521 /* src_map hasn't changed: src_entry is still valid */
11522 src_entry = saved_src_entry;
1c79356b 11523 goto VerificationSuccessful;
5ba3f43e 11524 }
1c79356b
A
11525
11526 /*
11527 * Simple version comparison failed.
11528 *
11529 * Retry the lookup and verify that the
11530 * same object/offset are still present.
11531 *
11532 * [Note: a memory manager that colludes with
11533 * the calling task can detect that we have
11534 * cheated. While the map was unlocked, the
11535 * mapping could have been changed and restored.]
11536 */
11537
11538 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
fe8ab488 11539 if (result != KERN_MEMORY_RESTART_COPY) {
3e170ce0
A
11540 vm_object_deallocate(VME_OBJECT(new_entry));
11541 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
a39ff7e2
A
11542 /* reset accounting state */
11543 new_entry->iokit_acct = FALSE;
fe8ab488
A
11544 new_entry->use_pmap = TRUE;
11545 }
1c79356b
A
11546 RETURN(KERN_INVALID_ADDRESS);
11547 }
11548
11549 src_entry = tmp_entry;
11550 vm_map_clip_start(src_map, src_entry, src_start);
11551
91447636 11552 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
0a7de745
A
11553 !use_maxprot) ||
11554 ((src_entry->max_protection & VM_PROT_READ) == 0)) {
1c79356b 11555 goto VerificationFailed;
0a7de745 11556 }
1c79356b 11557
39236c6e 11558 if (src_entry->vme_end < new_entry->vme_end) {
39037602
A
11559 /*
11560 * This entry might have been shortened
11561 * (vm_map_clip_end) or been replaced with
11562 * an entry that ends closer to "src_start"
11563 * than before.
11564 * Adjust "new_entry" accordingly; copying
11565 * less memory would be correct but we also
11566 * redo the copy (see below) if the new entry
11567 * no longer points at the same object/offset.
11568 */
39236c6e 11569 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
0a7de745 11570 VM_MAP_COPY_PAGE_MASK(copy)));
39236c6e
A
11571 new_entry->vme_end = src_entry->vme_end;
11572 src_size = new_entry->vme_end - src_start;
39037602
A
11573 } else if (src_entry->vme_end > new_entry->vme_end) {
11574 /*
11575 * This entry might have been extended
11576 * (vm_map_entry_simplify() or coalesce)
11577 * or been replaced with an entry that ends farther
5ba3f43e 11578 * from "src_start" than before.
39037602
A
11579 *
11580 * We've called vm_object_copy_*() only on
11581 * the previous <start:end> range, so we can't
11582 * just extend new_entry. We have to re-do
11583 * the copy based on the new entry as if it was
11584 * pointing at a different object/offset (see
11585 * "Verification failed" below).
11586 */
39236c6e 11587 }
1c79356b 11588
3e170ce0 11589 if ((VME_OBJECT(src_entry) != src_object) ||
39037602
A
11590 (VME_OFFSET(src_entry) != src_offset) ||
11591 (src_entry->vme_end > new_entry->vme_end)) {
1c79356b
A
11592 /*
11593 * Verification failed.
11594 *
11595 * Start over with this top-level entry.
11596 */
11597
0a7de745 11598VerificationFailed: ;
1c79356b 11599
3e170ce0 11600 vm_object_deallocate(VME_OBJECT(new_entry));
1c79356b
A
11601 tmp_entry = src_entry;
11602 continue;
11603 }
11604
11605 /*
11606 * Verification succeeded.
11607 */
11608
0a7de745 11609VerificationSuccessful:;
1c79356b 11610
0a7de745 11611 if (result == KERN_MEMORY_RESTART_COPY) {
1c79356b 11612 goto RestartCopy;
0a7de745 11613 }
1c79356b
A
11614
11615 /*
11616 * Copy succeeded.
11617 */
11618
0a7de745 11619CopySuccessful: ;
1c79356b
A
11620
11621 /*
11622 * Link in the new copy entry.
11623 */
11624
11625 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
0a7de745 11626 new_entry);
5ba3f43e 11627
1c79356b
A
11628 /*
11629 * Determine whether the entire region
11630 * has been copied.
11631 */
2d21ac55 11632 src_base = src_start;
1c79356b
A
11633 src_start = new_entry->vme_end;
11634 new_entry = VM_MAP_ENTRY_NULL;
11635 while ((src_start >= src_end) && (src_end != 0)) {
0a7de745 11636 submap_map_t *ptr;
fe8ab488
A
11637
11638 if (src_map == base_map) {
11639 /* back to the top */
1c79356b 11640 break;
fe8ab488
A
11641 }
11642
11643 ptr = parent_maps;
11644 assert(ptr != NULL);
11645 parent_maps = parent_maps->next;
11646
11647 /* fix up the damage we did in that submap */
11648 vm_map_simplify_range(src_map,
0a7de745
A
11649 src_base,
11650 src_end);
fe8ab488
A
11651
11652 vm_map_unlock(src_map);
11653 vm_map_deallocate(src_map);
11654 vm_map_lock(ptr->parent_map);
11655 src_map = ptr->parent_map;
11656 src_base = ptr->base_start;
11657 src_start = ptr->base_start + ptr->base_len;
11658 src_end = ptr->base_end;
11659 if (!vm_map_lookup_entry(src_map,
0a7de745
A
11660 src_start,
11661 &tmp_entry) &&
fe8ab488
A
11662 (src_end > src_start)) {
11663 RETURN(KERN_INVALID_ADDRESS);
11664 }
11665 kfree(ptr, sizeof(submap_map_t));
0a7de745 11666 if (parent_maps == NULL) {
fe8ab488 11667 map_share = FALSE;
0a7de745 11668 }
fe8ab488
A
11669 src_entry = tmp_entry->vme_prev;
11670 }
11671
11672 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11673 (src_start >= src_addr + len) &&
11674 (src_addr + len != 0)) {
11675 /*
11676 * Stop copying now, even though we haven't reached
11677 * "src_end". We'll adjust the end of the last copy
11678 * entry at the end, if needed.
11679 *
11680 * If src_map's aligment is different from the
11681 * system's page-alignment, there could be
11682 * extra non-map-aligned map entries between
11683 * the original (non-rounded) "src_addr + len"
11684 * and the rounded "src_end".
11685 * We do not want to copy those map entries since
11686 * they're not part of the copied range.
11687 */
11688 break;
1c79356b 11689 }
fe8ab488 11690
0a7de745 11691 if ((src_start >= src_end) && (src_end != 0)) {
1c79356b 11692 break;
0a7de745 11693 }
1c79356b
A
11694
11695 /*
11696 * Verify that there are no gaps in the region
11697 */
11698
11699 tmp_entry = src_entry->vme_next;
fe8ab488 11700 if ((tmp_entry->vme_start != src_start) ||
39236c6e 11701 (tmp_entry == vm_map_to_entry(src_map))) {
1c79356b 11702 RETURN(KERN_INVALID_ADDRESS);
39236c6e 11703 }
1c79356b
A
11704 }
11705
11706 /*
11707 * If the source should be destroyed, do it now, since the
5ba3f43e 11708 * copy was successful.
1c79356b
A
11709 */
11710 if (src_destroy) {
39236c6e
A
11711 (void) vm_map_delete(
11712 src_map,
11713 vm_map_trunc_page(src_addr,
0a7de745 11714 VM_MAP_PAGE_MASK(src_map)),
39236c6e
A
11715 src_end,
11716 ((src_map == kernel_map) ?
0a7de745
A
11717 VM_MAP_REMOVE_KUNWIRE :
11718 VM_MAP_REMOVE_NO_FLAGS),
39236c6e 11719 VM_MAP_NULL);
2d21ac55
A
11720 } else {
11721 /* fix up the damage we did in the base map */
39236c6e
A
11722 vm_map_simplify_range(
11723 src_map,
11724 vm_map_trunc_page(src_addr,
0a7de745 11725 VM_MAP_PAGE_MASK(src_map)),
39236c6e 11726 vm_map_round_page(src_end,
0a7de745 11727 VM_MAP_PAGE_MASK(src_map)));
1c79356b
A
11728 }
11729
11730 vm_map_unlock(src_map);
5ba3f43e 11731 tmp_entry = VM_MAP_ENTRY_NULL;
1c79356b 11732
39236c6e 11733 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
fe8ab488 11734 vm_map_offset_t original_start, original_offset, original_end;
5ba3f43e 11735
39236c6e
A
11736 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
11737
11738 /* adjust alignment of first copy_entry's "vme_start" */
11739 tmp_entry = vm_map_copy_first_entry(copy);
11740 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11741 vm_map_offset_t adjustment;
fe8ab488
A
11742
11743 original_start = tmp_entry->vme_start;
3e170ce0 11744 original_offset = VME_OFFSET(tmp_entry);
fe8ab488
A
11745
11746 /* map-align the start of the first copy entry... */
11747 adjustment = (tmp_entry->vme_start -
0a7de745
A
11748 vm_map_trunc_page(
11749 tmp_entry->vme_start,
11750 VM_MAP_PAGE_MASK(src_map)));
fe8ab488 11751 tmp_entry->vme_start -= adjustment;
3e170ce0 11752 VME_OFFSET_SET(tmp_entry,
0a7de745 11753 VME_OFFSET(tmp_entry) - adjustment);
fe8ab488
A
11754 copy_addr -= adjustment;
11755 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11756 /* ... adjust for mis-aligned start of copy range */
39236c6e 11757 adjustment =
0a7de745
A
11758 (vm_map_trunc_page(copy->offset,
11759 PAGE_MASK) -
11760 vm_map_trunc_page(copy->offset,
11761 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
11762 if (adjustment) {
11763 assert(page_aligned(adjustment));
11764 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11765 tmp_entry->vme_start += adjustment;
3e170ce0 11766 VME_OFFSET_SET(tmp_entry,
0a7de745
A
11767 (VME_OFFSET(tmp_entry) +
11768 adjustment));
39236c6e
A
11769 copy_addr += adjustment;
11770 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11771 }
fe8ab488
A
11772
11773 /*
11774 * Assert that the adjustments haven't exposed
11775 * more than was originally copied...
11776 */
11777 assert(tmp_entry->vme_start >= original_start);
3e170ce0 11778 assert(VME_OFFSET(tmp_entry) >= original_offset);
fe8ab488
A
11779 /*
11780 * ... and that it did not adjust outside of a
11781 * a single 16K page.
11782 */
11783 assert(vm_map_trunc_page(tmp_entry->vme_start,
0a7de745
A
11784 VM_MAP_PAGE_MASK(src_map)) ==
11785 vm_map_trunc_page(original_start,
11786 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
11787 }
11788
11789 /* adjust alignment of last copy_entry's "vme_end" */
11790 tmp_entry = vm_map_copy_last_entry(copy);
11791 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11792 vm_map_offset_t adjustment;
fe8ab488
A
11793
11794 original_end = tmp_entry->vme_end;
11795
11796 /* map-align the end of the last copy entry... */
11797 tmp_entry->vme_end =
0a7de745
A
11798 vm_map_round_page(tmp_entry->vme_end,
11799 VM_MAP_PAGE_MASK(src_map));
fe8ab488 11800 /* ... adjust for mis-aligned end of copy range */
39236c6e 11801 adjustment =
0a7de745
A
11802 (vm_map_round_page((copy->offset +
11803 copy->size),
11804 VM_MAP_PAGE_MASK(src_map)) -
11805 vm_map_round_page((copy->offset +
11806 copy->size),
11807 PAGE_MASK));
39236c6e
A
11808 if (adjustment) {
11809 assert(page_aligned(adjustment));
11810 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11811 tmp_entry->vme_end -= adjustment;
11812 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11813 }
fe8ab488
A
11814
11815 /*
11816 * Assert that the adjustments haven't exposed
11817 * more than was originally copied...
11818 */
11819 assert(tmp_entry->vme_end <= original_end);
11820 /*
11821 * ... and that it did not adjust outside of a
11822 * a single 16K page.
11823 */
11824 assert(vm_map_round_page(tmp_entry->vme_end,
0a7de745
A
11825 VM_MAP_PAGE_MASK(src_map)) ==
11826 vm_map_round_page(original_end,
11827 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
11828 }
11829 }
11830
1c79356b
A
11831 /* Fix-up start and end points in copy. This is necessary */
11832 /* when the various entries in the copy object were picked */
11833 /* up from different sub-maps */
11834
11835 tmp_entry = vm_map_copy_first_entry(copy);
fe8ab488 11836 copy_size = 0; /* compute actual size */
1c79356b 11837 while (tmp_entry != vm_map_copy_to_entry(copy)) {
39236c6e 11838 assert(VM_MAP_PAGE_ALIGNED(
0a7de745
A
11839 copy_addr + (tmp_entry->vme_end -
11840 tmp_entry->vme_start),
11841 VM_MAP_COPY_PAGE_MASK(copy)));
39236c6e 11842 assert(VM_MAP_PAGE_ALIGNED(
0a7de745
A
11843 copy_addr,
11844 VM_MAP_COPY_PAGE_MASK(copy)));
39236c6e
A
11845
11846 /*
11847 * The copy_entries will be injected directly into the
11848 * destination map and might not be "map aligned" there...
11849 */
11850 tmp_entry->map_aligned = FALSE;
11851
5ba3f43e 11852 tmp_entry->vme_end = copy_addr +
0a7de745 11853 (tmp_entry->vme_end - tmp_entry->vme_start);
1c79356b 11854 tmp_entry->vme_start = copy_addr;
e2d2fc5c 11855 assert(tmp_entry->vme_start < tmp_entry->vme_end);
1c79356b 11856 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
fe8ab488 11857 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
1c79356b
A
11858 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
11859 }
11860
fe8ab488
A
11861 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
11862 copy_size < copy->size) {
11863 /*
11864 * The actual size of the VM map copy is smaller than what
11865 * was requested by the caller. This must be because some
11866 * PAGE_SIZE-sized pages are missing at the end of the last
11867 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11868 * The caller might not have been aware of those missing
11869 * pages and might not want to be aware of it, which is
11870 * fine as long as they don't try to access (and crash on)
11871 * those missing pages.
11872 * Let's adjust the size of the "copy", to avoid failing
11873 * in vm_map_copyout() or vm_map_copy_overwrite().
11874 */
11875 assert(vm_map_round_page(copy_size,
0a7de745
A
11876 VM_MAP_PAGE_MASK(src_map)) ==
11877 vm_map_round_page(copy->size,
11878 VM_MAP_PAGE_MASK(src_map)));
fe8ab488
A
11879 copy->size = copy_size;
11880 }
11881
1c79356b 11882 *copy_result = copy;
0a7de745 11883 return KERN_SUCCESS;
1c79356b 11884
0a7de745 11885#undef RETURN
1c79356b
A
11886}
11887
39236c6e
A
11888kern_return_t
11889vm_map_copy_extract(
0a7de745
A
11890 vm_map_t src_map,
11891 vm_map_address_t src_addr,
11892 vm_map_size_t len,
11893 vm_map_copy_t *copy_result, /* OUT */
11894 vm_prot_t *cur_prot, /* OUT */
11895 vm_prot_t *max_prot)
39236c6e 11896{
0a7de745
A
11897 vm_map_offset_t src_start, src_end;
11898 vm_map_copy_t copy;
11899 kern_return_t kr;
39236c6e
A
11900
11901 /*
11902 * Check for copies of zero bytes.
11903 */
11904
11905 if (len == 0) {
11906 *copy_result = VM_MAP_COPY_NULL;
0a7de745 11907 return KERN_SUCCESS;
39236c6e
A
11908 }
11909
11910 /*
11911 * Check that the end address doesn't overflow
11912 */
11913 src_end = src_addr + len;
0a7de745 11914 if (src_end < src_addr) {
39236c6e 11915 return KERN_INVALID_ADDRESS;
0a7de745 11916 }
39236c6e
A
11917
11918 /*
11919 * Compute (page aligned) start and end of region
11920 */
11921 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
11922 src_end = vm_map_round_page(src_end, PAGE_MASK);
11923
11924 /*
11925 * Allocate a header element for the list.
11926 *
5ba3f43e 11927 * Use the start and end in the header to
39236c6e
A
11928 * remember the endpoints prior to rounding.
11929 */
11930
d9a64523 11931 copy = vm_map_copy_allocate();
39236c6e 11932 copy->type = VM_MAP_COPY_ENTRY_LIST;
39236c6e
A
11933 copy->cpy_hdr.entries_pageable = TRUE;
11934
11935 vm_map_store_init(&copy->cpy_hdr);
11936
11937 copy->offset = 0;
11938 copy->size = len;
11939
11940 kr = vm_map_remap_extract(src_map,
0a7de745
A
11941 src_addr,
11942 len,
11943 FALSE, /* copy */
11944 &copy->cpy_hdr,
11945 cur_prot,
11946 max_prot,
11947 VM_INHERIT_SHARE,
11948 TRUE, /* pageable */
11949 FALSE, /* same_map */
11950 VM_MAP_KERNEL_FLAGS_NONE);
39236c6e
A
11951 if (kr != KERN_SUCCESS) {
11952 vm_map_copy_discard(copy);
11953 return kr;
11954 }
11955
11956 *copy_result = copy;
11957 return KERN_SUCCESS;
11958}
11959
1c79356b
A
11960/*
11961 * vm_map_copyin_object:
11962 *
11963 * Create a copy object from an object.
11964 * Our caller donates an object reference.
11965 */
11966
11967kern_return_t
11968vm_map_copyin_object(
0a7de745
A
11969 vm_object_t object,
11970 vm_object_offset_t offset, /* offset of region in object */
11971 vm_object_size_t size, /* size of region in object */
11972 vm_map_copy_t *copy_result) /* OUT */
1c79356b 11973{
0a7de745 11974 vm_map_copy_t copy; /* Resulting copy */
1c79356b
A
11975
11976 /*
11977 * We drop the object into a special copy object
11978 * that contains the object directly.
11979 */
11980
d9a64523 11981 copy = vm_map_copy_allocate();
1c79356b
A
11982 copy->type = VM_MAP_COPY_OBJECT;
11983 copy->cpy_object = object;
1c79356b
A
11984 copy->offset = offset;
11985 copy->size = size;
11986
11987 *copy_result = copy;
0a7de745 11988 return KERN_SUCCESS;
1c79356b
A
11989}
11990
91447636 11991static void
1c79356b 11992vm_map_fork_share(
0a7de745
A
11993 vm_map_t old_map,
11994 vm_map_entry_t old_entry,
11995 vm_map_t new_map)
1c79356b 11996{
0a7de745
A
11997 vm_object_t object;
11998 vm_map_entry_t new_entry;
1c79356b
A
11999
12000 /*
12001 * New sharing code. New map entry
12002 * references original object. Internal
12003 * objects use asynchronous copy algorithm for
12004 * future copies. First make sure we have
12005 * the right object. If we need a shadow,
12006 * or someone else already has one, then
12007 * make a new shadow and share it.
12008 */
5ba3f43e 12009
3e170ce0 12010 object = VME_OBJECT(old_entry);
1c79356b
A
12011 if (old_entry->is_sub_map) {
12012 assert(old_entry->wired_count == 0);
0c530ab8 12013#ifndef NO_NESTED_PMAP
0a7de745
A
12014 if (old_entry->use_pmap) {
12015 kern_return_t result;
91447636 12016
5ba3f43e 12017 result = pmap_nest(new_map->pmap,
0a7de745
A
12018 (VME_SUBMAP(old_entry))->pmap,
12019 (addr64_t)old_entry->vme_start,
12020 (addr64_t)old_entry->vme_start,
12021 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12022 if (result) {
1c79356b 12023 panic("vm_map_fork_share: pmap_nest failed!");
0a7de745 12024 }
1c79356b 12025 }
0a7de745 12026#endif /* NO_NESTED_PMAP */
1c79356b 12027 } else if (object == VM_OBJECT_NULL) {
91447636 12028 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
0a7de745 12029 old_entry->vme_start));
3e170ce0
A
12030 VME_OFFSET_SET(old_entry, 0);
12031 VME_OBJECT_SET(old_entry, object);
fe8ab488 12032 old_entry->use_pmap = TRUE;
a39ff7e2 12033// assert(!old_entry->needs_copy);
1c79356b 12034 } else if (object->copy_strategy !=
0a7de745 12035 MEMORY_OBJECT_COPY_SYMMETRIC) {
1c79356b
A
12036 /*
12037 * We are already using an asymmetric
12038 * copy, and therefore we already have
12039 * the right object.
12040 */
5ba3f43e 12041
0a7de745
A
12042 assert(!old_entry->needs_copy);
12043 } else if (old_entry->needs_copy || /* case 1 */
12044 object->shadowed || /* case 2 */
12045 (!object->true_share && /* case 3 */
12046 !old_entry->is_shared &&
12047 (object->vo_size >
12048 (vm_map_size_t)(old_entry->vme_end -
12049 old_entry->vme_start)))) {
1c79356b
A
12050 /*
12051 * We need to create a shadow.
12052 * There are three cases here.
12053 * In the first case, we need to
12054 * complete a deferred symmetrical
12055 * copy that we participated in.
12056 * In the second and third cases,
12057 * we need to create the shadow so
12058 * that changes that we make to the
12059 * object do not interfere with
12060 * any symmetrical copies which
12061 * have occured (case 2) or which
12062 * might occur (case 3).
12063 *
12064 * The first case is when we had
12065 * deferred shadow object creation
12066 * via the entry->needs_copy mechanism.
12067 * This mechanism only works when
12068 * only one entry points to the source
12069 * object, and we are about to create
12070 * a second entry pointing to the
12071 * same object. The problem is that
12072 * there is no way of mapping from
12073 * an object to the entries pointing
12074 * to it. (Deferred shadow creation
12075 * works with one entry because occurs
12076 * at fault time, and we walk from the
12077 * entry to the object when handling
12078 * the fault.)
12079 *
12080 * The second case is when the object
12081 * to be shared has already been copied
12082 * with a symmetric copy, but we point
12083 * directly to the object without
12084 * needs_copy set in our entry. (This
12085 * can happen because different ranges
12086 * of an object can be pointed to by
12087 * different entries. In particular,
12088 * a single entry pointing to an object
12089 * can be split by a call to vm_inherit,
12090 * which, combined with task_create, can
12091 * result in the different entries
12092 * having different needs_copy values.)
12093 * The shadowed flag in the object allows
12094 * us to detect this case. The problem
12095 * with this case is that if this object
12096 * has or will have shadows, then we
12097 * must not perform an asymmetric copy
12098 * of this object, since such a copy
12099 * allows the object to be changed, which
12100 * will break the previous symmetrical
12101 * copies (which rely upon the object
12102 * not changing). In a sense, the shadowed
12103 * flag says "don't change this object".
12104 * We fix this by creating a shadow
12105 * object for this object, and sharing
12106 * that. This works because we are free
12107 * to change the shadow object (and thus
12108 * to use an asymmetric copy strategy);
12109 * this is also semantically correct,
12110 * since this object is temporary, and
12111 * therefore a copy of the object is
12112 * as good as the object itself. (This
12113 * is not true for permanent objects,
12114 * since the pager needs to see changes,
12115 * which won't happen if the changes
12116 * are made to a copy.)
12117 *
12118 * The third case is when the object
12119 * to be shared has parts sticking
12120 * outside of the entry we're working
12121 * with, and thus may in the future
12122 * be subject to a symmetrical copy.
12123 * (This is a preemptive version of
12124 * case 2.)
12125 */
3e170ce0 12126 VME_OBJECT_SHADOW(old_entry,
0a7de745
A
12127 (vm_map_size_t) (old_entry->vme_end -
12128 old_entry->vme_start));
5ba3f43e 12129
1c79356b
A
12130 /*
12131 * If we're making a shadow for other than
12132 * copy on write reasons, then we have
12133 * to remove write permission.
12134 */
12135
1c79356b
A
12136 if (!old_entry->needs_copy &&
12137 (old_entry->protection & VM_PROT_WRITE)) {
0a7de745 12138 vm_prot_t prot;
0c530ab8 12139
5ba3f43e
A
12140 assert(!pmap_has_prot_policy(old_entry->protection));
12141
0c530ab8 12142 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 12143
5ba3f43e
A
12144 assert(!pmap_has_prot_policy(prot));
12145
0a7de745
A
12146 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12147 prot |= VM_PROT_EXECUTE;
12148 }
2d21ac55 12149
5ba3f43e 12150
316670eb 12151 if (old_map->mapped_in_other_pmaps) {
9bccf70c 12152 vm_object_pmap_protect(
3e170ce0
A
12153 VME_OBJECT(old_entry),
12154 VME_OFFSET(old_entry),
9bccf70c 12155 (old_entry->vme_end -
0a7de745 12156 old_entry->vme_start),
9bccf70c
A
12157 PMAP_NULL,
12158 old_entry->vme_start,
0c530ab8 12159 prot);
1c79356b 12160 } else {
9bccf70c 12161 pmap_protect(old_map->pmap,
0a7de745
A
12162 old_entry->vme_start,
12163 old_entry->vme_end,
12164 prot);
1c79356b
A
12165 }
12166 }
5ba3f43e 12167
1c79356b 12168 old_entry->needs_copy = FALSE;
3e170ce0 12169 object = VME_OBJECT(old_entry);
1c79356b 12170 }
6d2010ae 12171
5ba3f43e 12172
1c79356b
A
12173 /*
12174 * If object was using a symmetric copy strategy,
12175 * change its copy strategy to the default
12176 * asymmetric copy strategy, which is copy_delay
12177 * in the non-norma case and copy_call in the
12178 * norma case. Bump the reference count for the
12179 * new entry.
12180 */
5ba3f43e 12181
0a7de745 12182 if (old_entry->is_sub_map) {
3e170ce0
A
12183 vm_map_lock(VME_SUBMAP(old_entry));
12184 vm_map_reference(VME_SUBMAP(old_entry));
12185 vm_map_unlock(VME_SUBMAP(old_entry));
1c79356b
A
12186 } else {
12187 vm_object_lock(object);
2d21ac55 12188 vm_object_reference_locked(object);
1c79356b
A
12189 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12190 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12191 }
12192 vm_object_unlock(object);
12193 }
5ba3f43e 12194
1c79356b
A
12195 /*
12196 * Clone the entry, using object ref from above.
12197 * Mark both entries as shared.
12198 */
5ba3f43e 12199
7ddcb079 12200 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
0a7de745 12201 * map or descendants */
1c79356b
A
12202 vm_map_entry_copy(new_entry, old_entry);
12203 old_entry->is_shared = TRUE;
12204 new_entry->is_shared = TRUE;
39037602 12205
a39ff7e2
A
12206 /*
12207 * We're dealing with a shared mapping, so the resulting mapping
12208 * should inherit some of the original mapping's accounting settings.
12209 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12210 * "use_pmap" should stay the same as before (if it hasn't been reset
12211 * to TRUE when we cleared "iokit_acct").
12212 */
12213 assert(!new_entry->iokit_acct);
12214
39037602
A
12215 /*
12216 * If old entry's inheritence is VM_INHERIT_NONE,
12217 * the new entry is for corpse fork, remove the
12218 * write permission from the new entry.
12219 */
12220 if (old_entry->inheritance == VM_INHERIT_NONE) {
39037602
A
12221 new_entry->protection &= ~VM_PROT_WRITE;
12222 new_entry->max_protection &= ~VM_PROT_WRITE;
12223 }
5ba3f43e 12224
1c79356b
A
12225 /*
12226 * Insert the entry into the new map -- we
12227 * know we're inserting at the end of the new
12228 * map.
12229 */
5ba3f43e 12230
d9a64523 12231 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
0a7de745 12232 VM_MAP_KERNEL_FLAGS_NONE);
5ba3f43e 12233
1c79356b
A
12234 /*
12235 * Update the physical map
12236 */
5ba3f43e 12237
1c79356b
A
12238 if (old_entry->is_sub_map) {
12239 /* Bill Angell pmap support goes here */
12240 } else {
12241 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
0a7de745
A
12242 old_entry->vme_end - old_entry->vme_start,
12243 old_entry->vme_start);
1c79356b
A
12244 }
12245}
12246
91447636 12247static boolean_t
1c79356b 12248vm_map_fork_copy(
0a7de745
A
12249 vm_map_t old_map,
12250 vm_map_entry_t *old_entry_p,
12251 vm_map_t new_map,
12252 int vm_map_copyin_flags)
1c79356b
A
12253{
12254 vm_map_entry_t old_entry = *old_entry_p;
91447636
A
12255 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12256 vm_map_offset_t start = old_entry->vme_start;
1c79356b
A
12257 vm_map_copy_t copy;
12258 vm_map_entry_t last = vm_map_last_entry(new_map);
12259
12260 vm_map_unlock(old_map);
12261 /*
12262 * Use maxprot version of copyin because we
12263 * care about whether this memory can ever
12264 * be accessed, not just whether it's accessible
12265 * right now.
12266 */
39037602
A
12267 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12268 if (vm_map_copyin_internal(old_map, start, entry_size,
0a7de745 12269 vm_map_copyin_flags, &copy)
1c79356b
A
12270 != KERN_SUCCESS) {
12271 /*
12272 * The map might have changed while it
12273 * was unlocked, check it again. Skip
12274 * any blank space or permanently
12275 * unreadable region.
12276 */
12277 vm_map_lock(old_map);
12278 if (!vm_map_lookup_entry(old_map, start, &last) ||
55e303ae 12279 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
1c79356b
A
12280 last = last->vme_next;
12281 }
12282 *old_entry_p = last;
12283
12284 /*
12285 * XXX For some error returns, want to
12286 * XXX skip to the next element. Note
12287 * that INVALID_ADDRESS and
12288 * PROTECTION_FAILURE are handled above.
12289 */
5ba3f43e 12290
1c79356b
A
12291 return FALSE;
12292 }
5ba3f43e 12293
1c79356b
A
12294 /*
12295 * Insert the copy into the new map
12296 */
5ba3f43e 12297
1c79356b 12298 vm_map_copy_insert(new_map, last, copy);
5ba3f43e 12299
1c79356b
A
12300 /*
12301 * Pick up the traversal at the end of
12302 * the copied region.
12303 */
5ba3f43e 12304
1c79356b
A
12305 vm_map_lock(old_map);
12306 start += entry_size;
0a7de745 12307 if (!vm_map_lookup_entry(old_map, start, &last)) {
1c79356b
A
12308 last = last->vme_next;
12309 } else {
2d21ac55
A
12310 if (last->vme_start == start) {
12311 /*
12312 * No need to clip here and we don't
12313 * want to cause any unnecessary
12314 * unnesting...
12315 */
12316 } else {
12317 vm_map_clip_start(old_map, last, start);
12318 }
1c79356b
A
12319 }
12320 *old_entry_p = last;
12321
12322 return TRUE;
12323}
12324
12325/*
12326 * vm_map_fork:
12327 *
12328 * Create and return a new map based on the old
12329 * map, according to the inheritance values on the
39037602 12330 * regions in that map and the options.
1c79356b
A
12331 *
12332 * The source map must not be locked.
12333 */
12334vm_map_t
12335vm_map_fork(
0a7de745
A
12336 ledger_t ledger,
12337 vm_map_t old_map,
12338 int options)
1c79356b 12339{
0a7de745
A
12340 pmap_t new_pmap;
12341 vm_map_t new_map;
12342 vm_map_entry_t old_entry;
12343 vm_map_size_t new_size = 0, entry_size;
12344 vm_map_entry_t new_entry;
12345 boolean_t src_needs_copy;
12346 boolean_t new_entry_needs_copy;
12347 boolean_t pmap_is64bit;
12348 int vm_map_copyin_flags;
12349 vm_inherit_t old_entry_inheritance;
12350 int map_create_options;
12351 kern_return_t footprint_collect_kr;
39037602
A
12352
12353 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
0a7de745
A
12354 VM_MAP_FORK_PRESERVE_PURGEABLE |
12355 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
39037602
A
12356 /* unsupported option */
12357 return VM_MAP_NULL;
12358 }
1c79356b 12359
3e170ce0 12360 pmap_is64bit =
b0d623f7 12361#if defined(__i386__) || defined(__x86_64__)
0a7de745 12362 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
5ba3f43e 12363#elif defined(__arm64__)
0a7de745 12364 old_map->pmap->max == MACH_VM_MAX_ADDRESS;
5ba3f43e 12365#elif defined(__arm__)
0a7de745 12366 FALSE;
b0d623f7 12367#else
316670eb 12368#error Unknown architecture.
b0d623f7 12369#endif
3e170ce0
A
12370
12371 new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
2d21ac55 12372
1c79356b
A
12373 vm_map_reference_swap(old_map);
12374 vm_map_lock(old_map);
12375
d9a64523
A
12376 map_create_options = 0;
12377 if (old_map->hdr.entries_pageable) {
12378 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12379 }
12380 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12381 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12382 footprint_collect_kr = KERN_SUCCESS;
12383 }
12384 new_map = vm_map_create_options(new_pmap,
0a7de745
A
12385 old_map->min_offset,
12386 old_map->max_offset,
12387 map_create_options);
5ba3f43e 12388 vm_map_lock(new_map);
39037602 12389 vm_commit_pagezero_status(new_map);
39236c6e
A
12390 /* inherit the parent map's page size */
12391 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
1c79356b 12392 for (
2d21ac55
A
12393 old_entry = vm_map_first_entry(old_map);
12394 old_entry != vm_map_to_entry(old_map);
12395 ) {
1c79356b
A
12396 entry_size = old_entry->vme_end - old_entry->vme_start;
12397
d9a64523
A
12398 old_entry_inheritance = old_entry->inheritance;
12399 /*
12400 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12401 * share VM_INHERIT_NONE entries that are not backed by a
12402 * device pager.
12403 */
12404 if (old_entry_inheritance == VM_INHERIT_NONE &&
12405 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12406 !(!old_entry->is_sub_map &&
0a7de745
A
12407 VME_OBJECT(old_entry) != NULL &&
12408 VME_OBJECT(old_entry)->pager != NULL &&
12409 is_device_pager_ops(
12410 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
d9a64523
A
12411 old_entry_inheritance = VM_INHERIT_SHARE;
12412 }
12413
12414 if (old_entry_inheritance != VM_INHERIT_NONE &&
12415 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12416 footprint_collect_kr == KERN_SUCCESS) {
39037602 12417 /*
d9a64523
A
12418 * The corpse won't have old_map->pmap to query
12419 * footprint information, so collect that data now
12420 * and store it in new_map->vmmap_corpse_footprint
12421 * for later autopsy.
39037602 12422 */
d9a64523 12423 footprint_collect_kr =
0a7de745
A
12424 vm_map_corpse_footprint_collect(old_map,
12425 old_entry,
12426 new_map);
d9a64523
A
12427 }
12428
12429 switch (old_entry_inheritance) {
12430 case VM_INHERIT_NONE:
12431 break;
1c79356b
A
12432
12433 case VM_INHERIT_SHARE:
12434 vm_map_fork_share(old_map, old_entry, new_map);
12435 new_size += entry_size;
12436 break;
12437
12438 case VM_INHERIT_COPY:
12439
12440 /*
12441 * Inline the copy_quickly case;
12442 * upon failure, fall back on call
12443 * to vm_map_fork_copy.
12444 */
12445
0a7de745 12446 if (old_entry->is_sub_map) {
1c79356b 12447 break;
0a7de745 12448 }
9bccf70c 12449 if ((old_entry->wired_count != 0) ||
3e170ce0 12450 ((VME_OBJECT(old_entry) != NULL) &&
0a7de745 12451 (VME_OBJECT(old_entry)->true_share))) {
1c79356b
A
12452 goto slow_vm_map_fork_copy;
12453 }
12454
7ddcb079 12455 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
1c79356b 12456 vm_map_entry_copy(new_entry, old_entry);
fe8ab488
A
12457 if (new_entry->is_sub_map) {
12458 /* clear address space specifics */
12459 new_entry->use_pmap = FALSE;
a39ff7e2
A
12460 } else {
12461 /*
12462 * We're dealing with a copy-on-write operation,
12463 * so the resulting mapping should not inherit
12464 * the original mapping's accounting settings.
12465 * "iokit_acct" should have been cleared in
12466 * vm_map_entry_copy().
12467 * "use_pmap" should be reset to its default
12468 * (TRUE) so that the new mapping gets
12469 * accounted for in the task's memory footprint.
12470 */
12471 assert(!new_entry->iokit_acct);
12472 new_entry->use_pmap = TRUE;
fe8ab488 12473 }
1c79356b 12474
0a7de745 12475 if (!vm_object_copy_quickly(
3e170ce0
A
12476 &VME_OBJECT(new_entry),
12477 VME_OFFSET(old_entry),
2d21ac55 12478 (old_entry->vme_end -
0a7de745 12479 old_entry->vme_start),
2d21ac55
A
12480 &src_needs_copy,
12481 &new_entry_needs_copy)) {
1c79356b
A
12482 vm_map_entry_dispose(new_map, new_entry);
12483 goto slow_vm_map_fork_copy;
12484 }
12485
12486 /*
12487 * Handle copy-on-write obligations
12488 */
5ba3f43e 12489
1c79356b 12490 if (src_needs_copy && !old_entry->needs_copy) {
0a7de745 12491 vm_prot_t prot;
0c530ab8 12492
5ba3f43e
A
12493 assert(!pmap_has_prot_policy(old_entry->protection));
12494
0c530ab8 12495 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 12496
3e170ce0 12497 if (override_nx(old_map, VME_ALIAS(old_entry))
0a7de745
A
12498 && prot) {
12499 prot |= VM_PROT_EXECUTE;
12500 }
2d21ac55 12501
5ba3f43e
A
12502 assert(!pmap_has_prot_policy(prot));
12503
1c79356b 12504 vm_object_pmap_protect(
3e170ce0
A
12505 VME_OBJECT(old_entry),
12506 VME_OFFSET(old_entry),
1c79356b 12507 (old_entry->vme_end -
0a7de745 12508 old_entry->vme_start),
5ba3f43e 12509 ((old_entry->is_shared
0a7de745
A
12510 || old_map->mapped_in_other_pmaps)
12511 ? PMAP_NULL :
12512 old_map->pmap),
1c79356b 12513 old_entry->vme_start,
0c530ab8 12514 prot);
1c79356b 12515
3e170ce0 12516 assert(old_entry->wired_count == 0);
1c79356b
A
12517 old_entry->needs_copy = TRUE;
12518 }
12519 new_entry->needs_copy = new_entry_needs_copy;
5ba3f43e 12520
1c79356b
A
12521 /*
12522 * Insert the entry at the end
12523 * of the map.
12524 */
5ba3f43e 12525
d9a64523 12526 vm_map_store_entry_link(new_map,
0a7de745
A
12527 vm_map_last_entry(new_map),
12528 new_entry,
12529 VM_MAP_KERNEL_FLAGS_NONE);
1c79356b
A
12530 new_size += entry_size;
12531 break;
12532
0a7de745 12533slow_vm_map_fork_copy:
39037602
A
12534 vm_map_copyin_flags = 0;
12535 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
12536 vm_map_copyin_flags |=
0a7de745 12537 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
39037602
A
12538 }
12539 if (vm_map_fork_copy(old_map,
0a7de745
A
12540 &old_entry,
12541 new_map,
12542 vm_map_copyin_flags)) {
1c79356b
A
12543 new_size += entry_size;
12544 }
12545 continue;
12546 }
12547 old_entry = old_entry->vme_next;
12548 }
12549
5ba3f43e
A
12550#if defined(__arm64__)
12551 pmap_insert_sharedpage(new_map->pmap);
12552#endif
fe8ab488 12553
1c79356b 12554 new_map->size = new_size;
d9a64523
A
12555
12556 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12557 vm_map_corpse_footprint_collect_done(new_map);
12558 }
12559
5ba3f43e 12560 vm_map_unlock(new_map);
1c79356b
A
12561 vm_map_unlock(old_map);
12562 vm_map_deallocate(old_map);
12563
0a7de745 12564 return new_map;
1c79356b
A
12565}
12566
2d21ac55
A
12567/*
12568 * vm_map_exec:
12569 *
0a7de745 12570 * Setup the "new_map" with the proper execution environment according
2d21ac55
A
12571 * to the type of executable (platform, 64bit, chroot environment).
12572 * Map the comm page and shared region, etc...
12573 */
12574kern_return_t
12575vm_map_exec(
0a7de745
A
12576 vm_map_t new_map,
12577 task_t task,
12578 boolean_t is64bit,
12579 void *fsroot,
12580 cpu_type_t cpu,
12581 cpu_subtype_t cpu_subtype)
2d21ac55
A
12582{
12583 SHARED_REGION_TRACE_DEBUG(
d9a64523 12584 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
0a7de745
A
12585 (void *)VM_KERNEL_ADDRPERM(current_task()),
12586 (void *)VM_KERNEL_ADDRPERM(new_map),
12587 (void *)VM_KERNEL_ADDRPERM(task),
12588 (void *)VM_KERNEL_ADDRPERM(fsroot),
12589 cpu,
12590 cpu_subtype));
39037602 12591 (void) vm_commpage_enter(new_map, task, is64bit);
d9a64523 12592 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype);
2d21ac55 12593 SHARED_REGION_TRACE_DEBUG(
d9a64523 12594 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
0a7de745
A
12595 (void *)VM_KERNEL_ADDRPERM(current_task()),
12596 (void *)VM_KERNEL_ADDRPERM(new_map),
12597 (void *)VM_KERNEL_ADDRPERM(task),
12598 (void *)VM_KERNEL_ADDRPERM(fsroot),
12599 cpu,
12600 cpu_subtype));
2d21ac55
A
12601 return KERN_SUCCESS;
12602}
1c79356b
A
12603
12604/*
12605 * vm_map_lookup_locked:
12606 *
12607 * Finds the VM object, offset, and
12608 * protection for a given virtual address in the
12609 * specified map, assuming a page fault of the
12610 * type specified.
12611 *
12612 * Returns the (object, offset, protection) for
12613 * this address, whether it is wired down, and whether
12614 * this map has the only reference to the data in question.
12615 * In order to later verify this lookup, a "version"
12616 * is returned.
12617 *
12618 * The map MUST be locked by the caller and WILL be
12619 * locked on exit. In order to guarantee the
12620 * existence of the returned object, it is returned
12621 * locked.
12622 *
12623 * If a lookup is requested with "write protection"
12624 * specified, the map may be changed to perform virtual
12625 * copying operations, although the data referenced will
12626 * remain the same.
12627 */
12628kern_return_t
12629vm_map_lookup_locked(
0a7de745
A
12630 vm_map_t *var_map, /* IN/OUT */
12631 vm_map_offset_t vaddr,
12632 vm_prot_t fault_type,
12633 int object_lock_type,
12634 vm_map_version_t *out_version, /* OUT */
12635 vm_object_t *object, /* OUT */
12636 vm_object_offset_t *offset, /* OUT */
12637 vm_prot_t *out_prot, /* OUT */
12638 boolean_t *wired, /* OUT */
12639 vm_object_fault_info_t fault_info, /* OUT */
12640 vm_map_t *real_map)
1c79356b 12641{
0a7de745
A
12642 vm_map_entry_t entry;
12643 vm_map_t map = *var_map;
12644 vm_map_t old_map = *var_map;
12645 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
12646 vm_map_offset_t cow_parent_vaddr = 0;
12647 vm_map_offset_t old_start = 0;
12648 vm_map_offset_t old_end = 0;
12649 vm_prot_t prot;
12650 boolean_t mask_protections;
12651 boolean_t force_copy;
12652 vm_prot_t original_fault_type;
6d2010ae
A
12653
12654 /*
12655 * VM_PROT_MASK means that the caller wants us to use "fault_type"
12656 * as a mask against the mapping's actual protections, not as an
12657 * absolute value.
12658 */
12659 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
fe8ab488
A
12660 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
12661 fault_type &= VM_PROT_ALL;
6d2010ae 12662 original_fault_type = fault_type;
1c79356b 12663
91447636 12664 *real_map = map;
6d2010ae
A
12665
12666RetryLookup:
12667 fault_type = original_fault_type;
1c79356b
A
12668
12669 /*
12670 * If the map has an interesting hint, try it before calling
12671 * full blown lookup routine.
12672 */
1c79356b 12673 entry = map->hint;
1c79356b
A
12674
12675 if ((entry == vm_map_to_entry(map)) ||
12676 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
0a7de745 12677 vm_map_entry_t tmp_entry;
1c79356b
A
12678
12679 /*
12680 * Entry was either not a valid hint, or the vaddr
12681 * was not contained in the entry, so do a full lookup.
12682 */
12683 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
0a7de745 12684 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
1c79356b 12685 vm_map_unlock(cow_sub_map_parent);
0a7de745
A
12686 }
12687 if ((*real_map != map)
12688 && (*real_map != cow_sub_map_parent)) {
91447636 12689 vm_map_unlock(*real_map);
0a7de745 12690 }
1c79356b
A
12691 return KERN_INVALID_ADDRESS;
12692 }
12693
12694 entry = tmp_entry;
12695 }
0a7de745 12696 if (map == old_map) {
1c79356b
A
12697 old_start = entry->vme_start;
12698 old_end = entry->vme_end;
12699 }
12700
12701 /*
12702 * Handle submaps. Drop lock on upper map, submap is
12703 * returned locked.
12704 */
12705
12706submap_recurse:
12707 if (entry->is_sub_map) {
0a7de745
A
12708 vm_map_offset_t local_vaddr;
12709 vm_map_offset_t end_delta;
12710 vm_map_offset_t start_delta;
12711 vm_map_entry_t submap_entry;
12712 vm_prot_t subentry_protection;
12713 vm_prot_t subentry_max_protection;
12714 boolean_t mapped_needs_copy = FALSE;
1c79356b
A
12715
12716 local_vaddr = vaddr;
12717
39037602 12718 if ((entry->use_pmap &&
0a7de745
A
12719 !((fault_type & VM_PROT_WRITE) ||
12720 force_copy))) {
91447636 12721 /* if real_map equals map we unlock below */
5ba3f43e 12722 if ((*real_map != map) &&
0a7de745 12723 (*real_map != cow_sub_map_parent)) {
91447636 12724 vm_map_unlock(*real_map);
0a7de745 12725 }
3e170ce0 12726 *real_map = VME_SUBMAP(entry);
1c79356b
A
12727 }
12728
0a7de745
A
12729 if (entry->needs_copy &&
12730 ((fault_type & VM_PROT_WRITE) ||
39037602 12731 force_copy)) {
1c79356b
A
12732 if (!mapped_needs_copy) {
12733 if (vm_map_lock_read_to_write(map)) {
12734 vm_map_lock_read(map);
99c3a104 12735 *real_map = map;
1c79356b
A
12736 goto RetryLookup;
12737 }
3e170ce0
A
12738 vm_map_lock_read(VME_SUBMAP(entry));
12739 *var_map = VME_SUBMAP(entry);
1c79356b
A
12740 cow_sub_map_parent = map;
12741 /* reset base to map before cow object */
12742 /* this is the map which will accept */
12743 /* the new cow object */
12744 old_start = entry->vme_start;
12745 old_end = entry->vme_end;
12746 cow_parent_vaddr = vaddr;
12747 mapped_needs_copy = TRUE;
12748 } else {
3e170ce0
A
12749 vm_map_lock_read(VME_SUBMAP(entry));
12750 *var_map = VME_SUBMAP(entry);
0a7de745
A
12751 if ((cow_sub_map_parent != map) &&
12752 (*real_map != map)) {
1c79356b 12753 vm_map_unlock(map);
0a7de745 12754 }
1c79356b
A
12755 }
12756 } else {
3e170ce0 12757 vm_map_lock_read(VME_SUBMAP(entry));
5ba3f43e 12758 *var_map = VME_SUBMAP(entry);
1c79356b
A
12759 /* leave map locked if it is a target */
12760 /* cow sub_map above otherwise, just */
12761 /* follow the maps down to the object */
12762 /* here we unlock knowing we are not */
12763 /* revisiting the map. */
0a7de745 12764 if ((*real_map != map) && (map != cow_sub_map_parent)) {
1c79356b 12765 vm_map_unlock_read(map);
0a7de745 12766 }
1c79356b
A
12767 }
12768
99c3a104 12769 map = *var_map;
1c79356b
A
12770
12771 /* calculate the offset in the submap for vaddr */
3e170ce0 12772 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
1c79356b 12773
0a7de745
A
12774RetrySubMap:
12775 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
12776 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
1c79356b
A
12777 vm_map_unlock(cow_sub_map_parent);
12778 }
0a7de745
A
12779 if ((*real_map != map)
12780 && (*real_map != cow_sub_map_parent)) {
91447636 12781 vm_map_unlock(*real_map);
1c79356b 12782 }
91447636 12783 *real_map = map;
1c79356b
A
12784 return KERN_INVALID_ADDRESS;
12785 }
2d21ac55 12786
1c79356b
A
12787 /* find the attenuated shadow of the underlying object */
12788 /* on our target map */
12789
12790 /* in english the submap object may extend beyond the */
12791 /* region mapped by the entry or, may only fill a portion */
12792 /* of it. For our purposes, we only care if the object */
12793 /* doesn't fill. In this case the area which will */
12794 /* ultimately be clipped in the top map will only need */
12795 /* to be as big as the portion of the underlying entry */
12796 /* which is mapped */
3e170ce0 12797 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
0a7de745 12798 submap_entry->vme_start - VME_OFFSET(entry) : 0;
1c79356b 12799
5ba3f43e 12800 end_delta =
0a7de745
A
12801 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
12802 submap_entry->vme_end ?
12803 0 : (VME_OFFSET(entry) +
12804 (old_end - old_start))
12805 - submap_entry->vme_end;
1c79356b
A
12806
12807 old_start += start_delta;
12808 old_end -= end_delta;
12809
0a7de745 12810 if (submap_entry->is_sub_map) {
1c79356b
A
12811 entry = submap_entry;
12812 vaddr = local_vaddr;
12813 goto submap_recurse;
12814 }
12815
39037602 12816 if (((fault_type & VM_PROT_WRITE) ||
0a7de745 12817 force_copy)
39037602 12818 && cow_sub_map_parent) {
0a7de745 12819 vm_object_t sub_object, copy_object;
2d21ac55 12820 vm_object_offset_t copy_offset;
0a7de745
A
12821 vm_map_offset_t local_start;
12822 vm_map_offset_t local_end;
12823 boolean_t copied_slowly = FALSE;
1c79356b
A
12824
12825 if (vm_map_lock_read_to_write(map)) {
12826 vm_map_lock_read(map);
12827 old_start -= start_delta;
12828 old_end += end_delta;
12829 goto RetrySubMap;
12830 }
0b4e3aa0
A
12831
12832
3e170ce0 12833 sub_object = VME_OBJECT(submap_entry);
2d21ac55
A
12834 if (sub_object == VM_OBJECT_NULL) {
12835 sub_object =
0a7de745
A
12836 vm_object_allocate(
12837 (vm_map_size_t)
12838 (submap_entry->vme_end -
12839 submap_entry->vme_start));
3e170ce0
A
12840 VME_OBJECT_SET(submap_entry, sub_object);
12841 VME_OFFSET_SET(submap_entry, 0);
a39ff7e2
A
12842 assert(!submap_entry->is_sub_map);
12843 assert(submap_entry->use_pmap);
1c79356b 12844 }
5ba3f43e 12845 local_start = local_vaddr -
0a7de745 12846 (cow_parent_vaddr - old_start);
5ba3f43e 12847 local_end = local_vaddr +
0a7de745 12848 (old_end - cow_parent_vaddr);
1c79356b
A
12849 vm_map_clip_start(map, submap_entry, local_start);
12850 vm_map_clip_end(map, submap_entry, local_end);
fe8ab488
A
12851 if (submap_entry->is_sub_map) {
12852 /* unnesting was done when clipping */
12853 assert(!submap_entry->use_pmap);
12854 }
1c79356b
A
12855
12856 /* This is the COW case, lets connect */
12857 /* an entry in our space to the underlying */
12858 /* object in the submap, bypassing the */
12859 /* submap. */
0b4e3aa0
A
12860
12861
0a7de745
A
12862 if (submap_entry->wired_count != 0 ||
12863 (sub_object->copy_strategy ==
4a3eedf9 12864 MEMORY_OBJECT_COPY_NONE)) {
2d21ac55
A
12865 vm_object_lock(sub_object);
12866 vm_object_copy_slowly(sub_object,
0a7de745
A
12867 VME_OFFSET(submap_entry),
12868 (submap_entry->vme_end -
12869 submap_entry->vme_start),
12870 FALSE,
12871 &copy_object);
2d21ac55 12872 copied_slowly = TRUE;
0b4e3aa0 12873 } else {
0b4e3aa0 12874 /* set up shadow object */
2d21ac55 12875 copy_object = sub_object;
39037602
A
12876 vm_object_lock(sub_object);
12877 vm_object_reference_locked(sub_object);
2d21ac55 12878 sub_object->shadowed = TRUE;
39037602
A
12879 vm_object_unlock(sub_object);
12880
3e170ce0 12881 assert(submap_entry->wired_count == 0);
0b4e3aa0 12882 submap_entry->needs_copy = TRUE;
0c530ab8 12883
5ba3f43e
A
12884 prot = submap_entry->protection;
12885 assert(!pmap_has_prot_policy(prot));
12886 prot = prot & ~VM_PROT_WRITE;
12887 assert(!pmap_has_prot_policy(prot));
2d21ac55 12888
3e170ce0 12889 if (override_nx(old_map,
0a7de745
A
12890 VME_ALIAS(submap_entry))
12891 && prot) {
12892 prot |= VM_PROT_EXECUTE;
12893 }
2d21ac55 12894
0b4e3aa0 12895 vm_object_pmap_protect(
2d21ac55 12896 sub_object,
3e170ce0 12897 VME_OFFSET(submap_entry),
5ba3f43e 12898 submap_entry->vme_end -
2d21ac55 12899 submap_entry->vme_start,
5ba3f43e 12900 (submap_entry->is_shared
0a7de745 12901 || map->mapped_in_other_pmaps) ?
2d21ac55 12902 PMAP_NULL : map->pmap,
1c79356b 12903 submap_entry->vme_start,
0c530ab8 12904 prot);
0b4e3aa0 12905 }
5ba3f43e 12906
2d21ac55
A
12907 /*
12908 * Adjust the fault offset to the submap entry.
12909 */
12910 copy_offset = (local_vaddr -
0a7de745
A
12911 submap_entry->vme_start +
12912 VME_OFFSET(submap_entry));
1c79356b
A
12913
12914 /* This works diffently than the */
12915 /* normal submap case. We go back */
12916 /* to the parent of the cow map and*/
12917 /* clip out the target portion of */
12918 /* the sub_map, substituting the */
12919 /* new copy object, */
12920
5ba3f43e
A
12921 subentry_protection = submap_entry->protection;
12922 subentry_max_protection = submap_entry->max_protection;
1c79356b 12923 vm_map_unlock(map);
5ba3f43e
A
12924 submap_entry = NULL; /* not valid after map unlock */
12925
1c79356b
A
12926 local_start = old_start;
12927 local_end = old_end;
12928 map = cow_sub_map_parent;
12929 *var_map = cow_sub_map_parent;
12930 vaddr = cow_parent_vaddr;
12931 cow_sub_map_parent = NULL;
12932
0a7de745
A
12933 if (!vm_map_lookup_entry(map,
12934 vaddr, &entry)) {
2d21ac55
A
12935 vm_object_deallocate(
12936 copy_object);
12937 vm_map_lock_write_to_read(map);
12938 return KERN_INVALID_ADDRESS;
12939 }
5ba3f43e 12940
2d21ac55
A
12941 /* clip out the portion of space */
12942 /* mapped by the sub map which */
12943 /* corresponds to the underlying */
12944 /* object */
12945
12946 /*
12947 * Clip (and unnest) the smallest nested chunk
12948 * possible around the faulting address...
12949 */
12950 local_start = vaddr & ~(pmap_nesting_size_min - 1);
12951 local_end = local_start + pmap_nesting_size_min;
12952 /*
12953 * ... but don't go beyond the "old_start" to "old_end"
12954 * range, to avoid spanning over another VM region
12955 * with a possibly different VM object and/or offset.
12956 */
12957 if (local_start < old_start) {
12958 local_start = old_start;
12959 }
12960 if (local_end > old_end) {
12961 local_end = old_end;
12962 }
12963 /*
12964 * Adjust copy_offset to the start of the range.
12965 */
12966 copy_offset -= (vaddr - local_start);
12967
1c79356b
A
12968 vm_map_clip_start(map, entry, local_start);
12969 vm_map_clip_end(map, entry, local_end);
fe8ab488
A
12970 if (entry->is_sub_map) {
12971 /* unnesting was done when clipping */
12972 assert(!entry->use_pmap);
12973 }
1c79356b
A
12974
12975 /* substitute copy object for */
12976 /* shared map entry */
3e170ce0 12977 vm_map_deallocate(VME_SUBMAP(entry));
fe8ab488 12978 assert(!entry->iokit_acct);
1c79356b 12979 entry->is_sub_map = FALSE;
fe8ab488 12980 entry->use_pmap = TRUE;
3e170ce0 12981 VME_OBJECT_SET(entry, copy_object);
1c79356b 12982
2d21ac55 12983 /* propagate the submap entry's protections */
d9a64523
A
12984 if (entry->protection != VM_PROT_READ) {
12985 /*
12986 * Someone has already altered the top entry's
12987 * protections via vm_protect(VM_PROT_COPY).
12988 * Respect these new values and ignore the
12989 * submap entry's protections.
12990 */
12991 } else {
12992 /*
12993 * Regular copy-on-write: propagate the submap
12994 * entry's protections to the top map entry.
12995 */
12996 entry->protection |= subentry_protection;
12997 }
5ba3f43e
A
12998 entry->max_protection |= subentry_max_protection;
12999
d9a64523
A
13000 if ((entry->protection & VM_PROT_WRITE) &&
13001 (entry->protection & VM_PROT_EXECUTE) &&
13002#if !CONFIG_EMBEDDED
13003 map != kernel_map &&
13004 cs_process_enforcement(NULL) &&
13005#endif /* !CONFIG_EMBEDDED */
13006 !(entry->used_for_jit)) {
13007 DTRACE_VM3(cs_wx,
0a7de745
A
13008 uint64_t, (uint64_t)entry->vme_start,
13009 uint64_t, (uint64_t)entry->vme_end,
13010 vm_prot_t, entry->protection);
d9a64523 13011 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
0a7de745
A
13012 proc_selfpid(),
13013 (current_task()->bsd_info
13014 ? proc_name_address(current_task()->bsd_info)
13015 : "?"),
13016 __FUNCTION__);
d9a64523 13017 entry->protection &= ~VM_PROT_EXECUTE;
5ba3f43e 13018 }
2d21ac55 13019
0a7de745 13020 if (copied_slowly) {
3e170ce0 13021 VME_OFFSET_SET(entry, local_start - old_start);
0b4e3aa0
A
13022 entry->needs_copy = FALSE;
13023 entry->is_shared = FALSE;
13024 } else {
3e170ce0
A
13025 VME_OFFSET_SET(entry, copy_offset);
13026 assert(entry->wired_count == 0);
0b4e3aa0 13027 entry->needs_copy = TRUE;
0a7de745 13028 if (entry->inheritance == VM_INHERIT_SHARE) {
0b4e3aa0 13029 entry->inheritance = VM_INHERIT_COPY;
0a7de745
A
13030 }
13031 if (map != old_map) {
0b4e3aa0 13032 entry->is_shared = TRUE;
0a7de745 13033 }
0b4e3aa0 13034 }
0a7de745 13035 if (entry->inheritance == VM_INHERIT_SHARE) {
0b4e3aa0 13036 entry->inheritance = VM_INHERIT_COPY;
0a7de745 13037 }
1c79356b
A
13038
13039 vm_map_lock_write_to_read(map);
13040 } else {
0a7de745
A
13041 if ((cow_sub_map_parent)
13042 && (cow_sub_map_parent != *real_map)
13043 && (cow_sub_map_parent != map)) {
1c79356b
A
13044 vm_map_unlock(cow_sub_map_parent);
13045 }
13046 entry = submap_entry;
13047 vaddr = local_vaddr;
13048 }
13049 }
5ba3f43e 13050
1c79356b
A
13051 /*
13052 * Check whether this task is allowed to have
13053 * this page.
13054 */
2d21ac55 13055
6601e61a 13056 prot = entry->protection;
0c530ab8 13057
3e170ce0 13058 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
0a7de745 13059 /*
2d21ac55 13060 * HACK -- if not a stack, then allow execution
0c530ab8 13061 */
0a7de745 13062 prot |= VM_PROT_EXECUTE;
2d21ac55
A
13063 }
13064
6d2010ae
A
13065 if (mask_protections) {
13066 fault_type &= prot;
13067 if (fault_type == VM_PROT_NONE) {
13068 goto protection_failure;
13069 }
13070 }
39037602 13071 if (((fault_type & prot) != fault_type)
5ba3f43e
A
13072#if __arm64__
13073 /* prefetch abort in execute-only page */
13074 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13075#endif
39037602 13076 ) {
0a7de745 13077protection_failure:
2d21ac55
A
13078 if (*real_map != map) {
13079 vm_map_unlock(*real_map);
0c530ab8
A
13080 }
13081 *real_map = map;
13082
0a7de745
A
13083 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13084 log_stack_execution_failure((addr64_t)vaddr, prot);
13085 }
0c530ab8 13086
2d21ac55 13087 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
0c530ab8 13088 return KERN_PROTECTION_FAILURE;
1c79356b
A
13089 }
13090
13091 /*
13092 * If this page is not pageable, we have to get
13093 * it for all possible accesses.
13094 */
13095
91447636 13096 *wired = (entry->wired_count != 0);
0a7de745
A
13097 if (*wired) {
13098 fault_type = prot;
13099 }
1c79356b
A
13100
13101 /*
13102 * If the entry was copy-on-write, we either ...
13103 */
13104
13105 if (entry->needs_copy) {
0a7de745 13106 /*
1c79356b
A
13107 * If we want to write the page, we may as well
13108 * handle that now since we've got the map locked.
13109 *
13110 * If we don't need to write the page, we just
13111 * demote the permissions allowed.
13112 */
13113
fe8ab488 13114 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
1c79356b
A
13115 /*
13116 * Make a new object, and place it in the
13117 * object chain. Note that no new references
13118 * have appeared -- one just moved from the
13119 * map to the new object.
13120 */
13121
13122 if (vm_map_lock_read_to_write(map)) {
13123 vm_map_lock_read(map);
13124 goto RetryLookup;
13125 }
39037602
A
13126
13127 if (VME_OBJECT(entry)->shadowed == FALSE) {
13128 vm_object_lock(VME_OBJECT(entry));
13129 VME_OBJECT(entry)->shadowed = TRUE;
13130 vm_object_unlock(VME_OBJECT(entry));
13131 }
3e170ce0 13132 VME_OBJECT_SHADOW(entry,
0a7de745
A
13133 (vm_map_size_t) (entry->vme_end -
13134 entry->vme_start));
1c79356b 13135 entry->needs_copy = FALSE;
39037602 13136
1c79356b
A
13137 vm_map_lock_write_to_read(map);
13138 }
39037602 13139 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
1c79356b
A
13140 /*
13141 * We're attempting to read a copy-on-write
13142 * page -- don't allow writes.
13143 */
13144
13145 prot &= (~VM_PROT_WRITE);
13146 }
13147 }
13148
13149 /*
13150 * Create an object if necessary.
13151 */
3e170ce0 13152 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
1c79356b
A
13153 if (vm_map_lock_read_to_write(map)) {
13154 vm_map_lock_read(map);
13155 goto RetryLookup;
13156 }
13157
3e170ce0 13158 VME_OBJECT_SET(entry,
0a7de745
A
13159 vm_object_allocate(
13160 (vm_map_size_t)(entry->vme_end -
13161 entry->vme_start)));
3e170ce0 13162 VME_OFFSET_SET(entry, 0);
a39ff7e2 13163 assert(entry->use_pmap);
1c79356b
A
13164 vm_map_lock_write_to_read(map);
13165 }
13166
13167 /*
13168 * Return the object/offset from this entry. If the entry
13169 * was copy-on-write or empty, it has been fixed up. Also
13170 * return the protection.
13171 */
13172
0a7de745
A
13173 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13174 *object = VME_OBJECT(entry);
1c79356b 13175 *out_prot = prot;
2d21ac55
A
13176
13177 if (fault_info) {
13178 fault_info->interruptible = THREAD_UNINT; /* for now... */
13179 /* ... the caller will change "interruptible" if needed */
0a7de745 13180 fault_info->cluster_size = 0;
3e170ce0 13181 fault_info->user_tag = VME_ALIAS(entry);
fe8ab488
A
13182 fault_info->pmap_options = 0;
13183 if (entry->iokit_acct ||
13184 (!entry->is_sub_map && !entry->use_pmap)) {
13185 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13186 }
0a7de745 13187 fault_info->behavior = entry->behavior;
3e170ce0
A
13188 fault_info->lo_offset = VME_OFFSET(entry);
13189 fault_info->hi_offset =
0a7de745 13190 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
2d21ac55 13191 fault_info->no_cache = entry->no_cache;
b0d623f7 13192 fault_info->stealth = FALSE;
6d2010ae 13193 fault_info->io_sync = FALSE;
3e170ce0
A
13194 if (entry->used_for_jit ||
13195 entry->vme_resilient_codesign) {
13196 fault_info->cs_bypass = TRUE;
13197 } else {
13198 fault_info->cs_bypass = FALSE;
13199 }
d9a64523
A
13200 fault_info->pmap_cs_associated = FALSE;
13201#if CONFIG_PMAP_CS
13202 if (entry->pmap_cs_associated) {
13203 /*
13204 * The pmap layer will validate this page
13205 * before allowing it to be executed from.
13206 */
13207 fault_info->pmap_cs_associated = TRUE;
13208 }
13209#endif /* CONFIG_PMAP_CS */
0b4c1975 13210 fault_info->mark_zf_absent = FALSE;
316670eb 13211 fault_info->batch_pmap_op = FALSE;
2d21ac55 13212 }
1c79356b
A
13213
13214 /*
13215 * Lock the object to prevent it from disappearing
13216 */
0a7de745
A
13217 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
13218 vm_object_lock(*object);
13219 } else {
13220 vm_object_lock_shared(*object);
13221 }
5ba3f43e 13222
1c79356b
A
13223 /*
13224 * Save the version number
13225 */
13226
13227 out_version->main_timestamp = map->timestamp;
13228
13229 return KERN_SUCCESS;
13230}
13231
13232
13233/*
13234 * vm_map_verify:
13235 *
13236 * Verifies that the map in question has not changed
5ba3f43e
A
13237 * since the given version. The map has to be locked
13238 * ("shared" mode is fine) before calling this function
13239 * and it will be returned locked too.
1c79356b
A
13240 */
13241boolean_t
13242vm_map_verify(
0a7de745
A
13243 vm_map_t map,
13244 vm_map_version_t *version) /* REF */
1c79356b 13245{
0a7de745 13246 boolean_t result;
1c79356b 13247
5ba3f43e 13248 vm_map_lock_assert_held(map);
1c79356b
A
13249 result = (map->timestamp == version->main_timestamp);
13250
0a7de745 13251 return result;
1c79356b
A
13252}
13253
91447636
A
13254/*
13255 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13256 * Goes away after regular vm_region_recurse function migrates to
13257 * 64 bits
13258 * vm_region_recurse: A form of vm_region which follows the
13259 * submaps in a target map
13260 *
13261 */
13262
13263kern_return_t
13264vm_map_region_recurse_64(
0a7de745
A
13265 vm_map_t map,
13266 vm_map_offset_t *address, /* IN/OUT */
13267 vm_map_size_t *size, /* OUT */
13268 natural_t *nesting_depth, /* IN/OUT */
13269 vm_region_submap_info_64_t submap_info, /* IN/OUT */
13270 mach_msg_type_number_t *count) /* IN/OUT */
91447636 13271{
0a7de745
A
13272 mach_msg_type_number_t original_count;
13273 vm_region_extended_info_data_t extended;
13274 vm_map_entry_t tmp_entry;
13275 vm_map_offset_t user_address;
13276 unsigned int user_max_depth;
91447636
A
13277
13278 /*
13279 * "curr_entry" is the VM map entry preceding or including the
13280 * address we're looking for.
13281 * "curr_map" is the map or sub-map containing "curr_entry".
5ba3f43e 13282 * "curr_address" is the equivalent of the top map's "user_address"
6d2010ae 13283 * in the current map.
91447636
A
13284 * "curr_offset" is the cumulated offset of "curr_map" in the
13285 * target task's address space.
13286 * "curr_depth" is the depth of "curr_map" in the chain of
13287 * sub-maps.
5ba3f43e 13288 *
6d2010ae
A
13289 * "curr_max_below" and "curr_max_above" limit the range (around
13290 * "curr_address") we should take into account in the current (sub)map.
13291 * They limit the range to what's visible through the map entries
13292 * we've traversed from the top map to the current map.
0a7de745 13293 *
91447636 13294 */
0a7de745
A
13295 vm_map_entry_t curr_entry;
13296 vm_map_address_t curr_address;
13297 vm_map_offset_t curr_offset;
13298 vm_map_t curr_map;
13299 unsigned int curr_depth;
13300 vm_map_offset_t curr_max_below, curr_max_above;
13301 vm_map_offset_t curr_skip;
91447636
A
13302
13303 /*
13304 * "next_" is the same as "curr_" but for the VM region immediately
13305 * after the address we're looking for. We need to keep track of this
13306 * too because we want to return info about that region if the
13307 * address we're looking for is not mapped.
13308 */
0a7de745
A
13309 vm_map_entry_t next_entry;
13310 vm_map_offset_t next_offset;
13311 vm_map_offset_t next_address;
13312 vm_map_t next_map;
13313 unsigned int next_depth;
13314 vm_map_offset_t next_max_below, next_max_above;
13315 vm_map_offset_t next_skip;
13316
13317 boolean_t look_for_pages;
2d21ac55 13318 vm_region_submap_short_info_64_t short_info;
0a7de745 13319 boolean_t do_region_footprint;
2d21ac55 13320
91447636
A
13321 if (map == VM_MAP_NULL) {
13322 /* no address space to work on */
13323 return KERN_INVALID_ARGUMENT;
13324 }
13325
5ba3f43e 13326
39236c6e
A
13327 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
13328 /*
13329 * "info" structure is not big enough and
13330 * would overflow
13331 */
13332 return KERN_INVALID_ARGUMENT;
13333 }
5ba3f43e 13334
a39ff7e2 13335 do_region_footprint = task_self_region_footprint();
39236c6e 13336 original_count = *count;
5ba3f43e 13337
39236c6e
A
13338 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
13339 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
13340 look_for_pages = FALSE;
13341 short_info = (vm_region_submap_short_info_64_t) submap_info;
13342 submap_info = NULL;
2d21ac55
A
13343 } else {
13344 look_for_pages = TRUE;
39236c6e 13345 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
2d21ac55 13346 short_info = NULL;
5ba3f43e 13347
39236c6e
A
13348 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13349 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
13350 }
91447636 13351 }
5ba3f43e 13352
91447636
A
13353 user_address = *address;
13354 user_max_depth = *nesting_depth;
5ba3f43e 13355
3e170ce0
A
13356 if (not_in_kdp) {
13357 vm_map_lock_read(map);
13358 }
13359
13360recurse_again:
91447636
A
13361 curr_entry = NULL;
13362 curr_map = map;
6d2010ae 13363 curr_address = user_address;
91447636 13364 curr_offset = 0;
6d2010ae 13365 curr_skip = 0;
91447636 13366 curr_depth = 0;
6d2010ae
A
13367 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
13368 curr_max_below = curr_address;
91447636
A
13369
13370 next_entry = NULL;
13371 next_map = NULL;
6d2010ae 13372 next_address = 0;
91447636 13373 next_offset = 0;
6d2010ae 13374 next_skip = 0;
91447636 13375 next_depth = 0;
6d2010ae
A
13376 next_max_above = (vm_map_offset_t) -1;
13377 next_max_below = (vm_map_offset_t) -1;
91447636 13378
91447636
A
13379 for (;;) {
13380 if (vm_map_lookup_entry(curr_map,
0a7de745
A
13381 curr_address,
13382 &tmp_entry)) {
91447636
A
13383 /* tmp_entry contains the address we're looking for */
13384 curr_entry = tmp_entry;
13385 } else {
6d2010ae 13386 vm_map_offset_t skip;
91447636
A
13387 /*
13388 * The address is not mapped. "tmp_entry" is the
13389 * map entry preceding the address. We want the next
13390 * one, if it exists.
13391 */
13392 curr_entry = tmp_entry->vme_next;
6d2010ae 13393
91447636 13394 if (curr_entry == vm_map_to_entry(curr_map) ||
6d2010ae 13395 (curr_entry->vme_start >=
0a7de745 13396 curr_address + curr_max_above)) {
91447636
A
13397 /* no next entry at this level: stop looking */
13398 if (not_in_kdp) {
13399 vm_map_unlock_read(curr_map);
13400 }
13401 curr_entry = NULL;
13402 curr_map = NULL;
3e170ce0 13403 curr_skip = 0;
91447636
A
13404 curr_offset = 0;
13405 curr_depth = 0;
6d2010ae
A
13406 curr_max_above = 0;
13407 curr_max_below = 0;
91447636
A
13408 break;
13409 }
6d2010ae
A
13410
13411 /* adjust current address and offset */
13412 skip = curr_entry->vme_start - curr_address;
13413 curr_address = curr_entry->vme_start;
3e170ce0 13414 curr_skip += skip;
6d2010ae
A
13415 curr_offset += skip;
13416 curr_max_above -= skip;
13417 curr_max_below = 0;
91447636
A
13418 }
13419
13420 /*
13421 * Is the next entry at this level closer to the address (or
13422 * deeper in the submap chain) than the one we had
13423 * so far ?
13424 */
13425 tmp_entry = curr_entry->vme_next;
13426 if (tmp_entry == vm_map_to_entry(curr_map)) {
13427 /* no next entry at this level */
6d2010ae 13428 } else if (tmp_entry->vme_start >=
0a7de745 13429 curr_address + curr_max_above) {
91447636
A
13430 /*
13431 * tmp_entry is beyond the scope of what we mapped of
13432 * this submap in the upper level: ignore it.
13433 */
13434 } else if ((next_entry == NULL) ||
0a7de745
A
13435 (tmp_entry->vme_start + curr_offset <=
13436 next_entry->vme_start + next_offset)) {
91447636
A
13437 /*
13438 * We didn't have a "next_entry" or this one is
13439 * closer to the address we're looking for:
13440 * use this "tmp_entry" as the new "next_entry".
13441 */
13442 if (next_entry != NULL) {
13443 /* unlock the last "next_map" */
13444 if (next_map != curr_map && not_in_kdp) {
13445 vm_map_unlock_read(next_map);
13446 }
13447 }
13448 next_entry = tmp_entry;
13449 next_map = curr_map;
91447636 13450 next_depth = curr_depth;
6d2010ae
A
13451 next_address = next_entry->vme_start;
13452 next_skip = curr_skip;
3e170ce0 13453 next_skip += (next_address - curr_address);
6d2010ae
A
13454 next_offset = curr_offset;
13455 next_offset += (next_address - curr_address);
13456 next_max_above = MIN(next_max_above, curr_max_above);
13457 next_max_above = MIN(next_max_above,
0a7de745 13458 next_entry->vme_end - next_address);
6d2010ae
A
13459 next_max_below = MIN(next_max_below, curr_max_below);
13460 next_max_below = MIN(next_max_below,
0a7de745 13461 next_address - next_entry->vme_start);
91447636
A
13462 }
13463
6d2010ae
A
13464 /*
13465 * "curr_max_{above,below}" allow us to keep track of the
13466 * portion of the submap that is actually mapped at this level:
13467 * the rest of that submap is irrelevant to us, since it's not
13468 * mapped here.
13469 * The relevant portion of the map starts at
3e170ce0 13470 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
6d2010ae
A
13471 */
13472 curr_max_above = MIN(curr_max_above,
0a7de745 13473 curr_entry->vme_end - curr_address);
6d2010ae 13474 curr_max_below = MIN(curr_max_below,
0a7de745 13475 curr_address - curr_entry->vme_start);
6d2010ae 13476
91447636
A
13477 if (!curr_entry->is_sub_map ||
13478 curr_depth >= user_max_depth) {
13479 /*
13480 * We hit a leaf map or we reached the maximum depth
13481 * we could, so stop looking. Keep the current map
13482 * locked.
13483 */
13484 break;
13485 }
13486
13487 /*
13488 * Get down to the next submap level.
13489 */
13490
13491 /*
13492 * Lock the next level and unlock the current level,
13493 * unless we need to keep it locked to access the "next_entry"
13494 * later.
13495 */
13496 if (not_in_kdp) {
3e170ce0 13497 vm_map_lock_read(VME_SUBMAP(curr_entry));
91447636
A
13498 }
13499 if (curr_map == next_map) {
13500 /* keep "next_map" locked in case we need it */
13501 } else {
13502 /* release this map */
0a7de745 13503 if (not_in_kdp) {
b0d623f7 13504 vm_map_unlock_read(curr_map);
0a7de745 13505 }
91447636
A
13506 }
13507
13508 /*
13509 * Adjust the offset. "curr_entry" maps the submap
13510 * at relative address "curr_entry->vme_start" in the
3e170ce0 13511 * curr_map but skips the first "VME_OFFSET(curr_entry)"
91447636
A
13512 * bytes of the submap.
13513 * "curr_offset" always represents the offset of a virtual
13514 * address in the curr_map relative to the absolute address
13515 * space (i.e. the top-level VM map).
13516 */
13517 curr_offset +=
0a7de745 13518 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
6d2010ae 13519 curr_address = user_address + curr_offset;
91447636 13520 /* switch to the submap */
3e170ce0 13521 curr_map = VME_SUBMAP(curr_entry);
91447636 13522 curr_depth++;
91447636
A
13523 curr_entry = NULL;
13524 }
13525
a39ff7e2
A
13526// LP64todo: all the current tools are 32bit, obviously never worked for 64b
13527// so probably should be a real 32b ID vs. ptr.
13528// Current users just check for equality
13529
91447636
A
13530 if (curr_entry == NULL) {
13531 /* no VM region contains the address... */
a39ff7e2
A
13532
13533 if (do_region_footprint && /* we want footprint numbers */
39037602
A
13534 next_entry == NULL && /* & there are no more regions */
13535 /* & we haven't already provided our fake region: */
a39ff7e2 13536 user_address <= vm_map_last_entry(map)->vme_end) {
39037602
A
13537 ledger_amount_t nonvol, nonvol_compressed;
13538 /*
13539 * Add a fake memory region to account for
13540 * purgeable memory that counts towards this
13541 * task's memory footprint, i.e. the resident
13542 * compressed pages of non-volatile objects
13543 * owned by that task.
13544 */
13545 ledger_get_balance(
13546 map->pmap->ledger,
13547 task_ledgers.purgeable_nonvolatile,
13548 &nonvol);
13549 ledger_get_balance(
13550 map->pmap->ledger,
13551 task_ledgers.purgeable_nonvolatile_compressed,
13552 &nonvol_compressed);
13553 if (nonvol + nonvol_compressed == 0) {
13554 /* no purgeable memory usage to report */
a39ff7e2 13555 return KERN_INVALID_ADDRESS;
39037602
A
13556 }
13557 /* fake region to show nonvolatile footprint */
a39ff7e2
A
13558 if (look_for_pages) {
13559 submap_info->protection = VM_PROT_DEFAULT;
13560 submap_info->max_protection = VM_PROT_DEFAULT;
13561 submap_info->inheritance = VM_INHERIT_DEFAULT;
13562 submap_info->offset = 0;
13563 submap_info->user_tag = -1;
13564 submap_info->pages_resident = (unsigned int) (nonvol / PAGE_SIZE);
13565 submap_info->pages_shared_now_private = 0;
13566 submap_info->pages_swapped_out = (unsigned int) (nonvol_compressed / PAGE_SIZE);
13567 submap_info->pages_dirtied = submap_info->pages_resident;
13568 submap_info->ref_count = 1;
13569 submap_info->shadow_depth = 0;
13570 submap_info->external_pager = 0;
13571 submap_info->share_mode = SM_PRIVATE;
13572 submap_info->is_submap = 0;
13573 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
13574 submap_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13575 submap_info->user_wired_count = 0;
13576 submap_info->pages_reusable = 0;
13577 } else {
13578 short_info->user_tag = -1;
13579 short_info->offset = 0;
13580 short_info->protection = VM_PROT_DEFAULT;
13581 short_info->inheritance = VM_INHERIT_DEFAULT;
13582 short_info->max_protection = VM_PROT_DEFAULT;
13583 short_info->behavior = VM_BEHAVIOR_DEFAULT;
13584 short_info->user_wired_count = 0;
13585 short_info->is_submap = 0;
13586 short_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13587 short_info->external_pager = 0;
13588 short_info->shadow_depth = 0;
13589 short_info->share_mode = SM_PRIVATE;
13590 short_info->ref_count = 1;
13591 }
39037602
A
13592 *nesting_depth = 0;
13593 *size = (vm_map_size_t) (nonvol + nonvol_compressed);
a39ff7e2
A
13594// *address = user_address;
13595 *address = vm_map_last_entry(map)->vme_end;
39037602
A
13596 return KERN_SUCCESS;
13597 }
a39ff7e2 13598
91447636
A
13599 if (next_entry == NULL) {
13600 /* ... and no VM region follows it either */
13601 return KERN_INVALID_ADDRESS;
13602 }
13603 /* ... gather info about the next VM region */
13604 curr_entry = next_entry;
0a7de745 13605 curr_map = next_map; /* still locked ... */
6d2010ae
A
13606 curr_address = next_address;
13607 curr_skip = next_skip;
91447636
A
13608 curr_offset = next_offset;
13609 curr_depth = next_depth;
6d2010ae
A
13610 curr_max_above = next_max_above;
13611 curr_max_below = next_max_below;
91447636
A
13612 } else {
13613 /* we won't need "next_entry" after all */
13614 if (next_entry != NULL) {
13615 /* release "next_map" */
13616 if (next_map != curr_map && not_in_kdp) {
13617 vm_map_unlock_read(next_map);
13618 }
13619 }
13620 }
13621 next_entry = NULL;
13622 next_map = NULL;
13623 next_offset = 0;
6d2010ae 13624 next_skip = 0;
91447636 13625 next_depth = 0;
6d2010ae
A
13626 next_max_below = -1;
13627 next_max_above = -1;
91447636 13628
3e170ce0
A
13629 if (curr_entry->is_sub_map &&
13630 curr_depth < user_max_depth) {
13631 /*
13632 * We're not as deep as we could be: we must have
13633 * gone back up after not finding anything mapped
13634 * below the original top-level map entry's.
13635 * Let's move "curr_address" forward and recurse again.
13636 */
13637 user_address = curr_address;
13638 goto recurse_again;
13639 }
13640
91447636 13641 *nesting_depth = curr_depth;
6d2010ae
A
13642 *size = curr_max_above + curr_max_below;
13643 *address = user_address + curr_skip - curr_max_below;
91447636 13644
b0d623f7
A
13645// LP64todo: all the current tools are 32bit, obviously never worked for 64b
13646// so probably should be a real 32b ID vs. ptr.
13647// Current users just check for equality
0a7de745 13648#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
b0d623f7 13649
2d21ac55 13650 if (look_for_pages) {
3e170ce0 13651 submap_info->user_tag = VME_ALIAS(curr_entry);
5ba3f43e 13652 submap_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
13653 submap_info->protection = curr_entry->protection;
13654 submap_info->inheritance = curr_entry->inheritance;
13655 submap_info->max_protection = curr_entry->max_protection;
13656 submap_info->behavior = curr_entry->behavior;
13657 submap_info->user_wired_count = curr_entry->user_wired_count;
13658 submap_info->is_submap = curr_entry->is_sub_map;
3e170ce0 13659 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 13660 } else {
3e170ce0 13661 short_info->user_tag = VME_ALIAS(curr_entry);
5ba3f43e 13662 short_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
13663 short_info->protection = curr_entry->protection;
13664 short_info->inheritance = curr_entry->inheritance;
13665 short_info->max_protection = curr_entry->max_protection;
13666 short_info->behavior = curr_entry->behavior;
13667 short_info->user_wired_count = curr_entry->user_wired_count;
13668 short_info->is_submap = curr_entry->is_sub_map;
3e170ce0 13669 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 13670 }
91447636
A
13671
13672 extended.pages_resident = 0;
13673 extended.pages_swapped_out = 0;
13674 extended.pages_shared_now_private = 0;
13675 extended.pages_dirtied = 0;
39236c6e 13676 extended.pages_reusable = 0;
91447636
A
13677 extended.external_pager = 0;
13678 extended.shadow_depth = 0;
3e170ce0
A
13679 extended.share_mode = SM_EMPTY;
13680 extended.ref_count = 0;
91447636
A
13681
13682 if (not_in_kdp) {
13683 if (!curr_entry->is_sub_map) {
6d2010ae
A
13684 vm_map_offset_t range_start, range_end;
13685 range_start = MAX((curr_address - curr_max_below),
0a7de745 13686 curr_entry->vme_start);
6d2010ae 13687 range_end = MIN((curr_address + curr_max_above),
0a7de745 13688 curr_entry->vme_end);
91447636 13689 vm_map_region_walk(curr_map,
0a7de745
A
13690 range_start,
13691 curr_entry,
13692 (VME_OFFSET(curr_entry) +
13693 (range_start -
13694 curr_entry->vme_start)),
13695 range_end - range_start,
13696 &extended,
13697 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
91447636
A
13698 if (extended.external_pager &&
13699 extended.ref_count == 2 &&
13700 extended.share_mode == SM_SHARED) {
2d21ac55 13701 extended.share_mode = SM_PRIVATE;
91447636 13702 }
91447636
A
13703 } else {
13704 if (curr_entry->use_pmap) {
2d21ac55 13705 extended.share_mode = SM_TRUESHARED;
91447636 13706 } else {
2d21ac55 13707 extended.share_mode = SM_PRIVATE;
91447636 13708 }
d9a64523 13709 extended.ref_count = VME_SUBMAP(curr_entry)->map_refcnt;
91447636
A
13710 }
13711 }
13712
2d21ac55
A
13713 if (look_for_pages) {
13714 submap_info->pages_resident = extended.pages_resident;
13715 submap_info->pages_swapped_out = extended.pages_swapped_out;
13716 submap_info->pages_shared_now_private =
0a7de745 13717 extended.pages_shared_now_private;
2d21ac55
A
13718 submap_info->pages_dirtied = extended.pages_dirtied;
13719 submap_info->external_pager = extended.external_pager;
13720 submap_info->shadow_depth = extended.shadow_depth;
13721 submap_info->share_mode = extended.share_mode;
13722 submap_info->ref_count = extended.ref_count;
5ba3f43e 13723
39236c6e
A
13724 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13725 submap_info->pages_reusable = extended.pages_reusable;
13726 }
2d21ac55
A
13727 } else {
13728 short_info->external_pager = extended.external_pager;
13729 short_info->shadow_depth = extended.shadow_depth;
13730 short_info->share_mode = extended.share_mode;
13731 short_info->ref_count = extended.ref_count;
13732 }
91447636
A
13733
13734 if (not_in_kdp) {
13735 vm_map_unlock_read(curr_map);
13736 }
13737
13738 return KERN_SUCCESS;
13739}
13740
1c79356b
A
13741/*
13742 * vm_region:
13743 *
13744 * User call to obtain information about a region in
13745 * a task's address map. Currently, only one flavor is
13746 * supported.
13747 *
13748 * XXX The reserved and behavior fields cannot be filled
13749 * in until the vm merge from the IK is completed, and
13750 * vm_reserve is implemented.
1c79356b
A
13751 */
13752
13753kern_return_t
91447636 13754vm_map_region(
0a7de745
A
13755 vm_map_t map,
13756 vm_map_offset_t *address, /* IN/OUT */
13757 vm_map_size_t *size, /* OUT */
13758 vm_region_flavor_t flavor, /* IN */
13759 vm_region_info_t info, /* OUT */
13760 mach_msg_type_number_t *count, /* IN/OUT */
13761 mach_port_t *object_name) /* OUT */
1c79356b 13762{
0a7de745
A
13763 vm_map_entry_t tmp_entry;
13764 vm_map_entry_t entry;
13765 vm_map_offset_t start;
1c79356b 13766
0a7de745
A
13767 if (map == VM_MAP_NULL) {
13768 return KERN_INVALID_ARGUMENT;
13769 }
1c79356b
A
13770
13771 switch (flavor) {
1c79356b 13772 case VM_REGION_BASIC_INFO:
2d21ac55 13773 /* legacy for old 32-bit objects info */
1c79356b 13774 {
0a7de745 13775 vm_region_basic_info_t basic;
91447636 13776
0a7de745
A
13777 if (*count < VM_REGION_BASIC_INFO_COUNT) {
13778 return KERN_INVALID_ARGUMENT;
13779 }
1c79356b 13780
2d21ac55
A
13781 basic = (vm_region_basic_info_t) info;
13782 *count = VM_REGION_BASIC_INFO_COUNT;
1c79356b 13783
2d21ac55 13784 vm_map_lock_read(map);
1c79356b 13785
2d21ac55
A
13786 start = *address;
13787 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13788 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13789 vm_map_unlock_read(map);
0a7de745 13790 return KERN_INVALID_ADDRESS;
2d21ac55
A
13791 }
13792 } else {
13793 entry = tmp_entry;
1c79356b 13794 }
1c79356b 13795
2d21ac55 13796 start = entry->vme_start;
1c79356b 13797
3e170ce0 13798 basic->offset = (uint32_t)VME_OFFSET(entry);
2d21ac55
A
13799 basic->protection = entry->protection;
13800 basic->inheritance = entry->inheritance;
13801 basic->max_protection = entry->max_protection;
13802 basic->behavior = entry->behavior;
13803 basic->user_wired_count = entry->user_wired_count;
13804 basic->reserved = entry->is_sub_map;
13805 *address = start;
13806 *size = (entry->vme_end - start);
91447636 13807
0a7de745
A
13808 if (object_name) {
13809 *object_name = IP_NULL;
13810 }
2d21ac55
A
13811 if (entry->is_sub_map) {
13812 basic->shared = FALSE;
13813 } else {
13814 basic->shared = entry->is_shared;
13815 }
91447636 13816
2d21ac55 13817 vm_map_unlock_read(map);
0a7de745 13818 return KERN_SUCCESS;
91447636
A
13819 }
13820
13821 case VM_REGION_BASIC_INFO_64:
13822 {
0a7de745 13823 vm_region_basic_info_64_t basic;
91447636 13824
0a7de745
A
13825 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
13826 return KERN_INVALID_ARGUMENT;
13827 }
2d21ac55
A
13828
13829 basic = (vm_region_basic_info_64_t) info;
13830 *count = VM_REGION_BASIC_INFO_COUNT_64;
13831
13832 vm_map_lock_read(map);
13833
13834 start = *address;
13835 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13836 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13837 vm_map_unlock_read(map);
0a7de745 13838 return KERN_INVALID_ADDRESS;
2d21ac55
A
13839 }
13840 } else {
13841 entry = tmp_entry;
13842 }
91447636 13843
2d21ac55 13844 start = entry->vme_start;
91447636 13845
3e170ce0 13846 basic->offset = VME_OFFSET(entry);
2d21ac55
A
13847 basic->protection = entry->protection;
13848 basic->inheritance = entry->inheritance;
13849 basic->max_protection = entry->max_protection;
13850 basic->behavior = entry->behavior;
13851 basic->user_wired_count = entry->user_wired_count;
13852 basic->reserved = entry->is_sub_map;
13853 *address = start;
13854 *size = (entry->vme_end - start);
91447636 13855
0a7de745
A
13856 if (object_name) {
13857 *object_name = IP_NULL;
13858 }
2d21ac55
A
13859 if (entry->is_sub_map) {
13860 basic->shared = FALSE;
13861 } else {
13862 basic->shared = entry->is_shared;
91447636 13863 }
2d21ac55
A
13864
13865 vm_map_unlock_read(map);
0a7de745 13866 return KERN_SUCCESS;
1c79356b
A
13867 }
13868 case VM_REGION_EXTENDED_INFO:
0a7de745
A
13869 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
13870 return KERN_INVALID_ARGUMENT;
13871 }
13872 /*fallthru*/
39236c6e 13873 case VM_REGION_EXTENDED_INFO__legacy:
0a7de745 13874 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
39236c6e 13875 return KERN_INVALID_ARGUMENT;
0a7de745 13876 }
39236c6e 13877
0a7de745
A
13878 {
13879 vm_region_extended_info_t extended;
13880 mach_msg_type_number_t original_count;
1c79356b 13881
0a7de745 13882 extended = (vm_region_extended_info_t) info;
1c79356b 13883
0a7de745 13884 vm_map_lock_read(map);
1c79356b 13885
0a7de745
A
13886 start = *address;
13887 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13888 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13889 vm_map_unlock_read(map);
13890 return KERN_INVALID_ADDRESS;
13891 }
13892 } else {
13893 entry = tmp_entry;
13894 }
13895 start = entry->vme_start;
1c79356b 13896
0a7de745
A
13897 extended->protection = entry->protection;
13898 extended->user_tag = VME_ALIAS(entry);
13899 extended->pages_resident = 0;
13900 extended->pages_swapped_out = 0;
13901 extended->pages_shared_now_private = 0;
13902 extended->pages_dirtied = 0;
13903 extended->external_pager = 0;
13904 extended->shadow_depth = 0;
13905
13906 original_count = *count;
13907 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
13908 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
13909 } else {
13910 extended->pages_reusable = 0;
13911 *count = VM_REGION_EXTENDED_INFO_COUNT;
13912 }
39236c6e 13913
0a7de745 13914 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
1c79356b 13915
0a7de745
A
13916 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
13917 extended->share_mode = SM_PRIVATE;
13918 }
1c79356b 13919
0a7de745
A
13920 if (object_name) {
13921 *object_name = IP_NULL;
13922 }
13923 *address = start;
13924 *size = (entry->vme_end - start);
1c79356b 13925
0a7de745
A
13926 vm_map_unlock_read(map);
13927 return KERN_SUCCESS;
13928 }
1c79356b 13929 case VM_REGION_TOP_INFO:
5ba3f43e 13930 {
0a7de745 13931 vm_region_top_info_t top;
1c79356b 13932
0a7de745
A
13933 if (*count < VM_REGION_TOP_INFO_COUNT) {
13934 return KERN_INVALID_ARGUMENT;
13935 }
1c79356b 13936
2d21ac55
A
13937 top = (vm_region_top_info_t) info;
13938 *count = VM_REGION_TOP_INFO_COUNT;
1c79356b 13939
2d21ac55 13940 vm_map_lock_read(map);
1c79356b 13941
2d21ac55
A
13942 start = *address;
13943 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13944 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13945 vm_map_unlock_read(map);
0a7de745 13946 return KERN_INVALID_ADDRESS;
2d21ac55
A
13947 }
13948 } else {
13949 entry = tmp_entry;
2d21ac55
A
13950 }
13951 start = entry->vme_start;
1c79356b 13952
2d21ac55
A
13953 top->private_pages_resident = 0;
13954 top->shared_pages_resident = 0;
1c79356b 13955
2d21ac55 13956 vm_map_region_top_walk(entry, top);
1c79356b 13957
0a7de745 13958 if (object_name) {
2d21ac55 13959 *object_name = IP_NULL;
0a7de745 13960 }
2d21ac55
A
13961 *address = start;
13962 *size = (entry->vme_end - start);
1c79356b 13963
2d21ac55 13964 vm_map_unlock_read(map);
0a7de745 13965 return KERN_SUCCESS;
1c79356b
A
13966 }
13967 default:
0a7de745 13968 return KERN_INVALID_ARGUMENT;
1c79356b
A
13969 }
13970}
13971
0a7de745
A
13972#define OBJ_RESIDENT_COUNT(obj, entry_size) \
13973 MIN((entry_size), \
13974 ((obj)->all_reusable ? \
13975 (obj)->wired_page_count : \
b0d623f7 13976 (obj)->resident_page_count - (obj)->reusable_page_count))
2d21ac55 13977
0c530ab8 13978void
91447636 13979vm_map_region_top_walk(
0a7de745 13980 vm_map_entry_t entry,
91447636 13981 vm_region_top_info_t top)
1c79356b 13982{
3e170ce0 13983 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
2d21ac55
A
13984 top->share_mode = SM_EMPTY;
13985 top->ref_count = 0;
13986 top->obj_id = 0;
13987 return;
1c79356b 13988 }
2d21ac55 13989
91447636 13990 {
0a7de745
A
13991 struct vm_object *obj, *tmp_obj;
13992 int ref_count;
13993 uint32_t entry_size;
1c79356b 13994
b0d623f7 13995 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
1c79356b 13996
3e170ce0 13997 obj = VME_OBJECT(entry);
1c79356b 13998
2d21ac55
A
13999 vm_object_lock(obj);
14000
0a7de745 14001 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
2d21ac55 14002 ref_count--;
0a7de745 14003 }
2d21ac55 14004
b0d623f7 14005 assert(obj->reusable_page_count <= obj->resident_page_count);
2d21ac55 14006 if (obj->shadow) {
0a7de745 14007 if (ref_count == 1) {
b0d623f7 14008 top->private_pages_resident =
0a7de745
A
14009 OBJ_RESIDENT_COUNT(obj, entry_size);
14010 } else {
b0d623f7 14011 top->shared_pages_resident =
0a7de745
A
14012 OBJ_RESIDENT_COUNT(obj, entry_size);
14013 }
2d21ac55
A
14014 top->ref_count = ref_count;
14015 top->share_mode = SM_COW;
5ba3f43e 14016
2d21ac55
A
14017 while ((tmp_obj = obj->shadow)) {
14018 vm_object_lock(tmp_obj);
14019 vm_object_unlock(obj);
14020 obj = tmp_obj;
1c79356b 14021
0a7de745 14022 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
2d21ac55 14023 ref_count--;
0a7de745 14024 }
1c79356b 14025
b0d623f7
A
14026 assert(obj->reusable_page_count <= obj->resident_page_count);
14027 top->shared_pages_resident +=
0a7de745 14028 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
14029 top->ref_count += ref_count - 1;
14030 }
1c79356b 14031 } else {
6d2010ae
A
14032 if (entry->superpage_size) {
14033 top->share_mode = SM_LARGE_PAGE;
14034 top->shared_pages_resident = 0;
14035 top->private_pages_resident = entry_size;
14036 } else if (entry->needs_copy) {
2d21ac55 14037 top->share_mode = SM_COW;
b0d623f7 14038 top->shared_pages_resident =
0a7de745 14039 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
14040 } else {
14041 if (ref_count == 1 ||
14042 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
14043 top->share_mode = SM_PRIVATE;
0a7de745
A
14044 top->private_pages_resident =
14045 OBJ_RESIDENT_COUNT(obj,
14046 entry_size);
2d21ac55
A
14047 } else {
14048 top->share_mode = SM_SHARED;
b0d623f7 14049 top->shared_pages_resident =
0a7de745
A
14050 OBJ_RESIDENT_COUNT(obj,
14051 entry_size);
2d21ac55
A
14052 }
14053 }
14054 top->ref_count = ref_count;
1c79356b 14055 }
b0d623f7 14056 /* XXX K64: obj_id will be truncated */
39236c6e 14057 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
1c79356b 14058
2d21ac55 14059 vm_object_unlock(obj);
1c79356b 14060 }
91447636
A
14061}
14062
0c530ab8 14063void
91447636 14064vm_map_region_walk(
0a7de745
A
14065 vm_map_t map,
14066 vm_map_offset_t va,
14067 vm_map_entry_t entry,
14068 vm_object_offset_t offset,
14069 vm_object_size_t range,
14070 vm_region_extended_info_t extended,
14071 boolean_t look_for_pages,
39236c6e 14072 mach_msg_type_number_t count)
91447636 14073{
0a7de745 14074 struct vm_object *obj, *tmp_obj;
39037602
A
14075 vm_map_offset_t last_offset;
14076 int i;
14077 int ref_count;
0a7de745
A
14078 struct vm_object *shadow_object;
14079 int shadow_depth;
14080 boolean_t do_region_footprint;
a39ff7e2
A
14081
14082 do_region_footprint = task_self_region_footprint();
91447636 14083
3e170ce0 14084 if ((VME_OBJECT(entry) == 0) ||
2d21ac55 14085 (entry->is_sub_map) ||
3e170ce0 14086 (VME_OBJECT(entry)->phys_contiguous &&
0a7de745 14087 !entry->superpage_size)) {
2d21ac55
A
14088 extended->share_mode = SM_EMPTY;
14089 extended->ref_count = 0;
14090 return;
1c79356b 14091 }
6d2010ae
A
14092
14093 if (entry->superpage_size) {
14094 extended->shadow_depth = 0;
14095 extended->share_mode = SM_LARGE_PAGE;
14096 extended->ref_count = 1;
14097 extended->external_pager = 0;
14098 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14099 extended->shadow_depth = 0;
14100 return;
14101 }
14102
39037602 14103 obj = VME_OBJECT(entry);
2d21ac55 14104
39037602 14105 vm_object_lock(obj);
2d21ac55 14106
0a7de745 14107 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
39037602 14108 ref_count--;
0a7de745 14109 }
2d21ac55 14110
39037602
A
14111 if (look_for_pages) {
14112 for (last_offset = offset + range;
0a7de745
A
14113 offset < last_offset;
14114 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
a39ff7e2
A
14115 if (do_region_footprint) {
14116 int disp;
14117
14118 disp = 0;
d9a64523
A
14119 if (map->has_corpse_footprint) {
14120 /*
14121 * Query the page info data we saved
14122 * while forking the corpse.
14123 */
14124 vm_map_corpse_footprint_query_page_info(
14125 map,
14126 va,
14127 &disp);
14128 } else {
14129 /*
14130 * Query the pmap.
14131 */
14132 pmap_query_page_info(map->pmap,
0a7de745
A
14133 va,
14134 &disp);
d9a64523 14135 }
a39ff7e2 14136 if (disp & PMAP_QUERY_PAGE_PRESENT) {
d9a64523
A
14137 if (!(disp & PMAP_QUERY_PAGE_ALTACCT)) {
14138 extended->pages_resident++;
14139 }
a39ff7e2
A
14140 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
14141 extended->pages_reusable++;
14142 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
0a7de745 14143 (disp & PMAP_QUERY_PAGE_ALTACCT)) {
a39ff7e2
A
14144 /* alternate accounting */
14145 } else {
14146 extended->pages_dirtied++;
14147 }
14148 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14149 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
14150 /* alternate accounting */
14151 } else {
14152 extended->pages_swapped_out++;
14153 }
14154 }
14155 /* deal with alternate accounting */
d9a64523
A
14156 if (obj->purgable == VM_PURGABLE_NONVOLATILE &&
14157 /* && not tagged as no-footprint? */
14158 VM_OBJECT_OWNER(obj) != NULL &&
14159 VM_OBJECT_OWNER(obj)->map == map) {
14160 if ((((va
0a7de745
A
14161 - entry->vme_start
14162 + VME_OFFSET(entry))
14163 / PAGE_SIZE) <
14164 (obj->resident_page_count +
14165 vm_compressor_pager_get_count(obj->pager)))) {
d9a64523
A
14166 /*
14167 * Non-volatile purgeable object owned
14168 * by this task: report the first
14169 * "#resident + #compressed" pages as
14170 * "resident" (to show that they
14171 * contribute to the footprint) but not
14172 * "dirty" (to avoid double-counting
14173 * with the fake "non-volatile" region
14174 * we'll report at the end of the
14175 * address space to account for all
14176 * (mapped or not) non-volatile memory
14177 * owned by this task.
14178 */
14179 extended->pages_resident++;
14180 }
14181 } else if ((obj->purgable == VM_PURGABLE_VOLATILE ||
0a7de745
A
14182 obj->purgable == VM_PURGABLE_EMPTY) &&
14183 /* && not tagged as no-footprint? */
14184 VM_OBJECT_OWNER(obj) != NULL &&
14185 VM_OBJECT_OWNER(obj)->map == map) {
d9a64523 14186 if ((((va
0a7de745
A
14187 - entry->vme_start
14188 + VME_OFFSET(entry))
14189 / PAGE_SIZE) <
14190 obj->wired_page_count)) {
d9a64523
A
14191 /*
14192 * Volatile|empty purgeable object owned
14193 * by this task: report the first
14194 * "#wired" pages as "resident" (to
14195 * show that they contribute to the
14196 * footprint) but not "dirty" (to avoid
14197 * double-counting with the fake
14198 * "non-volatile" region we'll report
14199 * at the end of the address space to
14200 * account for all (mapped or not)
14201 * non-volatile memory owned by this
14202 * task.
14203 */
14204 extended->pages_resident++;
14205 }
14206 } else if (obj->purgable != VM_PURGABLE_DENY) {
a39ff7e2
A
14207 /*
14208 * Pages from purgeable objects
0a7de745 14209 * will be reported as dirty
a39ff7e2
A
14210 * appropriately in an extra
14211 * fake memory region at the end of
14212 * the address space.
14213 */
39037602 14214 } else if (entry->iokit_acct) {
a39ff7e2
A
14215 /*
14216 * IOKit mappings are considered
14217 * as fully dirty for footprint's
14218 * sake.
14219 */
39037602 14220 extended->pages_dirtied++;
2d21ac55 14221 }
39037602 14222 continue;
2d21ac55 14223 }
a39ff7e2 14224
39037602 14225 vm_map_region_look_for_page(map, va, obj,
0a7de745
A
14226 offset, ref_count,
14227 0, extended, count);
2d21ac55 14228 }
a39ff7e2
A
14229
14230 if (do_region_footprint) {
39037602
A
14231 goto collect_object_info;
14232 }
39037602 14233 } else {
0a7de745 14234collect_object_info:
39037602
A
14235 shadow_object = obj->shadow;
14236 shadow_depth = 0;
2d21ac55 14237
0a7de745 14238 if (!(obj->pager_trusted) && !(obj->internal)) {
39037602 14239 extended->external_pager = 1;
0a7de745 14240 }
39037602
A
14241
14242 if (shadow_object != VM_OBJECT_NULL) {
14243 vm_object_lock(shadow_object);
14244 for (;
0a7de745
A
14245 shadow_object != VM_OBJECT_NULL;
14246 shadow_depth++) {
14247 vm_object_t next_shadow;
39037602 14248
0a7de745
A
14249 if (!(shadow_object->pager_trusted) &&
14250 !(shadow_object->internal)) {
39037602 14251 extended->external_pager = 1;
0a7de745 14252 }
39037602
A
14253
14254 next_shadow = shadow_object->shadow;
14255 if (next_shadow) {
14256 vm_object_lock(next_shadow);
14257 }
14258 vm_object_unlock(shadow_object);
14259 shadow_object = next_shadow;
2d21ac55 14260 }
91447636 14261 }
39037602
A
14262 extended->shadow_depth = shadow_depth;
14263 }
1c79356b 14264
0a7de745 14265 if (extended->shadow_depth || entry->needs_copy) {
39037602 14266 extended->share_mode = SM_COW;
0a7de745
A
14267 } else {
14268 if (ref_count == 1) {
39037602 14269 extended->share_mode = SM_PRIVATE;
0a7de745
A
14270 } else {
14271 if (obj->true_share) {
39037602 14272 extended->share_mode = SM_TRUESHARED;
0a7de745 14273 } else {
39037602 14274 extended->share_mode = SM_SHARED;
0a7de745 14275 }
2d21ac55 14276 }
39037602
A
14277 }
14278 extended->ref_count = ref_count - extended->shadow_depth;
5ba3f43e 14279
39037602 14280 for (i = 0; i < extended->shadow_depth; i++) {
0a7de745 14281 if ((tmp_obj = obj->shadow) == 0) {
39037602 14282 break;
0a7de745 14283 }
39037602 14284 vm_object_lock(tmp_obj);
2d21ac55 14285 vm_object_unlock(obj);
1c79356b 14286
0a7de745 14287 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
39037602 14288 ref_count--;
0a7de745 14289 }
39037602
A
14290
14291 extended->ref_count += ref_count;
14292 obj = tmp_obj;
14293 }
14294 vm_object_unlock(obj);
91447636 14295
39037602 14296 if (extended->share_mode == SM_SHARED) {
0a7de745
A
14297 vm_map_entry_t cur;
14298 vm_map_entry_t last;
39037602 14299 int my_refs;
91447636 14300
39037602
A
14301 obj = VME_OBJECT(entry);
14302 last = vm_map_to_entry(map);
14303 my_refs = 0;
91447636 14304
0a7de745 14305 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
39037602 14306 ref_count--;
0a7de745
A
14307 }
14308 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
39037602 14309 my_refs += vm_map_region_count_obj_refs(cur, obj);
0a7de745 14310 }
39037602 14311
0a7de745 14312 if (my_refs == ref_count) {
39037602 14313 extended->share_mode = SM_PRIVATE_ALIASED;
0a7de745 14314 } else if (my_refs > 1) {
39037602 14315 extended->share_mode = SM_SHARED_ALIASED;
0a7de745 14316 }
91447636 14317 }
1c79356b
A
14318}
14319
1c79356b 14320
91447636
A
14321/* object is locked on entry and locked on return */
14322
14323
14324static void
14325vm_map_region_look_for_page(
0a7de745
A
14326 __unused vm_map_t map,
14327 __unused vm_map_offset_t va,
14328 vm_object_t object,
14329 vm_object_offset_t offset,
14330 int max_refcnt,
14331 int depth,
14332 vm_region_extended_info_t extended,
39236c6e 14333 mach_msg_type_number_t count)
1c79356b 14334{
0a7de745
A
14335 vm_page_t p;
14336 vm_object_t shadow;
14337 int ref_count;
14338 vm_object_t caller_object;
39037602 14339
91447636
A
14340 shadow = object->shadow;
14341 caller_object = object;
1c79356b 14342
5ba3f43e 14343
91447636 14344 while (TRUE) {
0a7de745 14345 if (!(object->pager_trusted) && !(object->internal)) {
2d21ac55 14346 extended->external_pager = 1;
0a7de745 14347 }
1c79356b 14348
91447636 14349 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
0a7de745
A
14350 if (shadow && (max_refcnt == 1)) {
14351 extended->pages_shared_now_private++;
14352 }
1c79356b 14353
d9a64523 14354 if (!p->vmp_fictitious &&
0a7de745
A
14355 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
14356 extended->pages_dirtied++;
14357 } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
d9a64523 14358 if (p->vmp_reusable || object->all_reusable) {
39236c6e
A
14359 extended->pages_reusable++;
14360 }
14361 }
1c79356b 14362
39236c6e 14363 extended->pages_resident++;
91447636 14364
0a7de745 14365 if (object != caller_object) {
2d21ac55 14366 vm_object_unlock(object);
0a7de745 14367 }
91447636
A
14368
14369 return;
1c79356b 14370 }
39236c6e
A
14371 if (object->internal &&
14372 object->alive &&
14373 !object->terminating &&
14374 object->pager_ready) {
39037602
A
14375 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14376 == VM_EXTERNAL_STATE_EXISTS) {
14377 /* the pager has that page */
14378 extended->pages_swapped_out++;
0a7de745 14379 if (object != caller_object) {
39037602 14380 vm_object_unlock(object);
0a7de745 14381 }
39037602 14382 return;
2d21ac55 14383 }
1c79356b 14384 }
2d21ac55 14385
91447636 14386 if (shadow) {
2d21ac55 14387 vm_object_lock(shadow);
1c79356b 14388
0a7de745
A
14389 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
14390 ref_count--;
14391 }
1c79356b 14392
0a7de745
A
14393 if (++depth > extended->shadow_depth) {
14394 extended->shadow_depth = depth;
14395 }
1c79356b 14396
0a7de745
A
14397 if (ref_count > max_refcnt) {
14398 max_refcnt = ref_count;
14399 }
5ba3f43e 14400
0a7de745 14401 if (object != caller_object) {
2d21ac55 14402 vm_object_unlock(object);
0a7de745 14403 }
91447636 14404
6d2010ae 14405 offset = offset + object->vo_shadow_offset;
91447636
A
14406 object = shadow;
14407 shadow = object->shadow;
14408 continue;
1c79356b 14409 }
0a7de745 14410 if (object != caller_object) {
2d21ac55 14411 vm_object_unlock(object);
0a7de745 14412 }
91447636
A
14413 break;
14414 }
14415}
1c79356b 14416
91447636
A
14417static int
14418vm_map_region_count_obj_refs(
0a7de745 14419 vm_map_entry_t entry,
91447636
A
14420 vm_object_t object)
14421{
0a7de745 14422 int ref_count;
39037602
A
14423 vm_object_t chk_obj;
14424 vm_object_t tmp_obj;
1c79356b 14425
0a7de745
A
14426 if (VME_OBJECT(entry) == 0) {
14427 return 0;
14428 }
1c79356b 14429
0a7de745
A
14430 if (entry->is_sub_map) {
14431 return 0;
14432 } else {
2d21ac55 14433 ref_count = 0;
1c79356b 14434
3e170ce0 14435 chk_obj = VME_OBJECT(entry);
2d21ac55 14436 vm_object_lock(chk_obj);
1c79356b 14437
2d21ac55 14438 while (chk_obj) {
0a7de745 14439 if (chk_obj == object) {
2d21ac55 14440 ref_count++;
0a7de745 14441 }
2d21ac55 14442 tmp_obj = chk_obj->shadow;
0a7de745 14443 if (tmp_obj) {
2d21ac55 14444 vm_object_lock(tmp_obj);
0a7de745 14445 }
2d21ac55 14446 vm_object_unlock(chk_obj);
1c79356b 14447
2d21ac55
A
14448 chk_obj = tmp_obj;
14449 }
1c79356b 14450 }
0a7de745 14451 return ref_count;
1c79356b
A
14452}
14453
14454
14455/*
91447636
A
14456 * Routine: vm_map_simplify
14457 *
14458 * Description:
14459 * Attempt to simplify the map representation in
14460 * the vicinity of the given starting address.
14461 * Note:
14462 * This routine is intended primarily to keep the
14463 * kernel maps more compact -- they generally don't
14464 * benefit from the "expand a map entry" technology
14465 * at allocation time because the adjacent entry
14466 * is often wired down.
1c79356b 14467 */
91447636
A
14468void
14469vm_map_simplify_entry(
0a7de745
A
14470 vm_map_t map,
14471 vm_map_entry_t this_entry)
1c79356b 14472{
0a7de745 14473 vm_map_entry_t prev_entry;
1c79356b 14474
91447636 14475 counter(c_vm_map_simplify_entry_called++);
1c79356b 14476
91447636 14477 prev_entry = this_entry->vme_prev;
1c79356b 14478
91447636 14479 if ((this_entry != vm_map_to_entry(map)) &&
2d21ac55 14480 (prev_entry != vm_map_to_entry(map)) &&
1c79356b 14481
91447636 14482 (prev_entry->vme_end == this_entry->vme_start) &&
1c79356b 14483
2d21ac55 14484 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
3e170ce0
A
14485 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
14486 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
0a7de745
A
14487 prev_entry->vme_start))
14488 == VME_OFFSET(this_entry)) &&
1c79356b 14489
fe8ab488
A
14490 (prev_entry->behavior == this_entry->behavior) &&
14491 (prev_entry->needs_copy == this_entry->needs_copy) &&
91447636
A
14492 (prev_entry->protection == this_entry->protection) &&
14493 (prev_entry->max_protection == this_entry->max_protection) &&
fe8ab488
A
14494 (prev_entry->inheritance == this_entry->inheritance) &&
14495 (prev_entry->use_pmap == this_entry->use_pmap) &&
3e170ce0 14496 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
2d21ac55 14497 (prev_entry->no_cache == this_entry->no_cache) &&
fe8ab488
A
14498 (prev_entry->permanent == this_entry->permanent) &&
14499 (prev_entry->map_aligned == this_entry->map_aligned) &&
14500 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
14501 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
d9a64523 14502 (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
fe8ab488
A
14503 /* from_reserved_zone: OK if that field doesn't match */
14504 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
3e170ce0 14505 (prev_entry->vme_resilient_codesign ==
0a7de745 14506 this_entry->vme_resilient_codesign) &&
3e170ce0 14507 (prev_entry->vme_resilient_media ==
0a7de745 14508 this_entry->vme_resilient_media) &&
fe8ab488 14509
91447636
A
14510 (prev_entry->wired_count == this_entry->wired_count) &&
14511 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
1c79356b 14512
39037602 14513 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
91447636
A
14514 (prev_entry->in_transition == FALSE) &&
14515 (this_entry->in_transition == FALSE) &&
14516 (prev_entry->needs_wakeup == FALSE) &&
14517 (this_entry->needs_wakeup == FALSE) &&
14518 (prev_entry->is_shared == FALSE) &&
fe8ab488
A
14519 (this_entry->is_shared == FALSE) &&
14520 (prev_entry->superpage_size == FALSE) &&
14521 (this_entry->superpage_size == FALSE)
0a7de745 14522 ) {
316670eb 14523 vm_map_store_entry_unlink(map, prev_entry);
e2d2fc5c 14524 assert(prev_entry->vme_start < this_entry->vme_end);
0a7de745 14525 if (prev_entry->map_aligned) {
39236c6e 14526 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
0a7de745
A
14527 VM_MAP_PAGE_MASK(map)));
14528 }
91447636 14529 this_entry->vme_start = prev_entry->vme_start;
3e170ce0
A
14530 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
14531
14532 if (map->holelistenabled) {
14533 vm_map_store_update_first_free(map, this_entry, TRUE);
14534 }
14535
2d21ac55 14536 if (prev_entry->is_sub_map) {
3e170ce0 14537 vm_map_deallocate(VME_SUBMAP(prev_entry));
2d21ac55 14538 } else {
3e170ce0 14539 vm_object_deallocate(VME_OBJECT(prev_entry));
2d21ac55 14540 }
91447636 14541 vm_map_entry_dispose(map, prev_entry);
0c530ab8 14542 SAVE_HINT_MAP_WRITE(map, this_entry);
91447636 14543 counter(c_vm_map_simplified++);
1c79356b 14544 }
91447636 14545}
1c79356b 14546
91447636
A
14547void
14548vm_map_simplify(
0a7de745
A
14549 vm_map_t map,
14550 vm_map_offset_t start)
91447636 14551{
0a7de745 14552 vm_map_entry_t this_entry;
1c79356b 14553
91447636
A
14554 vm_map_lock(map);
14555 if (vm_map_lookup_entry(map, start, &this_entry)) {
14556 vm_map_simplify_entry(map, this_entry);
14557 vm_map_simplify_entry(map, this_entry->vme_next);
14558 }
14559 counter(c_vm_map_simplify_called++);
14560 vm_map_unlock(map);
14561}
1c79356b 14562
91447636
A
14563static void
14564vm_map_simplify_range(
0a7de745
A
14565 vm_map_t map,
14566 vm_map_offset_t start,
14567 vm_map_offset_t end)
91447636 14568{
0a7de745 14569 vm_map_entry_t entry;
1c79356b 14570
91447636
A
14571 /*
14572 * The map should be locked (for "write") by the caller.
14573 */
1c79356b 14574
91447636
A
14575 if (start >= end) {
14576 /* invalid address range */
14577 return;
14578 }
1c79356b 14579
39236c6e 14580 start = vm_map_trunc_page(start,
0a7de745 14581 VM_MAP_PAGE_MASK(map));
39236c6e 14582 end = vm_map_round_page(end,
0a7de745 14583 VM_MAP_PAGE_MASK(map));
2d21ac55 14584
91447636
A
14585 if (!vm_map_lookup_entry(map, start, &entry)) {
14586 /* "start" is not mapped and "entry" ends before "start" */
14587 if (entry == vm_map_to_entry(map)) {
14588 /* start with first entry in the map */
14589 entry = vm_map_first_entry(map);
14590 } else {
14591 /* start with next entry */
14592 entry = entry->vme_next;
14593 }
14594 }
5ba3f43e 14595
91447636 14596 while (entry != vm_map_to_entry(map) &&
0a7de745 14597 entry->vme_start <= end) {
91447636
A
14598 /* try and coalesce "entry" with its previous entry */
14599 vm_map_simplify_entry(map, entry);
14600 entry = entry->vme_next;
14601 }
14602}
1c79356b 14603
1c79356b 14604
91447636
A
14605/*
14606 * Routine: vm_map_machine_attribute
14607 * Purpose:
14608 * Provide machine-specific attributes to mappings,
14609 * such as cachability etc. for machines that provide
14610 * them. NUMA architectures and machines with big/strange
14611 * caches will use this.
14612 * Note:
14613 * Responsibilities for locking and checking are handled here,
14614 * everything else in the pmap module. If any non-volatile
14615 * information must be kept, the pmap module should handle
14616 * it itself. [This assumes that attributes do not
14617 * need to be inherited, which seems ok to me]
14618 */
14619kern_return_t
14620vm_map_machine_attribute(
0a7de745
A
14621 vm_map_t map,
14622 vm_map_offset_t start,
14623 vm_map_offset_t end,
14624 vm_machine_attribute_t attribute,
14625 vm_machine_attribute_val_t* value) /* IN/OUT */
91447636 14626{
0a7de745 14627 kern_return_t ret;
91447636
A
14628 vm_map_size_t sync_size;
14629 vm_map_entry_t entry;
5ba3f43e 14630
0a7de745 14631 if (start < vm_map_min(map) || end > vm_map_max(map)) {
91447636 14632 return KERN_INVALID_ADDRESS;
0a7de745 14633 }
1c79356b 14634
91447636
A
14635 /* Figure how much memory we need to flush (in page increments) */
14636 sync_size = end - start;
1c79356b 14637
91447636 14638 vm_map_lock(map);
5ba3f43e
A
14639
14640 if (attribute != MATTR_CACHE) {
91447636
A
14641 /* If we don't have to find physical addresses, we */
14642 /* don't have to do an explicit traversal here. */
0a7de745
A
14643 ret = pmap_attribute(map->pmap, start, end - start,
14644 attribute, value);
91447636
A
14645 vm_map_unlock(map);
14646 return ret;
14647 }
1c79356b 14648
0a7de745 14649 ret = KERN_SUCCESS; /* Assume it all worked */
1c79356b 14650
0a7de745 14651 while (sync_size) {
91447636 14652 if (vm_map_lookup_entry(map, start, &entry)) {
0a7de745
A
14653 vm_map_size_t sub_size;
14654 if ((entry->vme_end - start) > sync_size) {
91447636
A
14655 sub_size = sync_size;
14656 sync_size = 0;
14657 } else {
14658 sub_size = entry->vme_end - start;
2d21ac55 14659 sync_size -= sub_size;
91447636 14660 }
0a7de745 14661 if (entry->is_sub_map) {
91447636
A
14662 vm_map_offset_t sub_start;
14663 vm_map_offset_t sub_end;
1c79356b 14664
5ba3f43e 14665 sub_start = (start - entry->vme_start)
0a7de745 14666 + VME_OFFSET(entry);
91447636
A
14667 sub_end = sub_start + sub_size;
14668 vm_map_machine_attribute(
5ba3f43e 14669 VME_SUBMAP(entry),
91447636
A
14670 sub_start,
14671 sub_end,
14672 attribute, value);
14673 } else {
3e170ce0 14674 if (VME_OBJECT(entry)) {
0a7de745
A
14675 vm_page_t m;
14676 vm_object_t object;
14677 vm_object_t base_object;
14678 vm_object_t last_object;
14679 vm_object_offset_t offset;
14680 vm_object_offset_t base_offset;
14681 vm_map_size_t range;
91447636
A
14682 range = sub_size;
14683 offset = (start - entry->vme_start)
0a7de745 14684 + VME_OFFSET(entry);
91447636 14685 base_offset = offset;
3e170ce0 14686 object = VME_OBJECT(entry);
91447636
A
14687 base_object = object;
14688 last_object = NULL;
1c79356b 14689
91447636 14690 vm_object_lock(object);
1c79356b 14691
91447636
A
14692 while (range) {
14693 m = vm_page_lookup(
14694 object, offset);
1c79356b 14695
d9a64523 14696 if (m && !m->vmp_fictitious) {
0a7de745
A
14697 ret =
14698 pmap_attribute_cache_sync(
14699 VM_PAGE_GET_PHYS_PAGE(m),
14700 PAGE_SIZE,
14701 attribute, value);
91447636 14702 } else if (object->shadow) {
0a7de745 14703 offset = offset + object->vo_shadow_offset;
91447636
A
14704 last_object = object;
14705 object = object->shadow;
14706 vm_object_lock(last_object->shadow);
14707 vm_object_unlock(last_object);
14708 continue;
14709 }
14710 range -= PAGE_SIZE;
1c79356b 14711
91447636 14712 if (base_object != object) {
0a7de745 14713 vm_object_unlock(object);
91447636
A
14714 vm_object_lock(base_object);
14715 object = base_object;
14716 }
14717 /* Bump to the next page */
14718 base_offset += PAGE_SIZE;
14719 offset = base_offset;
14720 }
14721 vm_object_unlock(object);
14722 }
14723 }
14724 start += sub_size;
14725 } else {
14726 vm_map_unlock(map);
14727 return KERN_FAILURE;
14728 }
1c79356b 14729 }
e5568f75 14730
91447636 14731 vm_map_unlock(map);
e5568f75 14732
91447636
A
14733 return ret;
14734}
e5568f75 14735
91447636
A
14736/*
14737 * vm_map_behavior_set:
14738 *
14739 * Sets the paging reference behavior of the specified address
14740 * range in the target map. Paging reference behavior affects
5ba3f43e 14741 * how pagein operations resulting from faults on the map will be
91447636
A
14742 * clustered.
14743 */
5ba3f43e 14744kern_return_t
91447636 14745vm_map_behavior_set(
0a7de745
A
14746 vm_map_t map,
14747 vm_map_offset_t start,
14748 vm_map_offset_t end,
14749 vm_behavior_t new_behavior)
91447636 14750{
0a7de745
A
14751 vm_map_entry_t entry;
14752 vm_map_entry_t temp_entry;
e5568f75 14753
91447636 14754 XPR(XPR_VM_MAP,
2d21ac55 14755 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
b0d623f7 14756 map, start, end, new_behavior, 0);
e5568f75 14757
6d2010ae
A
14758 if (start > end ||
14759 start < vm_map_min(map) ||
14760 end > vm_map_max(map)) {
14761 return KERN_NO_SPACE;
14762 }
14763
91447636 14764 switch (new_behavior) {
b0d623f7
A
14765 /*
14766 * This first block of behaviors all set a persistent state on the specified
14767 * memory range. All we have to do here is to record the desired behavior
14768 * in the vm_map_entry_t's.
14769 */
14770
91447636
A
14771 case VM_BEHAVIOR_DEFAULT:
14772 case VM_BEHAVIOR_RANDOM:
14773 case VM_BEHAVIOR_SEQUENTIAL:
14774 case VM_BEHAVIOR_RSEQNTL:
b0d623f7
A
14775 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
14776 vm_map_lock(map);
5ba3f43e 14777
b0d623f7
A
14778 /*
14779 * The entire address range must be valid for the map.
0a7de745 14780 * Note that vm_map_range_check() does a
b0d623f7
A
14781 * vm_map_lookup_entry() internally and returns the
14782 * entry containing the start of the address range if
14783 * the entire range is valid.
14784 */
14785 if (vm_map_range_check(map, start, end, &temp_entry)) {
14786 entry = temp_entry;
14787 vm_map_clip_start(map, entry, start);
0a7de745 14788 } else {
b0d623f7 14789 vm_map_unlock(map);
0a7de745 14790 return KERN_INVALID_ADDRESS;
b0d623f7 14791 }
5ba3f43e 14792
b0d623f7
A
14793 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
14794 vm_map_clip_end(map, entry, end);
fe8ab488
A
14795 if (entry->is_sub_map) {
14796 assert(!entry->use_pmap);
14797 }
5ba3f43e 14798
0a7de745 14799 if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
b0d623f7
A
14800 entry->zero_wired_pages = TRUE;
14801 } else {
14802 entry->behavior = new_behavior;
14803 }
14804 entry = entry->vme_next;
14805 }
5ba3f43e 14806
b0d623f7 14807 vm_map_unlock(map);
91447636 14808 break;
b0d623f7
A
14809
14810 /*
14811 * The rest of these are different from the above in that they cause
5ba3f43e 14812 * an immediate action to take place as opposed to setting a behavior that
b0d623f7
A
14813 * affects future actions.
14814 */
14815
91447636 14816 case VM_BEHAVIOR_WILLNEED:
b0d623f7
A
14817 return vm_map_willneed(map, start, end);
14818
91447636 14819 case VM_BEHAVIOR_DONTNEED:
b0d623f7
A
14820 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
14821
14822 case VM_BEHAVIOR_FREE:
14823 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
14824
14825 case VM_BEHAVIOR_REUSABLE:
14826 return vm_map_reusable_pages(map, start, end);
14827
14828 case VM_BEHAVIOR_REUSE:
14829 return vm_map_reuse_pages(map, start, end);
14830
14831 case VM_BEHAVIOR_CAN_REUSE:
14832 return vm_map_can_reuse(map, start, end);
14833
3e170ce0
A
14834#if MACH_ASSERT
14835 case VM_BEHAVIOR_PAGEOUT:
14836 return vm_map_pageout(map, start, end);
14837#endif /* MACH_ASSERT */
14838
1c79356b 14839 default:
0a7de745 14840 return KERN_INVALID_ARGUMENT;
1c79356b 14841 }
1c79356b 14842
0a7de745 14843 return KERN_SUCCESS;
b0d623f7
A
14844}
14845
14846
14847/*
14848 * Internals for madvise(MADV_WILLNEED) system call.
14849 *
14850 * The present implementation is to do a read-ahead if the mapping corresponds
14851 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
14852 * and basically ignore the "advice" (which we are always free to do).
14853 */
14854
14855
14856static kern_return_t
14857vm_map_willneed(
0a7de745
A
14858 vm_map_t map,
14859 vm_map_offset_t start,
14860 vm_map_offset_t end
14861 )
b0d623f7 14862{
0a7de745
A
14863 vm_map_entry_t entry;
14864 vm_object_t object;
14865 memory_object_t pager;
14866 struct vm_object_fault_info fault_info = {};
14867 kern_return_t kr;
14868 vm_object_size_t len;
14869 vm_object_offset_t offset;
14870
14871 fault_info.interruptible = THREAD_UNINT; /* ignored value */
b0d623f7 14872 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
0a7de745 14873 fault_info.stealth = TRUE;
b0d623f7
A
14874
14875 /*
14876 * The MADV_WILLNEED operation doesn't require any changes to the
14877 * vm_map_entry_t's, so the read lock is sufficient.
14878 */
14879
14880 vm_map_lock_read(map);
14881
14882 /*
14883 * The madvise semantics require that the address range be fully
14884 * allocated with no holes. Otherwise, we're required to return
14885 * an error.
14886 */
14887
0a7de745 14888 if (!vm_map_range_check(map, start, end, &entry)) {
6d2010ae
A
14889 vm_map_unlock_read(map);
14890 return KERN_INVALID_ADDRESS;
14891 }
b0d623f7 14892
6d2010ae
A
14893 /*
14894 * Examine each vm_map_entry_t in the range.
14895 */
0a7de745 14896 for (; entry != vm_map_to_entry(map) && start < end;) {
b0d623f7 14897 /*
6d2010ae
A
14898 * The first time through, the start address could be anywhere
14899 * within the vm_map_entry we found. So adjust the offset to
14900 * correspond. After that, the offset will always be zero to
14901 * correspond to the beginning of the current vm_map_entry.
b0d623f7 14902 */
3e170ce0 14903 offset = (start - entry->vme_start) + VME_OFFSET(entry);
b0d623f7 14904
6d2010ae
A
14905 /*
14906 * Set the length so we don't go beyond the end of the
14907 * map_entry or beyond the end of the range we were given.
14908 * This range could span also multiple map entries all of which
14909 * map different files, so make sure we only do the right amount
14910 * of I/O for each object. Note that it's possible for there
14911 * to be multiple map entries all referring to the same object
14912 * but with different page permissions, but it's not worth
14913 * trying to optimize that case.
14914 */
14915 len = MIN(entry->vme_end - start, end - start);
b0d623f7 14916
6d2010ae
A
14917 if ((vm_size_t) len != len) {
14918 /* 32-bit overflow */
14919 len = (vm_size_t) (0 - PAGE_SIZE);
14920 }
14921 fault_info.cluster_size = (vm_size_t) len;
5ba3f43e 14922 fault_info.lo_offset = offset;
6d2010ae 14923 fault_info.hi_offset = offset + len;
3e170ce0 14924 fault_info.user_tag = VME_ALIAS(entry);
fe8ab488
A
14925 fault_info.pmap_options = 0;
14926 if (entry->iokit_acct ||
14927 (!entry->is_sub_map && !entry->use_pmap)) {
14928 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
14929 }
b0d623f7 14930
6d2010ae
A
14931 /*
14932 * If there's no read permission to this mapping, then just
14933 * skip it.
14934 */
14935 if ((entry->protection & VM_PROT_READ) == 0) {
14936 entry = entry->vme_next;
14937 start = entry->vme_start;
14938 continue;
14939 }
b0d623f7 14940
6d2010ae
A
14941 /*
14942 * Find the file object backing this map entry. If there is
14943 * none, then we simply ignore the "will need" advice for this
14944 * entry and go on to the next one.
14945 */
14946 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
14947 entry = entry->vme_next;
14948 start = entry->vme_start;
14949 continue;
14950 }
b0d623f7 14951
6d2010ae
A
14952 /*
14953 * The data_request() could take a long time, so let's
14954 * release the map lock to avoid blocking other threads.
14955 */
14956 vm_map_unlock_read(map);
b0d623f7 14957
6d2010ae
A
14958 vm_object_paging_begin(object);
14959 pager = object->pager;
14960 vm_object_unlock(object);
b0d623f7 14961
6d2010ae
A
14962 /*
14963 * Get the data from the object asynchronously.
14964 *
14965 * Note that memory_object_data_request() places limits on the
14966 * amount of I/O it will do. Regardless of the len we
fe8ab488 14967 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
6d2010ae
A
14968 * silently truncates the len to that size. This isn't
14969 * necessarily bad since madvise shouldn't really be used to
14970 * page in unlimited amounts of data. Other Unix variants
14971 * limit the willneed case as well. If this turns out to be an
14972 * issue for developers, then we can always adjust the policy
14973 * here and still be backwards compatible since this is all
14974 * just "advice".
14975 */
14976 kr = memory_object_data_request(
14977 pager,
14978 offset + object->paging_offset,
0a7de745 14979 0, /* ignored */
6d2010ae
A
14980 VM_PROT_READ,
14981 (memory_object_fault_info_t)&fault_info);
b0d623f7 14982
6d2010ae
A
14983 vm_object_lock(object);
14984 vm_object_paging_end(object);
14985 vm_object_unlock(object);
b0d623f7 14986
6d2010ae
A
14987 /*
14988 * If we couldn't do the I/O for some reason, just give up on
14989 * the madvise. We still return success to the user since
14990 * madvise isn't supposed to fail when the advice can't be
14991 * taken.
14992 */
14993 if (kr != KERN_SUCCESS) {
14994 return KERN_SUCCESS;
14995 }
b0d623f7 14996
6d2010ae
A
14997 start += len;
14998 if (start >= end) {
14999 /* done */
15000 return KERN_SUCCESS;
15001 }
b0d623f7 15002
6d2010ae
A
15003 /* look up next entry */
15004 vm_map_lock_read(map);
0a7de745 15005 if (!vm_map_lookup_entry(map, start, &entry)) {
b0d623f7 15006 /*
6d2010ae 15007 * There's a new hole in the address range.
b0d623f7 15008 */
6d2010ae
A
15009 vm_map_unlock_read(map);
15010 return KERN_INVALID_ADDRESS;
b0d623f7 15011 }
6d2010ae 15012 }
b0d623f7
A
15013
15014 vm_map_unlock_read(map);
6d2010ae 15015 return KERN_SUCCESS;
b0d623f7
A
15016}
15017
15018static boolean_t
15019vm_map_entry_is_reusable(
15020 vm_map_entry_t entry)
15021{
3e170ce0
A
15022 /* Only user map entries */
15023
b0d623f7
A
15024 vm_object_t object;
15025
2dced7af
A
15026 if (entry->is_sub_map) {
15027 return FALSE;
15028 }
15029
3e170ce0 15030 switch (VME_ALIAS(entry)) {
39236c6e
A
15031 case VM_MEMORY_MALLOC:
15032 case VM_MEMORY_MALLOC_SMALL:
15033 case VM_MEMORY_MALLOC_LARGE:
15034 case VM_MEMORY_REALLOC:
15035 case VM_MEMORY_MALLOC_TINY:
15036 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15037 case VM_MEMORY_MALLOC_LARGE_REUSED:
15038 /*
15039 * This is a malloc() memory region: check if it's still
15040 * in its original state and can be re-used for more
15041 * malloc() allocations.
15042 */
15043 break;
15044 default:
15045 /*
15046 * Not a malloc() memory region: let the caller decide if
15047 * it's re-usable.
15048 */
15049 return TRUE;
15050 }
15051
d9a64523 15052 if (/*entry->is_shared ||*/
0a7de745
A
15053 entry->is_sub_map ||
15054 entry->in_transition ||
15055 entry->protection != VM_PROT_DEFAULT ||
15056 entry->max_protection != VM_PROT_ALL ||
15057 entry->inheritance != VM_INHERIT_DEFAULT ||
15058 entry->no_cache ||
15059 entry->permanent ||
15060 entry->superpage_size != FALSE ||
15061 entry->zero_wired_pages ||
15062 entry->wired_count != 0 ||
15063 entry->user_wired_count != 0) {
b0d623f7 15064 return FALSE;
91447636 15065 }
b0d623f7 15066
3e170ce0 15067 object = VME_OBJECT(entry);
b0d623f7
A
15068 if (object == VM_OBJECT_NULL) {
15069 return TRUE;
15070 }
316670eb
A
15071 if (
15072#if 0
15073 /*
15074 * Let's proceed even if the VM object is potentially
15075 * shared.
15076 * We check for this later when processing the actual
15077 * VM pages, so the contents will be safe if shared.
5ba3f43e 15078 *
316670eb
A
15079 * But we can still mark this memory region as "reusable" to
15080 * acknowledge that the caller did let us know that the memory
15081 * could be re-used and should not be penalized for holding
15082 * on to it. This allows its "resident size" to not include
15083 * the reusable range.
15084 */
0a7de745 15085 object->ref_count == 1 &&
316670eb 15086#endif
0a7de745
A
15087 object->wired_page_count == 0 &&
15088 object->copy == VM_OBJECT_NULL &&
15089 object->shadow == VM_OBJECT_NULL &&
15090 object->internal &&
15091 object->purgable == VM_PURGABLE_DENY &&
15092 object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
15093 !object->true_share &&
15094 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15095 !object->code_signed) {
b0d623f7 15096 return TRUE;
1c79356b 15097 }
b0d623f7 15098 return FALSE;
b0d623f7 15099}
1c79356b 15100
b0d623f7
A
15101static kern_return_t
15102vm_map_reuse_pages(
0a7de745
A
15103 vm_map_t map,
15104 vm_map_offset_t start,
15105 vm_map_offset_t end)
b0d623f7 15106{
0a7de745
A
15107 vm_map_entry_t entry;
15108 vm_object_t object;
15109 vm_object_offset_t start_offset, end_offset;
b0d623f7
A
15110
15111 /*
15112 * The MADV_REUSE operation doesn't require any changes to the
15113 * vm_map_entry_t's, so the read lock is sufficient.
15114 */
0b4e3aa0 15115
b0d623f7 15116 vm_map_lock_read(map);
0a7de745 15117 assert(map->pmap != kernel_pmap); /* protect alias access */
1c79356b 15118
b0d623f7
A
15119 /*
15120 * The madvise semantics require that the address range be fully
15121 * allocated with no holes. Otherwise, we're required to return
15122 * an error.
15123 */
15124
15125 if (!vm_map_range_check(map, start, end, &entry)) {
15126 vm_map_unlock_read(map);
15127 vm_page_stats_reusable.reuse_pages_failure++;
15128 return KERN_INVALID_ADDRESS;
1c79356b 15129 }
91447636 15130
b0d623f7
A
15131 /*
15132 * Examine each vm_map_entry_t in the range.
15133 */
15134 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
0a7de745 15135 entry = entry->vme_next) {
b0d623f7
A
15136 /*
15137 * Sanity check on the VM map entry.
15138 */
0a7de745 15139 if (!vm_map_entry_is_reusable(entry)) {
b0d623f7
A
15140 vm_map_unlock_read(map);
15141 vm_page_stats_reusable.reuse_pages_failure++;
15142 return KERN_INVALID_ADDRESS;
15143 }
15144
15145 /*
15146 * The first time through, the start address could be anywhere
15147 * within the vm_map_entry we found. So adjust the offset to
15148 * correspond.
15149 */
15150 if (entry->vme_start < start) {
15151 start_offset = start - entry->vme_start;
15152 } else {
15153 start_offset = 0;
15154 }
15155 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
15156 start_offset += VME_OFFSET(entry);
15157 end_offset += VME_OFFSET(entry);
b0d623f7 15158
2dced7af 15159 assert(!entry->is_sub_map);
3e170ce0 15160 object = VME_OBJECT(entry);
b0d623f7
A
15161 if (object != VM_OBJECT_NULL) {
15162 vm_object_lock(object);
15163 vm_object_reuse_pages(object, start_offset, end_offset,
0a7de745 15164 TRUE);
b0d623f7
A
15165 vm_object_unlock(object);
15166 }
15167
3e170ce0 15168 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
b0d623f7
A
15169 /*
15170 * XXX
15171 * We do not hold the VM map exclusively here.
15172 * The "alias" field is not that critical, so it's
15173 * safe to update it here, as long as it is the only
15174 * one that can be modified while holding the VM map
15175 * "shared".
15176 */
3e170ce0 15177 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
b0d623f7
A
15178 }
15179 }
5ba3f43e 15180
b0d623f7
A
15181 vm_map_unlock_read(map);
15182 vm_page_stats_reusable.reuse_pages_success++;
15183 return KERN_SUCCESS;
1c79356b
A
15184}
15185
1c79356b 15186
b0d623f7
A
15187static kern_return_t
15188vm_map_reusable_pages(
0a7de745
A
15189 vm_map_t map,
15190 vm_map_offset_t start,
15191 vm_map_offset_t end)
b0d623f7 15192{
0a7de745
A
15193 vm_map_entry_t entry;
15194 vm_object_t object;
15195 vm_object_offset_t start_offset, end_offset;
15196 vm_map_offset_t pmap_offset;
b0d623f7
A
15197
15198 /*
15199 * The MADV_REUSABLE operation doesn't require any changes to the
15200 * vm_map_entry_t's, so the read lock is sufficient.
15201 */
15202
15203 vm_map_lock_read(map);
0a7de745 15204 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
15205
15206 /*
15207 * The madvise semantics require that the address range be fully
15208 * allocated with no holes. Otherwise, we're required to return
15209 * an error.
15210 */
15211
15212 if (!vm_map_range_check(map, start, end, &entry)) {
15213 vm_map_unlock_read(map);
15214 vm_page_stats_reusable.reusable_pages_failure++;
15215 return KERN_INVALID_ADDRESS;
15216 }
15217
15218 /*
15219 * Examine each vm_map_entry_t in the range.
15220 */
15221 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
0a7de745 15222 entry = entry->vme_next) {
b0d623f7
A
15223 int kill_pages = 0;
15224
15225 /*
15226 * Sanity check on the VM map entry.
15227 */
0a7de745 15228 if (!vm_map_entry_is_reusable(entry)) {
b0d623f7
A
15229 vm_map_unlock_read(map);
15230 vm_page_stats_reusable.reusable_pages_failure++;
15231 return KERN_INVALID_ADDRESS;
15232 }
15233
0a7de745 15234 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
39037602
A
15235 /* not writable: can't discard contents */
15236 vm_map_unlock_read(map);
15237 vm_page_stats_reusable.reusable_nonwritable++;
15238 vm_page_stats_reusable.reusable_pages_failure++;
15239 return KERN_PROTECTION_FAILURE;
15240 }
15241
b0d623f7
A
15242 /*
15243 * The first time through, the start address could be anywhere
15244 * within the vm_map_entry we found. So adjust the offset to
15245 * correspond.
15246 */
15247 if (entry->vme_start < start) {
15248 start_offset = start - entry->vme_start;
3e170ce0 15249 pmap_offset = start;
b0d623f7
A
15250 } else {
15251 start_offset = 0;
3e170ce0 15252 pmap_offset = entry->vme_start;
b0d623f7
A
15253 }
15254 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
15255 start_offset += VME_OFFSET(entry);
15256 end_offset += VME_OFFSET(entry);
b0d623f7 15257
2dced7af 15258 assert(!entry->is_sub_map);
3e170ce0 15259 object = VME_OBJECT(entry);
0a7de745 15260 if (object == VM_OBJECT_NULL) {
b0d623f7 15261 continue;
0a7de745 15262 }
b0d623f7
A
15263
15264
15265 vm_object_lock(object);
39037602 15266 if (((object->ref_count == 1) ||
0a7de745
A
15267 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
15268 object->copy == VM_OBJECT_NULL)) &&
39037602 15269 object->shadow == VM_OBJECT_NULL &&
fe8ab488
A
15270 /*
15271 * "iokit_acct" entries are billed for their virtual size
15272 * (rather than for their resident pages only), so they
15273 * wouldn't benefit from making pages reusable, and it
15274 * would be hard to keep track of pages that are both
39037602
A
15275 * "iokit_acct" and "reusable" in the pmap stats and
15276 * ledgers.
fe8ab488
A
15277 */
15278 !(entry->iokit_acct ||
0a7de745 15279 (!entry->is_sub_map && !entry->use_pmap))) {
39037602
A
15280 if (object->ref_count != 1) {
15281 vm_page_stats_reusable.reusable_shared++;
15282 }
b0d623f7 15283 kill_pages = 1;
39037602 15284 } else {
b0d623f7 15285 kill_pages = -1;
39037602 15286 }
b0d623f7
A
15287 if (kill_pages != -1) {
15288 vm_object_deactivate_pages(object,
0a7de745
A
15289 start_offset,
15290 end_offset - start_offset,
15291 kill_pages,
15292 TRUE /*reusable_pages*/,
15293 map->pmap,
15294 pmap_offset);
b0d623f7
A
15295 } else {
15296 vm_page_stats_reusable.reusable_pages_shared++;
15297 }
15298 vm_object_unlock(object);
15299
3e170ce0
A
15300 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
15301 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
b0d623f7
A
15302 /*
15303 * XXX
15304 * We do not hold the VM map exclusively here.
15305 * The "alias" field is not that critical, so it's
15306 * safe to update it here, as long as it is the only
15307 * one that can be modified while holding the VM map
15308 * "shared".
15309 */
3e170ce0 15310 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
b0d623f7
A
15311 }
15312 }
5ba3f43e 15313
b0d623f7
A
15314 vm_map_unlock_read(map);
15315 vm_page_stats_reusable.reusable_pages_success++;
15316 return KERN_SUCCESS;
15317}
15318
15319
15320static kern_return_t
15321vm_map_can_reuse(
0a7de745
A
15322 vm_map_t map,
15323 vm_map_offset_t start,
15324 vm_map_offset_t end)
b0d623f7 15325{
0a7de745 15326 vm_map_entry_t entry;
b0d623f7
A
15327
15328 /*
15329 * The MADV_REUSABLE operation doesn't require any changes to the
15330 * vm_map_entry_t's, so the read lock is sufficient.
15331 */
15332
15333 vm_map_lock_read(map);
0a7de745 15334 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
15335
15336 /*
15337 * The madvise semantics require that the address range be fully
15338 * allocated with no holes. Otherwise, we're required to return
15339 * an error.
15340 */
15341
15342 if (!vm_map_range_check(map, start, end, &entry)) {
15343 vm_map_unlock_read(map);
15344 vm_page_stats_reusable.can_reuse_failure++;
15345 return KERN_INVALID_ADDRESS;
15346 }
15347
15348 /*
15349 * Examine each vm_map_entry_t in the range.
15350 */
15351 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
0a7de745 15352 entry = entry->vme_next) {
b0d623f7
A
15353 /*
15354 * Sanity check on the VM map entry.
15355 */
0a7de745 15356 if (!vm_map_entry_is_reusable(entry)) {
b0d623f7
A
15357 vm_map_unlock_read(map);
15358 vm_page_stats_reusable.can_reuse_failure++;
15359 return KERN_INVALID_ADDRESS;
15360 }
15361 }
5ba3f43e 15362
b0d623f7
A
15363 vm_map_unlock_read(map);
15364 vm_page_stats_reusable.can_reuse_success++;
15365 return KERN_SUCCESS;
15366}
15367
15368
3e170ce0
A
15369#if MACH_ASSERT
15370static kern_return_t
15371vm_map_pageout(
0a7de745
A
15372 vm_map_t map,
15373 vm_map_offset_t start,
15374 vm_map_offset_t end)
3e170ce0 15375{
0a7de745 15376 vm_map_entry_t entry;
3e170ce0
A
15377
15378 /*
15379 * The MADV_PAGEOUT operation doesn't require any changes to the
15380 * vm_map_entry_t's, so the read lock is sufficient.
15381 */
15382
15383 vm_map_lock_read(map);
15384
15385 /*
15386 * The madvise semantics require that the address range be fully
15387 * allocated with no holes. Otherwise, we're required to return
15388 * an error.
15389 */
15390
15391 if (!vm_map_range_check(map, start, end, &entry)) {
15392 vm_map_unlock_read(map);
15393 return KERN_INVALID_ADDRESS;
15394 }
15395
15396 /*
15397 * Examine each vm_map_entry_t in the range.
15398 */
15399 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
0a7de745
A
15400 entry = entry->vme_next) {
15401 vm_object_t object;
3e170ce0
A
15402
15403 /*
15404 * Sanity check on the VM map entry.
15405 */
15406 if (entry->is_sub_map) {
15407 vm_map_t submap;
15408 vm_map_offset_t submap_start;
15409 vm_map_offset_t submap_end;
15410 vm_map_entry_t submap_entry;
15411
15412 submap = VME_SUBMAP(entry);
15413 submap_start = VME_OFFSET(entry);
5ba3f43e 15414 submap_end = submap_start + (entry->vme_end -
0a7de745 15415 entry->vme_start);
3e170ce0
A
15416
15417 vm_map_lock_read(submap);
15418
0a7de745
A
15419 if (!vm_map_range_check(submap,
15420 submap_start,
15421 submap_end,
15422 &submap_entry)) {
3e170ce0
A
15423 vm_map_unlock_read(submap);
15424 vm_map_unlock_read(map);
15425 return KERN_INVALID_ADDRESS;
15426 }
15427
15428 object = VME_OBJECT(submap_entry);
15429 if (submap_entry->is_sub_map ||
15430 object == VM_OBJECT_NULL ||
15431 !object->internal) {
15432 vm_map_unlock_read(submap);
15433 continue;
15434 }
15435
15436 vm_object_pageout(object);
15437
15438 vm_map_unlock_read(submap);
15439 submap = VM_MAP_NULL;
15440 submap_entry = VM_MAP_ENTRY_NULL;
15441 continue;
15442 }
15443
15444 object = VME_OBJECT(entry);
15445 if (entry->is_sub_map ||
15446 object == VM_OBJECT_NULL ||
15447 !object->internal) {
15448 continue;
15449 }
15450
15451 vm_object_pageout(object);
15452 }
5ba3f43e 15453
3e170ce0
A
15454 vm_map_unlock_read(map);
15455 return KERN_SUCCESS;
15456}
15457#endif /* MACH_ASSERT */
15458
15459
1c79356b 15460/*
91447636
A
15461 * Routine: vm_map_entry_insert
15462 *
d9a64523 15463 * Description: This routine inserts a new vm_entry in a locked map.
1c79356b 15464 */
91447636
A
15465vm_map_entry_t
15466vm_map_entry_insert(
0a7de745
A
15467 vm_map_t map,
15468 vm_map_entry_t insp_entry,
15469 vm_map_offset_t start,
15470 vm_map_offset_t end,
15471 vm_object_t object,
15472 vm_object_offset_t offset,
15473 boolean_t needs_copy,
15474 boolean_t is_shared,
15475 boolean_t in_transition,
15476 vm_prot_t cur_protection,
15477 vm_prot_t max_protection,
15478 vm_behavior_t behavior,
15479 vm_inherit_t inheritance,
15480 unsigned wired_count,
15481 boolean_t no_cache,
15482 boolean_t permanent,
15483 unsigned int superpage_size,
15484 boolean_t clear_map_aligned,
15485 boolean_t is_submap,
15486 boolean_t used_for_jit,
15487 int alias)
1c79356b 15488{
0a7de745 15489 vm_map_entry_t new_entry;
1c79356b 15490
91447636 15491 assert(insp_entry != (vm_map_entry_t)0);
d9a64523 15492 vm_map_lock_assert_exclusive(map);
1c79356b 15493
a39ff7e2 15494#if DEVELOPMENT || DEBUG
0a7de745 15495 vm_object_offset_t end_offset = 0;
a39ff7e2
A
15496 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
15497#endif /* DEVELOPMENT || DEBUG */
15498
7ddcb079 15499 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
1c79356b 15500
39236c6e
A
15501 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
15502 new_entry->map_aligned = TRUE;
15503 } else {
15504 new_entry->map_aligned = FALSE;
15505 }
15506 if (clear_map_aligned &&
0a7de745
A
15507 (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
15508 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
39236c6e
A
15509 new_entry->map_aligned = FALSE;
15510 }
15511
91447636
A
15512 new_entry->vme_start = start;
15513 new_entry->vme_end = end;
15514 assert(page_aligned(new_entry->vme_start));
15515 assert(page_aligned(new_entry->vme_end));
39236c6e 15516 if (new_entry->map_aligned) {
fe8ab488 15517 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
0a7de745 15518 VM_MAP_PAGE_MASK(map)));
39236c6e 15519 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
0a7de745 15520 VM_MAP_PAGE_MASK(map)));
39236c6e 15521 }
e2d2fc5c 15522 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 15523
3e170ce0
A
15524 VME_OBJECT_SET(new_entry, object);
15525 VME_OFFSET_SET(new_entry, offset);
91447636 15526 new_entry->is_shared = is_shared;
fe8ab488 15527 new_entry->is_sub_map = is_submap;
91447636
A
15528 new_entry->needs_copy = needs_copy;
15529 new_entry->in_transition = in_transition;
15530 new_entry->needs_wakeup = FALSE;
15531 new_entry->inheritance = inheritance;
15532 new_entry->protection = cur_protection;
15533 new_entry->max_protection = max_protection;
15534 new_entry->behavior = behavior;
15535 new_entry->wired_count = wired_count;
15536 new_entry->user_wired_count = 0;
fe8ab488
A
15537 if (is_submap) {
15538 /*
15539 * submap: "use_pmap" means "nested".
15540 * default: false.
15541 */
15542 new_entry->use_pmap = FALSE;
15543 } else {
15544 /*
15545 * object: "use_pmap" means "use pmap accounting" for footprint.
15546 * default: true.
15547 */
15548 new_entry->use_pmap = TRUE;
15549 }
5ba3f43e 15550 VME_ALIAS_SET(new_entry, alias);
b0d623f7 15551 new_entry->zero_wired_pages = FALSE;
2d21ac55 15552 new_entry->no_cache = no_cache;
b0d623f7 15553 new_entry->permanent = permanent;
0a7de745 15554 if (superpage_size) {
39236c6e 15555 new_entry->superpage_size = TRUE;
0a7de745 15556 } else {
39236c6e 15557 new_entry->superpage_size = FALSE;
0a7de745
A
15558 }
15559 if (used_for_jit) {
d9a64523
A
15560#if CONFIG_EMBEDDED
15561 if (!(map->jit_entry_exists))
15562#endif /* CONFIG_EMBEDDED */
15563 {
5ba3f43e
A
15564 new_entry->used_for_jit = TRUE;
15565 map->jit_entry_exists = TRUE;
15566
15567 /* Tell the pmap that it supports JIT. */
15568 pmap_set_jit_entitled(map->pmap);
15569 }
15570 } else {
15571 new_entry->used_for_jit = FALSE;
15572 }
d9a64523 15573 new_entry->pmap_cs_associated = FALSE;
fe8ab488 15574 new_entry->iokit_acct = FALSE;
3e170ce0
A
15575 new_entry->vme_resilient_codesign = FALSE;
15576 new_entry->vme_resilient_media = FALSE;
39037602 15577 new_entry->vme_atomic = FALSE;
1c79356b 15578
91447636
A
15579 /*
15580 * Insert the new entry into the list.
15581 */
1c79356b 15582
d9a64523 15583 vm_map_store_entry_link(map, insp_entry, new_entry,
0a7de745 15584 VM_MAP_KERNEL_FLAGS_NONE);
91447636
A
15585 map->size += end - start;
15586
15587 /*
15588 * Update the free space hint and the lookup hint.
15589 */
15590
0c530ab8 15591 SAVE_HINT_MAP_WRITE(map, new_entry);
91447636 15592 return new_entry;
1c79356b
A
15593}
15594
15595/*
91447636
A
15596 * Routine: vm_map_remap_extract
15597 *
15598 * Descritpion: This routine returns a vm_entry list from a map.
1c79356b 15599 */
91447636
A
15600static kern_return_t
15601vm_map_remap_extract(
0a7de745
A
15602 vm_map_t map,
15603 vm_map_offset_t addr,
15604 vm_map_size_t size,
15605 boolean_t copy,
15606 struct vm_map_header *map_header,
15607 vm_prot_t *cur_protection,
15608 vm_prot_t *max_protection,
91447636 15609 /* What, no behavior? */
0a7de745
A
15610 vm_inherit_t inheritance,
15611 boolean_t pageable,
15612 boolean_t same_map,
15613 vm_map_kernel_flags_t vmk_flags)
1c79356b 15614{
0a7de745
A
15615 kern_return_t result;
15616 vm_map_size_t mapped_size;
15617 vm_map_size_t tmp_size;
15618 vm_map_entry_t src_entry; /* result of last map lookup */
15619 vm_map_entry_t new_entry;
15620 vm_object_offset_t offset;
15621 vm_map_offset_t map_address;
15622 vm_map_offset_t src_start; /* start of entry to map */
15623 vm_map_offset_t src_end; /* end of region to be mapped */
15624 vm_object_t object;
15625 vm_map_version_t version;
15626 boolean_t src_needs_copy;
15627 boolean_t new_entry_needs_copy;
15628 vm_map_entry_t saved_src_entry;
15629 boolean_t src_entry_was_wired;
15630 vm_prot_t max_prot_for_prot_copy;
1c79356b 15631
91447636 15632 assert(map != VM_MAP_NULL);
39236c6e
A
15633 assert(size != 0);
15634 assert(size == vm_map_round_page(size, PAGE_MASK));
91447636 15635 assert(inheritance == VM_INHERIT_NONE ||
0a7de745
A
15636 inheritance == VM_INHERIT_COPY ||
15637 inheritance == VM_INHERIT_SHARE);
1c79356b 15638
91447636
A
15639 /*
15640 * Compute start and end of region.
15641 */
39236c6e
A
15642 src_start = vm_map_trunc_page(addr, PAGE_MASK);
15643 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
15644
1c79356b 15645
91447636
A
15646 /*
15647 * Initialize map_header.
15648 */
d9a64523
A
15649 map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15650 map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
91447636
A
15651 map_header->nentries = 0;
15652 map_header->entries_pageable = pageable;
39236c6e 15653 map_header->page_shift = PAGE_SHIFT;
1c79356b 15654
6d2010ae
A
15655 vm_map_store_init( map_header );
15656
d9a64523
A
15657 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15658 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
15659 } else {
15660 max_prot_for_prot_copy = VM_PROT_NONE;
15661 }
91447636
A
15662 *cur_protection = VM_PROT_ALL;
15663 *max_protection = VM_PROT_ALL;
1c79356b 15664
91447636
A
15665 map_address = 0;
15666 mapped_size = 0;
15667 result = KERN_SUCCESS;
1c79356b 15668
5ba3f43e 15669 /*
91447636
A
15670 * The specified source virtual space might correspond to
15671 * multiple map entries, need to loop on them.
15672 */
15673 vm_map_lock(map);
15674 while (mapped_size != size) {
0a7de745 15675 vm_map_size_t entry_size;
1c79356b 15676
91447636
A
15677 /*
15678 * Find the beginning of the region.
5ba3f43e 15679 */
0a7de745 15680 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
91447636
A
15681 result = KERN_INVALID_ADDRESS;
15682 break;
15683 }
1c79356b 15684
91447636
A
15685 if (src_start < src_entry->vme_start ||
15686 (mapped_size && src_start != src_entry->vme_start)) {
15687 result = KERN_INVALID_ADDRESS;
15688 break;
15689 }
1c79356b 15690
91447636 15691 tmp_size = size - mapped_size;
0a7de745 15692 if (src_end > src_entry->vme_end) {
91447636 15693 tmp_size -= (src_end - src_entry->vme_end);
0a7de745 15694 }
1c79356b 15695
91447636 15696 entry_size = (vm_map_size_t)(src_entry->vme_end -
0a7de745 15697 src_entry->vme_start);
1c79356b 15698
0a7de745 15699 if (src_entry->is_sub_map) {
3e170ce0 15700 vm_map_reference(VME_SUBMAP(src_entry));
91447636
A
15701 object = VM_OBJECT_NULL;
15702 } else {
3e170ce0 15703 object = VME_OBJECT(src_entry);
fe8ab488
A
15704 if (src_entry->iokit_acct) {
15705 /*
15706 * This entry uses "IOKit accounting".
15707 */
15708 } else if (object != VM_OBJECT_NULL &&
0a7de745 15709 object->purgable != VM_PURGABLE_DENY) {
fe8ab488
A
15710 /*
15711 * Purgeable objects have their own accounting:
15712 * no pmap accounting for them.
15713 */
a39ff7e2 15714 assertf(!src_entry->use_pmap,
0a7de745
A
15715 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15716 map,
15717 src_entry,
15718 (uint64_t)src_entry->vme_start,
15719 (uint64_t)src_entry->vme_end,
15720 src_entry->protection,
15721 src_entry->max_protection,
15722 VME_ALIAS(src_entry));
fe8ab488
A
15723 } else {
15724 /*
15725 * Not IOKit or purgeable:
15726 * must be accounted by pmap stats.
15727 */
a39ff7e2 15728 assertf(src_entry->use_pmap,
0a7de745
A
15729 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15730 map,
15731 src_entry,
15732 (uint64_t)src_entry->vme_start,
15733 (uint64_t)src_entry->vme_end,
15734 src_entry->protection,
15735 src_entry->max_protection,
15736 VME_ALIAS(src_entry));
fe8ab488 15737 }
55e303ae 15738
91447636
A
15739 if (object == VM_OBJECT_NULL) {
15740 object = vm_object_allocate(entry_size);
3e170ce0
A
15741 VME_OFFSET_SET(src_entry, 0);
15742 VME_OBJECT_SET(src_entry, object);
a39ff7e2 15743 assert(src_entry->use_pmap);
91447636 15744 } else if (object->copy_strategy !=
0a7de745 15745 MEMORY_OBJECT_COPY_SYMMETRIC) {
91447636
A
15746 /*
15747 * We are already using an asymmetric
15748 * copy, and therefore we already have
15749 * the right object.
15750 */
15751 assert(!src_entry->needs_copy);
15752 } else if (src_entry->needs_copy || object->shadowed ||
0a7de745
A
15753 (object->internal && !object->true_share &&
15754 !src_entry->is_shared &&
15755 object->vo_size > entry_size)) {
3e170ce0 15756 VME_OBJECT_SHADOW(src_entry, entry_size);
a39ff7e2 15757 assert(src_entry->use_pmap);
1c79356b 15758
91447636
A
15759 if (!src_entry->needs_copy &&
15760 (src_entry->protection & VM_PROT_WRITE)) {
0a7de745 15761 vm_prot_t prot;
0c530ab8 15762
5ba3f43e
A
15763 assert(!pmap_has_prot_policy(src_entry->protection));
15764
0a7de745 15765 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 15766
3e170ce0 15767 if (override_nx(map,
0a7de745
A
15768 VME_ALIAS(src_entry))
15769 && prot) {
15770 prot |= VM_PROT_EXECUTE;
15771 }
2d21ac55 15772
5ba3f43e
A
15773 assert(!pmap_has_prot_policy(prot));
15774
0a7de745 15775 if (map->mapped_in_other_pmaps) {
2d21ac55 15776 vm_object_pmap_protect(
3e170ce0
A
15777 VME_OBJECT(src_entry),
15778 VME_OFFSET(src_entry),
2d21ac55
A
15779 entry_size,
15780 PMAP_NULL,
0c530ab8 15781 src_entry->vme_start,
0c530ab8 15782 prot);
2d21ac55
A
15783 } else {
15784 pmap_protect(vm_map_pmap(map),
0a7de745
A
15785 src_entry->vme_start,
15786 src_entry->vme_end,
15787 prot);
91447636
A
15788 }
15789 }
1c79356b 15790
3e170ce0 15791 object = VME_OBJECT(src_entry);
91447636
A
15792 src_entry->needs_copy = FALSE;
15793 }
1c79356b 15794
1c79356b 15795
91447636 15796 vm_object_lock(object);
2d21ac55 15797 vm_object_reference_locked(object); /* object ref. for new entry */
5ba3f43e 15798 if (object->copy_strategy ==
2d21ac55 15799 MEMORY_OBJECT_COPY_SYMMETRIC) {
5ba3f43e 15800 object->copy_strategy =
0a7de745 15801 MEMORY_OBJECT_COPY_DELAY;
91447636
A
15802 }
15803 vm_object_unlock(object);
15804 }
1c79356b 15805
3e170ce0 15806 offset = (VME_OFFSET(src_entry) +
0a7de745 15807 (src_start - src_entry->vme_start));
1c79356b 15808
7ddcb079 15809 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
91447636 15810 vm_map_entry_copy(new_entry, src_entry);
fe8ab488
A
15811 if (new_entry->is_sub_map) {
15812 /* clr address space specifics */
15813 new_entry->use_pmap = FALSE;
a39ff7e2
A
15814 } else if (copy) {
15815 /*
15816 * We're dealing with a copy-on-write operation,
15817 * so the resulting mapping should not inherit the
15818 * original mapping's accounting settings.
15819 * "use_pmap" should be reset to its default (TRUE)
15820 * so that the new mapping gets accounted for in
15821 * the task's memory footprint.
15822 */
15823 new_entry->use_pmap = TRUE;
fe8ab488 15824 }
a39ff7e2
A
15825 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15826 assert(!new_entry->iokit_acct);
1c79356b 15827
39236c6e
A
15828 new_entry->map_aligned = FALSE;
15829
91447636
A
15830 new_entry->vme_start = map_address;
15831 new_entry->vme_end = map_address + tmp_size;
e2d2fc5c 15832 assert(new_entry->vme_start < new_entry->vme_end);
5c9f4661
A
15833 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15834 /*
15835 * Remapping for vm_map_protect(VM_PROT_COPY)
15836 * to convert a read-only mapping into a
15837 * copy-on-write version of itself but
15838 * with write access:
0a7de745 15839 * keep the original inheritance and add
5c9f4661
A
15840 * VM_PROT_WRITE to the max protection.
15841 */
15842 new_entry->inheritance = src_entry->inheritance;
d9a64523 15843 new_entry->protection &= max_prot_for_prot_copy;
5c9f4661
A
15844 new_entry->max_protection |= VM_PROT_WRITE;
15845 } else {
15846 new_entry->inheritance = inheritance;
15847 }
3e170ce0 15848 VME_OFFSET_SET(new_entry, offset);
0a7de745 15849
91447636
A
15850 /*
15851 * The new region has to be copied now if required.
15852 */
0a7de745 15853RestartCopy:
91447636 15854 if (!copy) {
316670eb
A
15855 /*
15856 * Cannot allow an entry describing a JIT
15857 * region to be shared across address spaces.
15858 */
39037602 15859 if (src_entry->used_for_jit == TRUE && !same_map) {
d9a64523 15860#if CONFIG_EMBEDDED
316670eb
A
15861 result = KERN_INVALID_ARGUMENT;
15862 break;
d9a64523 15863#endif /* CONFIG_EMBEDDED */
316670eb 15864 }
91447636
A
15865 src_entry->is_shared = TRUE;
15866 new_entry->is_shared = TRUE;
0a7de745 15867 if (!(new_entry->is_sub_map)) {
91447636 15868 new_entry->needs_copy = FALSE;
0a7de745 15869 }
91447636
A
15870 } else if (src_entry->is_sub_map) {
15871 /* make this a COW sub_map if not already */
3e170ce0 15872 assert(new_entry->wired_count == 0);
91447636
A
15873 new_entry->needs_copy = TRUE;
15874 object = VM_OBJECT_NULL;
15875 } else if (src_entry->wired_count == 0 &&
0a7de745
A
15876 vm_object_copy_quickly(&VME_OBJECT(new_entry),
15877 VME_OFFSET(new_entry),
15878 (new_entry->vme_end -
15879 new_entry->vme_start),
15880 &src_needs_copy,
15881 &new_entry_needs_copy)) {
91447636
A
15882 new_entry->needs_copy = new_entry_needs_copy;
15883 new_entry->is_shared = FALSE;
a39ff7e2 15884 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
1c79356b 15885
91447636
A
15886 /*
15887 * Handle copy_on_write semantics.
15888 */
15889 if (src_needs_copy && !src_entry->needs_copy) {
0a7de745 15890 vm_prot_t prot;
0c530ab8 15891
5ba3f43e
A
15892 assert(!pmap_has_prot_policy(src_entry->protection));
15893
0c530ab8 15894 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 15895
3e170ce0 15896 if (override_nx(map,
0a7de745
A
15897 VME_ALIAS(src_entry))
15898 && prot) {
15899 prot |= VM_PROT_EXECUTE;
15900 }
2d21ac55 15901
5ba3f43e
A
15902 assert(!pmap_has_prot_policy(prot));
15903
91447636 15904 vm_object_pmap_protect(object,
0a7de745
A
15905 offset,
15906 entry_size,
15907 ((src_entry->is_shared
15908 || map->mapped_in_other_pmaps) ?
15909 PMAP_NULL : map->pmap),
15910 src_entry->vme_start,
15911 prot);
1c79356b 15912
3e170ce0 15913 assert(src_entry->wired_count == 0);
91447636
A
15914 src_entry->needs_copy = TRUE;
15915 }
15916 /*
15917 * Throw away the old object reference of the new entry.
15918 */
15919 vm_object_deallocate(object);
91447636
A
15920 } else {
15921 new_entry->is_shared = FALSE;
a39ff7e2
A
15922 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
15923
15924 src_entry_was_wired = (src_entry->wired_count > 0);
15925 saved_src_entry = src_entry;
15926 src_entry = VM_MAP_ENTRY_NULL;
1c79356b 15927
91447636
A
15928 /*
15929 * The map can be safely unlocked since we
15930 * already hold a reference on the object.
15931 *
15932 * Record the timestamp of the map for later
15933 * verification, and unlock the map.
15934 */
15935 version.main_timestamp = map->timestamp;
0a7de745 15936 vm_map_unlock(map); /* Increments timestamp once! */
55e303ae 15937
91447636
A
15938 /*
15939 * Perform the copy.
15940 */
a39ff7e2 15941 if (src_entry_was_wired > 0) {
91447636
A
15942 vm_object_lock(object);
15943 result = vm_object_copy_slowly(
2d21ac55
A
15944 object,
15945 offset,
5ba3f43e
A
15946 (new_entry->vme_end -
15947 new_entry->vme_start),
2d21ac55 15948 THREAD_UNINT,
3e170ce0 15949 &VME_OBJECT(new_entry));
1c79356b 15950
3e170ce0 15951 VME_OFFSET_SET(new_entry, 0);
91447636
A
15952 new_entry->needs_copy = FALSE;
15953 } else {
3e170ce0
A
15954 vm_object_offset_t new_offset;
15955
15956 new_offset = VME_OFFSET(new_entry);
91447636 15957 result = vm_object_copy_strategically(
2d21ac55
A
15958 object,
15959 offset,
5ba3f43e
A
15960 (new_entry->vme_end -
15961 new_entry->vme_start),
3e170ce0
A
15962 &VME_OBJECT(new_entry),
15963 &new_offset,
2d21ac55 15964 &new_entry_needs_copy);
3e170ce0
A
15965 if (new_offset != VME_OFFSET(new_entry)) {
15966 VME_OFFSET_SET(new_entry, new_offset);
15967 }
1c79356b 15968
91447636
A
15969 new_entry->needs_copy = new_entry_needs_copy;
15970 }
1c79356b 15971
91447636
A
15972 /*
15973 * Throw away the old object reference of the new entry.
15974 */
15975 vm_object_deallocate(object);
1c79356b 15976
91447636
A
15977 if (result != KERN_SUCCESS &&
15978 result != KERN_MEMORY_RESTART_COPY) {
15979 _vm_map_entry_dispose(map_header, new_entry);
39037602 15980 vm_map_lock(map);
91447636
A
15981 break;
15982 }
1c79356b 15983
91447636
A
15984 /*
15985 * Verify that the map has not substantially
15986 * changed while the copy was being made.
15987 */
1c79356b 15988
91447636
A
15989 vm_map_lock(map);
15990 if (version.main_timestamp + 1 != map->timestamp) {
15991 /*
15992 * Simple version comparison failed.
15993 *
15994 * Retry the lookup and verify that the
15995 * same object/offset are still present.
15996 */
a39ff7e2 15997 saved_src_entry = VM_MAP_ENTRY_NULL;
3e170ce0 15998 vm_object_deallocate(VME_OBJECT(new_entry));
91447636 15999 _vm_map_entry_dispose(map_header, new_entry);
0a7de745 16000 if (result == KERN_MEMORY_RESTART_COPY) {
91447636 16001 result = KERN_SUCCESS;
0a7de745 16002 }
91447636
A
16003 continue;
16004 }
a39ff7e2
A
16005 /* map hasn't changed: src_entry is still valid */
16006 src_entry = saved_src_entry;
16007 saved_src_entry = VM_MAP_ENTRY_NULL;
1c79356b 16008
91447636
A
16009 if (result == KERN_MEMORY_RESTART_COPY) {
16010 vm_object_reference(object);
16011 goto RestartCopy;
16012 }
16013 }
1c79356b 16014
6d2010ae 16015 _vm_map_store_entry_link(map_header,
0a7de745 16016 map_header->links.prev, new_entry);
1c79356b 16017
6d2010ae 16018 /*Protections for submap mapping are irrelevant here*/
0a7de745 16019 if (!src_entry->is_sub_map) {
6d2010ae
A
16020 *cur_protection &= src_entry->protection;
16021 *max_protection &= src_entry->max_protection;
16022 }
91447636
A
16023 map_address += tmp_size;
16024 mapped_size += tmp_size;
16025 src_start += tmp_size;
91447636 16026 } /* end while */
1c79356b 16027
91447636
A
16028 vm_map_unlock(map);
16029 if (result != KERN_SUCCESS) {
16030 /*
16031 * Free all allocated elements.
16032 */
16033 for (src_entry = map_header->links.next;
0a7de745
A
16034 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
16035 src_entry = new_entry) {
91447636 16036 new_entry = src_entry->vme_next;
6d2010ae 16037 _vm_map_store_entry_unlink(map_header, src_entry);
39236c6e 16038 if (src_entry->is_sub_map) {
3e170ce0 16039 vm_map_deallocate(VME_SUBMAP(src_entry));
39236c6e 16040 } else {
3e170ce0 16041 vm_object_deallocate(VME_OBJECT(src_entry));
39236c6e 16042 }
91447636
A
16043 _vm_map_entry_dispose(map_header, src_entry);
16044 }
16045 }
16046 return result;
1c79356b
A
16047}
16048
16049/*
91447636 16050 * Routine: vm_remap
1c79356b 16051 *
91447636
A
16052 * Map portion of a task's address space.
16053 * Mapped region must not overlap more than
16054 * one vm memory object. Protections and
16055 * inheritance attributes remain the same
16056 * as in the original task and are out parameters.
16057 * Source and Target task can be identical
16058 * Other attributes are identical as for vm_map()
1c79356b
A
16059 */
16060kern_return_t
91447636 16061vm_map_remap(
0a7de745
A
16062 vm_map_t target_map,
16063 vm_map_address_t *address,
16064 vm_map_size_t size,
16065 vm_map_offset_t mask,
16066 int flags,
16067 vm_map_kernel_flags_t vmk_flags,
16068 vm_tag_t tag,
16069 vm_map_t src_map,
16070 vm_map_offset_t memory_address,
16071 boolean_t copy,
16072 vm_prot_t *cur_protection,
16073 vm_prot_t *max_protection,
16074 vm_inherit_t inheritance)
1c79356b 16075{
0a7de745
A
16076 kern_return_t result;
16077 vm_map_entry_t entry;
16078 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
16079 vm_map_entry_t new_entry;
16080 struct vm_map_header map_header;
16081 vm_map_offset_t offset_in_mapping;
16082
16083 if (target_map == VM_MAP_NULL) {
91447636 16084 return KERN_INVALID_ARGUMENT;
0a7de745 16085 }
1c79356b 16086
91447636 16087 switch (inheritance) {
2d21ac55
A
16088 case VM_INHERIT_NONE:
16089 case VM_INHERIT_COPY:
16090 case VM_INHERIT_SHARE:
0a7de745 16091 if (size != 0 && src_map != VM_MAP_NULL) {
91447636 16092 break;
0a7de745
A
16093 }
16094 /*FALL THRU*/
2d21ac55 16095 default:
91447636
A
16096 return KERN_INVALID_ARGUMENT;
16097 }
1c79356b 16098
5ba3f43e
A
16099 /*
16100 * If the user is requesting that we return the address of the
16101 * first byte of the data (rather than the base of the page),
16102 * then we use different rounding semantics: specifically,
39236c6e
A
16103 * we assume that (memory_address, size) describes a region
16104 * all of whose pages we must cover, rather than a base to be truncated
16105 * down and a size to be added to that base. So we figure out
16106 * the highest page that the requested region includes and make
16107 * sure that the size will cover it.
5ba3f43e 16108 *
0a7de745 16109 * The key example we're worried about it is of the form:
39236c6e 16110 *
0a7de745 16111 * memory_address = 0x1ff0, size = 0x20
5ba3f43e
A
16112 *
16113 * With the old semantics, we round down the memory_address to 0x1000
39236c6e
A
16114 * and round up the size to 0x1000, resulting in our covering *only*
16115 * page 0x1000. With the new semantics, we'd realize that the region covers
5ba3f43e 16116 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
39236c6e
A
16117 * 0x1000 and page 0x2000 in the region we remap.
16118 */
16119 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16120 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
16121 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
16122 } else {
16123 size = vm_map_round_page(size, PAGE_MASK);
5ba3f43e
A
16124 }
16125 if (size == 0) {
16126 return KERN_INVALID_ARGUMENT;
16127 }
1c79356b 16128
91447636 16129 result = vm_map_remap_extract(src_map, memory_address,
0a7de745
A
16130 size, copy, &map_header,
16131 cur_protection,
16132 max_protection,
16133 inheritance,
16134 target_map->hdr.entries_pageable,
16135 src_map == target_map,
16136 vmk_flags);
1c79356b 16137
91447636
A
16138 if (result != KERN_SUCCESS) {
16139 return result;
16140 }
1c79356b 16141
91447636
A
16142 /*
16143 * Allocate/check a range of free virtual address
16144 * space for the target
1c79356b 16145 */
39236c6e 16146 *address = vm_map_trunc_page(*address,
0a7de745 16147 VM_MAP_PAGE_MASK(target_map));
91447636
A
16148 vm_map_lock(target_map);
16149 result = vm_map_remap_range_allocate(target_map, address, size,
0a7de745
A
16150 mask, flags, vmk_flags, tag,
16151 &insp_entry);
1c79356b 16152
91447636 16153 for (entry = map_header.links.next;
0a7de745
A
16154 entry != CAST_TO_VM_MAP_ENTRY(&map_header.links);
16155 entry = new_entry) {
91447636 16156 new_entry = entry->vme_next;
6d2010ae 16157 _vm_map_store_entry_unlink(&map_header, entry);
91447636 16158 if (result == KERN_SUCCESS) {
3e170ce0
A
16159 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16160 /* no codesigning -> read-only access */
3e170ce0
A
16161 entry->max_protection = VM_PROT_READ;
16162 entry->protection = VM_PROT_READ;
16163 entry->vme_resilient_codesign = TRUE;
16164 }
91447636
A
16165 entry->vme_start += *address;
16166 entry->vme_end += *address;
39236c6e 16167 assert(!entry->map_aligned);
d9a64523 16168 vm_map_store_entry_link(target_map, insp_entry, entry,
0a7de745 16169 vmk_flags);
91447636
A
16170 insp_entry = entry;
16171 } else {
16172 if (!entry->is_sub_map) {
3e170ce0 16173 vm_object_deallocate(VME_OBJECT(entry));
91447636 16174 } else {
3e170ce0 16175 vm_map_deallocate(VME_SUBMAP(entry));
2d21ac55 16176 }
91447636 16177 _vm_map_entry_dispose(&map_header, entry);
1c79356b 16178 }
91447636 16179 }
1c79356b 16180
3e170ce0
A
16181 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16182 *cur_protection = VM_PROT_READ;
16183 *max_protection = VM_PROT_READ;
16184 }
16185
0a7de745 16186 if (target_map->disable_vmentry_reuse == TRUE) {
39037602 16187 assert(!target_map->is_nested_map);
0a7de745 16188 if (target_map->highest_entry_end < insp_entry->vme_end) {
6d2010ae
A
16189 target_map->highest_entry_end = insp_entry->vme_end;
16190 }
16191 }
16192
91447636
A
16193 if (result == KERN_SUCCESS) {
16194 target_map->size += size;
0c530ab8 16195 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
5ba3f43e 16196
d9a64523
A
16197#if PMAP_CS
16198 if (*max_protection & VM_PROT_EXECUTE) {
16199 vm_map_address_t region_start = 0, region_size = 0;
16200 struct pmap_cs_code_directory *region_cd = NULL;
16201 vm_map_address_t base = 0;
16202 struct pmap_cs_lookup_results results = {};
16203 vm_map_size_t page_addr = vm_map_trunc_page(memory_address, PAGE_MASK);
16204 vm_map_size_t assoc_size = vm_map_round_page(memory_address + size - page_addr, PAGE_MASK);
16205
16206 pmap_cs_lookup(src_map->pmap, memory_address, &results);
16207 region_size = results.region_size;
16208 region_start = results.region_start;
16209 region_cd = results.region_cd_entry;
16210 base = results.base;
16211
16212 if (region_cd != NULL && (page_addr != region_start || assoc_size != region_size)) {
16213 *cur_protection = VM_PROT_READ;
16214 *max_protection = VM_PROT_READ;
16215 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
0a7de745
A
16216 "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
16217 page_addr, page_addr + assoc_size, *address,
16218 region_start, region_size,
16219 region_cd != NULL ? "not " : "" // Don't leak kernel slide
16220 );
d9a64523
A
16221 }
16222 }
16223#endif
d9a64523
A
16224 }
16225 vm_map_unlock(target_map);
16226
0a7de745 16227 if (result == KERN_SUCCESS && target_map->wiring_required) {
5ba3f43e 16228 result = vm_map_wire_kernel(target_map, *address,
0a7de745
A
16229 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
16230 TRUE);
16231 }
39236c6e 16232
5ba3f43e
A
16233 /*
16234 * If requested, return the address of the data pointed to by the
39236c6e
A
16235 * request, rather than the base of the resulting page.
16236 */
16237 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16238 *address += offset_in_mapping;
16239 }
16240
91447636
A
16241 return result;
16242}
1c79356b 16243
91447636
A
16244/*
16245 * Routine: vm_map_remap_range_allocate
16246 *
16247 * Description:
16248 * Allocate a range in the specified virtual address map.
16249 * returns the address and the map entry just before the allocated
16250 * range
16251 *
16252 * Map must be locked.
16253 */
1c79356b 16254
91447636
A
16255static kern_return_t
16256vm_map_remap_range_allocate(
0a7de745
A
16257 vm_map_t map,
16258 vm_map_address_t *address, /* IN/OUT */
16259 vm_map_size_t size,
16260 vm_map_offset_t mask,
16261 int flags,
16262 vm_map_kernel_flags_t vmk_flags,
5ba3f43e 16263 __unused vm_tag_t tag,
0a7de745 16264 vm_map_entry_t *map_entry) /* OUT */
91447636 16265{
0a7de745
A
16266 vm_map_entry_t entry;
16267 vm_map_offset_t start;
16268 vm_map_offset_t end;
16269 vm_map_offset_t desired_empty_end;
16270 kern_return_t kr;
16271 vm_map_entry_t hole_entry;
1c79356b 16272
0a7de745 16273StartAgain:;
1c79356b 16274
2d21ac55 16275 start = *address;
1c79356b 16276
0a7de745
A
16277 if (flags & VM_FLAGS_ANYWHERE) {
16278 if (flags & VM_FLAGS_RANDOM_ADDR) {
39037602
A
16279 /*
16280 * Get a random start address.
16281 */
16282 kr = vm_map_random_address_for_size(map, address, size);
16283 if (kr != KERN_SUCCESS) {
0a7de745 16284 return kr;
39037602
A
16285 }
16286 start = *address;
16287 }
16288
2d21ac55
A
16289 /*
16290 * Calculate the first possible address.
16291 */
1c79356b 16292
0a7de745 16293 if (start < map->min_offset) {
2d21ac55 16294 start = map->min_offset;
0a7de745
A
16295 }
16296 if (start > map->max_offset) {
16297 return KERN_NO_SPACE;
16298 }
5ba3f43e 16299
2d21ac55
A
16300 /*
16301 * Look for the first possible address;
16302 * if there's already something at this
16303 * address, we have to start after it.
16304 */
1c79356b 16305
0a7de745 16306 if (map->disable_vmentry_reuse == TRUE) {
6d2010ae 16307 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2d21ac55 16308 } else {
3e170ce0 16309 if (map->holelistenabled) {
d9a64523 16310 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
3e170ce0
A
16311
16312 if (hole_entry == NULL) {
16313 /*
16314 * No more space in the map?
16315 */
0a7de745 16316 return KERN_NO_SPACE;
3e170ce0 16317 } else {
3e170ce0
A
16318 boolean_t found_hole = FALSE;
16319
16320 do {
16321 if (hole_entry->vme_start >= start) {
16322 start = hole_entry->vme_start;
16323 found_hole = TRUE;
16324 break;
16325 }
16326
16327 if (hole_entry->vme_end > start) {
16328 found_hole = TRUE;
16329 break;
16330 }
16331 hole_entry = hole_entry->vme_next;
d9a64523 16332 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
3e170ce0
A
16333
16334 if (found_hole == FALSE) {
0a7de745 16335 return KERN_NO_SPACE;
3e170ce0
A
16336 }
16337
16338 entry = hole_entry;
16339 }
6d2010ae 16340 } else {
3e170ce0
A
16341 assert(first_free_is_valid(map));
16342 if (start == map->min_offset) {
0a7de745 16343 if ((entry = map->first_free) != vm_map_to_entry(map)) {
3e170ce0 16344 start = entry->vme_end;
0a7de745 16345 }
3e170ce0 16346 } else {
0a7de745
A
16347 vm_map_entry_t tmp_entry;
16348 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
3e170ce0 16349 start = tmp_entry->vme_end;
0a7de745 16350 }
3e170ce0
A
16351 entry = tmp_entry;
16352 }
6d2010ae 16353 }
39236c6e 16354 start = vm_map_round_page(start,
0a7de745 16355 VM_MAP_PAGE_MASK(map));
2d21ac55 16356 }
5ba3f43e 16357
2d21ac55
A
16358 /*
16359 * In any case, the "entry" always precedes
16360 * the proposed new region throughout the
16361 * loop:
16362 */
1c79356b 16363
2d21ac55 16364 while (TRUE) {
0a7de745 16365 vm_map_entry_t next;
2d21ac55
A
16366
16367 /*
16368 * Find the end of the proposed new region.
16369 * Be sure we didn't go beyond the end, or
16370 * wrap around the address.
16371 */
16372
16373 end = ((start + mask) & ~mask);
39236c6e 16374 end = vm_map_round_page(end,
0a7de745
A
16375 VM_MAP_PAGE_MASK(map));
16376 if (end < start) {
16377 return KERN_NO_SPACE;
16378 }
2d21ac55
A
16379 start = end;
16380 end += size;
16381
d9a64523
A
16382 /* We want an entire page of empty space, but don't increase the allocation size. */
16383 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
16384
16385 if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
2d21ac55
A
16386 if (map->wait_for_space) {
16387 if (size <= (map->max_offset -
0a7de745 16388 map->min_offset)) {
2d21ac55
A
16389 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
16390 vm_map_unlock(map);
16391 thread_block(THREAD_CONTINUE_NULL);
16392 vm_map_lock(map);
16393 goto StartAgain;
16394 }
16395 }
5ba3f43e 16396
0a7de745 16397 return KERN_NO_SPACE;
2d21ac55 16398 }
1c79356b 16399
2d21ac55 16400 next = entry->vme_next;
1c79356b 16401
3e170ce0 16402 if (map->holelistenabled) {
0a7de745 16403 if (entry->vme_end >= desired_empty_end) {
3e170ce0 16404 break;
0a7de745 16405 }
3e170ce0
A
16406 } else {
16407 /*
0a7de745 16408 * If there are no more entries, we must win.
3e170ce0
A
16409 *
16410 * OR
16411 *
16412 * If there is another entry, it must be
16413 * after the end of the potential new region.
16414 */
1c79356b 16415
0a7de745 16416 if (next == vm_map_to_entry(map)) {
3e170ce0 16417 break;
0a7de745 16418 }
3e170ce0 16419
0a7de745 16420 if (next->vme_start >= desired_empty_end) {
3e170ce0 16421 break;
0a7de745 16422 }
3e170ce0 16423 }
1c79356b 16424
2d21ac55
A
16425 /*
16426 * Didn't fit -- move to the next entry.
16427 */
1c79356b 16428
2d21ac55 16429 entry = next;
3e170ce0
A
16430
16431 if (map->holelistenabled) {
d9a64523 16432 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
3e170ce0
A
16433 /*
16434 * Wrapped around
16435 */
0a7de745 16436 return KERN_NO_SPACE;
3e170ce0
A
16437 }
16438 start = entry->vme_start;
16439 } else {
16440 start = entry->vme_end;
16441 }
16442 }
16443
16444 if (map->holelistenabled) {
3e170ce0
A
16445 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
16446 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
16447 }
2d21ac55 16448 }
3e170ce0 16449
2d21ac55
A
16450 *address = start;
16451 } else {
0a7de745 16452 vm_map_entry_t temp_entry;
5ba3f43e 16453
2d21ac55
A
16454 /*
16455 * Verify that:
16456 * the address doesn't itself violate
16457 * the mask requirement.
16458 */
1c79356b 16459
0a7de745
A
16460 if ((start & mask) != 0) {
16461 return KERN_NO_SPACE;
16462 }
1c79356b 16463
1c79356b 16464
2d21ac55
A
16465 /*
16466 * ... the address is within bounds
16467 */
1c79356b 16468
2d21ac55 16469 end = start + size;
1c79356b 16470
2d21ac55
A
16471 if ((start < map->min_offset) ||
16472 (end > map->max_offset) ||
16473 (start >= end)) {
0a7de745 16474 return KERN_INVALID_ADDRESS;
2d21ac55 16475 }
1c79356b 16476
060df5ea
A
16477 /*
16478 * If we're asked to overwrite whatever was mapped in that
16479 * range, first deallocate that range.
16480 */
16481 if (flags & VM_FLAGS_OVERWRITE) {
16482 vm_map_t zap_map;
d9a64523 16483 int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
060df5ea
A
16484
16485 /*
16486 * We use a "zap_map" to avoid having to unlock
16487 * the "map" in vm_map_delete(), which would compromise
16488 * the atomicity of the "deallocate" and then "remap"
16489 * combination.
16490 */
16491 zap_map = vm_map_create(PMAP_NULL,
0a7de745
A
16492 start,
16493 end,
16494 map->hdr.entries_pageable);
060df5ea
A
16495 if (zap_map == VM_MAP_NULL) {
16496 return KERN_RESOURCE_SHORTAGE;
16497 }
39236c6e 16498 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 16499 vm_map_disable_hole_optimization(zap_map);
060df5ea 16500
d9a64523
A
16501 if (vmk_flags.vmkf_overwrite_immutable) {
16502 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
16503 }
060df5ea 16504 kr = vm_map_delete(map, start, end,
0a7de745
A
16505 remove_flags,
16506 zap_map);
060df5ea
A
16507 if (kr == KERN_SUCCESS) {
16508 vm_map_destroy(zap_map,
0a7de745 16509 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
060df5ea
A
16510 zap_map = VM_MAP_NULL;
16511 }
16512 }
16513
2d21ac55
A
16514 /*
16515 * ... the starting address isn't allocated
16516 */
91447636 16517
0a7de745
A
16518 if (vm_map_lookup_entry(map, start, &temp_entry)) {
16519 return KERN_NO_SPACE;
16520 }
91447636 16521
2d21ac55 16522 entry = temp_entry;
91447636 16523
2d21ac55
A
16524 /*
16525 * ... the next region doesn't overlap the
16526 * end point.
16527 */
1c79356b 16528
2d21ac55 16529 if ((entry->vme_next != vm_map_to_entry(map)) &&
0a7de745
A
16530 (entry->vme_next->vme_start < end)) {
16531 return KERN_NO_SPACE;
16532 }
2d21ac55
A
16533 }
16534 *map_entry = entry;
0a7de745 16535 return KERN_SUCCESS;
91447636 16536}
1c79356b 16537
91447636
A
16538/*
16539 * vm_map_switch:
16540 *
16541 * Set the address map for the current thread to the specified map
16542 */
1c79356b 16543
91447636
A
16544vm_map_t
16545vm_map_switch(
0a7de745 16546 vm_map_t map)
91447636 16547{
0a7de745
A
16548 int mycpu;
16549 thread_t thread = current_thread();
16550 vm_map_t oldmap = thread->map;
1c79356b 16551
91447636
A
16552 mp_disable_preemption();
16553 mycpu = cpu_number();
1c79356b 16554
91447636
A
16555 /*
16556 * Deactivate the current map and activate the requested map
16557 */
16558 PMAP_SWITCH_USER(thread, map, mycpu);
1c79356b 16559
91447636 16560 mp_enable_preemption();
0a7de745 16561 return oldmap;
91447636 16562}
1c79356b 16563
1c79356b 16564
91447636
A
16565/*
16566 * Routine: vm_map_write_user
16567 *
16568 * Description:
16569 * Copy out data from a kernel space into space in the
16570 * destination map. The space must already exist in the
16571 * destination map.
16572 * NOTE: This routine should only be called by threads
16573 * which can block on a page fault. i.e. kernel mode user
16574 * threads.
16575 *
16576 */
16577kern_return_t
16578vm_map_write_user(
0a7de745
A
16579 vm_map_t map,
16580 void *src_p,
16581 vm_map_address_t dst_addr,
16582 vm_size_t size)
91447636 16583{
0a7de745 16584 kern_return_t kr = KERN_SUCCESS;
1c79356b 16585
0a7de745 16586 if (current_map() == map) {
91447636
A
16587 if (copyout(src_p, dst_addr, size)) {
16588 kr = KERN_INVALID_ADDRESS;
16589 }
16590 } else {
0a7de745 16591 vm_map_t oldmap;
1c79356b 16592
91447636
A
16593 /* take on the identity of the target map while doing */
16594 /* the transfer */
1c79356b 16595
91447636
A
16596 vm_map_reference(map);
16597 oldmap = vm_map_switch(map);
16598 if (copyout(src_p, dst_addr, size)) {
16599 kr = KERN_INVALID_ADDRESS;
1c79356b 16600 }
91447636
A
16601 vm_map_switch(oldmap);
16602 vm_map_deallocate(map);
1c79356b 16603 }
91447636 16604 return kr;
1c79356b
A
16605}
16606
16607/*
91447636
A
16608 * Routine: vm_map_read_user
16609 *
16610 * Description:
16611 * Copy in data from a user space source map into the
16612 * kernel map. The space must already exist in the
16613 * kernel map.
16614 * NOTE: This routine should only be called by threads
16615 * which can block on a page fault. i.e. kernel mode user
16616 * threads.
1c79356b 16617 *
1c79356b
A
16618 */
16619kern_return_t
91447636 16620vm_map_read_user(
0a7de745
A
16621 vm_map_t map,
16622 vm_map_address_t src_addr,
16623 void *dst_p,
16624 vm_size_t size)
1c79356b 16625{
0a7de745 16626 kern_return_t kr = KERN_SUCCESS;
1c79356b 16627
0a7de745 16628 if (current_map() == map) {
91447636
A
16629 if (copyin(src_addr, dst_p, size)) {
16630 kr = KERN_INVALID_ADDRESS;
16631 }
16632 } else {
0a7de745 16633 vm_map_t oldmap;
1c79356b 16634
91447636
A
16635 /* take on the identity of the target map while doing */
16636 /* the transfer */
16637
16638 vm_map_reference(map);
16639 oldmap = vm_map_switch(map);
16640 if (copyin(src_addr, dst_p, size)) {
16641 kr = KERN_INVALID_ADDRESS;
16642 }
16643 vm_map_switch(oldmap);
16644 vm_map_deallocate(map);
1c79356b 16645 }
91447636
A
16646 return kr;
16647}
16648
1c79356b 16649
91447636
A
16650/*
16651 * vm_map_check_protection:
16652 *
16653 * Assert that the target map allows the specified
16654 * privilege on the entire address region given.
16655 * The entire region must be allocated.
16656 */
2d21ac55
A
16657boolean_t
16658vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
0a7de745 16659 vm_map_offset_t end, vm_prot_t protection)
91447636 16660{
2d21ac55
A
16661 vm_map_entry_t entry;
16662 vm_map_entry_t tmp_entry;
1c79356b 16663
91447636 16664 vm_map_lock(map);
1c79356b 16665
0a7de745 16666 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
2d21ac55 16667 vm_map_unlock(map);
0a7de745 16668 return FALSE;
1c79356b
A
16669 }
16670
91447636
A
16671 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
16672 vm_map_unlock(map);
0a7de745 16673 return FALSE;
91447636 16674 }
1c79356b 16675
91447636
A
16676 entry = tmp_entry;
16677
16678 while (start < end) {
16679 if (entry == vm_map_to_entry(map)) {
16680 vm_map_unlock(map);
0a7de745 16681 return FALSE;
1c79356b 16682 }
1c79356b 16683
91447636
A
16684 /*
16685 * No holes allowed!
16686 */
1c79356b 16687
91447636
A
16688 if (start < entry->vme_start) {
16689 vm_map_unlock(map);
0a7de745 16690 return FALSE;
91447636
A
16691 }
16692
16693 /*
16694 * Check protection associated with entry.
16695 */
16696
16697 if ((entry->protection & protection) != protection) {
16698 vm_map_unlock(map);
0a7de745 16699 return FALSE;
91447636
A
16700 }
16701
16702 /* go to next entry */
16703
16704 start = entry->vme_end;
16705 entry = entry->vme_next;
16706 }
16707 vm_map_unlock(map);
0a7de745 16708 return TRUE;
1c79356b
A
16709}
16710
1c79356b 16711kern_return_t
91447636 16712vm_map_purgable_control(
0a7de745
A
16713 vm_map_t map,
16714 vm_map_offset_t address,
16715 vm_purgable_t control,
16716 int *state)
1c79356b 16717{
0a7de745
A
16718 vm_map_entry_t entry;
16719 vm_object_t object;
16720 kern_return_t kr;
16721 boolean_t was_nonvolatile;
1c79356b 16722
1c79356b 16723 /*
91447636
A
16724 * Vet all the input parameters and current type and state of the
16725 * underlaying object. Return with an error if anything is amiss.
1c79356b 16726 */
0a7de745
A
16727 if (map == VM_MAP_NULL) {
16728 return KERN_INVALID_ARGUMENT;
16729 }
1c79356b 16730
91447636 16731 if (control != VM_PURGABLE_SET_STATE &&
b0d623f7 16732 control != VM_PURGABLE_GET_STATE &&
5ba3f43e 16733 control != VM_PURGABLE_PURGE_ALL &&
0a7de745
A
16734 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
16735 return KERN_INVALID_ARGUMENT;
16736 }
1c79356b 16737
b0d623f7
A
16738 if (control == VM_PURGABLE_PURGE_ALL) {
16739 vm_purgeable_object_purge_all();
16740 return KERN_SUCCESS;
16741 }
16742
5ba3f43e 16743 if ((control == VM_PURGABLE_SET_STATE ||
0a7de745 16744 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
b0d623f7 16745 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
0a7de745
A
16746 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
16747 return KERN_INVALID_ARGUMENT;
16748 }
91447636 16749
b0d623f7 16750 vm_map_lock_read(map);
91447636
A
16751
16752 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
91447636
A
16753 /*
16754 * Must pass a valid non-submap address.
16755 */
b0d623f7 16756 vm_map_unlock_read(map);
0a7de745 16757 return KERN_INVALID_ADDRESS;
91447636
A
16758 }
16759
16760 if ((entry->protection & VM_PROT_WRITE) == 0) {
16761 /*
16762 * Can't apply purgable controls to something you can't write.
16763 */
b0d623f7 16764 vm_map_unlock_read(map);
0a7de745 16765 return KERN_PROTECTION_FAILURE;
91447636
A
16766 }
16767
3e170ce0 16768 object = VME_OBJECT(entry);
fe8ab488
A
16769 if (object == VM_OBJECT_NULL ||
16770 object->purgable == VM_PURGABLE_DENY) {
91447636 16771 /*
fe8ab488 16772 * Object must already be present and be purgeable.
91447636 16773 */
b0d623f7 16774 vm_map_unlock_read(map);
91447636
A
16775 return KERN_INVALID_ARGUMENT;
16776 }
5ba3f43e 16777
91447636
A
16778 vm_object_lock(object);
16779
39236c6e 16780#if 00
5ba3f43e 16781 if (VME_OFFSET(entry) != 0 ||
6d2010ae 16782 entry->vme_end - entry->vme_start != object->vo_size) {
91447636
A
16783 /*
16784 * Can only apply purgable controls to the whole (existing)
16785 * object at once.
16786 */
b0d623f7 16787 vm_map_unlock_read(map);
91447636
A
16788 vm_object_unlock(object);
16789 return KERN_INVALID_ARGUMENT;
1c79356b 16790 }
39236c6e 16791#endif
fe8ab488
A
16792
16793 assert(!entry->is_sub_map);
16794 assert(!entry->use_pmap); /* purgeable has its own accounting */
16795
b0d623f7 16796 vm_map_unlock_read(map);
1c79356b 16797
fe8ab488
A
16798 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
16799
91447636 16800 kr = vm_object_purgable_control(object, control, state);
1c79356b 16801
fe8ab488
A
16802 if (was_nonvolatile &&
16803 object->purgable != VM_PURGABLE_NONVOLATILE &&
16804 map->pmap == kernel_pmap) {
16805#if DEBUG
16806 object->vo_purgeable_volatilizer = kernel_task;
16807#endif /* DEBUG */
16808 }
16809
91447636 16810 vm_object_unlock(object);
1c79356b 16811
91447636
A
16812 return kr;
16813}
1c79356b 16814
91447636 16815kern_return_t
b0d623f7 16816vm_map_page_query_internal(
0a7de745
A
16817 vm_map_t target_map,
16818 vm_map_offset_t offset,
16819 int *disposition,
16820 int *ref_count)
91447636 16821{
0a7de745
A
16822 kern_return_t kr;
16823 vm_page_info_basic_data_t info;
16824 mach_msg_type_number_t count;
b0d623f7
A
16825
16826 count = VM_PAGE_INFO_BASIC_COUNT;
16827 kr = vm_map_page_info(target_map,
0a7de745
A
16828 offset,
16829 VM_PAGE_INFO_BASIC,
16830 (vm_page_info_t) &info,
16831 &count);
b0d623f7
A
16832 if (kr == KERN_SUCCESS) {
16833 *disposition = info.disposition;
16834 *ref_count = info.ref_count;
16835 } else {
16836 *disposition = 0;
16837 *ref_count = 0;
16838 }
2d21ac55 16839
b0d623f7
A
16840 return kr;
16841}
5ba3f43e 16842
b0d623f7
A
16843kern_return_t
16844vm_map_page_info(
0a7de745
A
16845 vm_map_t map,
16846 vm_map_offset_t offset,
16847 vm_page_info_flavor_t flavor,
16848 vm_page_info_t info,
16849 mach_msg_type_number_t *count)
5ba3f43e 16850{
0a7de745
A
16851 return vm_map_page_range_info_internal(map,
16852 offset, /* start of range */
16853 (offset + 1), /* this will get rounded in the call to the page boundary */
16854 flavor,
16855 info,
16856 count);
5ba3f43e
A
16857}
16858
16859kern_return_t
16860vm_map_page_range_info_internal(
0a7de745
A
16861 vm_map_t map,
16862 vm_map_offset_t start_offset,
16863 vm_map_offset_t end_offset,
16864 vm_page_info_flavor_t flavor,
16865 vm_page_info_t info,
16866 mach_msg_type_number_t *count)
b0d623f7 16867{
0a7de745
A
16868 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
16869 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
16870 vm_page_t m = VM_PAGE_NULL;
16871 kern_return_t retval = KERN_SUCCESS;
16872 int disposition = 0;
16873 int ref_count = 0;
16874 int depth = 0, info_idx = 0;
16875 vm_page_info_basic_t basic_info = 0;
16876 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
16877 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
16878 boolean_t do_region_footprint;
2d21ac55 16879
b0d623f7
A
16880 switch (flavor) {
16881 case VM_PAGE_INFO_BASIC:
16882 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
6d2010ae
A
16883 /*
16884 * The "vm_page_info_basic_data" structure was not
16885 * properly padded, so allow the size to be off by
16886 * one to maintain backwards binary compatibility...
16887 */
0a7de745 16888 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
6d2010ae 16889 return KERN_INVALID_ARGUMENT;
0a7de745 16890 }
b0d623f7
A
16891 }
16892 break;
16893 default:
16894 return KERN_INVALID_ARGUMENT;
91447636 16895 }
2d21ac55 16896
a39ff7e2 16897 do_region_footprint = task_self_region_footprint();
b0d623f7
A
16898 disposition = 0;
16899 ref_count = 0;
b0d623f7 16900 depth = 0;
5ba3f43e 16901 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
b0d623f7 16902 retval = KERN_SUCCESS;
5ba3f43e
A
16903
16904 offset_in_page = start_offset & PAGE_MASK;
16905 start = vm_map_trunc_page(start_offset, PAGE_MASK);
16906 end = vm_map_round_page(end_offset, PAGE_MASK);
16907
0a7de745
A
16908 if (end < start) {
16909 return KERN_INVALID_ARGUMENT;
16910 }
16911
16912 assert((end - start) <= MAX_PAGE_RANGE_QUERY);
b0d623f7
A
16913
16914 vm_map_lock_read(map);
16915
5ba3f43e
A
16916 for (curr_s_offset = start; curr_s_offset < end;) {
16917 /*
16918 * New lookup needs reset of these variables.
16919 */
16920 curr_object = object = VM_OBJECT_NULL;
16921 offset_in_object = 0;
16922 ref_count = 0;
16923 depth = 0;
16924
a39ff7e2
A
16925 if (do_region_footprint &&
16926 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
16927 ledger_amount_t nonvol_compressed;
16928
16929 /*
16930 * Request for "footprint" info about a page beyond
16931 * the end of address space: this must be for
16932 * the fake region vm_map_region_recurse_64()
16933 * reported to account for non-volatile purgeable
16934 * memory owned by this task.
16935 */
16936 disposition = 0;
16937 nonvol_compressed = 0;
16938 ledger_get_balance(
16939 map->pmap->ledger,
16940 task_ledgers.purgeable_nonvolatile_compressed,
16941 &nonvol_compressed);
16942 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
16943 (unsigned) nonvol_compressed) {
16944 /*
16945 * We haven't reported all the "non-volatile
16946 * compressed" pages yet, so report this fake
16947 * page as "compressed".
16948 */
16949 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
16950 } else {
16951 /*
16952 * We've reported all the non-volatile
16953 * compressed page but not all the non-volatile
16954 * pages , so report this fake page as
16955 * "resident dirty".
16956 */
16957 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
16958 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
16959 disposition |= VM_PAGE_QUERY_PAGE_REF;
16960 }
16961 switch (flavor) {
16962 case VM_PAGE_INFO_BASIC:
16963 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16964 basic_info->disposition = disposition;
16965 basic_info->ref_count = 1;
16966 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
16967 basic_info->offset = 0;
16968 basic_info->depth = 0;
16969
16970 info_idx++;
16971 break;
16972 }
16973 curr_s_offset += PAGE_SIZE;
16974 continue;
16975 }
16976
5ba3f43e
A
16977 /*
16978 * First, find the map entry covering "curr_s_offset", going down
16979 * submaps if necessary.
16980 */
16981 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
16982 /* no entry -> no object -> no page */
16983
16984 if (curr_s_offset < vm_map_min(map)) {
16985 /*
16986 * Illegal address that falls below map min.
16987 */
16988 curr_e_offset = MIN(end, vm_map_min(map));
5ba3f43e
A
16989 } else if (curr_s_offset >= vm_map_max(map)) {
16990 /*
16991 * Illegal address that falls on/after map max.
16992 */
16993 curr_e_offset = end;
5ba3f43e
A
16994 } else if (map_entry == vm_map_to_entry(map)) {
16995 /*
16996 * Hit a hole.
16997 */
16998 if (map_entry->vme_next == vm_map_to_entry(map)) {
16999 /*
17000 * Empty map.
17001 */
17002 curr_e_offset = MIN(map->max_offset, end);
17003 } else {
17004 /*
0a7de745
A
17005 * Hole at start of the map.
17006 */
5ba3f43e
A
17007 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17008 }
17009 } else {
17010 if (map_entry->vme_next == vm_map_to_entry(map)) {
17011 /*
17012 * Hole at the end of the map.
17013 */
17014 curr_e_offset = MIN(map->max_offset, end);
17015 } else {
17016 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17017 }
17018 }
17019
17020 assert(curr_e_offset >= curr_s_offset);
17021
17022 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17023
17024 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17025
17026 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17027
17028 curr_s_offset = curr_e_offset;
17029
17030 info_idx += num_pages;
17031
17032 continue;
b0d623f7 17033 }
5ba3f43e 17034
b0d623f7 17035 /* compute offset from this map entry's start */
5ba3f43e
A
17036 offset_in_object = curr_s_offset - map_entry->vme_start;
17037
b0d623f7 17038 /* compute offset into this map entry's object (or submap) */
5ba3f43e 17039 offset_in_object += VME_OFFSET(map_entry);
b0d623f7
A
17040
17041 if (map_entry->is_sub_map) {
5ba3f43e
A
17042 vm_map_t sub_map = VM_MAP_NULL;
17043 vm_page_info_t submap_info = 0;
17044 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
17045
17046 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
17047
17048 submap_s_offset = offset_in_object;
17049 submap_e_offset = submap_s_offset + range_len;
2d21ac55 17050
3e170ce0 17051 sub_map = VME_SUBMAP(map_entry);
5ba3f43e
A
17052
17053 vm_map_reference(sub_map);
b0d623f7 17054 vm_map_unlock_read(map);
2d21ac55 17055
5ba3f43e
A
17056 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17057
17058 retval = vm_map_page_range_info_internal(sub_map,
0a7de745
A
17059 submap_s_offset,
17060 submap_e_offset,
17061 VM_PAGE_INFO_BASIC,
17062 (vm_page_info_t) submap_info,
17063 count);
5ba3f43e
A
17064
17065 assert(retval == KERN_SUCCESS);
17066
17067 vm_map_lock_read(map);
17068 vm_map_deallocate(sub_map);
17069
17070 /* Move the "info" index by the number of pages we inspected.*/
17071 info_idx += range_len >> PAGE_SHIFT;
17072
17073 /* Move our current offset by the size of the range we inspected.*/
17074 curr_s_offset += range_len;
b0d623f7 17075
b0d623f7 17076 continue;
1c79356b 17077 }
b0d623f7 17078
5ba3f43e
A
17079 object = VME_OBJECT(map_entry);
17080 if (object == VM_OBJECT_NULL) {
5ba3f43e
A
17081 /*
17082 * We don't have an object here and, hence,
17083 * no pages to inspect. We'll fill up the
17084 * info structure appropriately.
17085 */
17086
17087 curr_e_offset = MIN(map_entry->vme_end, end);
17088
17089 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17090
17091 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17092
17093 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17094
17095 curr_s_offset = curr_e_offset;
17096
17097 info_idx += num_pages;
17098
17099 continue;
17100 }
17101
a39ff7e2
A
17102 if (do_region_footprint) {
17103 int pmap_disp;
17104
17105 disposition = 0;
17106 pmap_disp = 0;
d9a64523
A
17107 if (map->has_corpse_footprint) {
17108 /*
17109 * Query the page info data we saved
17110 * while forking the corpse.
17111 */
17112 vm_map_corpse_footprint_query_page_info(
17113 map,
17114 curr_s_offset,
17115 &pmap_disp);
17116 } else {
17117 /*
17118 * Query the pmap.
17119 */
17120 pmap_query_page_info(map->pmap,
0a7de745
A
17121 curr_s_offset,
17122 &pmap_disp);
d9a64523
A
17123 }
17124 if (object->purgable == VM_PURGABLE_NONVOLATILE &&
17125 /* && not tagged as no-footprint? */
17126 VM_OBJECT_OWNER(object) != NULL &&
17127 VM_OBJECT_OWNER(object)->map == map) {
17128 if ((((curr_s_offset
0a7de745
A
17129 - map_entry->vme_start
17130 + VME_OFFSET(map_entry))
17131 / PAGE_SIZE) <
17132 (object->resident_page_count +
17133 vm_compressor_pager_get_count(object->pager)))) {
d9a64523
A
17134 /*
17135 * Non-volatile purgeable object owned
17136 * by this task: report the first
17137 * "#resident + #compressed" pages as
17138 * "resident" (to show that they
17139 * contribute to the footprint) but not
17140 * "dirty" (to avoid double-counting
17141 * with the fake "non-volatile" region
17142 * we'll report at the end of the
17143 * address space to account for all
17144 * (mapped or not) non-volatile memory
17145 * owned by this task.
17146 */
17147 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17148 }
17149 } else if ((object->purgable == VM_PURGABLE_VOLATILE ||
0a7de745
A
17150 object->purgable == VM_PURGABLE_EMPTY) &&
17151 /* && not tagged as no-footprint? */
17152 VM_OBJECT_OWNER(object) != NULL &&
17153 VM_OBJECT_OWNER(object)->map == map) {
d9a64523 17154 if ((((curr_s_offset
0a7de745
A
17155 - map_entry->vme_start
17156 + VME_OFFSET(map_entry))
17157 / PAGE_SIZE) <
17158 object->wired_page_count)) {
d9a64523
A
17159 /*
17160 * Volatile|empty purgeable object owned
17161 * by this task: report the first
17162 * "#wired" pages as "resident" (to
17163 * show that they contribute to the
17164 * footprint) but not "dirty" (to avoid
17165 * double-counting with the fake
17166 * "non-volatile" region we'll report
17167 * at the end of the address space to
17168 * account for all (mapped or not)
17169 * non-volatile memory owned by this
17170 * task.
17171 */
17172 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17173 }
17174 } else if (map_entry->iokit_acct &&
0a7de745
A
17175 object->internal &&
17176 object->purgable == VM_PURGABLE_DENY) {
a39ff7e2
A
17177 /*
17178 * Non-purgeable IOKit memory: phys_footprint
17179 * includes the entire virtual mapping.
17180 */
17181 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17182 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17183 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17184 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
0a7de745 17185 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
a39ff7e2 17186 /* alternate accounting */
d9a64523
A
17187#if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
17188 if (map->pmap->footprint_was_suspended ||
17189 /*
17190 * XXX corpse does not know if original
17191 * pmap had its footprint suspended...
17192 */
17193 map->has_corpse_footprint) {
17194 /*
17195 * The assertion below can fail if dyld
17196 * suspended footprint accounting
17197 * while doing some adjustments to
17198 * this page; the mapping would say
17199 * "use pmap accounting" but the page
17200 * would be marked "alternate
17201 * accounting".
17202 */
17203 } else
17204#endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
0a7de745 17205 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
a39ff7e2
A
17206 pmap_disp = 0;
17207 } else {
17208 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
17209 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17210 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17211 disposition |= VM_PAGE_QUERY_PAGE_REF;
17212 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
17213 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17214 } else {
17215 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17216 }
17217 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
17218 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17219 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17220 }
17221 }
17222 switch (flavor) {
17223 case VM_PAGE_INFO_BASIC:
17224 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17225 basic_info->disposition = disposition;
17226 basic_info->ref_count = 1;
17227 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17228 basic_info->offset = 0;
17229 basic_info->depth = 0;
17230
17231 info_idx++;
17232 break;
17233 }
17234 curr_s_offset += PAGE_SIZE;
17235 continue;
17236 }
17237
5ba3f43e
A
17238 vm_object_reference(object);
17239 /*
17240 * Shared mode -- so we can allow other readers
17241 * to grab the lock too.
17242 */
17243 vm_object_lock_shared(object);
17244
17245 curr_e_offset = MIN(map_entry->vme_end, end);
17246
b0d623f7 17247 vm_map_unlock_read(map);
b0d623f7 17248
5ba3f43e 17249 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
b0d623f7 17250
5ba3f43e 17251 curr_object = object;
2d21ac55 17252
5ba3f43e 17253 for (; curr_s_offset < curr_e_offset;) {
5ba3f43e
A
17254 if (object == curr_object) {
17255 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
17256 } else {
17257 ref_count = curr_object->ref_count;
17258 }
17259
17260 curr_offset_in_object = offset_in_object;
17261
17262 for (;;) {
17263 m = vm_page_lookup(curr_object, curr_offset_in_object);
17264
17265 if (m != VM_PAGE_NULL) {
5ba3f43e 17266 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
39037602 17267 break;
5ba3f43e
A
17268 } else {
17269 if (curr_object->internal &&
17270 curr_object->alive &&
17271 !curr_object->terminating &&
17272 curr_object->pager_ready) {
5ba3f43e
A
17273 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, curr_offset_in_object)
17274 == VM_EXTERNAL_STATE_EXISTS) {
17275 /* the pager has that page */
17276 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17277 break;
17278 }
17279 }
0a7de745 17280
5ba3f43e
A
17281 /*
17282 * Go down the VM object shadow chain until we find the page
17283 * we're looking for.
17284 */
17285
17286 if (curr_object->shadow != VM_OBJECT_NULL) {
17287 vm_object_t shadow = VM_OBJECT_NULL;
17288
17289 curr_offset_in_object += curr_object->vo_shadow_offset;
17290 shadow = curr_object->shadow;
17291
17292 vm_object_lock_shared(shadow);
17293 vm_object_unlock(curr_object);
17294
17295 curr_object = shadow;
17296 depth++;
17297 continue;
17298 } else {
5ba3f43e
A
17299 break;
17300 }
2d21ac55
A
17301 }
17302 }
b0d623f7 17303
5ba3f43e
A
17304 /* The ref_count is not strictly accurate, it measures the number */
17305 /* of entities holding a ref on the object, they may not be mapping */
17306 /* the object or may not be mapping the section holding the */
17307 /* target page but its still a ball park number and though an over- */
17308 /* count, it picks up the copy-on-write cases */
2d21ac55 17309
5ba3f43e
A
17310 /* We could also get a picture of page sharing from pmap_attributes */
17311 /* but this would under count as only faulted-in mappings would */
17312 /* show up. */
2d21ac55 17313
0a7de745 17314 if ((curr_object == object) && curr_object->shadow) {
5ba3f43e 17315 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
0a7de745 17316 }
5ba3f43e 17317
0a7de745 17318 if (!curr_object->internal) {
5ba3f43e 17319 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
0a7de745 17320 }
5ba3f43e
A
17321
17322 if (m != VM_PAGE_NULL) {
d9a64523 17323 if (m->vmp_fictitious) {
5ba3f43e 17324 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
5ba3f43e 17325 } else {
0a7de745 17326 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
5ba3f43e 17327 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
0a7de745 17328 }
5ba3f43e 17329
0a7de745 17330 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
5ba3f43e 17331 disposition |= VM_PAGE_QUERY_PAGE_REF;
0a7de745 17332 }
5ba3f43e 17333
0a7de745 17334 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
5ba3f43e 17335 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
0a7de745 17336 }
5ba3f43e 17337
0a7de745 17338 if (m->vmp_cs_validated) {
5ba3f43e 17339 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
0a7de745
A
17340 }
17341 if (m->vmp_cs_tainted) {
5ba3f43e 17342 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
0a7de745
A
17343 }
17344 if (m->vmp_cs_nx) {
5ba3f43e 17345 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
0a7de745 17346 }
5ba3f43e 17347 }
91447636 17348 }
1c79356b 17349
5ba3f43e
A
17350 switch (flavor) {
17351 case VM_PAGE_INFO_BASIC:
17352 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17353 basic_info->disposition = disposition;
17354 basic_info->ref_count = ref_count;
17355 basic_info->object_id = (vm_object_id_t) (uintptr_t)
0a7de745 17356 VM_KERNEL_ADDRPERM(curr_object);
5ba3f43e 17357 basic_info->offset =
0a7de745 17358 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
5ba3f43e
A
17359 basic_info->depth = depth;
17360
17361 info_idx++;
17362 break;
17363 }
1c79356b 17364
5ba3f43e
A
17365 disposition = 0;
17366 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
b0d623f7 17367
5ba3f43e
A
17368 /*
17369 * Move to next offset in the range and in our object.
17370 */
0a7de745 17371 curr_s_offset += PAGE_SIZE;
5ba3f43e
A
17372 offset_in_object += PAGE_SIZE;
17373 curr_offset_in_object = offset_in_object;
2d21ac55 17374
5ba3f43e 17375 if (curr_object != object) {
5ba3f43e 17376 vm_object_unlock(curr_object);
1c79356b 17377
5ba3f43e 17378 curr_object = object;
1c79356b 17379
5ba3f43e
A
17380 vm_object_lock_shared(curr_object);
17381 } else {
5ba3f43e
A
17382 vm_object_lock_yield_shared(curr_object);
17383 }
17384 }
593a1d5f 17385
5ba3f43e
A
17386 vm_object_unlock(curr_object);
17387 vm_object_deallocate(curr_object);
b0d623f7 17388
5ba3f43e 17389 vm_map_lock_read(map);
b0d623f7 17390 }
0c530ab8 17391
5ba3f43e 17392 vm_map_unlock_read(map);
2d21ac55 17393 return retval;
91447636
A
17394}
17395
17396/*
17397 * vm_map_msync
17398 *
17399 * Synchronises the memory range specified with its backing store
17400 * image by either flushing or cleaning the contents to the appropriate
17401 * memory manager engaging in a memory object synchronize dialog with
17402 * the manager. The client doesn't return until the manager issues
17403 * m_o_s_completed message. MIG Magically converts user task parameter
17404 * to the task's address map.
17405 *
17406 * interpretation of sync_flags
17407 * VM_SYNC_INVALIDATE - discard pages, only return precious
17408 * pages to manager.
17409 *
17410 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
17411 * - discard pages, write dirty or precious
17412 * pages back to memory manager.
17413 *
17414 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
17415 * - write dirty or precious pages back to
17416 * the memory manager.
17417 *
17418 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
17419 * is a hole in the region, and we would
17420 * have returned KERN_SUCCESS, return
17421 * KERN_INVALID_ADDRESS instead.
17422 *
17423 * NOTE
17424 * The memory object attributes have not yet been implemented, this
17425 * function will have to deal with the invalidate attribute
17426 *
17427 * RETURNS
17428 * KERN_INVALID_TASK Bad task parameter
17429 * KERN_INVALID_ARGUMENT both sync and async were specified.
17430 * KERN_SUCCESS The usual.
17431 * KERN_INVALID_ADDRESS There was a hole in the region.
17432 */
17433
17434kern_return_t
17435vm_map_msync(
0a7de745
A
17436 vm_map_t map,
17437 vm_map_address_t address,
17438 vm_map_size_t size,
17439 vm_sync_t sync_flags)
91447636 17440{
0a7de745
A
17441 vm_map_entry_t entry;
17442 vm_map_size_t amount_left;
17443 vm_object_offset_t offset;
17444 boolean_t do_sync_req;
17445 boolean_t had_hole = FALSE;
17446 vm_map_offset_t pmap_offset;
5ba3f43e 17447
91447636 17448 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
0a7de745
A
17449 (sync_flags & VM_SYNC_SYNCHRONOUS)) {
17450 return KERN_INVALID_ARGUMENT;
17451 }
1c79356b
A
17452
17453 /*
91447636 17454 * align address and size on page boundaries
1c79356b 17455 */
39236c6e 17456 size = (vm_map_round_page(address + size,
0a7de745
A
17457 VM_MAP_PAGE_MASK(map)) -
17458 vm_map_trunc_page(address,
17459 VM_MAP_PAGE_MASK(map)));
39236c6e 17460 address = vm_map_trunc_page(address,
0a7de745 17461 VM_MAP_PAGE_MASK(map));
1c79356b 17462
0a7de745
A
17463 if (map == VM_MAP_NULL) {
17464 return KERN_INVALID_TASK;
17465 }
1c79356b 17466
0a7de745
A
17467 if (size == 0) {
17468 return KERN_SUCCESS;
17469 }
1c79356b 17470
91447636 17471 amount_left = size;
1c79356b 17472
91447636 17473 while (amount_left > 0) {
0a7de745
A
17474 vm_object_size_t flush_size;
17475 vm_object_t object;
1c79356b 17476
91447636
A
17477 vm_map_lock(map);
17478 if (!vm_map_lookup_entry(map,
0a7de745
A
17479 address,
17480 &entry)) {
17481 vm_map_size_t skip;
91447636
A
17482
17483 /*
17484 * hole in the address map.
17485 */
17486 had_hole = TRUE;
17487
39037602
A
17488 if (sync_flags & VM_SYNC_KILLPAGES) {
17489 /*
17490 * For VM_SYNC_KILLPAGES, there should be
17491 * no holes in the range, since we couldn't
17492 * prevent someone else from allocating in
17493 * that hole and we wouldn't want to "kill"
17494 * their pages.
17495 */
17496 vm_map_unlock(map);
17497 break;
17498 }
17499
91447636
A
17500 /*
17501 * Check for empty map.
17502 */
17503 if (entry == vm_map_to_entry(map) &&
17504 entry->vme_next == entry) {
17505 vm_map_unlock(map);
17506 break;
17507 }
17508 /*
17509 * Check that we don't wrap and that
17510 * we have at least one real map entry.
17511 */
17512 if ((map->hdr.nentries == 0) ||
17513 (entry->vme_next->vme_start < address)) {
17514 vm_map_unlock(map);
17515 break;
17516 }
17517 /*
17518 * Move up to the next entry if needed
17519 */
17520 skip = (entry->vme_next->vme_start - address);
0a7de745 17521 if (skip >= amount_left) {
91447636 17522 amount_left = 0;
0a7de745 17523 } else {
91447636 17524 amount_left -= skip;
0a7de745 17525 }
91447636
A
17526 address = entry->vme_next->vme_start;
17527 vm_map_unlock(map);
17528 continue;
17529 }
1c79356b 17530
91447636 17531 offset = address - entry->vme_start;
3e170ce0 17532 pmap_offset = address;
1c79356b 17533
91447636
A
17534 /*
17535 * do we have more to flush than is contained in this
17536 * entry ?
17537 */
17538 if (amount_left + entry->vme_start + offset > entry->vme_end) {
17539 flush_size = entry->vme_end -
0a7de745 17540 (entry->vme_start + offset);
91447636
A
17541 } else {
17542 flush_size = amount_left;
17543 }
17544 amount_left -= flush_size;
17545 address += flush_size;
1c79356b 17546
91447636 17547 if (entry->is_sub_map == TRUE) {
0a7de745
A
17548 vm_map_t local_map;
17549 vm_map_offset_t local_offset;
1c79356b 17550
3e170ce0
A
17551 local_map = VME_SUBMAP(entry);
17552 local_offset = VME_OFFSET(entry);
91447636
A
17553 vm_map_unlock(map);
17554 if (vm_map_msync(
2d21ac55
A
17555 local_map,
17556 local_offset,
17557 flush_size,
17558 sync_flags) == KERN_INVALID_ADDRESS) {
91447636
A
17559 had_hole = TRUE;
17560 }
17561 continue;
17562 }
3e170ce0 17563 object = VME_OBJECT(entry);
1c79356b 17564
91447636
A
17565 /*
17566 * We can't sync this object if the object has not been
17567 * created yet
17568 */
17569 if (object == VM_OBJECT_NULL) {
17570 vm_map_unlock(map);
17571 continue;
17572 }
3e170ce0 17573 offset += VME_OFFSET(entry);
1c79356b 17574
0a7de745 17575 vm_object_lock(object);
1c79356b 17576
91447636 17577 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
0a7de745 17578 int kill_pages = 0;
b0d623f7 17579 boolean_t reusable_pages = FALSE;
91447636
A
17580
17581 if (sync_flags & VM_SYNC_KILLPAGES) {
0a7de745
A
17582 if (((object->ref_count == 1) ||
17583 ((object->copy_strategy !=
17584 MEMORY_OBJECT_COPY_SYMMETRIC) &&
17585 (object->copy == VM_OBJECT_NULL))) &&
39037602
A
17586 (object->shadow == VM_OBJECT_NULL)) {
17587 if (object->ref_count != 1) {
17588 vm_page_stats_reusable.free_shared++;
17589 }
0a7de745 17590 kill_pages = 1;
39037602 17591 } else {
0a7de745 17592 kill_pages = -1;
39037602 17593 }
91447636 17594 }
0a7de745
A
17595 if (kill_pages != -1) {
17596 vm_object_deactivate_pages(
3e170ce0
A
17597 object,
17598 offset,
17599 (vm_object_size_t) flush_size,
17600 kill_pages,
17601 reusable_pages,
17602 map->pmap,
17603 pmap_offset);
0a7de745 17604 }
91447636
A
17605 vm_object_unlock(object);
17606 vm_map_unlock(map);
17607 continue;
1c79356b 17608 }
91447636
A
17609 /*
17610 * We can't sync this object if there isn't a pager.
17611 * Don't bother to sync internal objects, since there can't
17612 * be any "permanent" storage for these objects anyway.
17613 */
17614 if ((object->pager == MEMORY_OBJECT_NULL) ||
17615 (object->internal) || (object->private)) {
17616 vm_object_unlock(object);
17617 vm_map_unlock(map);
17618 continue;
17619 }
17620 /*
17621 * keep reference on the object until syncing is done
17622 */
2d21ac55 17623 vm_object_reference_locked(object);
91447636 17624 vm_object_unlock(object);
1c79356b 17625
91447636 17626 vm_map_unlock(map);
1c79356b 17627
91447636 17628 do_sync_req = vm_object_sync(object,
0a7de745
A
17629 offset,
17630 flush_size,
17631 sync_flags & VM_SYNC_INVALIDATE,
17632 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
17633 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
17634 sync_flags & VM_SYNC_SYNCHRONOUS);
2d21ac55 17635
5ba3f43e 17636 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
0a7de745 17637 /*
5ba3f43e
A
17638 * clear out the clustering and read-ahead hints
17639 */
0a7de745 17640 vm_object_lock(object);
2d21ac55 17641
5ba3f43e
A
17642 object->pages_created = 0;
17643 object->pages_used = 0;
17644 object->sequential = 0;
17645 object->last_alloc = 0;
2d21ac55 17646
2d21ac55 17647 vm_object_unlock(object);
2d21ac55 17648 }
5ba3f43e
A
17649 vm_object_deallocate(object);
17650 } /* while */
91447636
A
17651
17652 /* for proper msync() behaviour */
0a7de745
A
17653 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
17654 return KERN_INVALID_ADDRESS;
17655 }
91447636 17656
0a7de745 17657 return KERN_SUCCESS;
91447636 17658}/* vm_msync */
1c79356b 17659
1c79356b 17660/*
91447636
A
17661 * Routine: convert_port_entry_to_map
17662 * Purpose:
17663 * Convert from a port specifying an entry or a task
17664 * to a map. Doesn't consume the port ref; produces a map ref,
17665 * which may be null. Unlike convert_port_to_map, the
17666 * port may be task or a named entry backed.
17667 * Conditions:
17668 * Nothing locked.
1c79356b 17669 */
1c79356b 17670
1c79356b 17671
91447636
A
17672vm_map_t
17673convert_port_entry_to_map(
0a7de745 17674 ipc_port_t port)
91447636
A
17675{
17676 vm_map_t map;
0a7de745
A
17677 vm_named_entry_t named_entry;
17678 uint32_t try_failed_count = 0;
1c79356b 17679
0a7de745
A
17680 if (IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17681 while (TRUE) {
91447636 17682 ip_lock(port);
0a7de745
A
17683 if (ip_active(port) && (ip_kotype(port)
17684 == IKOT_NAMED_ENTRY)) {
91447636 17685 named_entry =
0a7de745 17686 (vm_named_entry_t)port->ip_kobject;
b0d623f7 17687 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
0a7de745 17688 ip_unlock(port);
2d21ac55
A
17689
17690 try_failed_count++;
0a7de745
A
17691 mutex_pause(try_failed_count);
17692 continue;
17693 }
91447636 17694 named_entry->ref_count++;
b0d623f7 17695 lck_mtx_unlock(&(named_entry)->Lock);
91447636
A
17696 ip_unlock(port);
17697 if ((named_entry->is_sub_map) &&
5ba3f43e 17698 (named_entry->protection
0a7de745 17699 & VM_PROT_WRITE)) {
91447636
A
17700 map = named_entry->backing.map;
17701 } else {
17702 mach_destroy_memory_entry(port);
17703 return VM_MAP_NULL;
17704 }
17705 vm_map_reference_swap(map);
17706 mach_destroy_memory_entry(port);
17707 break;
0a7de745 17708 } else {
91447636 17709 return VM_MAP_NULL;
0a7de745 17710 }
91447636 17711 }
0a7de745 17712 } else {
91447636 17713 map = convert_port_to_map(port);
0a7de745 17714 }
1c79356b 17715
91447636
A
17716 return map;
17717}
1c79356b 17718
91447636
A
17719/*
17720 * Routine: convert_port_entry_to_object
17721 * Purpose:
17722 * Convert from a port specifying a named entry to an
17723 * object. Doesn't consume the port ref; produces a map ref,
5ba3f43e 17724 * which may be null.
91447636
A
17725 * Conditions:
17726 * Nothing locked.
17727 */
1c79356b 17728
1c79356b 17729
91447636
A
17730vm_object_t
17731convert_port_entry_to_object(
0a7de745 17732 ipc_port_t port)
91447636 17733{
0a7de745
A
17734 vm_object_t object = VM_OBJECT_NULL;
17735 vm_named_entry_t named_entry;
17736 uint32_t try_failed_count = 0;
39236c6e
A
17737
17738 if (IP_VALID(port) &&
17739 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
0a7de745 17740try_again:
39236c6e
A
17741 ip_lock(port);
17742 if (ip_active(port) &&
17743 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17744 named_entry = (vm_named_entry_t)port->ip_kobject;
17745 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 17746 ip_unlock(port);
39236c6e
A
17747 try_failed_count++;
17748 mutex_pause(try_failed_count);
0a7de745 17749 goto try_again;
39236c6e
A
17750 }
17751 named_entry->ref_count++;
17752 lck_mtx_unlock(&(named_entry)->Lock);
17753 ip_unlock(port);
17754 if (!(named_entry->is_sub_map) &&
39236c6e
A
17755 !(named_entry->is_copy) &&
17756 (named_entry->protection & VM_PROT_WRITE)) {
17757 object = named_entry->backing.object;
17758 vm_object_reference(object);
91447636 17759 }
39236c6e 17760 mach_destroy_memory_entry(port);
1c79356b 17761 }
1c79356b 17762 }
91447636
A
17763
17764 return object;
1c79356b 17765}
9bccf70c
A
17766
17767/*
91447636
A
17768 * Export routines to other components for the things we access locally through
17769 * macros.
9bccf70c 17770 */
91447636
A
17771#undef current_map
17772vm_map_t
17773current_map(void)
9bccf70c 17774{
0a7de745 17775 return current_map_fast();
9bccf70c
A
17776}
17777
17778/*
17779 * vm_map_reference:
17780 *
17781 * Most code internal to the osfmk will go through a
17782 * macro defining this. This is always here for the
17783 * use of other kernel components.
17784 */
17785#undef vm_map_reference
17786void
17787vm_map_reference(
0a7de745 17788 vm_map_t map)
9bccf70c 17789{
0a7de745 17790 if (map == VM_MAP_NULL) {
9bccf70c 17791 return;
0a7de745 17792 }
9bccf70c 17793
b0d623f7 17794 lck_mtx_lock(&map->s_lock);
0a7de745 17795#if TASK_SWAPPER
9bccf70c 17796 assert(map->res_count > 0);
d9a64523 17797 assert(map->map_refcnt >= map->res_count);
9bccf70c
A
17798 map->res_count++;
17799#endif
d9a64523 17800 map->map_refcnt++;
b0d623f7 17801 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
17802}
17803
17804/*
17805 * vm_map_deallocate:
17806 *
17807 * Removes a reference from the specified map,
17808 * destroying it if no references remain.
17809 * The map should not be locked.
17810 */
17811void
17812vm_map_deallocate(
0a7de745 17813 vm_map_t map)
9bccf70c 17814{
0a7de745 17815 unsigned int ref;
9bccf70c 17816
0a7de745 17817 if (map == VM_MAP_NULL) {
9bccf70c 17818 return;
0a7de745 17819 }
9bccf70c 17820
b0d623f7 17821 lck_mtx_lock(&map->s_lock);
d9a64523 17822 ref = --map->map_refcnt;
9bccf70c
A
17823 if (ref > 0) {
17824 vm_map_res_deallocate(map);
b0d623f7 17825 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
17826 return;
17827 }
d9a64523 17828 assert(map->map_refcnt == 0);
b0d623f7 17829 lck_mtx_unlock(&map->s_lock);
9bccf70c 17830
0a7de745 17831#if TASK_SWAPPER
9bccf70c
A
17832 /*
17833 * The map residence count isn't decremented here because
5ba3f43e 17834 * the vm_map_delete below will traverse the entire map,
9bccf70c
A
17835 * deleting entries, and the residence counts on objects
17836 * and sharing maps will go away then.
17837 */
17838#endif
17839
d9a64523 17840 vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
0c530ab8 17841}
91447636 17842
91447636 17843
0c530ab8
A
17844void
17845vm_map_disable_NX(vm_map_t map)
17846{
0a7de745
A
17847 if (map == NULL) {
17848 return;
17849 }
17850 if (map->pmap == NULL) {
17851 return;
17852 }
0c530ab8 17853
0a7de745 17854 pmap_disable_NX(map->pmap);
0c530ab8
A
17855}
17856
6d2010ae
A
17857void
17858vm_map_disallow_data_exec(vm_map_t map)
17859{
0a7de745
A
17860 if (map == NULL) {
17861 return;
17862 }
6d2010ae 17863
0a7de745 17864 map->map_disallow_data_exec = TRUE;
6d2010ae
A
17865}
17866
0c530ab8
A
17867/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
17868 * more descriptive.
17869 */
17870void
17871vm_map_set_32bit(vm_map_t map)
17872{
5ba3f43e
A
17873#if defined(__arm__) || defined(__arm64__)
17874 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
17875#else
0c530ab8 17876 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
5ba3f43e 17877#endif
0c530ab8
A
17878}
17879
17880
17881void
17882vm_map_set_64bit(vm_map_t map)
17883{
5ba3f43e
A
17884#if defined(__arm__) || defined(__arm64__)
17885 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
17886#else
0c530ab8 17887 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
5ba3f43e 17888#endif
0c530ab8
A
17889}
17890
813fb2f6 17891/*
d9a64523 17892 * Expand the maximum size of an existing map to the maximum supported.
813fb2f6
A
17893 */
17894void
17895vm_map_set_jumbo(vm_map_t map)
17896{
5ba3f43e 17897#if defined (__arm64__)
d9a64523
A
17898 vm_map_set_max_addr(map, ~0);
17899#else /* arm64 */
17900 (void) map;
17901#endif
17902}
17903
17904/*
17905 * Expand the maximum size of an existing map.
17906 */
17907void
17908vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
17909{
17910#if defined(__arm64__)
17911 vm_map_offset_t max_supported_offset = 0;
5ba3f43e 17912 vm_map_offset_t old_max_offset = map->max_offset;
d9a64523
A
17913 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
17914
17915 new_max_offset = trunc_page(new_max_offset);
17916
17917 /* The address space cannot be shrunk using this routine. */
17918 if (old_max_offset >= new_max_offset) {
17919 return;
17920 }
17921
17922 if (max_supported_offset < new_max_offset) {
17923 new_max_offset = max_supported_offset;
17924 }
17925
17926 map->max_offset = new_max_offset;
17927
17928 if (map->holes_list->prev->vme_end == old_max_offset) {
5ba3f43e
A
17929 /*
17930 * There is already a hole at the end of the map; simply make it bigger.
17931 */
17932 map->holes_list->prev->vme_end = map->max_offset;
17933 } else {
17934 /*
17935 * There is no hole at the end, so we need to create a new hole
17936 * for the new empty space we're creating.
17937 */
17938 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
17939 new_hole->start = old_max_offset;
17940 new_hole->end = map->max_offset;
17941 new_hole->prev = map->holes_list->prev;
17942 new_hole->next = (struct vm_map_entry *)map->holes_list;
17943 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
17944 map->holes_list->prev = (struct vm_map_entry *)new_hole;
17945 }
d9a64523
A
17946#else
17947 (void)map;
17948 (void)new_max_offset;
5ba3f43e 17949#endif
813fb2f6
A
17950}
17951
0c530ab8 17952vm_map_offset_t
3e170ce0 17953vm_compute_max_offset(boolean_t is64)
0c530ab8 17954{
5ba3f43e 17955#if defined(__arm__) || defined(__arm64__)
0a7de745 17956 return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
5ba3f43e 17957#else
0a7de745 17958 return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
5ba3f43e
A
17959#endif
17960}
17961
17962void
17963vm_map_get_max_aslr_slide_section(
0a7de745
A
17964 vm_map_t map __unused,
17965 int64_t *max_sections,
17966 int64_t *section_size)
5ba3f43e
A
17967{
17968#if defined(__arm64__)
17969 *max_sections = 3;
17970 *section_size = ARM_TT_TWIG_SIZE;
17971#else
17972 *max_sections = 1;
17973 *section_size = 0;
17974#endif
0c530ab8
A
17975}
17976
39236c6e 17977uint64_t
5ba3f43e 17978vm_map_get_max_aslr_slide_pages(vm_map_t map)
39236c6e 17979{
5ba3f43e
A
17980#if defined(__arm64__)
17981 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
17982 * limited embedded address space; this is also meant to minimize pmap
17983 * memory usage on 16KB page systems.
17984 */
0a7de745 17985 return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
5ba3f43e 17986#else
0a7de745 17987 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
5ba3f43e
A
17988#endif
17989}
17990
17991uint64_t
17992vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
17993{
17994#if defined(__arm64__)
17995 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
17996 * of independent entropy on 16KB page systems.
17997 */
0a7de745 17998 return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
5ba3f43e 17999#else
0a7de745 18000 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
5ba3f43e 18001#endif
39236c6e
A
18002}
18003
0a7de745 18004#ifndef __arm__
0c530ab8 18005boolean_t
2d21ac55 18006vm_map_is_64bit(
0a7de745 18007 vm_map_t map)
2d21ac55
A
18008{
18009 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
18010}
5ba3f43e 18011#endif
2d21ac55
A
18012
18013boolean_t
316670eb 18014vm_map_has_hard_pagezero(
0a7de745
A
18015 vm_map_t map,
18016 vm_map_offset_t pagezero_size)
0c530ab8
A
18017{
18018 /*
18019 * XXX FBDP
18020 * We should lock the VM map (for read) here but we can get away
18021 * with it for now because there can't really be any race condition:
18022 * the VM map's min_offset is changed only when the VM map is created
18023 * and when the zero page is established (when the binary gets loaded),
18024 * and this routine gets called only when the task terminates and the
18025 * VM map is being torn down, and when a new map is created via
18026 * load_machfile()/execve().
18027 */
0a7de745 18028 return map->min_offset >= pagezero_size;
0c530ab8
A
18029}
18030
316670eb
A
18031/*
18032 * Raise a VM map's maximun offset.
18033 */
18034kern_return_t
18035vm_map_raise_max_offset(
0a7de745
A
18036 vm_map_t map,
18037 vm_map_offset_t new_max_offset)
316670eb 18038{
0a7de745 18039 kern_return_t ret;
316670eb
A
18040
18041 vm_map_lock(map);
18042 ret = KERN_INVALID_ADDRESS;
18043
18044 if (new_max_offset >= map->max_offset) {
5ba3f43e 18045 if (!vm_map_is_64bit(map)) {
316670eb
A
18046 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
18047 map->max_offset = new_max_offset;
18048 ret = KERN_SUCCESS;
18049 }
18050 } else {
18051 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
18052 map->max_offset = new_max_offset;
18053 ret = KERN_SUCCESS;
18054 }
18055 }
18056 }
18057
18058 vm_map_unlock(map);
18059 return ret;
18060}
18061
18062
0c530ab8
A
18063/*
18064 * Raise a VM map's minimum offset.
18065 * To strictly enforce "page zero" reservation.
18066 */
18067kern_return_t
18068vm_map_raise_min_offset(
0a7de745
A
18069 vm_map_t map,
18070 vm_map_offset_t new_min_offset)
0c530ab8 18071{
0a7de745 18072 vm_map_entry_t first_entry;
0c530ab8 18073
39236c6e 18074 new_min_offset = vm_map_round_page(new_min_offset,
0a7de745 18075 VM_MAP_PAGE_MASK(map));
0c530ab8
A
18076
18077 vm_map_lock(map);
18078
18079 if (new_min_offset < map->min_offset) {
18080 /*
18081 * Can't move min_offset backwards, as that would expose
18082 * a part of the address space that was previously, and for
18083 * possibly good reasons, inaccessible.
18084 */
18085 vm_map_unlock(map);
18086 return KERN_INVALID_ADDRESS;
18087 }
3e170ce0
A
18088 if (new_min_offset >= map->max_offset) {
18089 /* can't go beyond the end of the address space */
18090 vm_map_unlock(map);
18091 return KERN_INVALID_ADDRESS;
18092 }
0c530ab8
A
18093
18094 first_entry = vm_map_first_entry(map);
18095 if (first_entry != vm_map_to_entry(map) &&
18096 first_entry->vme_start < new_min_offset) {
18097 /*
18098 * Some memory was already allocated below the new
18099 * minimun offset. It's too late to change it now...
18100 */
18101 vm_map_unlock(map);
18102 return KERN_NO_SPACE;
18103 }
18104
18105 map->min_offset = new_min_offset;
18106
3e170ce0
A
18107 assert(map->holes_list);
18108 map->holes_list->start = new_min_offset;
18109 assert(new_min_offset < map->holes_list->end);
18110
0c530ab8
A
18111 vm_map_unlock(map);
18112
18113 return KERN_SUCCESS;
18114}
2d21ac55
A
18115
18116/*
18117 * Set the limit on the maximum amount of user wired memory allowed for this map.
18118 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
18119 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
18120 * don't have to reach over to the BSD data structures.
18121 */
18122
18123void
0a7de745
A
18124vm_map_set_user_wire_limit(vm_map_t map,
18125 vm_size_t limit)
2d21ac55
A
18126{
18127 map->user_wire_limit = limit;
18128}
593a1d5f 18129
b0d623f7 18130
0a7de745
A
18131void
18132vm_map_switch_protect(vm_map_t map,
18133 boolean_t val)
593a1d5f
A
18134{
18135 vm_map_lock(map);
0a7de745 18136 map->switch_protect = val;
593a1d5f 18137 vm_map_unlock(map);
b0d623f7 18138}
b7266188 18139
39236c6e
A
18140/*
18141 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
18142 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
18143 * bump both counters.
18144 */
18145void
18146vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
18147{
18148 pmap_t pmap = vm_map_pmap(map);
18149
fe8ab488 18150 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
5ba3f43e 18151 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
39236c6e
A
18152}
18153
18154void
18155vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
18156{
18157 pmap_t pmap = vm_map_pmap(map);
18158
fe8ab488 18159 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
5ba3f43e 18160 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
39236c6e
A
18161}
18162
b7266188
A
18163/* Add (generate) code signature for memory range */
18164#if CONFIG_DYNAMIC_CODE_SIGNING
0a7de745
A
18165kern_return_t
18166vm_map_sign(vm_map_t map,
18167 vm_map_offset_t start,
18168 vm_map_offset_t end)
b7266188
A
18169{
18170 vm_map_entry_t entry;
18171 vm_page_t m;
18172 vm_object_t object;
5ba3f43e 18173
b7266188
A
18174 /*
18175 * Vet all the input parameters and current type and state of the
18176 * underlaying object. Return with an error if anything is amiss.
18177 */
0a7de745
A
18178 if (map == VM_MAP_NULL) {
18179 return KERN_INVALID_ARGUMENT;
18180 }
5ba3f43e 18181
b7266188 18182 vm_map_lock_read(map);
5ba3f43e 18183
b7266188
A
18184 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
18185 /*
18186 * Must pass a valid non-submap address.
18187 */
18188 vm_map_unlock_read(map);
0a7de745 18189 return KERN_INVALID_ADDRESS;
b7266188 18190 }
5ba3f43e 18191
0a7de745 18192 if ((entry->vme_start > start) || (entry->vme_end < end)) {
b7266188
A
18193 /*
18194 * Map entry doesn't cover the requested range. Not handling
18195 * this situation currently.
18196 */
18197 vm_map_unlock_read(map);
0a7de745 18198 return KERN_INVALID_ARGUMENT;
b7266188 18199 }
5ba3f43e 18200
3e170ce0 18201 object = VME_OBJECT(entry);
b7266188
A
18202 if (object == VM_OBJECT_NULL) {
18203 /*
18204 * Object must already be present or we can't sign.
18205 */
18206 vm_map_unlock_read(map);
18207 return KERN_INVALID_ARGUMENT;
18208 }
5ba3f43e 18209
b7266188
A
18210 vm_object_lock(object);
18211 vm_map_unlock_read(map);
5ba3f43e 18212
0a7de745 18213 while (start < end) {
b7266188 18214 uint32_t refmod;
5ba3f43e 18215
3e170ce0 18216 m = vm_page_lookup(object,
0a7de745
A
18217 start - entry->vme_start + VME_OFFSET(entry));
18218 if (m == VM_PAGE_NULL) {
5ba3f43e 18219 /* shoud we try to fault a page here? we can probably
b7266188
A
18220 * demand it exists and is locked for this request */
18221 vm_object_unlock(object);
18222 return KERN_FAILURE;
18223 }
18224 /* deal with special page status */
d9a64523
A
18225 if (m->vmp_busy ||
18226 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
b7266188
A
18227 vm_object_unlock(object);
18228 return KERN_FAILURE;
18229 }
5ba3f43e 18230
b7266188 18231 /* Page is OK... now "validate" it */
5ba3f43e 18232 /* This is the place where we'll call out to create a code
b7266188 18233 * directory, later */
d9a64523 18234 m->vmp_cs_validated = TRUE;
b7266188
A
18235
18236 /* The page is now "clean" for codesigning purposes. That means
5ba3f43e 18237 * we don't consider it as modified (wpmapped) anymore. But
b7266188
A
18238 * we'll disconnect the page so we note any future modification
18239 * attempts. */
d9a64523 18240 m->vmp_wpmapped = FALSE;
39037602 18241 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
5ba3f43e
A
18242
18243 /* Pull the dirty status from the pmap, since we cleared the
b7266188 18244 * wpmapped bit */
d9a64523 18245 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
316670eb 18246 SET_PAGE_DIRTY(m, FALSE);
b7266188 18247 }
5ba3f43e 18248
b7266188
A
18249 /* On to the next page */
18250 start += PAGE_SIZE;
18251 }
18252 vm_object_unlock(object);
5ba3f43e 18253
b7266188
A
18254 return KERN_SUCCESS;
18255}
18256#endif
6d2010ae 18257
0a7de745
A
18258kern_return_t
18259vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
5ba3f43e 18260{
0a7de745 18261 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
fe8ab488 18262 vm_map_entry_t next_entry;
0a7de745
A
18263 kern_return_t kr = KERN_SUCCESS;
18264 vm_map_t zap_map;
fe8ab488
A
18265
18266 vm_map_lock(map);
18267
18268 /*
18269 * We use a "zap_map" to avoid having to unlock
18270 * the "map" in vm_map_delete().
18271 */
18272 zap_map = vm_map_create(PMAP_NULL,
0a7de745
A
18273 map->min_offset,
18274 map->max_offset,
18275 map->hdr.entries_pageable);
fe8ab488
A
18276
18277 if (zap_map == VM_MAP_NULL) {
18278 return KERN_RESOURCE_SHORTAGE;
18279 }
18280
5ba3f43e 18281 vm_map_set_page_shift(zap_map,
0a7de745 18282 VM_MAP_PAGE_SHIFT(map));
3e170ce0 18283 vm_map_disable_hole_optimization(zap_map);
fe8ab488
A
18284
18285 for (entry = vm_map_first_entry(map);
0a7de745
A
18286 entry != vm_map_to_entry(map);
18287 entry = next_entry) {
fe8ab488 18288 next_entry = entry->vme_next;
5ba3f43e 18289
3e170ce0
A
18290 if (VME_OBJECT(entry) &&
18291 !entry->is_sub_map &&
18292 (VME_OBJECT(entry)->internal == TRUE) &&
18293 (VME_OBJECT(entry)->ref_count == 1)) {
3e170ce0
A
18294 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
18295 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
fe8ab488 18296
5ba3f43e 18297 (void)vm_map_delete(map,
0a7de745
A
18298 entry->vme_start,
18299 entry->vme_end,
18300 VM_MAP_REMOVE_SAVE_ENTRIES,
18301 zap_map);
fe8ab488
A
18302 }
18303 }
18304
18305 vm_map_unlock(map);
18306
0a7de745 18307 /*
fe8ab488 18308 * Get rid of the "zap_maps" and all the map entries that
0a7de745
A
18309 * they may still contain.
18310 */
18311 if (zap_map != VM_MAP_NULL) {
18312 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18313 zap_map = VM_MAP_NULL;
18314 }
fe8ab488
A
18315
18316 return kr;
18317}
18318
6d2010ae 18319
39037602
A
18320#if DEVELOPMENT || DEBUG
18321
18322int
18323vm_map_disconnect_page_mappings(
18324 vm_map_t map,
18325 boolean_t do_unnest)
6d2010ae
A
18326{
18327 vm_map_entry_t entry;
0a7de745 18328 int page_count = 0;
39037602
A
18329
18330 if (do_unnest == TRUE) {
18331#ifndef NO_NESTED_PMAP
18332 vm_map_lock(map);
18333
18334 for (entry = vm_map_first_entry(map);
0a7de745
A
18335 entry != vm_map_to_entry(map);
18336 entry = entry->vme_next) {
39037602
A
18337 if (entry->is_sub_map && entry->use_pmap) {
18338 /*
18339 * Make sure the range between the start of this entry and
18340 * the end of this entry is no longer nested, so that
18341 * we will only remove mappings from the pmap in use by this
18342 * this task
18343 */
18344 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
18345 }
18346 }
18347 vm_map_unlock(map);
18348#endif
18349 }
6d2010ae 18350 vm_map_lock_read(map);
39037602
A
18351
18352 page_count = map->pmap->stats.resident_count;
18353
6d2010ae 18354 for (entry = vm_map_first_entry(map);
0a7de745
A
18355 entry != vm_map_to_entry(map);
18356 entry = entry->vme_next) {
39037602 18357 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
0a7de745 18358 (VME_OBJECT(entry)->phys_contiguous))) {
6d2010ae
A
18359 continue;
18360 }
0a7de745 18361 if (entry->is_sub_map) {
39037602 18362 assert(!entry->use_pmap);
0a7de745 18363 }
6d2010ae 18364
39037602 18365 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
6d2010ae 18366 }
6d2010ae
A
18367 vm_map_unlock_read(map);
18368
39037602 18369 return page_count;
6d2010ae
A
18370}
18371
39037602
A
18372#endif
18373
18374
18375#if CONFIG_FREEZE
18376
18377
d9a64523 18378int c_freezer_swapout_page_count;
3e170ce0
A
18379int c_freezer_compression_count = 0;
18380AbsoluteTime c_freezer_last_yield_ts = 0;
18381
d9a64523
A
18382extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
18383extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
18384
18385kern_return_t
18386vm_map_freeze(
0a7de745
A
18387 vm_map_t map,
18388 unsigned int *purgeable_count,
18389 unsigned int *wired_count,
18390 unsigned int *clean_count,
18391 unsigned int *dirty_count,
18392 __unused unsigned int dirty_budget,
18393 unsigned int *shared_count,
18394 int *freezer_error_code,
18395 boolean_t eval_only)
5ba3f43e 18396{
0a7de745
A
18397 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
18398 kern_return_t kr = KERN_SUCCESS;
18399 boolean_t evaluation_phase = TRUE;
18400 vm_object_t cur_shared_object = NULL;
18401 int cur_shared_obj_ref_cnt = 0;
18402 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
6d2010ae 18403
d9a64523 18404 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
6d2010ae 18405
6d2010ae
A
18406 /*
18407 * We need the exclusive lock here so that we can
18408 * block any page faults or lookups while we are
18409 * in the middle of freezing this vm map.
18410 */
18411 vm_map_lock(map);
18412
39037602
A
18413 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
18414
18415 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
d9a64523
A
18416 if (vm_compressor_low_on_space()) {
18417 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18418 }
18419
18420 if (vm_swap_low_on_space()) {
18421 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18422 }
18423
39037602 18424 kr = KERN_NO_SPACE;
5ba3f43e 18425 goto done;
6d2010ae 18426 }
39037602 18427
d9a64523
A
18428 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
18429 /*
18430 * In-memory compressor backing the freezer. No disk.
18431 * So no need to do the evaluation phase.
18432 */
18433 evaluation_phase = FALSE;
18434
18435 if (eval_only == TRUE) {
18436 /*
18437 * We don't support 'eval_only' mode
18438 * in this non-swap config.
18439 */
18440 *freezer_error_code = FREEZER_ERROR_GENERIC;
18441 kr = KERN_INVALID_ARGUMENT;
18442 goto done;
18443 }
18444
18445 c_freezer_compression_count = 0;
18446 clock_get_uptime(&c_freezer_last_yield_ts);
18447 }
18448again:
3e170ce0 18449
6d2010ae 18450 for (entry2 = vm_map_first_entry(map);
0a7de745
A
18451 entry2 != vm_map_to_entry(map);
18452 entry2 = entry2->vme_next) {
18453 vm_object_t src_object = VME_OBJECT(entry2);
6d2010ae 18454
39037602 18455 if (src_object &&
3e170ce0 18456 !entry2->is_sub_map &&
39037602 18457 !src_object->phys_contiguous) {
39236c6e 18458 /* If eligible, scan the entry, moving eligible pages over to our parent object */
6d2010ae 18459
39037602 18460 if (src_object->internal == TRUE) {
39037602
A
18461 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18462 /*
18463 * Pages belonging to this object could be swapped to disk.
18464 * Make sure it's not a shared object because we could end
18465 * up just bringing it back in again.
d9a64523
A
18466 *
18467 * We try to optimize somewhat by checking for objects that are mapped
18468 * more than once within our own map. But we don't do full searches,
18469 * we just look at the entries following our current entry.
39037602
A
18470 */
18471 if (src_object->ref_count > 1) {
d9a64523
A
18472 if (src_object != cur_shared_object) {
18473 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18474 dirty_shared_count += obj_pages_snapshot;
18475
18476 cur_shared_object = src_object;
18477 cur_shared_obj_ref_cnt = 1;
18478 continue;
18479 } else {
18480 cur_shared_obj_ref_cnt++;
18481 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
18482 /*
18483 * Fall through to below and treat this object as private.
18484 * So deduct its pages from our shared total and add it to the
18485 * private total.
18486 */
18487
18488 dirty_shared_count -= obj_pages_snapshot;
18489 dirty_private_count += obj_pages_snapshot;
18490 } else {
18491 continue;
18492 }
18493 }
18494 }
18495
18496
18497 if (src_object->ref_count == 1) {
18498 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18499 }
18500
18501 if (evaluation_phase == TRUE) {
39037602 18502 continue;
3e170ce0 18503 }
3e170ce0 18504 }
d9a64523 18505
39037602 18506 vm_object_compressed_freezer_pageout(src_object);
3e170ce0 18507
d9a64523
A
18508 *wired_count += src_object->wired_page_count;
18509
3e170ce0 18510 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
d9a64523
A
18511 if (vm_compressor_low_on_space()) {
18512 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18513 }
18514
18515 if (vm_swap_low_on_space()) {
18516 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18517 }
18518
3e170ce0 18519 kr = KERN_NO_SPACE;
5ba3f43e 18520 break;
39236c6e 18521 }
6d2010ae
A
18522 }
18523 }
18524 }
d9a64523
A
18525
18526 if (evaluation_phase) {
d9a64523
A
18527 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
18528
18529 if (dirty_shared_count > shared_pages_threshold) {
18530 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
18531 kr = KERN_FAILURE;
18532 goto done;
18533 }
18534
18535 if (dirty_shared_count &&
0a7de745 18536 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
d9a64523
A
18537 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
18538 kr = KERN_FAILURE;
18539 goto done;
18540 }
18541
18542 evaluation_phase = FALSE;
18543 dirty_shared_count = dirty_private_count = 0;
0a7de745 18544
d9a64523
A
18545 c_freezer_compression_count = 0;
18546 clock_get_uptime(&c_freezer_last_yield_ts);
18547
18548 if (eval_only) {
18549 kr = KERN_SUCCESS;
18550 goto done;
18551 }
18552
18553 goto again;
d9a64523 18554 } else {
d9a64523
A
18555 kr = KERN_SUCCESS;
18556 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
18557 }
18558
6d2010ae
A
18559done:
18560 vm_map_unlock(map);
5ba3f43e 18561
d9a64523
A
18562 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
18563 vm_object_compressed_freezer_done();
39037602 18564
d9a64523
A
18565 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18566 /*
18567 * reset the counter tracking the # of swapped compressed pages
18568 * because we are now done with this freeze session and task.
18569 */
18570
18571 *dirty_count = c_freezer_swapout_page_count; //used to track pageouts
18572 c_freezer_swapout_page_count = 0;
18573 }
3e170ce0 18574 }
6d2010ae
A
18575 return kr;
18576}
18577
6d2010ae 18578#endif
e2d2fc5c 18579
e2d2fc5c
A
18580/*
18581 * vm_map_entry_should_cow_for_true_share:
18582 *
18583 * Determines if the map entry should be clipped and setup for copy-on-write
18584 * to avoid applying "true_share" to a large VM object when only a subset is
18585 * targeted.
18586 *
18587 * For now, we target only the map entries created for the Objective C
18588 * Garbage Collector, which initially have the following properties:
18589 * - alias == VM_MEMORY_MALLOC
0a7de745
A
18590 * - wired_count == 0
18591 * - !needs_copy
e2d2fc5c 18592 * and a VM object with:
0a7de745
A
18593 * - internal
18594 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
18595 * - !true_share
18596 * - vo_size == ANON_CHUNK_SIZE
3e170ce0
A
18597 *
18598 * Only non-kernel map entries.
e2d2fc5c
A
18599 */
18600boolean_t
18601vm_map_entry_should_cow_for_true_share(
0a7de745 18602 vm_map_entry_t entry)
e2d2fc5c 18603{
0a7de745 18604 vm_object_t object;
e2d2fc5c
A
18605
18606 if (entry->is_sub_map) {
18607 /* entry does not point at a VM object */
18608 return FALSE;
18609 }
18610
18611 if (entry->needs_copy) {
18612 /* already set for copy_on_write: done! */
18613 return FALSE;
18614 }
18615
3e170ce0
A
18616 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
18617 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
fe8ab488 18618 /* not a malloc heap or Obj-C Garbage Collector heap */
e2d2fc5c
A
18619 return FALSE;
18620 }
18621
18622 if (entry->wired_count) {
18623 /* wired: can't change the map entry... */
fe8ab488 18624 vm_counters.should_cow_but_wired++;
e2d2fc5c
A
18625 return FALSE;
18626 }
18627
3e170ce0 18628 object = VME_OBJECT(entry);
e2d2fc5c
A
18629
18630 if (object == VM_OBJECT_NULL) {
18631 /* no object yet... */
18632 return FALSE;
18633 }
18634
18635 if (!object->internal) {
18636 /* not an internal object */
18637 return FALSE;
18638 }
18639
18640 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
18641 /* not the default copy strategy */
18642 return FALSE;
18643 }
18644
18645 if (object->true_share) {
18646 /* already true_share: too late to avoid it */
18647 return FALSE;
18648 }
18649
3e170ce0 18650 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
fe8ab488
A
18651 object->vo_size != ANON_CHUNK_SIZE) {
18652 /* ... not an object created for the ObjC Garbage Collector */
18653 return FALSE;
18654 }
18655
3e170ce0 18656 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
fe8ab488
A
18657 object->vo_size != 2048 * 4096) {
18658 /* ... not a "MALLOC_SMALL" heap */
e2d2fc5c
A
18659 return FALSE;
18660 }
18661
18662 /*
18663 * All the criteria match: we have a large object being targeted for "true_share".
18664 * To limit the adverse side-effects linked with "true_share", tell the caller to
18665 * try and avoid setting up the entire object for "true_share" by clipping the
18666 * targeted range and setting it up for copy-on-write.
18667 */
18668 return TRUE;
18669}
39236c6e 18670
5ba3f43e 18671vm_map_offset_t
39236c6e 18672vm_map_round_page_mask(
0a7de745
A
18673 vm_map_offset_t offset,
18674 vm_map_offset_t mask)
39236c6e
A
18675{
18676 return VM_MAP_ROUND_PAGE(offset, mask);
18677}
18678
5ba3f43e 18679vm_map_offset_t
39236c6e 18680vm_map_trunc_page_mask(
0a7de745
A
18681 vm_map_offset_t offset,
18682 vm_map_offset_t mask)
39236c6e
A
18683{
18684 return VM_MAP_TRUNC_PAGE(offset, mask);
18685}
18686
3e170ce0
A
18687boolean_t
18688vm_map_page_aligned(
0a7de745
A
18689 vm_map_offset_t offset,
18690 vm_map_offset_t mask)
3e170ce0
A
18691{
18692 return ((offset) & mask) == 0;
18693}
18694
39236c6e
A
18695int
18696vm_map_page_shift(
18697 vm_map_t map)
18698{
18699 return VM_MAP_PAGE_SHIFT(map);
18700}
18701
18702int
18703vm_map_page_size(
18704 vm_map_t map)
18705{
18706 return VM_MAP_PAGE_SIZE(map);
18707}
18708
3e170ce0 18709vm_map_offset_t
39236c6e
A
18710vm_map_page_mask(
18711 vm_map_t map)
18712{
18713 return VM_MAP_PAGE_MASK(map);
18714}
18715
18716kern_return_t
18717vm_map_set_page_shift(
0a7de745
A
18718 vm_map_t map,
18719 int pageshift)
39236c6e
A
18720{
18721 if (map->hdr.nentries != 0) {
18722 /* too late to change page size */
18723 return KERN_FAILURE;
18724 }
18725
18726 map->hdr.page_shift = pageshift;
18727
18728 return KERN_SUCCESS;
18729}
18730
18731kern_return_t
18732vm_map_query_volatile(
0a7de745
A
18733 vm_map_t map,
18734 mach_vm_size_t *volatile_virtual_size_p,
18735 mach_vm_size_t *volatile_resident_size_p,
18736 mach_vm_size_t *volatile_compressed_size_p,
18737 mach_vm_size_t *volatile_pmap_size_p,
18738 mach_vm_size_t *volatile_compressed_pmap_size_p)
39236c6e 18739{
0a7de745
A
18740 mach_vm_size_t volatile_virtual_size;
18741 mach_vm_size_t volatile_resident_count;
18742 mach_vm_size_t volatile_compressed_count;
18743 mach_vm_size_t volatile_pmap_count;
18744 mach_vm_size_t volatile_compressed_pmap_count;
18745 mach_vm_size_t resident_count;
18746 vm_map_entry_t entry;
18747 vm_object_t object;
39236c6e
A
18748
18749 /* map should be locked by caller */
18750
18751 volatile_virtual_size = 0;
18752 volatile_resident_count = 0;
3e170ce0 18753 volatile_compressed_count = 0;
39236c6e 18754 volatile_pmap_count = 0;
3e170ce0 18755 volatile_compressed_pmap_count = 0;
39236c6e
A
18756
18757 for (entry = vm_map_first_entry(map);
0a7de745
A
18758 entry != vm_map_to_entry(map);
18759 entry = entry->vme_next) {
18760 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
4bd07ac2 18761
39236c6e
A
18762 if (entry->is_sub_map) {
18763 continue;
18764 }
0a7de745 18765 if (!(entry->protection & VM_PROT_WRITE)) {
39236c6e
A
18766 continue;
18767 }
3e170ce0 18768 object = VME_OBJECT(entry);
39236c6e
A
18769 if (object == VM_OBJECT_NULL) {
18770 continue;
18771 }
3e170ce0
A
18772 if (object->purgable != VM_PURGABLE_VOLATILE &&
18773 object->purgable != VM_PURGABLE_EMPTY) {
39236c6e
A
18774 continue;
18775 }
3e170ce0 18776 if (VME_OFFSET(entry)) {
39236c6e
A
18777 /*
18778 * If the map entry has been split and the object now
18779 * appears several times in the VM map, we don't want
18780 * to count the object's resident_page_count more than
18781 * once. We count it only for the first one, starting
18782 * at offset 0 and ignore the other VM map entries.
18783 */
18784 continue;
18785 }
18786 resident_count = object->resident_page_count;
3e170ce0 18787 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
39236c6e
A
18788 resident_count = 0;
18789 } else {
3e170ce0 18790 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
39236c6e
A
18791 }
18792
18793 volatile_virtual_size += entry->vme_end - entry->vme_start;
18794 volatile_resident_count += resident_count;
3e170ce0
A
18795 if (object->pager) {
18796 volatile_compressed_count +=
0a7de745 18797 vm_compressor_pager_get_count(object->pager);
3e170ce0 18798 }
4bd07ac2
A
18799 pmap_compressed_bytes = 0;
18800 pmap_resident_bytes =
0a7de745
A
18801 pmap_query_resident(map->pmap,
18802 entry->vme_start,
18803 entry->vme_end,
18804 &pmap_compressed_bytes);
4bd07ac2
A
18805 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
18806 volatile_compressed_pmap_count += (pmap_compressed_bytes
0a7de745 18807 / PAGE_SIZE);
39236c6e
A
18808 }
18809
18810 /* map is still locked on return */
18811
18812 *volatile_virtual_size_p = volatile_virtual_size;
18813 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
3e170ce0 18814 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
39236c6e 18815 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
3e170ce0 18816 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
39236c6e
A
18817
18818 return KERN_SUCCESS;
18819}
fe8ab488 18820
3e170ce0
A
18821void
18822vm_map_sizes(vm_map_t map,
0a7de745
A
18823 vm_map_size_t * psize,
18824 vm_map_size_t * pfree,
18825 vm_map_size_t * plargest_free)
3e170ce0 18826{
0a7de745
A
18827 vm_map_entry_t entry;
18828 vm_map_offset_t prev;
18829 vm_map_size_t free, total_free, largest_free;
18830 boolean_t end;
18831
18832 if (!map) {
18833 *psize = *pfree = *plargest_free = 0;
18834 return;
18835 }
18836 total_free = largest_free = 0;
18837
18838 vm_map_lock_read(map);
18839 if (psize) {
18840 *psize = map->max_offset - map->min_offset;
18841 }
18842
18843 prev = map->min_offset;
18844 for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
18845 end = (entry == vm_map_to_entry(map));
18846
18847 if (end) {
18848 free = entry->vme_end - prev;
18849 } else {
18850 free = entry->vme_start - prev;
18851 }
18852
18853 total_free += free;
18854 if (free > largest_free) {
18855 largest_free = free;
18856 }
18857
18858 if (end) {
18859 break;
18860 }
18861 prev = entry->vme_end;
18862 }
18863 vm_map_unlock_read(map);
18864 if (pfree) {
18865 *pfree = total_free;
18866 }
18867 if (plargest_free) {
18868 *plargest_free = largest_free;
18869 }
3e170ce0
A
18870}
18871
fe8ab488
A
18872#if VM_SCAN_FOR_SHADOW_CHAIN
18873int vm_map_shadow_max(vm_map_t map);
0a7de745
A
18874int
18875vm_map_shadow_max(
fe8ab488
A
18876 vm_map_t map)
18877{
0a7de745
A
18878 int shadows, shadows_max;
18879 vm_map_entry_t entry;
18880 vm_object_t object, next_object;
fe8ab488 18881
0a7de745 18882 if (map == NULL) {
fe8ab488 18883 return 0;
0a7de745 18884 }
fe8ab488
A
18885
18886 shadows_max = 0;
18887
18888 vm_map_lock_read(map);
5ba3f43e 18889
fe8ab488 18890 for (entry = vm_map_first_entry(map);
0a7de745
A
18891 entry != vm_map_to_entry(map);
18892 entry = entry->vme_next) {
fe8ab488
A
18893 if (entry->is_sub_map) {
18894 continue;
18895 }
3e170ce0 18896 object = VME_OBJECT(entry);
fe8ab488
A
18897 if (object == NULL) {
18898 continue;
18899 }
18900 vm_object_lock_shared(object);
18901 for (shadows = 0;
0a7de745
A
18902 object->shadow != NULL;
18903 shadows++, object = next_object) {
fe8ab488
A
18904 next_object = object->shadow;
18905 vm_object_lock_shared(next_object);
18906 vm_object_unlock(object);
18907 }
18908 vm_object_unlock(object);
18909 if (shadows > shadows_max) {
18910 shadows_max = shadows;
18911 }
18912 }
18913
18914 vm_map_unlock_read(map);
18915
18916 return shadows_max;
18917}
18918#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
39037602 18919
0a7de745
A
18920void
18921vm_commit_pagezero_status(vm_map_t lmap)
18922{
39037602
A
18923 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
18924}
5ba3f43e
A
18925
18926#if __x86_64__
18927void
18928vm_map_set_high_start(
0a7de745
A
18929 vm_map_t map,
18930 vm_map_offset_t high_start)
5ba3f43e
A
18931{
18932 map->vmmap_high_start = high_start;
18933}
18934#endif /* __x86_64__ */
d9a64523
A
18935
18936#if PMAP_CS
18937kern_return_t
18938vm_map_entry_cs_associate(
0a7de745
A
18939 vm_map_t map,
18940 vm_map_entry_t entry,
18941 vm_map_kernel_flags_t vmk_flags)
d9a64523
A
18942{
18943 vm_object_t cs_object, cs_shadow;
18944 vm_object_offset_t cs_offset;
18945 void *cs_blobs;
18946 struct vnode *cs_vnode;
18947 kern_return_t cs_ret;
18948
18949 if (map->pmap == NULL ||
18950 entry->is_sub_map || /* XXX FBDP: recurse on sub-range? */
18951 VME_OBJECT(entry) == VM_OBJECT_NULL ||
0a7de745 18952 !(entry->protection & VM_PROT_EXECUTE)) {
d9a64523
A
18953 return KERN_SUCCESS;
18954 }
18955
18956 vm_map_lock_assert_exclusive(map);
18957
18958 if (entry->used_for_jit) {
18959 cs_ret = pmap_cs_associate(map->pmap,
0a7de745
A
18960 PMAP_CS_ASSOCIATE_JIT,
18961 entry->vme_start,
18962 entry->vme_end - entry->vme_start);
d9a64523
A
18963 goto done;
18964 }
18965
18966 if (vmk_flags.vmkf_remap_prot_copy) {
18967 cs_ret = pmap_cs_associate(map->pmap,
0a7de745
A
18968 PMAP_CS_ASSOCIATE_COW,
18969 entry->vme_start,
18970 entry->vme_end - entry->vme_start);
d9a64523
A
18971 goto done;
18972 }
18973
18974 vm_object_lock_shared(VME_OBJECT(entry));
18975 cs_offset = VME_OFFSET(entry);
18976 for (cs_object = VME_OBJECT(entry);
0a7de745
A
18977 (cs_object != VM_OBJECT_NULL &&
18978 !cs_object->code_signed);
18979 cs_object = cs_shadow) {
d9a64523
A
18980 cs_shadow = cs_object->shadow;
18981 if (cs_shadow != VM_OBJECT_NULL) {
18982 cs_offset += cs_object->vo_shadow_offset;
18983 vm_object_lock_shared(cs_shadow);
18984 }
18985 vm_object_unlock(cs_object);
18986 }
18987 if (cs_object == VM_OBJECT_NULL) {
18988 return KERN_SUCCESS;
18989 }
18990
18991 cs_offset += cs_object->paging_offset;
18992 cs_vnode = vnode_pager_lookup_vnode(cs_object->pager);
18993 cs_ret = vnode_pager_get_cs_blobs(cs_vnode,
0a7de745 18994 &cs_blobs);
d9a64523
A
18995 assert(cs_ret == KERN_SUCCESS);
18996 cs_ret = cs_associate_blob_with_mapping(map->pmap,
0a7de745
A
18997 entry->vme_start,
18998 (entry->vme_end -
18999 entry->vme_start),
19000 cs_offset,
19001 cs_blobs);
d9a64523
A
19002 vm_object_unlock(cs_object);
19003 cs_object = VM_OBJECT_NULL;
19004
0a7de745 19005done:
d9a64523
A
19006 if (cs_ret == KERN_SUCCESS) {
19007 DTRACE_VM2(vm_map_entry_cs_associate_success,
0a7de745
A
19008 vm_map_offset_t, entry->vme_start,
19009 vm_map_offset_t, entry->vme_end);
d9a64523
A
19010 if (vm_map_executable_immutable) {
19011 /*
19012 * Prevent this executable
19013 * mapping from being unmapped
19014 * or modified.
19015 */
19016 entry->permanent = TRUE;
19017 }
19018 /*
19019 * pmap says it will validate the
19020 * code-signing validity of pages
19021 * faulted in via this mapping, so
19022 * this map entry should be marked so
19023 * that vm_fault() bypasses code-signing
19024 * validation for faults coming through
19025 * this mapping.
19026 */
19027 entry->pmap_cs_associated = TRUE;
19028 } else if (cs_ret == KERN_NOT_SUPPORTED) {
19029 /*
19030 * pmap won't check the code-signing
19031 * validity of pages faulted in via
19032 * this mapping, so VM should keep
19033 * doing it.
19034 */
19035 DTRACE_VM3(vm_map_entry_cs_associate_off,
0a7de745
A
19036 vm_map_offset_t, entry->vme_start,
19037 vm_map_offset_t, entry->vme_end,
19038 int, cs_ret);
d9a64523
A
19039 } else {
19040 /*
19041 * A real error: do not allow
19042 * execution in this mapping.
19043 */
19044 DTRACE_VM3(vm_map_entry_cs_associate_failure,
0a7de745
A
19045 vm_map_offset_t, entry->vme_start,
19046 vm_map_offset_t, entry->vme_end,
19047 int, cs_ret);
d9a64523
A
19048 entry->protection &= ~VM_PROT_EXECUTE;
19049 entry->max_protection &= ~VM_PROT_EXECUTE;
19050 }
19051
19052 return cs_ret;
19053}
19054#endif /* PMAP_CS */
19055
19056/*
19057 * FORKED CORPSE FOOTPRINT
19058 *
19059 * A forked corpse gets a copy of the original VM map but its pmap is mostly
19060 * empty since it never ran and never got to fault in any pages.
19061 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
19062 * a forked corpse would therefore return very little information.
19063 *
19064 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
19065 * to vm_map_fork() to collect footprint information from the original VM map
19066 * and its pmap, and store it in the forked corpse's VM map. That information
19067 * is stored in place of the VM map's "hole list" since we'll never need to
19068 * lookup for holes in the corpse's map.
19069 *
19070 * The corpse's footprint info looks like this:
19071 *
19072 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
19073 * as follows:
19074 * +---------------------------------------+
19075 * header-> | cf_size |
19076 * +-------------------+-------------------+
19077 * | cf_last_region | cf_last_zeroes |
19078 * +-------------------+-------------------+
19079 * region1-> | cfr_vaddr |
19080 * +-------------------+-------------------+
19081 * | cfr_num_pages | d0 | d1 | d2 | d3 |
19082 * +---------------------------------------+
19083 * | d4 | d5 | ... |
19084 * +---------------------------------------+
19085 * | ... |
19086 * +-------------------+-------------------+
19087 * | dy | dz | na | na | cfr_vaddr... | <-region2
19088 * +-------------------+-------------------+
19089 * | cfr_vaddr (ctd) | cfr_num_pages |
19090 * +---------------------------------------+
19091 * | d0 | d1 ... |
19092 * +---------------------------------------+
19093 * ...
19094 * +---------------------------------------+
19095 * last region-> | cfr_vaddr |
19096 * +---------------------------------------+
19097 * + cfr_num_pages | d0 | d1 | d2 | d3 |
19098 * +---------------------------------------+
19099 * ...
19100 * +---------------------------------------+
19101 * | dx | dy | dz | na | na | na | na | na |
19102 * +---------------------------------------+
19103 *
19104 * where:
0a7de745
A
19105 * cf_size: total size of the buffer (rounded to page size)
19106 * cf_last_region: offset in the buffer of the last "region" sub-header
d9a64523
A
19107 * cf_last_zeroes: number of trailing "zero" dispositions at the end
19108 * of last region
19109 * cfr_vaddr: virtual address of the start of the covered "region"
19110 * cfr_num_pages: number of pages in the covered "region"
19111 * d*: disposition of the page at that virtual address
19112 * Regions in the buffer are word-aligned.
19113 *
19114 * We estimate the size of the buffer based on the number of memory regions
19115 * and the virtual size of the address space. While copying each memory region
19116 * during vm_map_fork(), we also collect the footprint info for that region
19117 * and store it in the buffer, packing it as much as possible (coalescing
19118 * contiguous memory regions to avoid having too many region headers and
19119 * avoiding long streaks of "zero" page dispositions by splitting footprint
19120 * "regions", so the number of regions in the footprint buffer might not match
19121 * the number of memory regions in the address space.
19122 *
19123 * We also have to copy the original task's "nonvolatile" ledgers since that's
19124 * part of the footprint and will need to be reported to any tool asking for
19125 * the footprint information of the forked corpse.
19126 */
19127
19128uint64_t vm_map_corpse_footprint_count = 0;
19129uint64_t vm_map_corpse_footprint_size_avg = 0;
19130uint64_t vm_map_corpse_footprint_size_max = 0;
19131uint64_t vm_map_corpse_footprint_full = 0;
19132uint64_t vm_map_corpse_footprint_no_buf = 0;
19133
19134/*
19135 * vm_map_corpse_footprint_new_region:
0a7de745 19136 * closes the current footprint "region" and creates a new one
d9a64523
A
19137 *
19138 * Returns NULL if there's not enough space in the buffer for a new region.
19139 */
19140static struct vm_map_corpse_footprint_region *
19141vm_map_corpse_footprint_new_region(
19142 struct vm_map_corpse_footprint_header *footprint_header)
19143{
0a7de745
A
19144 uintptr_t footprint_edge;
19145 uint32_t new_region_offset;
d9a64523
A
19146 struct vm_map_corpse_footprint_region *footprint_region;
19147 struct vm_map_corpse_footprint_region *new_footprint_region;
19148
19149 footprint_edge = ((uintptr_t)footprint_header +
0a7de745 19150 footprint_header->cf_size);
d9a64523 19151 footprint_region = ((struct vm_map_corpse_footprint_region *)
0a7de745
A
19152 ((char *)footprint_header +
19153 footprint_header->cf_last_region));
19154 assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
19155 footprint_edge);
d9a64523
A
19156
19157 /* get rid of trailing zeroes in the last region */
19158 assert(footprint_region->cfr_num_pages >=
0a7de745 19159 footprint_header->cf_last_zeroes);
d9a64523 19160 footprint_region->cfr_num_pages -=
0a7de745 19161 footprint_header->cf_last_zeroes;
d9a64523
A
19162 footprint_header->cf_last_zeroes = 0;
19163
19164 /* reuse this region if it's now empty */
19165 if (footprint_region->cfr_num_pages == 0) {
19166 return footprint_region;
19167 }
19168
19169 /* compute offset of new region */
19170 new_region_offset = footprint_header->cf_last_region;
0a7de745 19171 new_region_offset += sizeof(*footprint_region);
d9a64523 19172 new_region_offset += footprint_region->cfr_num_pages;
0a7de745 19173 new_region_offset = roundup(new_region_offset, sizeof(int));
d9a64523
A
19174
19175 /* check if we're going over the edge */
19176 if (((uintptr_t)footprint_header +
0a7de745
A
19177 new_region_offset +
19178 sizeof(*footprint_region)) >=
d9a64523
A
19179 footprint_edge) {
19180 /* over the edge: no new region */
19181 return NULL;
19182 }
19183
19184 /* adjust offset of last region in header */
19185 footprint_header->cf_last_region = new_region_offset;
19186
19187 new_footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745
A
19188 ((char *)footprint_header +
19189 footprint_header->cf_last_region);
d9a64523
A
19190 new_footprint_region->cfr_vaddr = 0;
19191 new_footprint_region->cfr_num_pages = 0;
19192 /* caller needs to initialize new region */
19193
19194 return new_footprint_region;
19195}
19196
19197/*
19198 * vm_map_corpse_footprint_collect:
19199 * collect footprint information for "old_entry" in "old_map" and
19200 * stores it in "new_map"'s vmmap_footprint_info.
19201 */
19202kern_return_t
19203vm_map_corpse_footprint_collect(
0a7de745
A
19204 vm_map_t old_map,
19205 vm_map_entry_t old_entry,
19206 vm_map_t new_map)
d9a64523 19207{
0a7de745
A
19208 vm_map_offset_t va;
19209 int disp;
19210 kern_return_t kr;
d9a64523
A
19211 struct vm_map_corpse_footprint_header *footprint_header;
19212 struct vm_map_corpse_footprint_region *footprint_region;
19213 struct vm_map_corpse_footprint_region *new_footprint_region;
0a7de745
A
19214 unsigned char *next_disp_p;
19215 uintptr_t footprint_edge;
19216 uint32_t num_pages_tmp;
d9a64523
A
19217
19218 va = old_entry->vme_start;
19219
19220 vm_map_lock_assert_exclusive(old_map);
19221 vm_map_lock_assert_exclusive(new_map);
19222
19223 assert(new_map->has_corpse_footprint);
19224 assert(!old_map->has_corpse_footprint);
19225 if (!new_map->has_corpse_footprint ||
19226 old_map->has_corpse_footprint) {
19227 /*
19228 * This can only transfer footprint info from a
19229 * map with a live pmap to a map with a corpse footprint.
19230 */
19231 return KERN_NOT_SUPPORTED;
19232 }
19233
19234 if (new_map->vmmap_corpse_footprint == NULL) {
0a7de745
A
19235 vm_offset_t buf;
19236 vm_size_t buf_size;
d9a64523
A
19237
19238 buf = 0;
0a7de745
A
19239 buf_size = (sizeof(*footprint_header) +
19240 (old_map->hdr.nentries
19241 *
19242 (sizeof(*footprint_region) +
19243 +3)) /* potential alignment for each region */
19244 +
19245 ((old_map->size / PAGE_SIZE)
19246 *
19247 sizeof(char))); /* disposition for each page */
d9a64523
A
19248// printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
19249 buf_size = round_page(buf_size);
19250
19251 /* limit buffer to 1 page to validate overflow detection */
19252// buf_size = PAGE_SIZE;
19253
19254 /* limit size to a somewhat sane amount */
19255#if CONFIG_EMBEDDED
0a7de745 19256#define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
d9a64523 19257#else /* CONFIG_EMBEDDED */
0a7de745 19258#define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
d9a64523
A
19259#endif /* CONFIG_EMBEDDED */
19260 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
19261 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
19262 }
19263
19264 /*
19265 * Allocate the pageable buffer (with a trailing guard page).
19266 * It will be zero-filled on demand.
19267 */
19268 kr = kernel_memory_allocate(kernel_map,
0a7de745
A
19269 &buf,
19270 (buf_size
19271 + PAGE_SIZE), /* trailing guard page */
19272 0, /* mask */
19273 KMA_PAGEABLE | KMA_GUARD_LAST,
19274 VM_KERN_MEMORY_DIAG);
d9a64523
A
19275 if (kr != KERN_SUCCESS) {
19276 vm_map_corpse_footprint_no_buf++;
19277 return kr;
19278 }
19279
19280 /* initialize header and 1st region */
19281 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
19282 new_map->vmmap_corpse_footprint = footprint_header;
19283
19284 footprint_header->cf_size = buf_size;
19285 footprint_header->cf_last_region =
0a7de745 19286 sizeof(*footprint_header);
d9a64523
A
19287 footprint_header->cf_last_zeroes = 0;
19288
19289 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745
A
19290 ((char *)footprint_header +
19291 footprint_header->cf_last_region);
d9a64523
A
19292 footprint_region->cfr_vaddr = 0;
19293 footprint_region->cfr_num_pages = 0;
19294 } else {
19295 /* retrieve header and last region */
19296 footprint_header = (struct vm_map_corpse_footprint_header *)
0a7de745 19297 new_map->vmmap_corpse_footprint;
d9a64523 19298 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745
A
19299 ((char *)footprint_header +
19300 footprint_header->cf_last_region);
d9a64523
A
19301 }
19302 footprint_edge = ((uintptr_t)footprint_header +
0a7de745 19303 footprint_header->cf_size);
d9a64523
A
19304
19305 if ((footprint_region->cfr_vaddr +
0a7de745
A
19306 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
19307 PAGE_SIZE))
d9a64523
A
19308 != old_entry->vme_start) {
19309 uint64_t num_pages_delta;
19310 uint32_t region_offset_delta;
19311
19312 /*
19313 * Not the next contiguous virtual address:
19314 * start a new region or store "zero" dispositions for
19315 * the missing pages?
19316 */
19317 /* size of gap in actual page dispositions */
19318 num_pages_delta = (((old_entry->vme_start -
0a7de745
A
19319 footprint_region->cfr_vaddr) / PAGE_SIZE)
19320 - footprint_region->cfr_num_pages);
d9a64523
A
19321 /* size of gap as a new footprint region header */
19322 region_offset_delta =
0a7de745
A
19323 (sizeof(*footprint_region) +
19324 roundup((footprint_region->cfr_num_pages -
19325 footprint_header->cf_last_zeroes),
19326 sizeof(int)) -
19327 (footprint_region->cfr_num_pages -
19328 footprint_header->cf_last_zeroes));
d9a64523
A
19329// printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
19330 if (region_offset_delta < num_pages_delta ||
19331 os_add3_overflow(footprint_region->cfr_num_pages,
0a7de745
A
19332 (uint32_t) num_pages_delta,
19333 1,
19334 &num_pages_tmp)) {
d9a64523
A
19335 /*
19336 * Storing data for this gap would take more space
19337 * than inserting a new footprint region header:
19338 * let's start a new region and save space. If it's a
19339 * tie, let's avoid using a new region, since that
19340 * would require more region hops to find the right
19341 * range during lookups.
19342 *
19343 * If the current region's cfr_num_pages would overflow
19344 * if we added "zero" page dispositions for the gap,
19345 * no choice but to start a new region.
19346 */
19347// printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
19348 new_footprint_region =
0a7de745 19349 vm_map_corpse_footprint_new_region(footprint_header);
d9a64523
A
19350 /* check that we're not going over the edge */
19351 if (new_footprint_region == NULL) {
19352 goto over_the_edge;
19353 }
19354 footprint_region = new_footprint_region;
19355 /* initialize new region as empty */
19356 footprint_region->cfr_vaddr = old_entry->vme_start;
19357 footprint_region->cfr_num_pages = 0;
19358 } else {
19359 /*
19360 * Store "zero" page dispositions for the missing
19361 * pages.
19362 */
19363// printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
19364 for (; num_pages_delta > 0; num_pages_delta--) {
19365 next_disp_p =
0a7de745
A
19366 ((unsigned char *) footprint_region +
19367 sizeof(*footprint_region) +
19368 footprint_region->cfr_num_pages);
d9a64523
A
19369 /* check that we're not going over the edge */
19370 if ((uintptr_t)next_disp_p >= footprint_edge) {
19371 goto over_the_edge;
19372 }
19373 /* store "zero" disposition for this gap page */
19374 footprint_region->cfr_num_pages++;
19375 *next_disp_p = (unsigned char) 0;
19376 footprint_header->cf_last_zeroes++;
19377 }
19378 }
19379 }
19380
19381 for (va = old_entry->vme_start;
0a7de745
A
19382 va < old_entry->vme_end;
19383 va += PAGE_SIZE) {
19384 vm_object_t object;
d9a64523
A
19385
19386 object = VME_OBJECT(old_entry);
19387 if (!old_entry->is_sub_map &&
19388 old_entry->iokit_acct &&
19389 object != VM_OBJECT_NULL &&
19390 object->internal &&
19391 object->purgable == VM_PURGABLE_DENY) {
19392 /*
19393 * Non-purgeable IOKit memory: phys_footprint
19394 * includes the entire virtual mapping.
19395 * Since the forked corpse's VM map entry will not
19396 * have "iokit_acct", pretend that this page's
19397 * disposition is "present & internal", so that it
19398 * shows up in the forked corpse's footprint.
19399 */
19400 disp = (PMAP_QUERY_PAGE_PRESENT |
0a7de745 19401 PMAP_QUERY_PAGE_INTERNAL);
d9a64523
A
19402 } else {
19403 disp = 0;
19404 pmap_query_page_info(old_map->pmap,
0a7de745
A
19405 va,
19406 &disp);
d9a64523
A
19407 }
19408
19409// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
19410
19411 if (disp == 0 && footprint_region->cfr_num_pages == 0) {
19412 /*
19413 * Ignore "zero" dispositions at start of
19414 * region: just move start of region.
19415 */
19416 footprint_region->cfr_vaddr += PAGE_SIZE;
19417 continue;
19418 }
19419
19420 /* would region's cfr_num_pages overflow? */
19421 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
0a7de745 19422 &num_pages_tmp)) {
d9a64523
A
19423 /* overflow: create a new region */
19424 new_footprint_region =
0a7de745
A
19425 vm_map_corpse_footprint_new_region(
19426 footprint_header);
d9a64523
A
19427 if (new_footprint_region == NULL) {
19428 goto over_the_edge;
19429 }
19430 footprint_region = new_footprint_region;
19431 footprint_region->cfr_vaddr = va;
19432 footprint_region->cfr_num_pages = 0;
19433 }
19434
19435 next_disp_p = ((unsigned char *)footprint_region +
0a7de745
A
19436 sizeof(*footprint_region) +
19437 footprint_region->cfr_num_pages);
d9a64523
A
19438 /* check that we're not going over the edge */
19439 if ((uintptr_t)next_disp_p >= footprint_edge) {
19440 goto over_the_edge;
19441 }
19442 /* store this dispostion */
19443 *next_disp_p = (unsigned char) disp;
19444 footprint_region->cfr_num_pages++;
19445
19446 if (disp != 0) {
19447 /* non-zero disp: break the current zero streak */
19448 footprint_header->cf_last_zeroes = 0;
19449 /* done */
19450 continue;
19451 }
19452
19453 /* zero disp: add to the current streak of zeroes */
19454 footprint_header->cf_last_zeroes++;
19455 if ((footprint_header->cf_last_zeroes +
0a7de745
A
19456 roundup((footprint_region->cfr_num_pages -
19457 footprint_header->cf_last_zeroes) &
19458 (sizeof(int) - 1),
19459 sizeof(int))) <
19460 (sizeof(*footprint_header))) {
d9a64523
A
19461 /*
19462 * There are not enough trailing "zero" dispositions
19463 * (+ the extra padding we would need for the previous
19464 * region); creating a new region would not save space
19465 * at this point, so let's keep this "zero" disposition
19466 * in this region and reconsider later.
19467 */
19468 continue;
19469 }
19470 /*
19471 * Create a new region to avoid having too many consecutive
19472 * "zero" dispositions.
19473 */
19474 new_footprint_region =
0a7de745 19475 vm_map_corpse_footprint_new_region(footprint_header);
d9a64523
A
19476 if (new_footprint_region == NULL) {
19477 goto over_the_edge;
19478 }
19479 footprint_region = new_footprint_region;
19480 /* initialize the new region as empty ... */
19481 footprint_region->cfr_num_pages = 0;
19482 /* ... and skip this "zero" disp */
19483 footprint_region->cfr_vaddr = va + PAGE_SIZE;
19484 }
19485
19486 return KERN_SUCCESS;
19487
19488over_the_edge:
19489// printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
19490 vm_map_corpse_footprint_full++;
19491 return KERN_RESOURCE_SHORTAGE;
19492}
19493
19494/*
19495 * vm_map_corpse_footprint_collect_done:
19496 * completes the footprint collection by getting rid of any remaining
19497 * trailing "zero" dispositions and trimming the unused part of the
19498 * kernel buffer
19499 */
19500void
19501vm_map_corpse_footprint_collect_done(
0a7de745 19502 vm_map_t new_map)
d9a64523
A
19503{
19504 struct vm_map_corpse_footprint_header *footprint_header;
19505 struct vm_map_corpse_footprint_region *footprint_region;
0a7de745
A
19506 vm_size_t buf_size, actual_size;
19507 kern_return_t kr;
d9a64523
A
19508
19509 assert(new_map->has_corpse_footprint);
19510 if (!new_map->has_corpse_footprint ||
19511 new_map->vmmap_corpse_footprint == NULL) {
19512 return;
19513 }
19514
19515 footprint_header = (struct vm_map_corpse_footprint_header *)
0a7de745 19516 new_map->vmmap_corpse_footprint;
d9a64523
A
19517 buf_size = footprint_header->cf_size;
19518
19519 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745
A
19520 ((char *)footprint_header +
19521 footprint_header->cf_last_region);
d9a64523
A
19522
19523 /* get rid of trailing zeroes in last region */
19524 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
19525 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
19526 footprint_header->cf_last_zeroes = 0;
19527
19528 actual_size = (vm_size_t)(footprint_header->cf_last_region +
0a7de745
A
19529 sizeof(*footprint_region) +
19530 footprint_region->cfr_num_pages);
d9a64523
A
19531
19532// printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
0a7de745
A
19533 vm_map_corpse_footprint_size_avg =
19534 (((vm_map_corpse_footprint_size_avg *
19535 vm_map_corpse_footprint_count) +
19536 actual_size) /
19537 (vm_map_corpse_footprint_count + 1));
d9a64523
A
19538 vm_map_corpse_footprint_count++;
19539 if (actual_size > vm_map_corpse_footprint_size_max) {
19540 vm_map_corpse_footprint_size_max = actual_size;
19541 }
19542
19543 actual_size = round_page(actual_size);
19544 if (buf_size > actual_size) {
19545 kr = vm_deallocate(kernel_map,
0a7de745
A
19546 ((vm_address_t)footprint_header +
19547 actual_size +
19548 PAGE_SIZE), /* trailing guard page */
19549 (buf_size - actual_size));
d9a64523 19550 assertf(kr == KERN_SUCCESS,
0a7de745
A
19551 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19552 footprint_header,
19553 (uint64_t) buf_size,
19554 (uint64_t) actual_size,
19555 kr);
d9a64523 19556 kr = vm_protect(kernel_map,
0a7de745
A
19557 ((vm_address_t)footprint_header +
19558 actual_size),
19559 PAGE_SIZE,
19560 FALSE, /* set_maximum */
19561 VM_PROT_NONE);
d9a64523 19562 assertf(kr == KERN_SUCCESS,
0a7de745
A
19563 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19564 footprint_header,
19565 (uint64_t) buf_size,
19566 (uint64_t) actual_size,
19567 kr);
d9a64523
A
19568 }
19569
19570 footprint_header->cf_size = actual_size;
19571}
19572
19573/*
19574 * vm_map_corpse_footprint_query_page_info:
19575 * retrieves the disposition of the page at virtual address "vaddr"
19576 * in the forked corpse's VM map
19577 *
19578 * This is the equivalent of pmap_query_page_info() for a forked corpse.
19579 */
19580kern_return_t
19581vm_map_corpse_footprint_query_page_info(
0a7de745
A
19582 vm_map_t map,
19583 vm_map_offset_t va,
19584 int *disp)
d9a64523
A
19585{
19586 struct vm_map_corpse_footprint_header *footprint_header;
19587 struct vm_map_corpse_footprint_region *footprint_region;
0a7de745
A
19588 uint32_t footprint_region_offset;
19589 vm_map_offset_t region_start, region_end;
19590 int disp_idx;
19591 kern_return_t kr;
d9a64523
A
19592
19593 if (!map->has_corpse_footprint) {
19594 *disp = 0;
19595 kr = KERN_INVALID_ARGUMENT;
19596 goto done;
19597 }
19598
19599 footprint_header = map->vmmap_corpse_footprint;
19600 if (footprint_header == NULL) {
19601 *disp = 0;
19602// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19603 kr = KERN_INVALID_ARGUMENT;
19604 goto done;
19605 }
19606
19607 /* start looking at the hint ("cf_hint_region") */
19608 footprint_region_offset = footprint_header->cf_hint_region;
19609
19610lookup_again:
0a7de745 19611 if (footprint_region_offset < sizeof(*footprint_header)) {
d9a64523 19612 /* hint too low: start from 1st region */
0a7de745 19613 footprint_region_offset = sizeof(*footprint_header);
d9a64523
A
19614 }
19615 if (footprint_region_offset >= footprint_header->cf_last_region) {
19616 /* hint too high: re-start from 1st region */
0a7de745 19617 footprint_region_offset = sizeof(*footprint_header);
d9a64523
A
19618 }
19619 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745 19620 ((char *)footprint_header + footprint_region_offset);
d9a64523
A
19621 region_start = footprint_region->cfr_vaddr;
19622 region_end = (region_start +
0a7de745
A
19623 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19624 PAGE_SIZE));
d9a64523 19625 if (va < region_start &&
0a7de745 19626 footprint_region_offset != sizeof(*footprint_header)) {
d9a64523
A
19627 /* our range starts before the hint region */
19628
19629 /* reset the hint (in a racy way...) */
0a7de745 19630 footprint_header->cf_hint_region = sizeof(*footprint_header);
d9a64523 19631 /* lookup "va" again from 1st region */
0a7de745 19632 footprint_region_offset = sizeof(*footprint_header);
d9a64523
A
19633 goto lookup_again;
19634 }
19635
19636 while (va >= region_end) {
19637 if (footprint_region_offset >= footprint_header->cf_last_region) {
19638 break;
19639 }
19640 /* skip the region's header */
0a7de745 19641 footprint_region_offset += sizeof(*footprint_region);
d9a64523
A
19642 /* skip the region's page dispositions */
19643 footprint_region_offset += footprint_region->cfr_num_pages;
19644 /* align to next word boundary */
19645 footprint_region_offset =
0a7de745
A
19646 roundup(footprint_region_offset,
19647 sizeof(int));
d9a64523 19648 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745 19649 ((char *)footprint_header + footprint_region_offset);
d9a64523
A
19650 region_start = footprint_region->cfr_vaddr;
19651 region_end = (region_start +
0a7de745
A
19652 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19653 PAGE_SIZE));
d9a64523
A
19654 }
19655 if (va < region_start || va >= region_end) {
19656 /* page not found */
19657 *disp = 0;
19658// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19659 kr = KERN_SUCCESS;
19660 goto done;
19661 }
19662
19663 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
19664 footprint_header->cf_hint_region = footprint_region_offset;
19665
19666 /* get page disposition for "va" in this region */
19667 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / PAGE_SIZE);
19668 *disp = (int) (footprint_region->cfr_disposition[disp_idx]);
19669
19670 kr = KERN_SUCCESS;
19671done:
19672// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19673 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
19674 DTRACE_VM4(footprint_query_page_info,
0a7de745
A
19675 vm_map_t, map,
19676 vm_map_offset_t, va,
19677 int, *disp,
19678 kern_return_t, kr);
d9a64523
A
19679
19680 return kr;
19681}
19682
19683
19684static void
19685vm_map_corpse_footprint_destroy(
0a7de745 19686 vm_map_t map)
d9a64523
A
19687{
19688 if (map->has_corpse_footprint &&
19689 map->vmmap_corpse_footprint != 0) {
19690 struct vm_map_corpse_footprint_header *footprint_header;
19691 vm_size_t buf_size;
19692 kern_return_t kr;
19693
19694 footprint_header = map->vmmap_corpse_footprint;
19695 buf_size = footprint_header->cf_size;
19696 kr = vm_deallocate(kernel_map,
0a7de745
A
19697 (vm_offset_t) map->vmmap_corpse_footprint,
19698 ((vm_size_t) buf_size
19699 + PAGE_SIZE)); /* trailing guard page */
d9a64523
A
19700 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
19701 map->vmmap_corpse_footprint = 0;
19702 map->has_corpse_footprint = FALSE;
19703 }
19704}
19705
19706/*
19707 * vm_map_copy_footprint_ledgers:
19708 * copies any ledger that's relevant to the memory footprint of "old_task"
19709 * into the forked corpse's task ("new_task")
19710 */
19711void
19712vm_map_copy_footprint_ledgers(
0a7de745
A
19713 task_t old_task,
19714 task_t new_task)
d9a64523
A
19715{
19716 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
19717 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
19718 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
0a7de745
A
19719 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
19720 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
19721 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
19722 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
19723 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
19724 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
19725 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
19726 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
19727 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
d9a64523
A
19728}
19729
19730/*
19731 * vm_map_copy_ledger:
19732 * copy a single ledger from "old_task" to "new_task"
19733 */
19734void
19735vm_map_copy_ledger(
0a7de745
A
19736 task_t old_task,
19737 task_t new_task,
19738 int ledger_entry)
d9a64523 19739{
0a7de745 19740 ledger_amount_t old_balance, new_balance, delta;
d9a64523
A
19741
19742 assert(new_task->map->has_corpse_footprint);
0a7de745 19743 if (!new_task->map->has_corpse_footprint) {
d9a64523 19744 return;
0a7de745 19745 }
d9a64523
A
19746
19747 /* turn off sanity checks for the ledger we're about to mess with */
19748 ledger_disable_panic_on_negative(new_task->ledger,
0a7de745 19749 ledger_entry);
d9a64523
A
19750
19751 /* adjust "new_task" to match "old_task" */
19752 ledger_get_balance(old_task->ledger,
0a7de745
A
19753 ledger_entry,
19754 &old_balance);
d9a64523 19755 ledger_get_balance(new_task->ledger,
0a7de745
A
19756 ledger_entry,
19757 &new_balance);
d9a64523
A
19758 if (new_balance == old_balance) {
19759 /* new == old: done */
19760 } else if (new_balance > old_balance) {
19761 /* new > old ==> new -= new - old */
19762 delta = new_balance - old_balance;
19763 ledger_debit(new_task->ledger,
0a7de745
A
19764 ledger_entry,
19765 delta);
d9a64523
A
19766 } else {
19767 /* new < old ==> new += old - new */
19768 delta = old_balance - new_balance;
19769 ledger_credit(new_task->ledger,
0a7de745
A
19770 ledger_entry,
19771 delta);
d9a64523
A
19772 }
19773}