]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_map.c
xnu-4570.41.2.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5ba3f43e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
5ba3f43e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
5ba3f43e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
5ba3f43e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
5ba3f43e 31/*
1c79356b
A
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
5ba3f43e 35 *
1c79356b
A
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
5ba3f43e 41 *
1c79356b
A
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
5ba3f43e 45 *
1c79356b 46 * Carnegie Mellon requests users of this software to return to
5ba3f43e 47 *
1c79356b
A
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
5ba3f43e 52 *
1c79356b
A
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
1c79356b
A
66#include <task_swapper.h>
67#include <mach_assert.h>
fe8ab488
A
68
69#include <vm/vm_options.h>
70
91447636 71#include <libkern/OSAtomic.h>
1c79356b
A
72
73#include <mach/kern_return.h>
74#include <mach/port.h>
75#include <mach/vm_attributes.h>
76#include <mach/vm_param.h>
77#include <mach/vm_behavior.h>
55e303ae 78#include <mach/vm_statistics.h>
91447636 79#include <mach/memory_object.h>
0c530ab8 80#include <mach/mach_vm.h>
91447636 81#include <machine/cpu_capabilities.h>
2d21ac55 82#include <mach/sdt.h>
91447636 83
1c79356b 84#include <kern/assert.h>
39037602 85#include <kern/backtrace.h>
1c79356b 86#include <kern/counters.h>
91447636 87#include <kern/kalloc.h>
1c79356b 88#include <kern/zalloc.h>
91447636
A
89
90#include <vm/cpm.h>
39236c6e 91#include <vm/vm_compressor_pager.h>
1c79356b
A
92#include <vm/vm_init.h>
93#include <vm/vm_fault.h>
94#include <vm/vm_map.h>
95#include <vm/vm_object.h>
96#include <vm/vm_page.h>
b0d623f7 97#include <vm/vm_pageout.h>
1c79356b
A
98#include <vm/vm_kern.h>
99#include <ipc/ipc_port.h>
100#include <kern/sched_prim.h>
101#include <kern/misc_protos.h>
1c79356b
A
102#include <kern/xpr.h>
103
91447636
A
104#include <mach/vm_map_server.h>
105#include <mach/mach_host_server.h>
2d21ac55 106#include <vm/vm_protos.h>
b0d623f7 107#include <vm/vm_purgeable_internal.h>
91447636 108
91447636 109#include <vm/vm_protos.h>
2d21ac55 110#include <vm/vm_shared_region.h>
6d2010ae 111#include <vm/vm_map_store.h>
91447636 112
5ba3f43e
A
113#include <san/kasan.h>
114
115#if __arm64__
116extern int fourk_binary_compatibility_unsafe;
117extern int fourk_binary_compatibility_allow_wx;
118#endif /* __arm64__ */
39037602
A
119extern int proc_selfpid(void);
120extern char *proc_name_address(void *p);
121
122#if VM_MAP_DEBUG_APPLE_PROTECT
123int vm_map_debug_apple_protect = 0;
124#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
125#if VM_MAP_DEBUG_FOURK
126int vm_map_debug_fourk = 0;
127#endif /* VM_MAP_DEBUG_FOURK */
3e170ce0 128
5ba3f43e
A
129int vm_map_executable_immutable = 0;
130int vm_map_executable_immutable_no_log = 0;
131
316670eb 132extern u_int32_t random(void); /* from <libkern/libkern.h> */
1c79356b
A
133/* Internal prototypes
134 */
2d21ac55 135
91447636
A
136static void vm_map_simplify_range(
137 vm_map_t map,
138 vm_map_offset_t start,
139 vm_map_offset_t end); /* forward */
140
141static boolean_t vm_map_range_check(
2d21ac55
A
142 vm_map_t map,
143 vm_map_offset_t start,
144 vm_map_offset_t end,
145 vm_map_entry_t *entry);
1c79356b 146
91447636 147static vm_map_entry_t _vm_map_entry_create(
7ddcb079 148 struct vm_map_header *map_header, boolean_t map_locked);
1c79356b 149
91447636 150static void _vm_map_entry_dispose(
2d21ac55
A
151 struct vm_map_header *map_header,
152 vm_map_entry_t entry);
1c79356b 153
91447636 154static void vm_map_pmap_enter(
2d21ac55
A
155 vm_map_t map,
156 vm_map_offset_t addr,
157 vm_map_offset_t end_addr,
158 vm_object_t object,
159 vm_object_offset_t offset,
160 vm_prot_t protection);
1c79356b 161
91447636 162static void _vm_map_clip_end(
2d21ac55
A
163 struct vm_map_header *map_header,
164 vm_map_entry_t entry,
165 vm_map_offset_t end);
91447636
A
166
167static void _vm_map_clip_start(
2d21ac55
A
168 struct vm_map_header *map_header,
169 vm_map_entry_t entry,
170 vm_map_offset_t start);
1c79356b 171
91447636 172static void vm_map_entry_delete(
2d21ac55
A
173 vm_map_t map,
174 vm_map_entry_t entry);
1c79356b 175
91447636 176static kern_return_t vm_map_delete(
2d21ac55
A
177 vm_map_t map,
178 vm_map_offset_t start,
179 vm_map_offset_t end,
180 int flags,
181 vm_map_t zap_map);
1c79356b 182
91447636 183static kern_return_t vm_map_copy_overwrite_unaligned(
2d21ac55
A
184 vm_map_t dst_map,
185 vm_map_entry_t entry,
186 vm_map_copy_t copy,
39236c6e
A
187 vm_map_address_t start,
188 boolean_t discard_on_success);
1c79356b 189
91447636 190static kern_return_t vm_map_copy_overwrite_aligned(
2d21ac55
A
191 vm_map_t dst_map,
192 vm_map_entry_t tmp_entry,
193 vm_map_copy_t copy,
194 vm_map_offset_t start,
195 pmap_t pmap);
1c79356b 196
91447636 197static kern_return_t vm_map_copyin_kernel_buffer(
2d21ac55
A
198 vm_map_t src_map,
199 vm_map_address_t src_addr,
200 vm_map_size_t len,
201 boolean_t src_destroy,
202 vm_map_copy_t *copy_result); /* OUT */
1c79356b 203
91447636 204static kern_return_t vm_map_copyout_kernel_buffer(
2d21ac55
A
205 vm_map_t map,
206 vm_map_address_t *addr, /* IN/OUT */
207 vm_map_copy_t copy,
39037602 208 vm_map_size_t copy_size,
39236c6e
A
209 boolean_t overwrite,
210 boolean_t consume_on_success);
1c79356b 211
91447636 212static void vm_map_fork_share(
2d21ac55
A
213 vm_map_t old_map,
214 vm_map_entry_t old_entry,
215 vm_map_t new_map);
1c79356b 216
91447636 217static boolean_t vm_map_fork_copy(
2d21ac55
A
218 vm_map_t old_map,
219 vm_map_entry_t *old_entry_p,
39037602
A
220 vm_map_t new_map,
221 int vm_map_copyin_flags);
1c79356b 222
0c530ab8 223void vm_map_region_top_walk(
2d21ac55
A
224 vm_map_entry_t entry,
225 vm_region_top_info_t top);
1c79356b 226
0c530ab8 227void vm_map_region_walk(
2d21ac55
A
228 vm_map_t map,
229 vm_map_offset_t va,
230 vm_map_entry_t entry,
231 vm_object_offset_t offset,
232 vm_object_size_t range,
233 vm_region_extended_info_t extended,
39236c6e
A
234 boolean_t look_for_pages,
235 mach_msg_type_number_t count);
91447636
A
236
237static kern_return_t vm_map_wire_nested(
2d21ac55
A
238 vm_map_t map,
239 vm_map_offset_t start,
240 vm_map_offset_t end,
3e170ce0 241 vm_prot_t caller_prot,
5ba3f43e 242 vm_tag_t tag,
2d21ac55 243 boolean_t user_wire,
5ba3f43e 244 pmap_t map_pmap,
fe8ab488
A
245 vm_map_offset_t pmap_addr,
246 ppnum_t *physpage_p);
91447636
A
247
248static kern_return_t vm_map_unwire_nested(
2d21ac55
A
249 vm_map_t map,
250 vm_map_offset_t start,
251 vm_map_offset_t end,
252 boolean_t user_wire,
253 pmap_t map_pmap,
254 vm_map_offset_t pmap_addr);
91447636
A
255
256static kern_return_t vm_map_overwrite_submap_recurse(
2d21ac55
A
257 vm_map_t dst_map,
258 vm_map_offset_t dst_addr,
259 vm_map_size_t dst_size);
91447636
A
260
261static kern_return_t vm_map_copy_overwrite_nested(
2d21ac55
A
262 vm_map_t dst_map,
263 vm_map_offset_t dst_addr,
264 vm_map_copy_t copy,
265 boolean_t interruptible,
6d2010ae
A
266 pmap_t pmap,
267 boolean_t discard_on_success);
91447636
A
268
269static kern_return_t vm_map_remap_extract(
2d21ac55
A
270 vm_map_t map,
271 vm_map_offset_t addr,
272 vm_map_size_t size,
273 boolean_t copy,
274 struct vm_map_header *map_header,
275 vm_prot_t *cur_protection,
276 vm_prot_t *max_protection,
277 vm_inherit_t inheritance,
39037602 278 boolean_t pageable,
5c9f4661
A
279 boolean_t same_map,
280 vm_map_kernel_flags_t vmk_flags);
91447636
A
281
282static kern_return_t vm_map_remap_range_allocate(
2d21ac55
A
283 vm_map_t map,
284 vm_map_address_t *address,
285 vm_map_size_t size,
286 vm_map_offset_t mask,
060df5ea 287 int flags,
5ba3f43e
A
288 vm_map_kernel_flags_t vmk_flags,
289 vm_tag_t tag,
2d21ac55 290 vm_map_entry_t *map_entry);
91447636
A
291
292static void vm_map_region_look_for_page(
2d21ac55
A
293 vm_map_t map,
294 vm_map_offset_t va,
295 vm_object_t object,
296 vm_object_offset_t offset,
297 int max_refcnt,
298 int depth,
39236c6e
A
299 vm_region_extended_info_t extended,
300 mach_msg_type_number_t count);
91447636
A
301
302static int vm_map_region_count_obj_refs(
2d21ac55
A
303 vm_map_entry_t entry,
304 vm_object_t object);
1c79356b 305
b0d623f7
A
306
307static kern_return_t vm_map_willneed(
308 vm_map_t map,
309 vm_map_offset_t start,
310 vm_map_offset_t end);
311
312static kern_return_t vm_map_reuse_pages(
313 vm_map_t map,
314 vm_map_offset_t start,
315 vm_map_offset_t end);
316
317static kern_return_t vm_map_reusable_pages(
318 vm_map_t map,
319 vm_map_offset_t start,
320 vm_map_offset_t end);
321
322static kern_return_t vm_map_can_reuse(
323 vm_map_t map,
324 vm_map_offset_t start,
325 vm_map_offset_t end);
326
3e170ce0
A
327#if MACH_ASSERT
328static kern_return_t vm_map_pageout(
329 vm_map_t map,
330 vm_map_offset_t start,
331 vm_map_offset_t end);
332#endif /* MACH_ASSERT */
6d2010ae 333
5ba3f43e
A
334pid_t find_largest_process_vm_map_entries(void);
335
1c79356b
A
336/*
337 * Macros to copy a vm_map_entry. We must be careful to correctly
338 * manage the wired page count. vm_map_entry_copy() creates a new
339 * map entry to the same memory - the wired count in the new entry
340 * must be set to zero. vm_map_entry_copy_full() creates a new
341 * entry that is identical to the old entry. This preserves the
342 * wire count; it's used for map splitting and zone changing in
343 * vm_map_copyout.
344 */
316670eb 345
7ddcb079
A
346#define vm_map_entry_copy(NEW,OLD) \
347MACRO_BEGIN \
348boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
2d21ac55
A
349 *(NEW) = *(OLD); \
350 (NEW)->is_shared = FALSE; \
351 (NEW)->needs_wakeup = FALSE; \
352 (NEW)->in_transition = FALSE; \
353 (NEW)->wired_count = 0; \
354 (NEW)->user_wired_count = 0; \
b0d623f7 355 (NEW)->permanent = FALSE; \
316670eb 356 (NEW)->used_for_jit = FALSE; \
fe8ab488 357 (NEW)->from_reserved_zone = _vmec_reserved; \
5c9f4661
A
358 if ((NEW)->iokit_acct) { \
359 assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
360 (NEW)->iokit_acct = FALSE; \
361 (NEW)->use_pmap = TRUE; \
362 } \
3e170ce0
A
363 (NEW)->vme_resilient_codesign = FALSE; \
364 (NEW)->vme_resilient_media = FALSE; \
39037602 365 (NEW)->vme_atomic = FALSE; \
1c79356b
A
366MACRO_END
367
7ddcb079
A
368#define vm_map_entry_copy_full(NEW,OLD) \
369MACRO_BEGIN \
370boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
371(*(NEW) = *(OLD)); \
372(NEW)->from_reserved_zone = _vmecf_reserved; \
373MACRO_END
1c79356b 374
2d21ac55
A
375/*
376 * Decide if we want to allow processes to execute from their data or stack areas.
5ba3f43e 377 * override_nx() returns true if we do. Data/stack execution can be enabled independently
2d21ac55
A
378 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
379 * or allow_stack_exec to enable data execution for that type of data area for that particular
380 * ABI (or both by or'ing the flags together). These are initialized in the architecture
5ba3f43e
A
381 * specific pmap files since the default behavior varies according to architecture. The
382 * main reason it varies is because of the need to provide binary compatibility with old
383 * applications that were written before these restrictions came into being. In the old
384 * days, an app could execute anything it could read, but this has slowly been tightened
2d21ac55
A
385 * up over time. The default behavior is:
386 *
387 * 32-bit PPC apps may execute from both stack and data areas
388 * 32-bit Intel apps may exeucte from data areas but not stack
389 * 64-bit PPC/Intel apps may not execute from either data or stack
390 *
391 * An application on any architecture may override these defaults by explicitly
5ba3f43e 392 * adding PROT_EXEC permission to the page in question with the mprotect(2)
2d21ac55
A
393 * system call. This code here just determines what happens when an app tries to
394 * execute from a page that lacks execute permission.
395 *
396 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
6d2010ae
A
397 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
398 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
399 * execution from data areas for a particular binary even if the arch normally permits it. As
400 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
401 * to support some complicated use cases, notably browsers with out-of-process plugins that
402 * are not all NX-safe.
2d21ac55
A
403 */
404
405extern int allow_data_exec, allow_stack_exec;
406
407int
408override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
409{
410 int current_abi;
411
3e170ce0
A
412 if (map->pmap == kernel_pmap) return FALSE;
413
2d21ac55
A
414 /*
415 * Determine if the app is running in 32 or 64 bit mode.
416 */
417
418 if (vm_map_is_64bit(map))
419 current_abi = VM_ABI_64;
420 else
421 current_abi = VM_ABI_32;
422
423 /*
5ba3f43e 424 * Determine if we should allow the execution based on whether it's a
2d21ac55
A
425 * stack or data area and the current architecture.
426 */
427
428 if (user_tag == VM_MEMORY_STACK)
429 return allow_stack_exec & current_abi;
430
6d2010ae 431 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
2d21ac55
A
432}
433
434
1c79356b
A
435/*
436 * Virtual memory maps provide for the mapping, protection,
437 * and sharing of virtual memory objects. In addition,
438 * this module provides for an efficient virtual copy of
439 * memory from one map to another.
440 *
441 * Synchronization is required prior to most operations.
442 *
443 * Maps consist of an ordered doubly-linked list of simple
444 * entries; a single hint is used to speed up lookups.
445 *
446 * Sharing maps have been deleted from this version of Mach.
447 * All shared objects are now mapped directly into the respective
448 * maps. This requires a change in the copy on write strategy;
449 * the asymmetric (delayed) strategy is used for shared temporary
450 * objects instead of the symmetric (shadow) strategy. All maps
451 * are now "top level" maps (either task map, kernel map or submap
5ba3f43e 452 * of the kernel map).
1c79356b
A
453 *
454 * Since portions of maps are specified by start/end addreses,
455 * which may not align with existing map entries, all
456 * routines merely "clip" entries to these start/end values.
457 * [That is, an entry is split into two, bordering at a
458 * start or end value.] Note that these clippings may not
459 * always be necessary (as the two resulting entries are then
460 * not changed); however, the clipping is done for convenience.
461 * No attempt is currently made to "glue back together" two
462 * abutting entries.
463 *
464 * The symmetric (shadow) copy strategy implements virtual copy
465 * by copying VM object references from one map to
466 * another, and then marking both regions as copy-on-write.
467 * It is important to note that only one writeable reference
468 * to a VM object region exists in any map when this strategy
469 * is used -- this means that shadow object creation can be
470 * delayed until a write operation occurs. The symmetric (delayed)
471 * strategy allows multiple maps to have writeable references to
472 * the same region of a vm object, and hence cannot delay creating
473 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
474 * Copying of permanent objects is completely different; see
475 * vm_object_copy_strategically() in vm_object.c.
476 */
477
5ba3f43e
A
478static zone_t vm_map_zone; /* zone for vm_map structures */
479zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
480static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking allocations */
481static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
482zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
1c79356b
A
483
484
485/*
486 * Placeholder object for submap operations. This object is dropped
487 * into the range by a call to vm_map_find, and removed when
488 * vm_map_submap creates the submap.
489 */
490
491vm_object_t vm_submap_object;
492
91447636 493static void *map_data;
b0d623f7 494static vm_size_t map_data_size;
91447636 495static void *kentry_data;
b0d623f7 496static vm_size_t kentry_data_size;
3e170ce0
A
497static void *map_holes_data;
498static vm_size_t map_holes_data_size;
1c79356b 499
5ba3f43e
A
500#if CONFIG_EMBEDDED
501#define NO_COALESCE_LIMIT 0
502#else
b0d623f7 503#define NO_COALESCE_LIMIT ((1024 * 128) - 1)
5ba3f43e 504#endif
1c79356b 505
55e303ae 506/* Skip acquiring locks if we're in the midst of a kernel core dump */
b0d623f7 507unsigned int not_in_kdp = 1;
55e303ae 508
6d2010ae
A
509unsigned int vm_map_set_cache_attr_count = 0;
510
511kern_return_t
512vm_map_set_cache_attr(
513 vm_map_t map,
514 vm_map_offset_t va)
515{
516 vm_map_entry_t map_entry;
517 vm_object_t object;
518 kern_return_t kr = KERN_SUCCESS;
519
520 vm_map_lock_read(map);
521
522 if (!vm_map_lookup_entry(map, va, &map_entry) ||
523 map_entry->is_sub_map) {
524 /*
525 * that memory is not properly mapped
526 */
527 kr = KERN_INVALID_ARGUMENT;
528 goto done;
529 }
3e170ce0 530 object = VME_OBJECT(map_entry);
6d2010ae
A
531
532 if (object == VM_OBJECT_NULL) {
533 /*
534 * there should be a VM object here at this point
535 */
536 kr = KERN_INVALID_ARGUMENT;
537 goto done;
538 }
539 vm_object_lock(object);
540 object->set_cache_attr = TRUE;
541 vm_object_unlock(object);
542
543 vm_map_set_cache_attr_count++;
544done:
545 vm_map_unlock_read(map);
546
547 return kr;
548}
549
550
593a1d5f
A
551#if CONFIG_CODE_DECRYPTION
552/*
553 * vm_map_apple_protected:
5ba3f43e 554 * This remaps the requested part of the object with an object backed by
593a1d5f
A
555 * the decrypting pager.
556 * crypt_info contains entry points and session data for the crypt module.
557 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
558 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
559 */
0c530ab8
A
560kern_return_t
561vm_map_apple_protected(
3e170ce0
A
562 vm_map_t map,
563 vm_map_offset_t start,
564 vm_map_offset_t end,
565 vm_object_offset_t crypto_backing_offset,
593a1d5f 566 struct pager_crypt_info *crypt_info)
0c530ab8
A
567{
568 boolean_t map_locked;
569 kern_return_t kr;
570 vm_map_entry_t map_entry;
3e170ce0
A
571 struct vm_map_entry tmp_entry;
572 memory_object_t unprotected_mem_obj;
0c530ab8
A
573 vm_object_t protected_object;
574 vm_map_offset_t map_addr;
3e170ce0
A
575 vm_map_offset_t start_aligned, end_aligned;
576 vm_object_offset_t crypto_start, crypto_end;
577 int vm_flags;
5ba3f43e
A
578 vm_map_kernel_flags_t vmk_flags;
579
580 vm_flags = 0;
581 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
0c530ab8 582
3e170ce0
A
583 map_locked = FALSE;
584 unprotected_mem_obj = MEMORY_OBJECT_NULL;
0c530ab8 585
3e170ce0
A
586 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
587 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
588 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
589 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
b0d623f7 590
5ba3f43e
A
591#if __arm64__
592 /*
593 * "start" and "end" might be 4K-aligned but not 16K-aligned,
594 * so we might have to loop and establish up to 3 mappings:
595 *
596 * + the first 16K-page, which might overlap with the previous
597 * 4K-aligned mapping,
598 * + the center,
599 * + the last 16K-page, which might overlap with the next
600 * 4K-aligned mapping.
601 * Each of these mapping might be backed by a vnode pager (if
602 * properly page-aligned) or a "fourk_pager", itself backed by a
603 * vnode pager (if 4K-aligned but not page-aligned).
604 */
605#else /* __arm64__ */
3e170ce0
A
606 assert(start_aligned == start);
607 assert(end_aligned == end);
5ba3f43e 608#endif /* __arm64__ */
b0d623f7 609
3e170ce0
A
610 map_addr = start_aligned;
611 for (map_addr = start_aligned;
612 map_addr < end;
613 map_addr = tmp_entry.vme_end) {
614 vm_map_lock(map);
615 map_locked = TRUE;
b0d623f7 616
3e170ce0
A
617 /* lookup the protected VM object */
618 if (!vm_map_lookup_entry(map,
619 map_addr,
620 &map_entry) ||
621 map_entry->is_sub_map ||
622 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
623 !(map_entry->protection & VM_PROT_EXECUTE)) {
624 /* that memory is not properly mapped */
625 kr = KERN_INVALID_ARGUMENT;
626 goto done;
627 }
b0d623f7 628
3e170ce0
A
629 /* get the protected object to be decrypted */
630 protected_object = VME_OBJECT(map_entry);
631 if (protected_object == VM_OBJECT_NULL) {
632 /* there should be a VM object here at this point */
633 kr = KERN_INVALID_ARGUMENT;
634 goto done;
635 }
636 /* ensure protected object stays alive while map is unlocked */
637 vm_object_reference(protected_object);
638
639 /* limit the map entry to the area we want to cover */
640 vm_map_clip_start(map, map_entry, start_aligned);
641 vm_map_clip_end(map, map_entry, end_aligned);
642
643 tmp_entry = *map_entry;
644 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
645 vm_map_unlock(map);
646 map_locked = FALSE;
647
648 /*
649 * This map entry might be only partially encrypted
650 * (if not fully "page-aligned").
651 */
652 crypto_start = 0;
653 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
654 if (tmp_entry.vme_start < start) {
655 if (tmp_entry.vme_start != start_aligned) {
656 kr = KERN_INVALID_ADDRESS;
657 }
658 crypto_start += (start - tmp_entry.vme_start);
659 }
660 if (tmp_entry.vme_end > end) {
661 if (tmp_entry.vme_end != end_aligned) {
662 kr = KERN_INVALID_ADDRESS;
663 }
664 crypto_end -= (tmp_entry.vme_end - end);
665 }
666
667 /*
668 * This "extra backing offset" is needed to get the decryption
669 * routine to use the right key. It adjusts for the possibly
670 * relative offset of an interposed "4K" pager...
671 */
672 if (crypto_backing_offset == (vm_object_offset_t) -1) {
673 crypto_backing_offset = VME_OFFSET(&tmp_entry);
674 }
0c530ab8 675
3e170ce0
A
676 /*
677 * Lookup (and create if necessary) the protected memory object
678 * matching that VM object.
679 * If successful, this also grabs a reference on the memory object,
680 * to guarantee that it doesn't go away before we get a chance to map
681 * it.
682 */
683 unprotected_mem_obj = apple_protect_pager_setup(
684 protected_object,
685 VME_OFFSET(&tmp_entry),
686 crypto_backing_offset,
687 crypt_info,
688 crypto_start,
689 crypto_end);
690
691 /* release extra ref on protected object */
692 vm_object_deallocate(protected_object);
693
694 if (unprotected_mem_obj == NULL) {
695 kr = KERN_FAILURE;
696 goto done;
697 }
698
699 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
5ba3f43e
A
700 /* can overwrite an immutable mapping */
701 vmk_flags.vmkf_overwrite_immutable = TRUE;
702#if __arm64__
703 if (tmp_entry.used_for_jit &&
704 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
705 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
706 fourk_binary_compatibility_unsafe &&
707 fourk_binary_compatibility_allow_wx) {
708 printf("** FOURK_COMPAT [%d]: "
709 "allowing write+execute at 0x%llx\n",
710 proc_selfpid(), tmp_entry.vme_start);
711 vmk_flags.vmkf_map_jit = TRUE;
712 }
713#endif /* __arm64__ */
3e170ce0
A
714
715 /* map this memory object in place of the current one */
716 map_addr = tmp_entry.vme_start;
717 kr = vm_map_enter_mem_object(map,
718 &map_addr,
719 (tmp_entry.vme_end -
720 tmp_entry.vme_start),
721 (mach_vm_offset_t) 0,
722 vm_flags,
5ba3f43e
A
723 vmk_flags,
724 VM_KERN_MEMORY_NONE,
3e170ce0
A
725 (ipc_port_t) unprotected_mem_obj,
726 0,
727 TRUE,
728 tmp_entry.protection,
729 tmp_entry.max_protection,
730 tmp_entry.inheritance);
5ba3f43e
A
731 assertf(kr == KERN_SUCCESS,
732 "kr = 0x%x\n", kr);
733 assertf(map_addr == tmp_entry.vme_start,
734 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
735 (uint64_t)map_addr,
736 (uint64_t) tmp_entry.vme_start,
737 &tmp_entry);
3e170ce0
A
738
739#if VM_MAP_DEBUG_APPLE_PROTECT
39037602
A
740 if (vm_map_debug_apple_protect) {
741 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
742 " backing:[object:%p,offset:0x%llx,"
743 "crypto_backing_offset:0x%llx,"
744 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
745 map,
746 (uint64_t) map_addr,
747 (uint64_t) (map_addr + (tmp_entry.vme_end -
748 tmp_entry.vme_start)),
749 unprotected_mem_obj,
750 protected_object,
751 VME_OFFSET(&tmp_entry),
752 crypto_backing_offset,
753 crypto_start,
754 crypto_end);
755 }
3e170ce0 756#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
5ba3f43e 757
3e170ce0
A
758 /*
759 * Release the reference obtained by
760 * apple_protect_pager_setup().
761 * The mapping (if it succeeded) is now holding a reference on
762 * the memory object.
763 */
764 memory_object_deallocate(unprotected_mem_obj);
765 unprotected_mem_obj = MEMORY_OBJECT_NULL;
766
767 /* continue with next map entry */
768 crypto_backing_offset += (tmp_entry.vme_end -
769 tmp_entry.vme_start);
770 crypto_backing_offset -= crypto_start;
771 }
772 kr = KERN_SUCCESS;
0c530ab8
A
773
774done:
775 if (map_locked) {
3e170ce0 776 vm_map_unlock(map);
0c530ab8
A
777 }
778 return kr;
779}
593a1d5f 780#endif /* CONFIG_CODE_DECRYPTION */
0c530ab8
A
781
782
b0d623f7
A
783lck_grp_t vm_map_lck_grp;
784lck_grp_attr_t vm_map_lck_grp_attr;
785lck_attr_t vm_map_lck_attr;
fe8ab488 786lck_attr_t vm_map_lck_rw_attr;
b0d623f7
A
787
788
593a1d5f
A
789/*
790 * vm_map_init:
791 *
792 * Initialize the vm_map module. Must be called before
793 * any other vm_map routines.
794 *
795 * Map and entry structures are allocated from zones -- we must
796 * initialize those zones.
797 *
798 * There are three zones of interest:
799 *
800 * vm_map_zone: used to allocate maps.
801 * vm_map_entry_zone: used to allocate map entries.
7ddcb079 802 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
593a1d5f
A
803 *
804 * The kernel allocates map entries from a special zone that is initially
805 * "crammed" with memory. It would be difficult (perhaps impossible) for
806 * the kernel to allocate more memory to a entry zone when it became
807 * empty since the very act of allocating memory implies the creation
808 * of a new entry.
809 */
1c79356b
A
810void
811vm_map_init(
812 void)
813{
7ddcb079 814 vm_size_t entry_zone_alloc_size;
316670eb
A
815 const char *mez_name = "VM map entries";
816
2d21ac55
A
817 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
818 PAGE_SIZE, "maps");
0b4c1975 819 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
7ddcb079
A
820#if defined(__LP64__)
821 entry_zone_alloc_size = PAGE_SIZE * 5;
822#else
823 entry_zone_alloc_size = PAGE_SIZE * 6;
824#endif
91447636 825 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
7ddcb079 826 1024*1024, entry_zone_alloc_size,
316670eb 827 mez_name);
0b4c1975 828 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
7ddcb079 829 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
316670eb 830 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
1c79356b 831
7ddcb079
A
832 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
833 kentry_data_size * 64, kentry_data_size,
834 "Reserved VM map entries");
835 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
5ba3f43e
A
836 /* Don't quarantine because we always need elements available */
837 zone_change(vm_map_entry_reserved_zone, Z_KASAN_QUARANTINE, FALSE);
1c79356b 838
91447636 839 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
7ddcb079 840 16*1024, PAGE_SIZE, "VM map copies");
0b4c1975 841 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
1c79356b 842
3e170ce0
A
843 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
844 16*1024, PAGE_SIZE, "VM map holes");
845 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
846
1c79356b
A
847 /*
848 * Cram the map and kentry zones with initial data.
7ddcb079 849 * Set reserved_zone non-collectible to aid zone_gc().
1c79356b
A
850 */
851 zone_change(vm_map_zone, Z_COLLECT, FALSE);
39037602 852 zone_change(vm_map_zone, Z_FOREIGN, TRUE);
5ba3f43e 853 zone_change(vm_map_zone, Z_GZALLOC_EXEMPT, TRUE);
7ddcb079
A
854
855 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
856 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
857 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
858 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
859 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
6d2010ae 860 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
316670eb 861 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
6d2010ae 862
3e170ce0
A
863 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
864 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
865 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
866 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
867 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
868 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
869
5ba3f43e 870 /*
3e170ce0 871 * Add the stolen memory to zones, adjust zone size and stolen counts.
5ba3f43e 872 * zcram only up to the maximum number of pages for each zone chunk.
3e170ce0 873 */
7ddcb079 874 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
5ba3f43e
A
875
876 const vm_size_t stride = ZONE_CHUNK_MAXPAGES * PAGE_SIZE;
877 for (vm_offset_t off = 0; off < kentry_data_size; off += stride) {
878 zcram(vm_map_entry_reserved_zone,
879 (vm_offset_t)kentry_data + off,
880 MIN(kentry_data_size - off, stride));
881 }
882 for (vm_offset_t off = 0; off < map_holes_data_size; off += stride) {
883 zcram(vm_map_holes_zone,
884 (vm_offset_t)map_holes_data + off,
885 MIN(map_holes_data_size - off, stride));
886 }
887
3e170ce0
A
888 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
889
b0d623f7
A
890 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
891 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
5ba3f43e 892 lck_attr_setdefault(&vm_map_lck_attr);
316670eb 893
fe8ab488
A
894 lck_attr_setdefault(&vm_map_lck_rw_attr);
895 lck_attr_cleardebug(&vm_map_lck_rw_attr);
896
39037602
A
897#if VM_MAP_DEBUG_APPLE_PROTECT
898 PE_parse_boot_argn("vm_map_debug_apple_protect",
899 &vm_map_debug_apple_protect,
900 sizeof(vm_map_debug_apple_protect));
901#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
902#if VM_MAP_DEBUG_APPLE_FOURK
903 PE_parse_boot_argn("vm_map_debug_fourk",
904 &vm_map_debug_fourk,
905 sizeof(vm_map_debug_fourk));
906#endif /* VM_MAP_DEBUG_FOURK */
5ba3f43e
A
907 PE_parse_boot_argn("vm_map_executable_immutable",
908 &vm_map_executable_immutable,
909 sizeof(vm_map_executable_immutable));
910 PE_parse_boot_argn("vm_map_executable_immutable_no_log",
911 &vm_map_executable_immutable_no_log,
912 sizeof(vm_map_executable_immutable_no_log));
1c79356b
A
913}
914
915void
916vm_map_steal_memory(
917 void)
918{
7ddcb079
A
919 uint32_t kentry_initial_pages;
920
b0d623f7 921 map_data_size = round_page(10 * sizeof(struct _vm_map));
1c79356b
A
922 map_data = pmap_steal_memory(map_data_size);
923
1c79356b 924 /*
7ddcb079
A
925 * kentry_initial_pages corresponds to the number of kernel map entries
926 * required during bootstrap until the asynchronous replenishment
927 * scheme is activated and/or entries are available from the general
928 * map entry pool.
1c79356b 929 */
7ddcb079
A
930#if defined(__LP64__)
931 kentry_initial_pages = 10;
932#else
933 kentry_initial_pages = 6;
1c79356b 934#endif
316670eb
A
935
936#if CONFIG_GZALLOC
937 /* If using the guard allocator, reserve more memory for the kernel
938 * reserved map entry pool.
939 */
940 if (gzalloc_enabled())
941 kentry_initial_pages *= 1024;
942#endif
943
7ddcb079 944 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1c79356b 945 kentry_data = pmap_steal_memory(kentry_data_size);
3e170ce0
A
946
947 map_holes_data_size = kentry_data_size;
948 map_holes_data = pmap_steal_memory(map_holes_data_size);
1c79356b
A
949}
950
5ba3f43e
A
951boolean_t vm_map_supports_hole_optimization = FALSE;
952
3e170ce0
A
953void
954vm_kernel_reserved_entry_init(void) {
7ddcb079 955 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
5ba3f43e
A
956
957 /*
958 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
959 */
3e170ce0 960 zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
5ba3f43e 961 vm_map_supports_hole_optimization = TRUE;
3e170ce0
A
962}
963
964void
965vm_map_disable_hole_optimization(vm_map_t map)
966{
967 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
968
969 if (map->holelistenabled) {
970
971 head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
972
973 while (hole_entry != NULL) {
974
975 next_hole_entry = hole_entry->vme_next;
976
977 hole_entry->vme_next = NULL;
978 hole_entry->vme_prev = NULL;
979 zfree(vm_map_holes_zone, hole_entry);
980
981 if (next_hole_entry == head_entry) {
982 hole_entry = NULL;
983 } else {
984 hole_entry = next_hole_entry;
985 }
986 }
987
988 map->holes_list = NULL;
989 map->holelistenabled = FALSE;
990
991 map->first_free = vm_map_first_entry(map);
992 SAVE_HINT_HOLE_WRITE(map, NULL);
993 }
994}
995
996boolean_t
997vm_kernel_map_is_kernel(vm_map_t map) {
998 return (map->pmap == kernel_pmap);
7ddcb079
A
999}
1000
1c79356b
A
1001/*
1002 * vm_map_create:
1003 *
1004 * Creates and returns a new empty VM map with
1005 * the given physical map structure, and having
1006 * the given lower and upper address bounds.
1007 */
3e170ce0 1008
1c79356b
A
1009vm_map_t
1010vm_map_create(
91447636
A
1011 pmap_t pmap,
1012 vm_map_offset_t min,
1013 vm_map_offset_t max,
1014 boolean_t pageable)
1c79356b 1015{
2d21ac55 1016 static int color_seed = 0;
39037602 1017 vm_map_t result;
3e170ce0 1018 struct vm_map_links *hole_entry = NULL;
1c79356b
A
1019
1020 result = (vm_map_t) zalloc(vm_map_zone);
1021 if (result == VM_MAP_NULL)
1022 panic("vm_map_create");
1023
1024 vm_map_first_entry(result) = vm_map_to_entry(result);
1025 vm_map_last_entry(result) = vm_map_to_entry(result);
1026 result->hdr.nentries = 0;
1027 result->hdr.entries_pageable = pageable;
1028
6d2010ae 1029 vm_map_store_init( &(result->hdr) );
5ba3f43e 1030
39236c6e
A
1031 result->hdr.page_shift = PAGE_SHIFT;
1032
1c79356b 1033 result->size = 0;
2d21ac55
A
1034 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
1035 result->user_wire_size = 0;
5ba3f43e
A
1036#if __x86_64__
1037 result->vmmap_high_start = 0;
1038#endif /* __x86_64__ */
1c79356b
A
1039 result->ref_count = 1;
1040#if TASK_SWAPPER
1041 result->res_count = 1;
1042 result->sw_state = MAP_SW_IN;
1043#endif /* TASK_SWAPPER */
1044 result->pmap = pmap;
1045 result->min_offset = min;
1046 result->max_offset = max;
1047 result->wiring_required = FALSE;
1048 result->no_zero_fill = FALSE;
316670eb 1049 result->mapped_in_other_pmaps = FALSE;
1c79356b 1050 result->wait_for_space = FALSE;
b0d623f7 1051 result->switch_protect = FALSE;
6d2010ae
A
1052 result->disable_vmentry_reuse = FALSE;
1053 result->map_disallow_data_exec = FALSE;
39037602 1054 result->is_nested_map = FALSE;
6d2010ae 1055 result->highest_entry_end = 0;
1c79356b
A
1056 result->first_free = vm_map_to_entry(result);
1057 result->hint = vm_map_to_entry(result);
2d21ac55 1058 result->color_rr = (color_seed++) & vm_color_mask;
6d2010ae 1059 result->jit_entry_exists = FALSE;
3e170ce0 1060
5ba3f43e 1061 if (vm_map_supports_hole_optimization) {
3e170ce0
A
1062 hole_entry = zalloc(vm_map_holes_zone);
1063
1064 hole_entry->start = min;
5ba3f43e
A
1065#if defined(__arm__) || defined(__arm64__)
1066 hole_entry->end = result->max_offset;
1067#else
3e170ce0 1068 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
5ba3f43e 1069#endif
3e170ce0
A
1070 result->holes_list = result->hole_hint = hole_entry;
1071 hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
1072 result->holelistenabled = TRUE;
1073
1074 } else {
1075
1076 result->holelistenabled = FALSE;
1077 }
1078
1c79356b 1079 vm_map_lock_init(result);
b0d623f7 1080 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
5ba3f43e 1081
1c79356b
A
1082 return(result);
1083}
1084
1085/*
1086 * vm_map_entry_create: [ internal use only ]
1087 *
1088 * Allocates a VM map entry for insertion in the
1089 * given map (or map copy). No fields are filled.
1090 */
7ddcb079 1091#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1c79356b 1092
7ddcb079
A
1093#define vm_map_copy_entry_create(copy, map_locked) \
1094 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1095unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1c79356b 1096
91447636 1097static vm_map_entry_t
1c79356b 1098_vm_map_entry_create(
7ddcb079 1099 struct vm_map_header *map_header, boolean_t __unused map_locked)
1c79356b 1100{
7ddcb079
A
1101 zone_t zone;
1102 vm_map_entry_t entry;
1c79356b 1103
7ddcb079
A
1104 zone = vm_map_entry_zone;
1105
1106 assert(map_header->entries_pageable ? !map_locked : TRUE);
1107
1108 if (map_header->entries_pageable) {
1109 entry = (vm_map_entry_t) zalloc(zone);
1110 }
1111 else {
1112 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1113
1114 if (entry == VM_MAP_ENTRY_NULL) {
1115 zone = vm_map_entry_reserved_zone;
1116 entry = (vm_map_entry_t) zalloc(zone);
1117 OSAddAtomic(1, &reserved_zalloc_count);
1118 } else
1119 OSAddAtomic(1, &nonreserved_zalloc_count);
1120 }
1c79356b 1121
1c79356b
A
1122 if (entry == VM_MAP_ENTRY_NULL)
1123 panic("vm_map_entry_create");
7ddcb079
A
1124 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1125
6d2010ae 1126 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
316670eb 1127#if MAP_ENTRY_CREATION_DEBUG
39236c6e 1128 entry->vme_creation_maphdr = map_header;
39037602
A
1129 backtrace(&entry->vme_creation_bt[0],
1130 (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
316670eb 1131#endif
1c79356b
A
1132 return(entry);
1133}
1134
1135/*
1136 * vm_map_entry_dispose: [ internal use only ]
1137 *
1138 * Inverse of vm_map_entry_create.
2d21ac55
A
1139 *
1140 * write map lock held so no need to
1141 * do anything special to insure correctness
1142 * of the stores
1c79356b
A
1143 */
1144#define vm_map_entry_dispose(map, entry) \
6d2010ae 1145 _vm_map_entry_dispose(&(map)->hdr, (entry))
1c79356b
A
1146
1147#define vm_map_copy_entry_dispose(map, entry) \
1148 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1149
91447636 1150static void
1c79356b 1151_vm_map_entry_dispose(
39037602
A
1152 struct vm_map_header *map_header,
1153 vm_map_entry_t entry)
1c79356b 1154{
39037602 1155 zone_t zone;
1c79356b 1156
7ddcb079 1157 if (map_header->entries_pageable || !(entry->from_reserved_zone))
2d21ac55 1158 zone = vm_map_entry_zone;
1c79356b 1159 else
7ddcb079
A
1160 zone = vm_map_entry_reserved_zone;
1161
1162 if (!map_header->entries_pageable) {
1163 if (zone == vm_map_entry_zone)
1164 OSAddAtomic(-1, &nonreserved_zalloc_count);
1165 else
1166 OSAddAtomic(-1, &reserved_zalloc_count);
1167 }
1c79356b 1168
91447636 1169 zfree(zone, entry);
1c79356b
A
1170}
1171
91447636 1172#if MACH_ASSERT
91447636 1173static boolean_t first_free_check = FALSE;
6d2010ae 1174boolean_t
1c79356b
A
1175first_free_is_valid(
1176 vm_map_t map)
1177{
1c79356b
A
1178 if (!first_free_check)
1179 return TRUE;
5ba3f43e
A
1180
1181 return( first_free_is_valid_store( map ));
1c79356b 1182}
91447636 1183#endif /* MACH_ASSERT */
1c79356b 1184
1c79356b
A
1185
1186#define vm_map_copy_entry_link(copy, after_where, entry) \
6d2010ae 1187 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1c79356b
A
1188
1189#define vm_map_copy_entry_unlink(copy, entry) \
6d2010ae 1190 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1c79356b 1191
1c79356b 1192#if MACH_ASSERT && TASK_SWAPPER
1c79356b
A
1193/*
1194 * vm_map_res_reference:
1195 *
1196 * Adds another valid residence count to the given map.
1197 *
1198 * Map is locked so this function can be called from
1199 * vm_map_swapin.
1200 *
1201 */
39037602 1202void vm_map_res_reference(vm_map_t map)
1c79356b
A
1203{
1204 /* assert map is locked */
1205 assert(map->res_count >= 0);
1206 assert(map->ref_count >= map->res_count);
1207 if (map->res_count == 0) {
b0d623f7 1208 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1209 vm_map_lock(map);
1210 vm_map_swapin(map);
b0d623f7 1211 lck_mtx_lock(&map->s_lock);
1c79356b
A
1212 ++map->res_count;
1213 vm_map_unlock(map);
1214 } else
1215 ++map->res_count;
1216}
1217
1218/*
1219 * vm_map_reference_swap:
1220 *
1221 * Adds valid reference and residence counts to the given map.
1222 *
1223 * The map may not be in memory (i.e. zero residence count).
1224 *
1225 */
39037602 1226void vm_map_reference_swap(vm_map_t map)
1c79356b
A
1227{
1228 assert(map != VM_MAP_NULL);
b0d623f7 1229 lck_mtx_lock(&map->s_lock);
1c79356b
A
1230 assert(map->res_count >= 0);
1231 assert(map->ref_count >= map->res_count);
1232 map->ref_count++;
1233 vm_map_res_reference(map);
b0d623f7 1234 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1235}
1236
1237/*
1238 * vm_map_res_deallocate:
1239 *
1240 * Decrement residence count on a map; possibly causing swapout.
1241 *
1242 * The map must be in memory (i.e. non-zero residence count).
1243 *
1244 * The map is locked, so this function is callable from vm_map_deallocate.
1245 *
1246 */
39037602 1247void vm_map_res_deallocate(vm_map_t map)
1c79356b
A
1248{
1249 assert(map->res_count > 0);
1250 if (--map->res_count == 0) {
b0d623f7 1251 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1252 vm_map_lock(map);
1253 vm_map_swapout(map);
1254 vm_map_unlock(map);
b0d623f7 1255 lck_mtx_lock(&map->s_lock);
1c79356b
A
1256 }
1257 assert(map->ref_count >= map->res_count);
1258}
1259#endif /* MACH_ASSERT && TASK_SWAPPER */
1260
1c79356b
A
1261/*
1262 * vm_map_destroy:
1263 *
1264 * Actually destroy a map.
1265 */
1266void
1267vm_map_destroy(
2d21ac55
A
1268 vm_map_t map,
1269 int flags)
5ba3f43e 1270{
1c79356b 1271 vm_map_lock(map);
2d21ac55 1272
3e170ce0
A
1273 /* final cleanup: no need to unnest shared region */
1274 flags |= VM_MAP_REMOVE_NO_UNNESTING;
5ba3f43e
A
1275 /* final cleanup: ok to remove immutable mappings */
1276 flags |= VM_MAP_REMOVE_IMMUTABLE;
3e170ce0 1277
2d21ac55
A
1278 /* clean up regular map entries */
1279 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1280 flags, VM_MAP_NULL);
1281 /* clean up leftover special mappings (commpage, etc...) */
5ba3f43e 1282#if !defined(__arm__) && !defined(__arm64__)
2d21ac55
A
1283 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1284 flags, VM_MAP_NULL);
5ba3f43e 1285#endif /* !__arm__ && !__arm64__ */
6d2010ae 1286
3e170ce0 1287 vm_map_disable_hole_optimization(map);
1c79356b
A
1288 vm_map_unlock(map);
1289
2d21ac55 1290 assert(map->hdr.nentries == 0);
5ba3f43e 1291
55e303ae
A
1292 if(map->pmap)
1293 pmap_destroy(map->pmap);
1c79356b 1294
39037602
A
1295 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1296 /*
1297 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1298 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1299 * structure or kalloc'ed via lck_mtx_init.
1300 * An example is s_lock_ext within struct _vm_map.
1301 *
1302 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1303 * can add another tag to detect embedded vs alloc'ed indirect external
1304 * mutexes but that'll be additional checks in the lock path and require
1305 * updating dependencies for the old vs new tag.
1306 *
1307 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1308 * just when lock debugging is ON, we choose to forego explicitly destroying
1309 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1310 * count on vm_map_lck_grp, which has no serious side-effect.
1311 */
1312 } else {
1313 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1314 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1315 }
1316
91447636 1317 zfree(vm_map_zone, map);
1c79356b
A
1318}
1319
5ba3f43e
A
1320/*
1321 * Returns pid of the task with the largest number of VM map entries.
1322 * Used in the zone-map-exhaustion jetsam path.
1323 */
1324pid_t
1325find_largest_process_vm_map_entries(void)
1326{
1327 pid_t victim_pid = -1;
1328 int max_vm_map_entries = 0;
1329 task_t task = TASK_NULL;
1330 queue_head_t *task_list = &tasks;
1331
1332 lck_mtx_lock(&tasks_threads_lock);
1333 queue_iterate(task_list, task, task_t, tasks) {
1334 if (task == kernel_task || !task->active)
1335 continue;
1336
1337 vm_map_t task_map = task->map;
1338 if (task_map != VM_MAP_NULL) {
1339 int task_vm_map_entries = task_map->hdr.nentries;
1340 if (task_vm_map_entries > max_vm_map_entries) {
1341 max_vm_map_entries = task_vm_map_entries;
1342 victim_pid = pid_from_task(task);
1343 }
1344 }
1345 }
1346 lck_mtx_unlock(&tasks_threads_lock);
1347
1348 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1349 return victim_pid;
1350}
1351
1c79356b
A
1352#if TASK_SWAPPER
1353/*
1354 * vm_map_swapin/vm_map_swapout
1355 *
5ba3f43e 1356 * Swap a map in and out, either referencing or releasing its resources.
1c79356b
A
1357 * These functions are internal use only; however, they must be exported
1358 * because they may be called from macros, which are exported.
1359 *
5ba3f43e
A
1360 * In the case of swapout, there could be races on the residence count,
1361 * so if the residence count is up, we return, assuming that a
1c79356b
A
1362 * vm_map_deallocate() call in the near future will bring us back.
1363 *
1364 * Locking:
1365 * -- We use the map write lock for synchronization among races.
1366 * -- The map write lock, and not the simple s_lock, protects the
1367 * swap state of the map.
1368 * -- If a map entry is a share map, then we hold both locks, in
1369 * hierarchical order.
1370 *
1371 * Synchronization Notes:
1372 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1373 * will block on the map lock and proceed when swapout is through.
1374 * 2) A vm_map_reference() call at this time is illegal, and will
1375 * cause a panic. vm_map_reference() is only allowed on resident
1376 * maps, since it refuses to block.
5ba3f43e 1377 * 3) A vm_map_swapin() call during a swapin will block, and
1c79356b
A
1378 * proceeed when the first swapin is done, turning into a nop.
1379 * This is the reason the res_count is not incremented until
1380 * after the swapin is complete.
1381 * 4) There is a timing hole after the checks of the res_count, before
1382 * the map lock is taken, during which a swapin may get the lock
1383 * before a swapout about to happen. If this happens, the swapin
1384 * will detect the state and increment the reference count, causing
5ba3f43e
A
1385 * the swapout to be a nop, thereby delaying it until a later
1386 * vm_map_deallocate. If the swapout gets the lock first, then
1387 * the swapin will simply block until the swapout is done, and
1c79356b
A
1388 * then proceed.
1389 *
1390 * Because vm_map_swapin() is potentially an expensive operation, it
1391 * should be used with caution.
1392 *
1393 * Invariants:
1394 * 1) A map with a residence count of zero is either swapped, or
1395 * being swapped.
1396 * 2) A map with a non-zero residence count is either resident,
1397 * or being swapped in.
1398 */
1399
1400int vm_map_swap_enable = 1;
1401
1402void vm_map_swapin (vm_map_t map)
1403{
39037602 1404 vm_map_entry_t entry;
2d21ac55 1405
1c79356b
A
1406 if (!vm_map_swap_enable) /* debug */
1407 return;
1408
1409 /*
1410 * Map is locked
1411 * First deal with various races.
1412 */
1413 if (map->sw_state == MAP_SW_IN)
5ba3f43e 1414 /*
1c79356b
A
1415 * we raced with swapout and won. Returning will incr.
1416 * the res_count, turning the swapout into a nop.
1417 */
1418 return;
1419
1420 /*
1421 * The residence count must be zero. If we raced with another
1422 * swapin, the state would have been IN; if we raced with a
1423 * swapout (after another competing swapin), we must have lost
1424 * the race to get here (see above comment), in which case
1425 * res_count is still 0.
1426 */
1427 assert(map->res_count == 0);
1428
1429 /*
1430 * There are no intermediate states of a map going out or
1431 * coming in, since the map is locked during the transition.
1432 */
1433 assert(map->sw_state == MAP_SW_OUT);
1434
1435 /*
5ba3f43e 1436 * We now operate upon each map entry. If the entry is a sub-
1c79356b
A
1437 * or share-map, we call vm_map_res_reference upon it.
1438 * If the entry is an object, we call vm_object_res_reference
1439 * (this may iterate through the shadow chain).
1440 * Note that we hold the map locked the entire time,
1441 * even if we get back here via a recursive call in
1442 * vm_map_res_reference.
1443 */
1444 entry = vm_map_first_entry(map);
1445
1446 while (entry != vm_map_to_entry(map)) {
3e170ce0 1447 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1c79356b 1448 if (entry->is_sub_map) {
3e170ce0 1449 vm_map_t lmap = VME_SUBMAP(entry);
b0d623f7 1450 lck_mtx_lock(&lmap->s_lock);
1c79356b 1451 vm_map_res_reference(lmap);
b0d623f7 1452 lck_mtx_unlock(&lmap->s_lock);
1c79356b 1453 } else {
3e170ce0 1454 vm_object_t object = VME_OBEJCT(entry);
1c79356b
A
1455 vm_object_lock(object);
1456 /*
1457 * This call may iterate through the
1458 * shadow chain.
1459 */
1460 vm_object_res_reference(object);
1461 vm_object_unlock(object);
1462 }
1463 }
1464 entry = entry->vme_next;
1465 }
1466 assert(map->sw_state == MAP_SW_OUT);
1467 map->sw_state = MAP_SW_IN;
1468}
1469
1470void vm_map_swapout(vm_map_t map)
1471{
39037602 1472 vm_map_entry_t entry;
5ba3f43e 1473
1c79356b
A
1474 /*
1475 * Map is locked
1476 * First deal with various races.
1477 * If we raced with a swapin and lost, the residence count
1478 * will have been incremented to 1, and we simply return.
1479 */
b0d623f7 1480 lck_mtx_lock(&map->s_lock);
1c79356b 1481 if (map->res_count != 0) {
b0d623f7 1482 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1483 return;
1484 }
b0d623f7 1485 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1486
1487 /*
1488 * There are no intermediate states of a map going out or
1489 * coming in, since the map is locked during the transition.
1490 */
1491 assert(map->sw_state == MAP_SW_IN);
1492
1493 if (!vm_map_swap_enable)
1494 return;
1495
1496 /*
5ba3f43e 1497 * We now operate upon each map entry. If the entry is a sub-
1c79356b
A
1498 * or share-map, we call vm_map_res_deallocate upon it.
1499 * If the entry is an object, we call vm_object_res_deallocate
1500 * (this may iterate through the shadow chain).
1501 * Note that we hold the map locked the entire time,
1502 * even if we get back here via a recursive call in
1503 * vm_map_res_deallocate.
1504 */
1505 entry = vm_map_first_entry(map);
1506
1507 while (entry != vm_map_to_entry(map)) {
3e170ce0 1508 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1c79356b 1509 if (entry->is_sub_map) {
3e170ce0 1510 vm_map_t lmap = VME_SUBMAP(entry);
b0d623f7 1511 lck_mtx_lock(&lmap->s_lock);
1c79356b 1512 vm_map_res_deallocate(lmap);
b0d623f7 1513 lck_mtx_unlock(&lmap->s_lock);
1c79356b 1514 } else {
3e170ce0 1515 vm_object_t object = VME_OBJECT(entry);
1c79356b
A
1516 vm_object_lock(object);
1517 /*
5ba3f43e
A
1518 * This call may take a long time,
1519 * since it could actively push
1520 * out pages (if we implement it
1c79356b
A
1521 * that way).
1522 */
1523 vm_object_res_deallocate(object);
1524 vm_object_unlock(object);
1525 }
1526 }
1527 entry = entry->vme_next;
1528 }
1529 assert(map->sw_state == MAP_SW_IN);
1530 map->sw_state = MAP_SW_OUT;
1531}
1532
1533#endif /* TASK_SWAPPER */
1534
1c79356b
A
1535/*
1536 * vm_map_lookup_entry: [ internal use only ]
1537 *
5ba3f43e
A
1538 * Calls into the vm map store layer to find the map
1539 * entry containing (or immediately preceding) the
6d2010ae 1540 * specified address in the given map; the entry is returned
1c79356b
A
1541 * in the "entry" parameter. The boolean
1542 * result indicates whether the address is
1543 * actually contained in the map.
1544 */
1545boolean_t
1546vm_map_lookup_entry(
39037602
A
1547 vm_map_t map,
1548 vm_map_offset_t address,
1c79356b
A
1549 vm_map_entry_t *entry) /* OUT */
1550{
6d2010ae 1551 return ( vm_map_store_lookup_entry( map, address, entry ));
1c79356b
A
1552}
1553
1554/*
1555 * Routine: vm_map_find_space
1556 * Purpose:
1557 * Allocate a range in the specified virtual address map,
1558 * returning the entry allocated for that range.
1559 * Used by kmem_alloc, etc.
1560 *
1561 * The map must be NOT be locked. It will be returned locked
1562 * on KERN_SUCCESS, unlocked on failure.
1563 *
1564 * If an entry is allocated, the object/offset fields
1565 * are initialized to zero.
1566 */
1567kern_return_t
1568vm_map_find_space(
39037602 1569 vm_map_t map,
91447636
A
1570 vm_map_offset_t *address, /* OUT */
1571 vm_map_size_t size,
1572 vm_map_offset_t mask,
5ba3f43e
A
1573 int flags __unused,
1574 vm_map_kernel_flags_t vmk_flags,
1575 vm_tag_t tag,
1c79356b
A
1576 vm_map_entry_t *o_entry) /* OUT */
1577{
3e170ce0 1578 vm_map_entry_t entry, new_entry;
39037602
A
1579 vm_map_offset_t start;
1580 vm_map_offset_t end;
3e170ce0 1581 vm_map_entry_t hole_entry;
91447636
A
1582
1583 if (size == 0) {
1584 *address = 0;
1585 return KERN_INVALID_ARGUMENT;
1586 }
1c79356b 1587
5ba3f43e 1588 if (vmk_flags.vmkf_guard_after) {
2d21ac55 1589 /* account for the back guard page in the size */
39236c6e 1590 size += VM_MAP_PAGE_SIZE(map);
2d21ac55
A
1591 }
1592
7ddcb079 1593 new_entry = vm_map_entry_create(map, FALSE);
1c79356b
A
1594
1595 /*
1596 * Look for the first possible address; if there's already
1597 * something at this address, we have to start after it.
1598 */
1599
1600 vm_map_lock(map);
1601
6d2010ae
A
1602 if( map->disable_vmentry_reuse == TRUE) {
1603 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1604 } else {
3e170ce0
A
1605 if (map->holelistenabled) {
1606 hole_entry = (vm_map_entry_t)map->holes_list;
1607
1608 if (hole_entry == NULL) {
1609 /*
1610 * No more space in the map?
1611 */
1612 vm_map_entry_dispose(map, new_entry);
1613 vm_map_unlock(map);
1614 return(KERN_NO_SPACE);
1615 }
1616
1617 entry = hole_entry;
1618 start = entry->vme_start;
1619 } else {
1620 assert(first_free_is_valid(map));
1621 if ((entry = map->first_free) == vm_map_to_entry(map))
1622 start = map->min_offset;
1623 else
1624 start = entry->vme_end;
1625 }
6d2010ae 1626 }
1c79356b
A
1627
1628 /*
1629 * In any case, the "entry" always precedes
1630 * the proposed new region throughout the loop:
1631 */
1632
1633 while (TRUE) {
39037602 1634 vm_map_entry_t next;
1c79356b
A
1635
1636 /*
1637 * Find the end of the proposed new region.
1638 * Be sure we didn't go beyond the end, or
1639 * wrap around the address.
1640 */
1641
5ba3f43e 1642 if (vmk_flags.vmkf_guard_before) {
2d21ac55 1643 /* reserve space for the front guard page */
39236c6e 1644 start += VM_MAP_PAGE_SIZE(map);
2d21ac55 1645 }
1c79356b 1646 end = ((start + mask) & ~mask);
5ba3f43e 1647
1c79356b
A
1648 if (end < start) {
1649 vm_map_entry_dispose(map, new_entry);
1650 vm_map_unlock(map);
1651 return(KERN_NO_SPACE);
1652 }
1653 start = end;
1654 end += size;
1655
1656 if ((end > map->max_offset) || (end < start)) {
1657 vm_map_entry_dispose(map, new_entry);
1658 vm_map_unlock(map);
1659 return(KERN_NO_SPACE);
1660 }
1661
1c79356b 1662 next = entry->vme_next;
1c79356b 1663
3e170ce0
A
1664 if (map->holelistenabled) {
1665 if (entry->vme_end >= end)
1666 break;
1667 } else {
1668 /*
1669 * If there are no more entries, we must win.
1670 *
1671 * OR
1672 *
1673 * If there is another entry, it must be
1674 * after the end of the potential new region.
1675 */
1c79356b 1676
3e170ce0
A
1677 if (next == vm_map_to_entry(map))
1678 break;
1679
1680 if (next->vme_start >= end)
1681 break;
1682 }
1c79356b
A
1683
1684 /*
1685 * Didn't fit -- move to the next entry.
1686 */
1687
1688 entry = next;
3e170ce0
A
1689
1690 if (map->holelistenabled) {
1691 if (entry == (vm_map_entry_t) map->holes_list) {
1692 /*
1693 * Wrapped around
1694 */
1695 vm_map_entry_dispose(map, new_entry);
1696 vm_map_unlock(map);
1697 return(KERN_NO_SPACE);
1698 }
1699 start = entry->vme_start;
1700 } else {
1701 start = entry->vme_end;
1702 }
1703 }
1704
1705 if (map->holelistenabled) {
1706 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1707 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1708 }
1c79356b
A
1709 }
1710
1711 /*
1712 * At this point,
1713 * "start" and "end" should define the endpoints of the
1714 * available new range, and
1715 * "entry" should refer to the region before the new
1716 * range, and
1717 *
1718 * the map should be locked.
1719 */
1720
5ba3f43e 1721 if (vmk_flags.vmkf_guard_before) {
2d21ac55 1722 /* go back for the front guard page */
39236c6e 1723 start -= VM_MAP_PAGE_SIZE(map);
2d21ac55 1724 }
1c79356b
A
1725 *address = start;
1726
e2d2fc5c 1727 assert(start < end);
1c79356b
A
1728 new_entry->vme_start = start;
1729 new_entry->vme_end = end;
1730 assert(page_aligned(new_entry->vme_start));
1731 assert(page_aligned(new_entry->vme_end));
39236c6e
A
1732 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1733 VM_MAP_PAGE_MASK(map)));
1734 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1735 VM_MAP_PAGE_MASK(map)));
1c79356b
A
1736
1737 new_entry->is_shared = FALSE;
1738 new_entry->is_sub_map = FALSE;
fe8ab488 1739 new_entry->use_pmap = TRUE;
3e170ce0
A
1740 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1741 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1c79356b
A
1742
1743 new_entry->needs_copy = FALSE;
1744
1745 new_entry->inheritance = VM_INHERIT_DEFAULT;
1746 new_entry->protection = VM_PROT_DEFAULT;
1747 new_entry->max_protection = VM_PROT_ALL;
1748 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1749 new_entry->wired_count = 0;
1750 new_entry->user_wired_count = 0;
1751
1752 new_entry->in_transition = FALSE;
1753 new_entry->needs_wakeup = FALSE;
2d21ac55 1754 new_entry->no_cache = FALSE;
b0d623f7 1755 new_entry->permanent = FALSE;
39236c6e
A
1756 new_entry->superpage_size = FALSE;
1757 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1758 new_entry->map_aligned = TRUE;
1759 } else {
1760 new_entry->map_aligned = FALSE;
1761 }
2d21ac55 1762
3e170ce0 1763 new_entry->used_for_jit = FALSE;
b0d623f7 1764 new_entry->zero_wired_pages = FALSE;
fe8ab488 1765 new_entry->iokit_acct = FALSE;
3e170ce0
A
1766 new_entry->vme_resilient_codesign = FALSE;
1767 new_entry->vme_resilient_media = FALSE;
5ba3f43e 1768 if (vmk_flags.vmkf_atomic_entry)
39037602
A
1769 new_entry->vme_atomic = TRUE;
1770 else
1771 new_entry->vme_atomic = FALSE;
1c79356b 1772
5ba3f43e 1773 VME_ALIAS_SET(new_entry, tag);
0c530ab8 1774
1c79356b
A
1775 /*
1776 * Insert the new entry into the list
1777 */
1778
6d2010ae 1779 vm_map_store_entry_link(map, entry, new_entry);
1c79356b
A
1780
1781 map->size += size;
1782
1783 /*
1784 * Update the lookup hint
1785 */
0c530ab8 1786 SAVE_HINT_MAP_WRITE(map, new_entry);
1c79356b
A
1787
1788 *o_entry = new_entry;
1789 return(KERN_SUCCESS);
1790}
1791
1792int vm_map_pmap_enter_print = FALSE;
1793int vm_map_pmap_enter_enable = FALSE;
1794
1795/*
91447636 1796 * Routine: vm_map_pmap_enter [internal only]
1c79356b
A
1797 *
1798 * Description:
1799 * Force pages from the specified object to be entered into
1800 * the pmap at the specified address if they are present.
1801 * As soon as a page not found in the object the scan ends.
1802 *
1803 * Returns:
5ba3f43e 1804 * Nothing.
1c79356b
A
1805 *
1806 * In/out conditions:
1807 * The source map should not be locked on entry.
1808 */
fe8ab488 1809__unused static void
1c79356b
A
1810vm_map_pmap_enter(
1811 vm_map_t map,
39037602
A
1812 vm_map_offset_t addr,
1813 vm_map_offset_t end_addr,
1814 vm_object_t object,
1c79356b
A
1815 vm_object_offset_t offset,
1816 vm_prot_t protection)
1817{
2d21ac55
A
1818 int type_of_fault;
1819 kern_return_t kr;
0b4e3aa0 1820
55e303ae
A
1821 if(map->pmap == 0)
1822 return;
1823
1c79356b 1824 while (addr < end_addr) {
39037602 1825 vm_page_t m;
1c79356b 1826
fe8ab488
A
1827
1828 /*
1829 * TODO:
1830 * From vm_map_enter(), we come into this function without the map
1831 * lock held or the object lock held.
1832 * We haven't taken a reference on the object either.
1833 * We should do a proper lookup on the map to make sure
1834 * that things are sane before we go locking objects that
1835 * could have been deallocated from under us.
1836 */
1837
1c79356b 1838 vm_object_lock(object);
1c79356b
A
1839
1840 m = vm_page_lookup(object, offset);
5ba3f43e
A
1841
1842 if (m == VM_PAGE_NULL || m->busy || m->fictitious ||
2d21ac55 1843 (m->unusual && ( m->error || m->restart || m->absent))) {
1c79356b
A
1844 vm_object_unlock(object);
1845 return;
1846 }
1847
1c79356b
A
1848 if (vm_map_pmap_enter_print) {
1849 printf("vm_map_pmap_enter:");
2d21ac55
A
1850 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1851 map, (unsigned long long)addr, object, (unsigned long long)offset);
1c79356b 1852 }
2d21ac55 1853 type_of_fault = DBG_CACHE_HIT_FAULT;
6d2010ae 1854 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
5ba3f43e
A
1855 VM_PAGE_WIRED(m),
1856 FALSE, /* change_wiring */
1857 VM_KERN_MEMORY_NONE, /* tag - not wiring */
1858 FALSE, /* no_cache */
1859 FALSE, /* cs_bypass */
1860 0, /* XXX need user tag / alias? */
1861 0, /* pmap_options */
1862 NULL, /* need_retry */
1863 &type_of_fault);
1c79356b 1864
1c79356b
A
1865 vm_object_unlock(object);
1866
1867 offset += PAGE_SIZE_64;
1868 addr += PAGE_SIZE;
1869 }
1870}
1871
91447636
A
1872boolean_t vm_map_pmap_is_empty(
1873 vm_map_t map,
1874 vm_map_offset_t start,
1875 vm_map_offset_t end);
1876boolean_t vm_map_pmap_is_empty(
1877 vm_map_t map,
1878 vm_map_offset_t start,
1879 vm_map_offset_t end)
1880{
2d21ac55
A
1881#ifdef MACHINE_PMAP_IS_EMPTY
1882 return pmap_is_empty(map->pmap, start, end);
1883#else /* MACHINE_PMAP_IS_EMPTY */
91447636
A
1884 vm_map_offset_t offset;
1885 ppnum_t phys_page;
1886
1887 if (map->pmap == NULL) {
1888 return TRUE;
1889 }
2d21ac55 1890
91447636
A
1891 for (offset = start;
1892 offset < end;
1893 offset += PAGE_SIZE) {
1894 phys_page = pmap_find_phys(map->pmap, offset);
1895 if (phys_page) {
1896 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1897 "page %d at 0x%llx\n",
2d21ac55
A
1898 map, (long long)start, (long long)end,
1899 phys_page, (long long)offset);
91447636
A
1900 return FALSE;
1901 }
1902 }
1903 return TRUE;
2d21ac55 1904#endif /* MACHINE_PMAP_IS_EMPTY */
91447636
A
1905}
1906
316670eb
A
1907#define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1908kern_return_t
1909vm_map_random_address_for_size(
1910 vm_map_t map,
1911 vm_map_offset_t *address,
1912 vm_map_size_t size)
1913{
1914 kern_return_t kr = KERN_SUCCESS;
1915 int tries = 0;
1916 vm_map_offset_t random_addr = 0;
1917 vm_map_offset_t hole_end;
1918
1919 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
1920 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
1921 vm_map_size_t vm_hole_size = 0;
1922 vm_map_size_t addr_space_size;
1923
1924 addr_space_size = vm_map_max(map) - vm_map_min(map);
1925
1926 assert(page_aligned(size));
1927
1928 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1929 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
39236c6e
A
1930 random_addr = vm_map_trunc_page(
1931 vm_map_min(map) +(random_addr % addr_space_size),
1932 VM_MAP_PAGE_MASK(map));
316670eb
A
1933
1934 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1935 if (prev_entry == vm_map_to_entry(map)) {
1936 next_entry = vm_map_first_entry(map);
1937 } else {
1938 next_entry = prev_entry->vme_next;
1939 }
1940 if (next_entry == vm_map_to_entry(map)) {
1941 hole_end = vm_map_max(map);
1942 } else {
1943 hole_end = next_entry->vme_start;
1944 }
1945 vm_hole_size = hole_end - random_addr;
1946 if (vm_hole_size >= size) {
1947 *address = random_addr;
1948 break;
1949 }
1950 }
1951 tries++;
1952 }
1953
1954 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1955 kr = KERN_NO_SPACE;
1956 }
1957 return kr;
1958}
1959
1c79356b
A
1960/*
1961 * Routine: vm_map_enter
1962 *
1963 * Description:
1964 * Allocate a range in the specified virtual address map.
1965 * The resulting range will refer to memory defined by
1966 * the given memory object and offset into that object.
1967 *
1968 * Arguments are as defined in the vm_map call.
1969 */
91447636
A
1970int _map_enter_debug = 0;
1971static unsigned int vm_map_enter_restore_successes = 0;
1972static unsigned int vm_map_enter_restore_failures = 0;
1c79356b
A
1973kern_return_t
1974vm_map_enter(
91447636 1975 vm_map_t map,
593a1d5f 1976 vm_map_offset_t *address, /* IN/OUT */
91447636 1977 vm_map_size_t size,
593a1d5f 1978 vm_map_offset_t mask,
1c79356b 1979 int flags,
5ba3f43e
A
1980 vm_map_kernel_flags_t vmk_flags,
1981 vm_tag_t alias,
1c79356b
A
1982 vm_object_t object,
1983 vm_object_offset_t offset,
1984 boolean_t needs_copy,
1985 vm_prot_t cur_protection,
1986 vm_prot_t max_protection,
1987 vm_inherit_t inheritance)
1988{
91447636 1989 vm_map_entry_t entry, new_entry;
2d21ac55 1990 vm_map_offset_t start, tmp_start, tmp_offset;
91447636 1991 vm_map_offset_t end, tmp_end;
b0d623f7
A
1992 vm_map_offset_t tmp2_start, tmp2_end;
1993 vm_map_offset_t step;
1c79356b 1994 kern_return_t result = KERN_SUCCESS;
91447636
A
1995 vm_map_t zap_old_map = VM_MAP_NULL;
1996 vm_map_t zap_new_map = VM_MAP_NULL;
1997 boolean_t map_locked = FALSE;
1998 boolean_t pmap_empty = TRUE;
1999 boolean_t new_mapping_established = FALSE;
5ba3f43e 2000 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
91447636
A
2001 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2002 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2003 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2d21ac55 2004 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
5ba3f43e
A
2005 boolean_t is_submap = vmk_flags.vmkf_submap;
2006 boolean_t permanent = vmk_flags.vmkf_permanent;
2007 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2008 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3e170ce0
A
2009 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2010 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
39037602 2011 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
b0d623f7 2012 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
5ba3f43e 2013 vm_tag_t user_alias;
2d21ac55 2014 vm_map_offset_t effective_min_offset, effective_max_offset;
593a1d5f 2015 kern_return_t kr;
39236c6e 2016 boolean_t clear_map_aligned = FALSE;
3e170ce0 2017 vm_map_entry_t hole_entry;
593a1d5f 2018
5ba3f43e
A
2019 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2020
b0d623f7
A
2021 if (superpage_size) {
2022 switch (superpage_size) {
2023 /*
2024 * Note that the current implementation only supports
2025 * a single size for superpages, SUPERPAGE_SIZE, per
2026 * architecture. As soon as more sizes are supposed
2027 * to be supported, SUPERPAGE_SIZE has to be replaced
2028 * with a lookup of the size depending on superpage_size.
2029 */
2030#ifdef __x86_64__
6d2010ae
A
2031 case SUPERPAGE_SIZE_ANY:
2032 /* handle it like 2 MB and round up to page size */
2033 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
b0d623f7
A
2034 case SUPERPAGE_SIZE_2MB:
2035 break;
2036#endif
2037 default:
2038 return KERN_INVALID_ARGUMENT;
2039 }
2040 mask = SUPERPAGE_SIZE-1;
2041 if (size & (SUPERPAGE_SIZE-1))
2042 return KERN_INVALID_ARGUMENT;
2043 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
2044 }
2045
6d2010ae 2046
5ba3f43e
A
2047#if CONFIG_EMBEDDED
2048 if (cur_protection & VM_PROT_WRITE){
2049 if ((cur_protection & VM_PROT_EXECUTE) && !entry_for_jit){
2050 printf("EMBEDDED: %s: curprot cannot be write+execute. "
2051 "turning off execute\n",
2052 __FUNCTION__);
2053 cur_protection &= ~VM_PROT_EXECUTE;
2054 }
2055 }
2056#endif /* CONFIG_EMBEDDED */
1c79356b 2057
3e170ce0
A
2058 if (resilient_codesign || resilient_media) {
2059 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
2060 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2061 return KERN_PROTECTION_FAILURE;
2062 }
2063 }
2064
2d21ac55
A
2065 if (is_submap) {
2066 if (purgable) {
2067 /* submaps can not be purgeable */
2068 return KERN_INVALID_ARGUMENT;
2069 }
2070 if (object == VM_OBJECT_NULL) {
2071 /* submaps can not be created lazily */
2072 return KERN_INVALID_ARGUMENT;
2073 }
2074 }
5ba3f43e 2075 if (vmk_flags.vmkf_already) {
2d21ac55
A
2076 /*
2077 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2078 * is already present. For it to be meaningul, the requested
2079 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2080 * we shouldn't try and remove what was mapped there first
2081 * (!VM_FLAGS_OVERWRITE).
2082 */
2083 if ((flags & VM_FLAGS_ANYWHERE) ||
2084 (flags & VM_FLAGS_OVERWRITE)) {
2085 return KERN_INVALID_ARGUMENT;
2086 }
2087 }
2088
6d2010ae 2089 effective_min_offset = map->min_offset;
b0d623f7 2090
5ba3f43e 2091 if (vmk_flags.vmkf_beyond_max) {
2d21ac55 2092 /*
b0d623f7 2093 * Allow an insertion beyond the map's max offset.
2d21ac55 2094 */
5ba3f43e 2095#if !defined(__arm__) && !defined(__arm64__)
2d21ac55
A
2096 if (vm_map_is_64bit(map))
2097 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2098 else
5ba3f43e 2099#endif /* __arm__ */
2d21ac55
A
2100 effective_max_offset = 0x00000000FFFFF000ULL;
2101 } else {
2102 effective_max_offset = map->max_offset;
2103 }
2104
2105 if (size == 0 ||
2106 (offset & PAGE_MASK_64) != 0) {
91447636
A
2107 *address = 0;
2108 return KERN_INVALID_ARGUMENT;
2109 }
2110
3e170ce0
A
2111 if (map->pmap == kernel_pmap) {
2112 user_alias = VM_KERN_MEMORY_NONE;
2113 } else {
2114 user_alias = alias;
2115 }
2d21ac55 2116
1c79356b
A
2117#define RETURN(value) { result = value; goto BailOut; }
2118
2119 assert(page_aligned(*address));
2120 assert(page_aligned(size));
91447636 2121
39236c6e
A
2122 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2123 /*
2124 * In most cases, the caller rounds the size up to the
2125 * map's page size.
2126 * If we get a size that is explicitly not map-aligned here,
2127 * we'll have to respect the caller's wish and mark the
2128 * mapping as "not map-aligned" to avoid tripping the
2129 * map alignment checks later.
2130 */
2131 clear_map_aligned = TRUE;
2132 }
5ba3f43e 2133 if (!anywhere &&
fe8ab488
A
2134 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2135 /*
2136 * We've been asked to map at a fixed address and that
2137 * address is not aligned to the map's specific alignment.
2138 * The caller should know what it's doing (i.e. most likely
2139 * mapping some fragmented copy map, transferring memory from
2140 * a VM map with a different alignment), so clear map_aligned
2141 * for this new VM map entry and proceed.
2142 */
2143 clear_map_aligned = TRUE;
2144 }
39236c6e 2145
91447636
A
2146 /*
2147 * Only zero-fill objects are allowed to be purgable.
2148 * LP64todo - limit purgable objects to 32-bits for now
2149 */
2150 if (purgable &&
2151 (offset != 0 ||
2152 (object != VM_OBJECT_NULL &&
6d2010ae 2153 (object->vo_size != size ||
2d21ac55 2154 object->purgable == VM_PURGABLE_DENY))
b0d623f7 2155 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
91447636
A
2156 return KERN_INVALID_ARGUMENT;
2157
2158 if (!anywhere && overwrite) {
2159 /*
2160 * Create a temporary VM map to hold the old mappings in the
2161 * affected area while we create the new one.
2162 * This avoids releasing the VM map lock in
2163 * vm_map_entry_delete() and allows atomicity
2164 * when we want to replace some mappings with a new one.
2165 * It also allows us to restore the old VM mappings if the
2166 * new mapping fails.
2167 */
2168 zap_old_map = vm_map_create(PMAP_NULL,
2169 *address,
2170 *address + size,
b0d623f7 2171 map->hdr.entries_pageable);
39236c6e 2172 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 2173 vm_map_disable_hole_optimization(zap_old_map);
91447636
A
2174 }
2175
2d21ac55 2176StartAgain: ;
1c79356b
A
2177
2178 start = *address;
2179
2180 if (anywhere) {
2181 vm_map_lock(map);
91447636 2182 map_locked = TRUE;
5ba3f43e 2183
316670eb
A
2184 if (entry_for_jit) {
2185 if (map->jit_entry_exists) {
2186 result = KERN_INVALID_ARGUMENT;
2187 goto BailOut;
2188 }
39037602
A
2189 random_address = TRUE;
2190 }
2191
2192 if (random_address) {
316670eb
A
2193 /*
2194 * Get a random start address.
2195 */
2196 result = vm_map_random_address_for_size(map, address, size);
2197 if (result != KERN_SUCCESS) {
2198 goto BailOut;
2199 }
2200 start = *address;
6d2010ae 2201 }
5ba3f43e
A
2202#if __x86_64__
2203 else if ((start == 0 || start == vm_map_min(map)) &&
2204 !map->disable_vmentry_reuse &&
2205 map->vmmap_high_start != 0) {
2206 start = map->vmmap_high_start;
2207 }
2208#endif /* __x86_64__ */
1c79356b 2209
316670eb 2210
1c79356b
A
2211 /*
2212 * Calculate the first possible address.
2213 */
2214
2d21ac55
A
2215 if (start < effective_min_offset)
2216 start = effective_min_offset;
2217 if (start > effective_max_offset)
1c79356b
A
2218 RETURN(KERN_NO_SPACE);
2219
2220 /*
2221 * Look for the first possible address;
2222 * if there's already something at this
2223 * address, we have to start after it.
2224 */
2225
6d2010ae
A
2226 if( map->disable_vmentry_reuse == TRUE) {
2227 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1c79356b 2228 } else {
6d2010ae 2229
3e170ce0
A
2230 if (map->holelistenabled) {
2231 hole_entry = (vm_map_entry_t)map->holes_list;
2232
2233 if (hole_entry == NULL) {
2234 /*
2235 * No more space in the map?
2236 */
2237 result = KERN_NO_SPACE;
2238 goto BailOut;
2239 } else {
2240
2241 boolean_t found_hole = FALSE;
2242
2243 do {
2244 if (hole_entry->vme_start >= start) {
2245 start = hole_entry->vme_start;
2246 found_hole = TRUE;
2247 break;
2248 }
2249
2250 if (hole_entry->vme_end > start) {
2251 found_hole = TRUE;
2252 break;
2253 }
2254 hole_entry = hole_entry->vme_next;
2255
2256 } while (hole_entry != (vm_map_entry_t) map->holes_list);
2257
2258 if (found_hole == FALSE) {
2259 result = KERN_NO_SPACE;
2260 goto BailOut;
2261 }
2262
2263 entry = hole_entry;
6d2010ae 2264
3e170ce0
A
2265 if (start == 0)
2266 start += PAGE_SIZE_64;
2267 }
6d2010ae 2268 } else {
3e170ce0
A
2269 assert(first_free_is_valid(map));
2270
2271 entry = map->first_free;
2272
2273 if (entry == vm_map_to_entry(map)) {
6d2010ae 2274 entry = NULL;
3e170ce0
A
2275 } else {
2276 if (entry->vme_next == vm_map_to_entry(map)){
2277 /*
2278 * Hole at the end of the map.
2279 */
2280 entry = NULL;
2281 } else {
2282 if (start < (entry->vme_next)->vme_start ) {
2283 start = entry->vme_end;
2284 start = vm_map_round_page(start,
2285 VM_MAP_PAGE_MASK(map));
2286 } else {
2287 /*
2288 * Need to do a lookup.
2289 */
2290 entry = NULL;
2291 }
2292 }
2293 }
2294
2295 if (entry == NULL) {
2296 vm_map_entry_t tmp_entry;
2297 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2298 assert(!entry_for_jit);
2299 start = tmp_entry->vme_end;
39236c6e
A
2300 start = vm_map_round_page(start,
2301 VM_MAP_PAGE_MASK(map));
6d2010ae 2302 }
3e170ce0 2303 entry = tmp_entry;
316670eb 2304 }
6d2010ae 2305 }
1c79356b
A
2306 }
2307
2308 /*
2309 * In any case, the "entry" always precedes
2310 * the proposed new region throughout the
2311 * loop:
2312 */
2313
2314 while (TRUE) {
39037602 2315 vm_map_entry_t next;
1c79356b 2316
2d21ac55 2317 /*
1c79356b
A
2318 * Find the end of the proposed new region.
2319 * Be sure we didn't go beyond the end, or
2320 * wrap around the address.
2321 */
2322
2323 end = ((start + mask) & ~mask);
39236c6e
A
2324 end = vm_map_round_page(end,
2325 VM_MAP_PAGE_MASK(map));
1c79356b
A
2326 if (end < start)
2327 RETURN(KERN_NO_SPACE);
2328 start = end;
39236c6e
A
2329 assert(VM_MAP_PAGE_ALIGNED(start,
2330 VM_MAP_PAGE_MASK(map)));
1c79356b
A
2331 end += size;
2332
2d21ac55 2333 if ((end > effective_max_offset) || (end < start)) {
1c79356b 2334 if (map->wait_for_space) {
fe8ab488 2335 assert(!keep_map_locked);
2d21ac55
A
2336 if (size <= (effective_max_offset -
2337 effective_min_offset)) {
1c79356b
A
2338 assert_wait((event_t)map,
2339 THREAD_ABORTSAFE);
2340 vm_map_unlock(map);
91447636
A
2341 map_locked = FALSE;
2342 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
2343 goto StartAgain;
2344 }
2345 }
2346 RETURN(KERN_NO_SPACE);
2347 }
2348
1c79356b 2349 next = entry->vme_next;
1c79356b 2350
3e170ce0
A
2351 if (map->holelistenabled) {
2352 if (entry->vme_end >= end)
2353 break;
2354 } else {
2355 /*
2356 * If there are no more entries, we must win.
2357 *
2358 * OR
2359 *
2360 * If there is another entry, it must be
2361 * after the end of the potential new region.
2362 */
1c79356b 2363
3e170ce0
A
2364 if (next == vm_map_to_entry(map))
2365 break;
2366
2367 if (next->vme_start >= end)
2368 break;
2369 }
1c79356b
A
2370
2371 /*
2372 * Didn't fit -- move to the next entry.
2373 */
2374
2375 entry = next;
3e170ce0
A
2376
2377 if (map->holelistenabled) {
2378 if (entry == (vm_map_entry_t) map->holes_list) {
2379 /*
2380 * Wrapped around
2381 */
2382 result = KERN_NO_SPACE;
2383 goto BailOut;
2384 }
2385 start = entry->vme_start;
2386 } else {
2387 start = entry->vme_end;
2388 }
2389
39236c6e
A
2390 start = vm_map_round_page(start,
2391 VM_MAP_PAGE_MASK(map));
1c79356b 2392 }
3e170ce0
A
2393
2394 if (map->holelistenabled) {
2395 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2396 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2397 }
2398 }
2399
1c79356b 2400 *address = start;
39236c6e
A
2401 assert(VM_MAP_PAGE_ALIGNED(*address,
2402 VM_MAP_PAGE_MASK(map)));
1c79356b 2403 } else {
1c79356b
A
2404 /*
2405 * Verify that:
2406 * the address doesn't itself violate
2407 * the mask requirement.
2408 */
2409
2410 vm_map_lock(map);
91447636 2411 map_locked = TRUE;
1c79356b
A
2412 if ((start & mask) != 0)
2413 RETURN(KERN_NO_SPACE);
2414
2415 /*
2416 * ... the address is within bounds
2417 */
2418
2419 end = start + size;
2420
2d21ac55
A
2421 if ((start < effective_min_offset) ||
2422 (end > effective_max_offset) ||
1c79356b
A
2423 (start >= end)) {
2424 RETURN(KERN_INVALID_ADDRESS);
2425 }
2426
91447636 2427 if (overwrite && zap_old_map != VM_MAP_NULL) {
5ba3f43e 2428 int remove_flags;
91447636
A
2429 /*
2430 * Fixed mapping and "overwrite" flag: attempt to
2431 * remove all existing mappings in the specified
2432 * address range, saving them in our "zap_old_map".
2433 */
5ba3f43e
A
2434 remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2435 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2436 if (vmk_flags.vmkf_overwrite_immutable) {
2437 /* we can overwrite immutable mappings */
2438 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2439 }
91447636 2440 (void) vm_map_delete(map, start, end,
5ba3f43e 2441 remove_flags,
91447636
A
2442 zap_old_map);
2443 }
2444
1c79356b
A
2445 /*
2446 * ... the starting address isn't allocated
2447 */
2448
2d21ac55 2449 if (vm_map_lookup_entry(map, start, &entry)) {
5ba3f43e 2450 if (! (vmk_flags.vmkf_already)) {
2d21ac55
A
2451 RETURN(KERN_NO_SPACE);
2452 }
2453 /*
2454 * Check if what's already there is what we want.
2455 */
2456 tmp_start = start;
2457 tmp_offset = offset;
2458 if (entry->vme_start < start) {
2459 tmp_start -= start - entry->vme_start;
2460 tmp_offset -= start - entry->vme_start;
5ba3f43e 2461
2d21ac55
A
2462 }
2463 for (; entry->vme_start < end;
2464 entry = entry->vme_next) {
4a3eedf9
A
2465 /*
2466 * Check if the mapping's attributes
2467 * match the existing map entry.
2468 */
2d21ac55
A
2469 if (entry == vm_map_to_entry(map) ||
2470 entry->vme_start != tmp_start ||
2471 entry->is_sub_map != is_submap ||
3e170ce0 2472 VME_OFFSET(entry) != tmp_offset ||
2d21ac55
A
2473 entry->needs_copy != needs_copy ||
2474 entry->protection != cur_protection ||
2475 entry->max_protection != max_protection ||
2476 entry->inheritance != inheritance ||
fe8ab488 2477 entry->iokit_acct != iokit_acct ||
3e170ce0 2478 VME_ALIAS(entry) != alias) {
2d21ac55
A
2479 /* not the same mapping ! */
2480 RETURN(KERN_NO_SPACE);
2481 }
4a3eedf9
A
2482 /*
2483 * Check if the same object is being mapped.
2484 */
2485 if (is_submap) {
3e170ce0 2486 if (VME_SUBMAP(entry) !=
4a3eedf9
A
2487 (vm_map_t) object) {
2488 /* not the same submap */
2489 RETURN(KERN_NO_SPACE);
2490 }
2491 } else {
3e170ce0 2492 if (VME_OBJECT(entry) != object) {
4a3eedf9
A
2493 /* not the same VM object... */
2494 vm_object_t obj2;
2495
3e170ce0 2496 obj2 = VME_OBJECT(entry);
4a3eedf9
A
2497 if ((obj2 == VM_OBJECT_NULL ||
2498 obj2->internal) &&
2499 (object == VM_OBJECT_NULL ||
2500 object->internal)) {
2501 /*
2502 * ... but both are
2503 * anonymous memory,
2504 * so equivalent.
2505 */
2506 } else {
2507 RETURN(KERN_NO_SPACE);
2508 }
2509 }
2510 }
2511
2d21ac55
A
2512 tmp_offset += entry->vme_end - entry->vme_start;
2513 tmp_start += entry->vme_end - entry->vme_start;
2514 if (entry->vme_end >= end) {
2515 /* reached the end of our mapping */
2516 break;
2517 }
2518 }
2519 /* it all matches: let's use what's already there ! */
2520 RETURN(KERN_MEMORY_PRESENT);
2521 }
1c79356b
A
2522
2523 /*
2524 * ... the next region doesn't overlap the
2525 * end point.
2526 */
2527
2528 if ((entry->vme_next != vm_map_to_entry(map)) &&
2529 (entry->vme_next->vme_start < end))
2530 RETURN(KERN_NO_SPACE);
2531 }
2532
2533 /*
2534 * At this point,
2535 * "start" and "end" should define the endpoints of the
2536 * available new range, and
2537 * "entry" should refer to the region before the new
2538 * range, and
2539 *
2540 * the map should be locked.
2541 */
2542
2543 /*
2544 * See whether we can avoid creating a new entry (and object) by
2545 * extending one of our neighbors. [So far, we only attempt to
91447636
A
2546 * extend from below.] Note that we can never extend/join
2547 * purgable objects because they need to remain distinct
2548 * entities in order to implement their "volatile object"
2549 * semantics.
1c79356b
A
2550 */
2551
316670eb 2552 if (purgable || entry_for_jit) {
91447636 2553 if (object == VM_OBJECT_NULL) {
3e170ce0 2554
91447636
A
2555 object = vm_object_allocate(size);
2556 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
fe8ab488 2557 object->true_share = TRUE;
316670eb 2558 if (purgable) {
fe8ab488 2559 task_t owner;
316670eb 2560 object->purgable = VM_PURGABLE_NONVOLATILE;
fe8ab488
A
2561 if (map->pmap == kernel_pmap) {
2562 /*
2563 * Purgeable mappings made in a kernel
2564 * map are "owned" by the kernel itself
2565 * rather than the current user task
2566 * because they're likely to be used by
2567 * more than this user task (see
2568 * execargs_purgeable_allocate(), for
2569 * example).
2570 */
2571 owner = kernel_task;
2572 } else {
2573 owner = current_task();
2574 }
2575 assert(object->vo_purgeable_owner == NULL);
2576 assert(object->resident_page_count == 0);
2577 assert(object->wired_page_count == 0);
2578 vm_object_lock(object);
2579 vm_purgeable_nonvolatile_enqueue(object, owner);
2580 vm_object_unlock(object);
316670eb 2581 }
91447636
A
2582 offset = (vm_object_offset_t)0;
2583 }
2d21ac55
A
2584 } else if ((is_submap == FALSE) &&
2585 (object == VM_OBJECT_NULL) &&
2586 (entry != vm_map_to_entry(map)) &&
2587 (entry->vme_end == start) &&
2588 (!entry->is_shared) &&
2589 (!entry->is_sub_map) &&
fe8ab488
A
2590 (!entry->in_transition) &&
2591 (!entry->needs_wakeup) &&
2592 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2d21ac55
A
2593 (entry->protection == cur_protection) &&
2594 (entry->max_protection == max_protection) &&
fe8ab488 2595 (entry->inheritance == inheritance) &&
3e170ce0
A
2596 ((user_alias == VM_MEMORY_REALLOC) ||
2597 (VME_ALIAS(entry) == alias)) &&
2d21ac55 2598 (entry->no_cache == no_cache) &&
fe8ab488 2599 (entry->permanent == permanent) &&
5ba3f43e
A
2600 /* no coalescing for immutable executable mappings */
2601 !((entry->protection & VM_PROT_EXECUTE) &&
2602 entry->permanent) &&
fe8ab488 2603 (!entry->superpage_size && !superpage_size) &&
39236c6e
A
2604 /*
2605 * No coalescing if not map-aligned, to avoid propagating
2606 * that condition any further than needed:
2607 */
2608 (!entry->map_aligned || !clear_map_aligned) &&
fe8ab488
A
2609 (!entry->zero_wired_pages) &&
2610 (!entry->used_for_jit && !entry_for_jit) &&
2611 (entry->iokit_acct == iokit_acct) &&
3e170ce0
A
2612 (!entry->vme_resilient_codesign) &&
2613 (!entry->vme_resilient_media) &&
39037602 2614 (!entry->vme_atomic) &&
fe8ab488 2615
b0d623f7 2616 ((entry->vme_end - entry->vme_start) + size <=
3e170ce0 2617 (user_alias == VM_MEMORY_REALLOC ?
b0d623f7
A
2618 ANON_CHUNK_SIZE :
2619 NO_COALESCE_LIMIT)) &&
fe8ab488 2620
2d21ac55 2621 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
3e170ce0 2622 if (vm_object_coalesce(VME_OBJECT(entry),
2d21ac55 2623 VM_OBJECT_NULL,
3e170ce0 2624 VME_OFFSET(entry),
2d21ac55
A
2625 (vm_object_offset_t) 0,
2626 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2627 (vm_map_size_t)(end - entry->vme_end))) {
1c79356b
A
2628
2629 /*
2630 * Coalesced the two objects - can extend
2631 * the previous map entry to include the
2632 * new range.
2633 */
2634 map->size += (end - entry->vme_end);
e2d2fc5c 2635 assert(entry->vme_start < end);
39236c6e
A
2636 assert(VM_MAP_PAGE_ALIGNED(end,
2637 VM_MAP_PAGE_MASK(map)));
3e170ce0
A
2638 if (__improbable(vm_debug_events))
2639 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
1c79356b 2640 entry->vme_end = end;
3e170ce0
A
2641 if (map->holelistenabled) {
2642 vm_map_store_update_first_free(map, entry, TRUE);
2643 } else {
2644 vm_map_store_update_first_free(map, map->first_free, TRUE);
2645 }
fe8ab488 2646 new_mapping_established = TRUE;
1c79356b
A
2647 RETURN(KERN_SUCCESS);
2648 }
2649 }
2650
b0d623f7
A
2651 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2652 new_entry = NULL;
2653
2654 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2655 tmp2_end = tmp2_start + step;
2656 /*
2657 * Create a new entry
2658 * LP64todo - for now, we can only allocate 4GB internal objects
2659 * because the default pager can't page bigger ones. Remove this
2660 * when it can.
2661 *
2662 * XXX FBDP
2663 * The reserved "page zero" in each process's address space can
2664 * be arbitrarily large. Splitting it into separate 4GB objects and
2665 * therefore different VM map entries serves no purpose and just
2666 * slows down operations on the VM map, so let's not split the
2667 * allocation into 4GB chunks if the max protection is NONE. That
2668 * memory should never be accessible, so it will never get to the
2669 * default pager.
2670 */
2671 tmp_start = tmp2_start;
2672 if (object == VM_OBJECT_NULL &&
2673 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2674 max_protection != VM_PROT_NONE &&
5ba3f43e 2675 superpage_size == 0)
b0d623f7
A
2676 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2677 else
2678 tmp_end = tmp2_end;
2679 do {
5ba3f43e
A
2680 new_entry = vm_map_entry_insert(
2681 map, entry, tmp_start, tmp_end,
2682 object, offset, needs_copy,
2683 FALSE, FALSE,
2684 cur_protection, max_protection,
2685 VM_BEHAVIOR_DEFAULT,
2686 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2687 0,
2688 no_cache,
2689 permanent,
2690 superpage_size,
2691 clear_map_aligned,
2692 is_submap,
2693 entry_for_jit,
2694 alias);
3e170ce0
A
2695
2696 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
6d2010ae 2697
3e170ce0
A
2698 if (resilient_codesign &&
2699 ! ((cur_protection | max_protection) &
2700 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2701 new_entry->vme_resilient_codesign = TRUE;
2702 }
2703
2704 if (resilient_media &&
2705 ! ((cur_protection | max_protection) &
2706 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2707 new_entry->vme_resilient_media = TRUE;
2708 }
2709
fe8ab488
A
2710 assert(!new_entry->iokit_acct);
2711 if (!is_submap &&
2712 object != VM_OBJECT_NULL &&
2713 object->purgable != VM_PURGABLE_DENY) {
2714 assert(new_entry->use_pmap);
2715 assert(!new_entry->iokit_acct);
2716 /*
2717 * Turn off pmap accounting since
2718 * purgeable objects have their
2719 * own ledgers.
2720 */
2721 new_entry->use_pmap = FALSE;
2722 } else if (!is_submap &&
ecc0ceb4
A
2723 iokit_acct &&
2724 object != VM_OBJECT_NULL &&
2725 object->internal) {
fe8ab488
A
2726 /* alternate accounting */
2727 assert(!new_entry->iokit_acct);
2728 assert(new_entry->use_pmap);
2729 new_entry->iokit_acct = TRUE;
2730 new_entry->use_pmap = FALSE;
ecc0ceb4
A
2731 DTRACE_VM4(
2732 vm_map_iokit_mapped_region,
2733 vm_map_t, map,
2734 vm_map_offset_t, new_entry->vme_start,
2735 vm_map_offset_t, new_entry->vme_end,
2736 int, VME_ALIAS(new_entry));
fe8ab488
A
2737 vm_map_iokit_mapped_region(
2738 map,
2739 (new_entry->vme_end -
2740 new_entry->vme_start));
2741 } else if (!is_submap) {
2742 assert(!new_entry->iokit_acct);
2743 assert(new_entry->use_pmap);
2744 }
2745
b0d623f7
A
2746 if (is_submap) {
2747 vm_map_t submap;
2748 boolean_t submap_is_64bit;
2749 boolean_t use_pmap;
2750
fe8ab488
A
2751 assert(new_entry->is_sub_map);
2752 assert(!new_entry->use_pmap);
2753 assert(!new_entry->iokit_acct);
b0d623f7
A
2754 submap = (vm_map_t) object;
2755 submap_is_64bit = vm_map_is_64bit(submap);
3e170ce0 2756 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
fe8ab488 2757#ifndef NO_NESTED_PMAP
b0d623f7 2758 if (use_pmap && submap->pmap == NULL) {
316670eb 2759 ledger_t ledger = map->pmap->ledger;
b0d623f7 2760 /* we need a sub pmap to nest... */
316670eb
A
2761 submap->pmap = pmap_create(ledger, 0,
2762 submap_is_64bit);
b0d623f7
A
2763 if (submap->pmap == NULL) {
2764 /* let's proceed without nesting... */
2765 }
5ba3f43e
A
2766#if defined(__arm__) || defined(__arm64__)
2767 else {
2768 pmap_set_nested(submap->pmap);
2769 }
2770#endif
2d21ac55 2771 }
b0d623f7
A
2772 if (use_pmap && submap->pmap != NULL) {
2773 kr = pmap_nest(map->pmap,
2774 submap->pmap,
2775 tmp_start,
2776 tmp_start,
2777 tmp_end - tmp_start);
2778 if (kr != KERN_SUCCESS) {
2779 printf("vm_map_enter: "
2780 "pmap_nest(0x%llx,0x%llx) "
2781 "error 0x%x\n",
2782 (long long)tmp_start,
2783 (long long)tmp_end,
2784 kr);
2785 } else {
2786 /* we're now nested ! */
2787 new_entry->use_pmap = TRUE;
2788 pmap_empty = FALSE;
2789 }
2790 }
fe8ab488 2791#endif /* NO_NESTED_PMAP */
2d21ac55 2792 }
b0d623f7
A
2793 entry = new_entry;
2794
2795 if (superpage_size) {
2796 vm_page_t pages, m;
2797 vm_object_t sp_object;
5ba3f43e 2798 vm_object_offset_t sp_offset;
b0d623f7 2799
3e170ce0 2800 VME_OFFSET_SET(entry, 0);
b0d623f7
A
2801
2802 /* allocate one superpage */
2803 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2d21ac55 2804 if (kr != KERN_SUCCESS) {
3e170ce0
A
2805 /* deallocate whole range... */
2806 new_mapping_established = TRUE;
2807 /* ... but only up to "tmp_end" */
2808 size -= end - tmp_end;
b0d623f7
A
2809 RETURN(kr);
2810 }
2811
2812 /* create one vm_object per superpage */
2813 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2814 sp_object->phys_contiguous = TRUE;
39037602 2815 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages)*PAGE_SIZE;
3e170ce0 2816 VME_OBJECT_SET(entry, sp_object);
fe8ab488 2817 assert(entry->use_pmap);
b0d623f7
A
2818
2819 /* enter the base pages into the object */
2820 vm_object_lock(sp_object);
5ba3f43e
A
2821 for (sp_offset = 0;
2822 sp_offset < SUPERPAGE_SIZE;
2823 sp_offset += PAGE_SIZE) {
b0d623f7 2824 m = pages;
39037602 2825 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7
A
2826 pages = NEXT_PAGE(m);
2827 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5ba3f43e 2828 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
2d21ac55 2829 }
b0d623f7 2830 vm_object_unlock(sp_object);
2d21ac55 2831 }
5ba3f43e 2832 } while (tmp_end != tmp2_end &&
b0d623f7 2833 (tmp_start = tmp_end) &&
5ba3f43e 2834 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
b0d623f7
A
2835 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2836 }
91447636 2837
91447636 2838 new_mapping_established = TRUE;
1c79356b 2839
fe8ab488
A
2840BailOut:
2841 assert(map_locked == TRUE);
2d21ac55 2842
593a1d5f
A
2843 if (result == KERN_SUCCESS) {
2844 vm_prot_t pager_prot;
2845 memory_object_t pager;
91447636 2846
fe8ab488 2847#if DEBUG
593a1d5f 2848 if (pmap_empty &&
5ba3f43e 2849 !(vmk_flags.vmkf_no_pmap_check)) {
593a1d5f
A
2850 assert(vm_map_pmap_is_empty(map,
2851 *address,
2852 *address+size));
2853 }
fe8ab488 2854#endif /* DEBUG */
593a1d5f
A
2855
2856 /*
2857 * For "named" VM objects, let the pager know that the
2858 * memory object is being mapped. Some pagers need to keep
2859 * track of this, to know when they can reclaim the memory
2860 * object, for example.
2861 * VM calls memory_object_map() for each mapping (specifying
2862 * the protection of each mapping) and calls
2863 * memory_object_last_unmap() when all the mappings are gone.
2864 */
2865 pager_prot = max_protection;
2866 if (needs_copy) {
2867 /*
2868 * Copy-On-Write mapping: won't modify
2869 * the memory object.
2870 */
2871 pager_prot &= ~VM_PROT_WRITE;
2872 }
2873 if (!is_submap &&
2874 object != VM_OBJECT_NULL &&
2875 object->named &&
2876 object->pager != MEMORY_OBJECT_NULL) {
2877 vm_object_lock(object);
2878 pager = object->pager;
2879 if (object->named &&
2880 pager != MEMORY_OBJECT_NULL) {
2881 assert(object->pager_ready);
2882 vm_object_mapping_wait(object, THREAD_UNINT);
2883 vm_object_mapping_begin(object);
2884 vm_object_unlock(object);
2885
2886 kr = memory_object_map(pager, pager_prot);
2887 assert(kr == KERN_SUCCESS);
2888
2889 vm_object_lock(object);
2890 vm_object_mapping_end(object);
2891 }
2892 vm_object_unlock(object);
2893 }
fe8ab488
A
2894 }
2895
2896 assert(map_locked == TRUE);
2897
2898 if (!keep_map_locked) {
2899 vm_map_unlock(map);
2900 map_locked = FALSE;
2901 }
2902
2903 /*
2904 * We can't hold the map lock if we enter this block.
2905 */
2906
2907 if (result == KERN_SUCCESS) {
2908
2909 /* Wire down the new entry if the user
2910 * requested all new map entries be wired.
2911 */
2912 if ((map->wiring_required)||(superpage_size)) {
2913 assert(!keep_map_locked);
2914 pmap_empty = FALSE; /* pmap won't be empty */
5ba3f43e
A
2915 kr = vm_map_wire_kernel(map, start, end,
2916 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3e170ce0 2917 TRUE);
fe8ab488
A
2918 result = kr;
2919 }
2920
2921 }
2922
2923 if (result != KERN_SUCCESS) {
91447636
A
2924 if (new_mapping_established) {
2925 /*
2926 * We have to get rid of the new mappings since we
2927 * won't make them available to the user.
2928 * Try and do that atomically, to minimize the risk
2929 * that someone else create new mappings that range.
2930 */
2931 zap_new_map = vm_map_create(PMAP_NULL,
2932 *address,
2933 *address + size,
b0d623f7 2934 map->hdr.entries_pageable);
39236c6e
A
2935 vm_map_set_page_shift(zap_new_map,
2936 VM_MAP_PAGE_SHIFT(map));
3e170ce0
A
2937 vm_map_disable_hole_optimization(zap_new_map);
2938
91447636
A
2939 if (!map_locked) {
2940 vm_map_lock(map);
2941 map_locked = TRUE;
2942 }
2943 (void) vm_map_delete(map, *address, *address+size,
fe8ab488
A
2944 (VM_MAP_REMOVE_SAVE_ENTRIES |
2945 VM_MAP_REMOVE_NO_MAP_ALIGN),
91447636
A
2946 zap_new_map);
2947 }
2948 if (zap_old_map != VM_MAP_NULL &&
2949 zap_old_map->hdr.nentries != 0) {
2950 vm_map_entry_t entry1, entry2;
2951
2952 /*
2953 * The new mapping failed. Attempt to restore
2954 * the old mappings, saved in the "zap_old_map".
2955 */
2956 if (!map_locked) {
2957 vm_map_lock(map);
2958 map_locked = TRUE;
2959 }
2960
2961 /* first check if the coast is still clear */
2962 start = vm_map_first_entry(zap_old_map)->vme_start;
2963 end = vm_map_last_entry(zap_old_map)->vme_end;
2964 if (vm_map_lookup_entry(map, start, &entry1) ||
2965 vm_map_lookup_entry(map, end, &entry2) ||
2966 entry1 != entry2) {
2967 /*
2968 * Part of that range has already been
2969 * re-mapped: we can't restore the old
2970 * mappings...
2971 */
2972 vm_map_enter_restore_failures++;
2973 } else {
2974 /*
2975 * Transfer the saved map entries from
2976 * "zap_old_map" to the original "map",
2977 * inserting them all after "entry1".
2978 */
2979 for (entry2 = vm_map_first_entry(zap_old_map);
2980 entry2 != vm_map_to_entry(zap_old_map);
2981 entry2 = vm_map_first_entry(zap_old_map)) {
2d21ac55
A
2982 vm_map_size_t entry_size;
2983
2984 entry_size = (entry2->vme_end -
2985 entry2->vme_start);
6d2010ae 2986 vm_map_store_entry_unlink(zap_old_map,
91447636 2987 entry2);
2d21ac55 2988 zap_old_map->size -= entry_size;
6d2010ae 2989 vm_map_store_entry_link(map, entry1, entry2);
2d21ac55 2990 map->size += entry_size;
91447636
A
2991 entry1 = entry2;
2992 }
2993 if (map->wiring_required) {
2994 /*
2995 * XXX TODO: we should rewire the
2996 * old pages here...
2997 */
2998 }
2999 vm_map_enter_restore_successes++;
3000 }
3001 }
3002 }
3003
fe8ab488
A
3004 /*
3005 * The caller is responsible for releasing the lock if it requested to
3006 * keep the map locked.
3007 */
3008 if (map_locked && !keep_map_locked) {
91447636
A
3009 vm_map_unlock(map);
3010 }
3011
3012 /*
3013 * Get rid of the "zap_maps" and all the map entries that
3014 * they may still contain.
3015 */
3016 if (zap_old_map != VM_MAP_NULL) {
2d21ac55 3017 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
3018 zap_old_map = VM_MAP_NULL;
3019 }
3020 if (zap_new_map != VM_MAP_NULL) {
2d21ac55 3021 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
3022 zap_new_map = VM_MAP_NULL;
3023 }
3024
3025 return result;
1c79356b
A
3026
3027#undef RETURN
3028}
3029
5ba3f43e
A
3030#if __arm64__
3031extern const struct memory_object_pager_ops fourk_pager_ops;
3032kern_return_t
3033vm_map_enter_fourk(
3034 vm_map_t map,
3035 vm_map_offset_t *address, /* IN/OUT */
3036 vm_map_size_t size,
2d21ac55
A
3037 vm_map_offset_t mask,
3038 int flags,
5ba3f43e
A
3039 vm_map_kernel_flags_t vmk_flags,
3040 vm_tag_t alias,
3041 vm_object_t object,
2d21ac55 3042 vm_object_offset_t offset,
5ba3f43e 3043 boolean_t needs_copy,
2d21ac55
A
3044 vm_prot_t cur_protection,
3045 vm_prot_t max_protection,
5ba3f43e 3046 vm_inherit_t inheritance)
91447636 3047{
5ba3f43e
A
3048 vm_map_entry_t entry, new_entry;
3049 vm_map_offset_t start, fourk_start;
3050 vm_map_offset_t end, fourk_end;
3051 vm_map_size_t fourk_size;
3052 kern_return_t result = KERN_SUCCESS;
3053 vm_map_t zap_old_map = VM_MAP_NULL;
3054 vm_map_t zap_new_map = VM_MAP_NULL;
3055 boolean_t map_locked = FALSE;
3056 boolean_t pmap_empty = TRUE;
3057 boolean_t new_mapping_established = FALSE;
3058 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3059 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3060 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3061 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3062 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3063 boolean_t is_submap = vmk_flags.vmkf_submap;
3064 boolean_t permanent = vmk_flags.vmkf_permanent;
3065 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
3066// boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3067 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3068 vm_map_offset_t effective_min_offset, effective_max_offset;
3069 kern_return_t kr;
3070 boolean_t clear_map_aligned = FALSE;
3071 memory_object_t fourk_mem_obj;
3072 vm_object_t fourk_object;
3073 vm_map_offset_t fourk_pager_offset;
3074 int fourk_pager_index_start, fourk_pager_index_num;
3075 int cur_idx;
3076 boolean_t fourk_copy;
3077 vm_object_t copy_object;
3078 vm_object_offset_t copy_offset;
3079
3080 fourk_mem_obj = MEMORY_OBJECT_NULL;
3081 fourk_object = VM_OBJECT_NULL;
6d2010ae 3082
5ba3f43e
A
3083 if (superpage_size) {
3084 return KERN_NOT_SUPPORTED;
3085 }
91447636 3086
5ba3f43e
A
3087#if CONFIG_EMBEDDED
3088 if (cur_protection & VM_PROT_WRITE) {
3089 if ((cur_protection & VM_PROT_EXECUTE) &&
3090 !entry_for_jit) {
3091 printf("EMBEDDED: %s: curprot cannot be write+execute. "
3092 "turning off execute\n",
3093 __FUNCTION__);
3094 cur_protection &= ~VM_PROT_EXECUTE;
3095 }
3096 }
3097#endif /* CONFIG_EMBEDDED */
3098
3099 if (is_submap) {
3100 return KERN_NOT_SUPPORTED;
3101 }
3102 if (vmk_flags.vmkf_already) {
3103 return KERN_NOT_SUPPORTED;
3104 }
3105 if (purgable || entry_for_jit) {
3106 return KERN_NOT_SUPPORTED;
3107 }
3108
3109 effective_min_offset = map->min_offset;
3110
3111 if (vmk_flags.vmkf_beyond_max) {
3112 return KERN_NOT_SUPPORTED;
3113 } else {
3114 effective_max_offset = map->max_offset;
3115 }
3116
3117 if (size == 0 ||
3118 (offset & FOURK_PAGE_MASK) != 0) {
3119 *address = 0;
2d21ac55 3120 return KERN_INVALID_ARGUMENT;
3e170ce0 3121 }
5ba3f43e
A
3122
3123#define RETURN(value) { result = value; goto BailOut; }
3124
3125 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3126 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3127
3128 if (!anywhere && overwrite) {
3129 return KERN_NOT_SUPPORTED;
3130 }
3131 if (!anywhere && overwrite) {
3132 /*
3133 * Create a temporary VM map to hold the old mappings in the
3134 * affected area while we create the new one.
3135 * This avoids releasing the VM map lock in
3136 * vm_map_entry_delete() and allows atomicity
3137 * when we want to replace some mappings with a new one.
3138 * It also allows us to restore the old VM mappings if the
3139 * new mapping fails.
3140 */
3141 zap_old_map = vm_map_create(PMAP_NULL,
3142 *address,
3143 *address + size,
3144 map->hdr.entries_pageable);
3145 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3146 vm_map_disable_hole_optimization(zap_old_map);
3e170ce0 3147 }
593a1d5f 3148
5ba3f43e
A
3149 fourk_start = *address;
3150 fourk_size = size;
3151 fourk_end = fourk_start + fourk_size;
2d21ac55 3152
5ba3f43e
A
3153 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3154 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3155 size = end - start;
39236c6e 3156
5ba3f43e
A
3157 if (anywhere) {
3158 return KERN_NOT_SUPPORTED;
3159 } else {
3160 /*
3161 * Verify that:
3162 * the address doesn't itself violate
3163 * the mask requirement.
3164 */
3165
3166 vm_map_lock(map);
3167 map_locked = TRUE;
3168 if ((start & mask) != 0) {
3169 RETURN(KERN_NO_SPACE);
6d2010ae 3170 }
5ba3f43e
A
3171
3172 /*
3173 * ... the address is within bounds
3174 */
3175
3176 end = start + size;
3177
3178 if ((start < effective_min_offset) ||
3179 (end > effective_max_offset) ||
3180 (start >= end)) {
3181 RETURN(KERN_INVALID_ADDRESS);
22ba694c 3182 }
5ba3f43e
A
3183
3184 if (overwrite && zap_old_map != VM_MAP_NULL) {
3185 /*
3186 * Fixed mapping and "overwrite" flag: attempt to
3187 * remove all existing mappings in the specified
3188 * address range, saving them in our "zap_old_map".
3189 */
3190 (void) vm_map_delete(map, start, end,
3191 (VM_MAP_REMOVE_SAVE_ENTRIES |
3192 VM_MAP_REMOVE_NO_MAP_ALIGN),
3193 zap_old_map);
3e170ce0 3194 }
2d21ac55 3195
5ba3f43e
A
3196 /*
3197 * ... the starting address isn't allocated
3198 */
3199 if (vm_map_lookup_entry(map, start, &entry)) {
3200 vm_object_t cur_object, shadow_object;
3201
3202 /*
3203 * We might already some 4K mappings
3204 * in a 16K page here.
3205 */
3206
3207 if (entry->vme_end - entry->vme_start
3208 != SIXTEENK_PAGE_SIZE) {
3209 RETURN(KERN_NO_SPACE);
3210 }
3211 if (entry->is_sub_map) {
3212 RETURN(KERN_NO_SPACE);
3213 }
3214 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3215 RETURN(KERN_NO_SPACE);
3216 }
3217
3218 /* go all the way down the shadow chain */
3219 cur_object = VME_OBJECT(entry);
3220 vm_object_lock(cur_object);
3221 while (cur_object->shadow != VM_OBJECT_NULL) {
3222 shadow_object = cur_object->shadow;
3223 vm_object_lock(shadow_object);
3224 vm_object_unlock(cur_object);
3225 cur_object = shadow_object;
3226 shadow_object = VM_OBJECT_NULL;
3227 }
3228 if (cur_object->internal ||
3229 cur_object->pager == NULL) {
3230 vm_object_unlock(cur_object);
3231 RETURN(KERN_NO_SPACE);
3232 }
3233 if (cur_object->pager->mo_pager_ops
3234 != &fourk_pager_ops) {
3235 vm_object_unlock(cur_object);
3236 RETURN(KERN_NO_SPACE);
3237 }
3238 fourk_object = cur_object;
3239 fourk_mem_obj = fourk_object->pager;
3240
3241 /* keep the "4K" object alive */
3242 vm_object_reference_locked(fourk_object);
3243 vm_object_unlock(fourk_object);
3244
3245 /* merge permissions */
3246 entry->protection |= cur_protection;
3247 entry->max_protection |= max_protection;
3248 if ((entry->protection & (VM_PROT_WRITE |
3249 VM_PROT_EXECUTE)) ==
3250 (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3251 fourk_binary_compatibility_unsafe &&
3252 fourk_binary_compatibility_allow_wx) {
3253 /* write+execute: need to be "jit" */
3254 entry->used_for_jit = TRUE;
3255 }
3256
3257 goto map_in_fourk_pager;
3258 }
3259
3260 /*
3261 * ... the next region doesn't overlap the
3262 * end point.
3263 */
3264
3265 if ((entry->vme_next != vm_map_to_entry(map)) &&
3266 (entry->vme_next->vme_start < end)) {
3267 RETURN(KERN_NO_SPACE);
3268 }
3269 }
3270
3271 /*
3272 * At this point,
3273 * "start" and "end" should define the endpoints of the
3274 * available new range, and
3275 * "entry" should refer to the region before the new
3276 * range, and
3277 *
3278 * the map should be locked.
3279 */
3280
3281 /* create a new "4K" pager */
3282 fourk_mem_obj = fourk_pager_create();
3283 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3284 assert(fourk_object);
3285
3286 /* keep the "4" object alive */
3287 vm_object_reference(fourk_object);
3288
3289 /* create a "copy" object, to map the "4K" object copy-on-write */
3290 fourk_copy = TRUE;
3291 result = vm_object_copy_strategically(fourk_object,
3292 0,
3293 end - start,
3294 &copy_object,
3295 &copy_offset,
3296 &fourk_copy);
3297 assert(result == KERN_SUCCESS);
3298 assert(copy_object != VM_OBJECT_NULL);
3299 assert(copy_offset == 0);
3300
3301 /* take a reference on the copy object, for this mapping */
3302 vm_object_reference(copy_object);
3303
3304 /* map the "4K" pager's copy object */
3305 new_entry =
3306 vm_map_entry_insert(map, entry,
3307 vm_map_trunc_page(start,
3308 VM_MAP_PAGE_MASK(map)),
3309 vm_map_round_page(end,
3310 VM_MAP_PAGE_MASK(map)),
3311 copy_object,
3312 0, /* offset */
3313 FALSE, /* needs_copy */
3314 FALSE, FALSE,
3315 cur_protection, max_protection,
3316 VM_BEHAVIOR_DEFAULT,
3317 ((entry_for_jit)
3318 ? VM_INHERIT_NONE
3319 : inheritance),
3320 0,
3321 no_cache,
3322 permanent,
3323 superpage_size,
3324 clear_map_aligned,
3325 is_submap,
3326 FALSE, /* jit */
3327 alias);
3328 entry = new_entry;
3329
3330#if VM_MAP_DEBUG_FOURK
3331 if (vm_map_debug_fourk) {
3332 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3333 map,
3334 (uint64_t) entry->vme_start,
3335 (uint64_t) entry->vme_end,
3336 fourk_mem_obj);
3337 }
3338#endif /* VM_MAP_DEBUG_FOURK */
3339
3340 new_mapping_established = TRUE;
3341
3342map_in_fourk_pager:
3343 /* "map" the original "object" where it belongs in the "4K" pager */
3344 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3345 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3346 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3347 fourk_pager_index_num = 4;
3348 } else {
3349 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3350 }
3351 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3352 fourk_pager_index_num = 4 - fourk_pager_index_start;
3353 }
3354 for (cur_idx = 0;
3355 cur_idx < fourk_pager_index_num;
3356 cur_idx++) {
3357 vm_object_t old_object;
3358 vm_object_offset_t old_offset;
3359
3360 kr = fourk_pager_populate(fourk_mem_obj,
3361 TRUE, /* overwrite */
3362 fourk_pager_index_start + cur_idx,
3363 object,
3364 (object
3365 ? (offset +
3366 (cur_idx * FOURK_PAGE_SIZE))
3367 : 0),
3368 &old_object,
3369 &old_offset);
3370#if VM_MAP_DEBUG_FOURK
3371 if (vm_map_debug_fourk) {
3372 if (old_object == (vm_object_t) -1 &&
3373 old_offset == (vm_object_offset_t) -1) {
3374 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3375 "pager [%p:0x%llx] "
3376 "populate[%d] "
3377 "[object:%p,offset:0x%llx]\n",
3378 map,
3379 (uint64_t) entry->vme_start,
3380 (uint64_t) entry->vme_end,
3381 fourk_mem_obj,
3382 VME_OFFSET(entry),
3383 fourk_pager_index_start + cur_idx,
3384 object,
3385 (object
3386 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3387 : 0));
3388 } else {
3389 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3390 "pager [%p:0x%llx] "
3391 "populate[%d] [object:%p,offset:0x%llx] "
3392 "old [%p:0x%llx]\n",
3393 map,
3394 (uint64_t) entry->vme_start,
3395 (uint64_t) entry->vme_end,
3396 fourk_mem_obj,
3397 VME_OFFSET(entry),
3398 fourk_pager_index_start + cur_idx,
3399 object,
3400 (object
3401 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3402 : 0),
3403 old_object,
3404 old_offset);
3405 }
3406 }
3407#endif /* VM_MAP_DEBUG_FOURK */
3408
3409 assert(kr == KERN_SUCCESS);
3410 if (object != old_object &&
3411 object != VM_OBJECT_NULL &&
3412 object != (vm_object_t) -1) {
3413 vm_object_reference(object);
3414 }
3415 if (object != old_object &&
3416 old_object != VM_OBJECT_NULL &&
3417 old_object != (vm_object_t) -1) {
3418 vm_object_deallocate(old_object);
3419 }
3420 }
3421
3422BailOut:
3423 assert(map_locked == TRUE);
3424
3425 if (fourk_object != VM_OBJECT_NULL) {
3426 vm_object_deallocate(fourk_object);
3427 fourk_object = VM_OBJECT_NULL;
3428 fourk_mem_obj = MEMORY_OBJECT_NULL;
3429 }
3430
3431 if (result == KERN_SUCCESS) {
3432 vm_prot_t pager_prot;
3433 memory_object_t pager;
3434
3435#if DEBUG
3436 if (pmap_empty &&
3437 !(vmk_flags.vmkf_no_pmap_check)) {
3438 assert(vm_map_pmap_is_empty(map,
3439 *address,
3440 *address+size));
3441 }
3442#endif /* DEBUG */
3443
3444 /*
3445 * For "named" VM objects, let the pager know that the
3446 * memory object is being mapped. Some pagers need to keep
3447 * track of this, to know when they can reclaim the memory
3448 * object, for example.
3449 * VM calls memory_object_map() for each mapping (specifying
3450 * the protection of each mapping) and calls
3451 * memory_object_last_unmap() when all the mappings are gone.
3452 */
3453 pager_prot = max_protection;
3454 if (needs_copy) {
3455 /*
3456 * Copy-On-Write mapping: won't modify
3457 * the memory object.
3458 */
3459 pager_prot &= ~VM_PROT_WRITE;
3460 }
3461 if (!is_submap &&
3462 object != VM_OBJECT_NULL &&
3463 object->named &&
3464 object->pager != MEMORY_OBJECT_NULL) {
3465 vm_object_lock(object);
3466 pager = object->pager;
3467 if (object->named &&
3468 pager != MEMORY_OBJECT_NULL) {
3469 assert(object->pager_ready);
3470 vm_object_mapping_wait(object, THREAD_UNINT);
3471 vm_object_mapping_begin(object);
3472 vm_object_unlock(object);
3473
3474 kr = memory_object_map(pager, pager_prot);
3475 assert(kr == KERN_SUCCESS);
3476
3477 vm_object_lock(object);
3478 vm_object_mapping_end(object);
3479 }
3480 vm_object_unlock(object);
3481 }
3482 if (!is_submap &&
3483 fourk_object != VM_OBJECT_NULL &&
3484 fourk_object->named &&
3485 fourk_object->pager != MEMORY_OBJECT_NULL) {
3486 vm_object_lock(fourk_object);
3487 pager = fourk_object->pager;
3488 if (fourk_object->named &&
3489 pager != MEMORY_OBJECT_NULL) {
3490 assert(fourk_object->pager_ready);
3491 vm_object_mapping_wait(fourk_object,
3492 THREAD_UNINT);
3493 vm_object_mapping_begin(fourk_object);
3494 vm_object_unlock(fourk_object);
3495
3496 kr = memory_object_map(pager, VM_PROT_READ);
3497 assert(kr == KERN_SUCCESS);
3498
3499 vm_object_lock(fourk_object);
3500 vm_object_mapping_end(fourk_object);
3501 }
3502 vm_object_unlock(fourk_object);
3503 }
3504 }
3505
3506 assert(map_locked == TRUE);
3507
3508 if (!keep_map_locked) {
3509 vm_map_unlock(map);
3510 map_locked = FALSE;
3511 }
3512
3513 /*
3514 * We can't hold the map lock if we enter this block.
3515 */
3516
3517 if (result == KERN_SUCCESS) {
3518
3519 /* Wire down the new entry if the user
3520 * requested all new map entries be wired.
3521 */
3522 if ((map->wiring_required)||(superpage_size)) {
3523 assert(!keep_map_locked);
3524 pmap_empty = FALSE; /* pmap won't be empty */
3525 kr = vm_map_wire_kernel(map, start, end,
3526 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3527 TRUE);
3528 result = kr;
3529 }
3530
3531 }
3532
3533 if (result != KERN_SUCCESS) {
3534 if (new_mapping_established) {
3535 /*
3536 * We have to get rid of the new mappings since we
3537 * won't make them available to the user.
3538 * Try and do that atomically, to minimize the risk
3539 * that someone else create new mappings that range.
3540 */
3541 zap_new_map = vm_map_create(PMAP_NULL,
3542 *address,
3543 *address + size,
3544 map->hdr.entries_pageable);
3545 vm_map_set_page_shift(zap_new_map,
3546 VM_MAP_PAGE_SHIFT(map));
3547 vm_map_disable_hole_optimization(zap_new_map);
3548
3549 if (!map_locked) {
3550 vm_map_lock(map);
3551 map_locked = TRUE;
3552 }
3553 (void) vm_map_delete(map, *address, *address+size,
3554 (VM_MAP_REMOVE_SAVE_ENTRIES |
3555 VM_MAP_REMOVE_NO_MAP_ALIGN),
3556 zap_new_map);
3557 }
3558 if (zap_old_map != VM_MAP_NULL &&
3559 zap_old_map->hdr.nentries != 0) {
3560 vm_map_entry_t entry1, entry2;
3561
3562 /*
3563 * The new mapping failed. Attempt to restore
3564 * the old mappings, saved in the "zap_old_map".
3565 */
3566 if (!map_locked) {
3567 vm_map_lock(map);
3568 map_locked = TRUE;
3569 }
3570
3571 /* first check if the coast is still clear */
3572 start = vm_map_first_entry(zap_old_map)->vme_start;
3573 end = vm_map_last_entry(zap_old_map)->vme_end;
3574 if (vm_map_lookup_entry(map, start, &entry1) ||
3575 vm_map_lookup_entry(map, end, &entry2) ||
3576 entry1 != entry2) {
3577 /*
3578 * Part of that range has already been
3579 * re-mapped: we can't restore the old
3580 * mappings...
3581 */
3582 vm_map_enter_restore_failures++;
3583 } else {
3584 /*
3585 * Transfer the saved map entries from
3586 * "zap_old_map" to the original "map",
3587 * inserting them all after "entry1".
3588 */
3589 for (entry2 = vm_map_first_entry(zap_old_map);
3590 entry2 != vm_map_to_entry(zap_old_map);
3591 entry2 = vm_map_first_entry(zap_old_map)) {
3592 vm_map_size_t entry_size;
3593
3594 entry_size = (entry2->vme_end -
3595 entry2->vme_start);
3596 vm_map_store_entry_unlink(zap_old_map,
3597 entry2);
3598 zap_old_map->size -= entry_size;
3599 vm_map_store_entry_link(map, entry1, entry2);
3600 map->size += entry_size;
3601 entry1 = entry2;
3602 }
3603 if (map->wiring_required) {
3604 /*
3605 * XXX TODO: we should rewire the
3606 * old pages here...
3607 */
3608 }
3609 vm_map_enter_restore_successes++;
3610 }
3611 }
3612 }
3613
3614 /*
3615 * The caller is responsible for releasing the lock if it requested to
3616 * keep the map locked.
3617 */
3618 if (map_locked && !keep_map_locked) {
3619 vm_map_unlock(map);
3620 }
3621
3622 /*
3623 * Get rid of the "zap_maps" and all the map entries that
3624 * they may still contain.
3625 */
3626 if (zap_old_map != VM_MAP_NULL) {
3627 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3628 zap_old_map = VM_MAP_NULL;
3629 }
3630 if (zap_new_map != VM_MAP_NULL) {
3631 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3632 zap_new_map = VM_MAP_NULL;
3633 }
3634
3635 return result;
3636
3637#undef RETURN
3638}
3639#endif /* __arm64__ */
3640
3641/*
3642 * Counters for the prefault optimization.
3643 */
3644int64_t vm_prefault_nb_pages = 0;
3645int64_t vm_prefault_nb_bailout = 0;
3646
3647static kern_return_t
3648vm_map_enter_mem_object_helper(
3649 vm_map_t target_map,
3650 vm_map_offset_t *address,
3651 vm_map_size_t initial_size,
3652 vm_map_offset_t mask,
3653 int flags,
3654 vm_map_kernel_flags_t vmk_flags,
3655 vm_tag_t tag,
3656 ipc_port_t port,
3657 vm_object_offset_t offset,
3658 boolean_t copy,
3659 vm_prot_t cur_protection,
3660 vm_prot_t max_protection,
3661 vm_inherit_t inheritance,
3662 upl_page_list_ptr_t page_list,
3663 unsigned int page_list_count)
3664{
3665 vm_map_address_t map_addr;
3666 vm_map_size_t map_size;
3667 vm_object_t object;
3668 vm_object_size_t size;
3669 kern_return_t result;
3670 boolean_t mask_cur_protection, mask_max_protection;
3671 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
3672 vm_map_offset_t offset_in_mapping = 0;
3673#if __arm64__
3674 boolean_t fourk = vmk_flags.vmkf_fourk;
3675#endif /* __arm64__ */
3676
3677 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
3678
3679 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
3680 mask_max_protection = max_protection & VM_PROT_IS_MASK;
3681 cur_protection &= ~VM_PROT_IS_MASK;
3682 max_protection &= ~VM_PROT_IS_MASK;
3683
3684 /*
3685 * Check arguments for validity
3686 */
3687 if ((target_map == VM_MAP_NULL) ||
3688 (cur_protection & ~VM_PROT_ALL) ||
3689 (max_protection & ~VM_PROT_ALL) ||
3690 (inheritance > VM_INHERIT_LAST_VALID) ||
3691 (try_prefault && (copy || !page_list)) ||
3692 initial_size == 0) {
3693 return KERN_INVALID_ARGUMENT;
3694 }
3695
3696#if __arm64__
3697 if (fourk) {
3698 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
3699 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
3700 } else
3701#endif /* __arm64__ */
3702 {
3703 map_addr = vm_map_trunc_page(*address,
3704 VM_MAP_PAGE_MASK(target_map));
3705 map_size = vm_map_round_page(initial_size,
3706 VM_MAP_PAGE_MASK(target_map));
3707 }
3708 size = vm_object_round_page(initial_size);
3709
3710 /*
3711 * Find the vm object (if any) corresponding to this port.
3712 */
3713 if (!IP_VALID(port)) {
3714 object = VM_OBJECT_NULL;
3715 offset = 0;
3716 copy = FALSE;
3717 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
3718 vm_named_entry_t named_entry;
3719
3720 named_entry = (vm_named_entry_t) port->ip_kobject;
3721
3722 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3723 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3724 offset += named_entry->data_offset;
3725 }
3726
3727 /* a few checks to make sure user is obeying rules */
3728 if (size == 0) {
3729 if (offset >= named_entry->size)
3730 return KERN_INVALID_RIGHT;
3731 size = named_entry->size - offset;
3732 }
3733 if (mask_max_protection) {
3734 max_protection &= named_entry->protection;
3735 }
3736 if (mask_cur_protection) {
3737 cur_protection &= named_entry->protection;
3738 }
3739 if ((named_entry->protection & max_protection) !=
3740 max_protection)
3741 return KERN_INVALID_RIGHT;
3742 if ((named_entry->protection & cur_protection) !=
3743 cur_protection)
3744 return KERN_INVALID_RIGHT;
3745 if (offset + size < offset) {
3746 /* overflow */
3747 return KERN_INVALID_ARGUMENT;
3748 }
3749 if (named_entry->size < (offset + initial_size)) {
3750 return KERN_INVALID_ARGUMENT;
3751 }
3752
3753 if (named_entry->is_copy) {
3754 /* for a vm_map_copy, we can only map it whole */
3755 if ((size != named_entry->size) &&
3756 (vm_map_round_page(size,
3757 VM_MAP_PAGE_MASK(target_map)) ==
3758 named_entry->size)) {
3759 /* XXX FBDP use the rounded size... */
39236c6e
A
3760 size = vm_map_round_page(
3761 size,
3762 VM_MAP_PAGE_MASK(target_map));
3763 }
5ba3f43e 3764
fe8ab488
A
3765 if (!(flags & VM_FLAGS_ANYWHERE) &&
3766 (offset != 0 ||
3767 size != named_entry->size)) {
3768 /*
3769 * XXX for a mapping at a "fixed" address,
3770 * we can't trim after mapping the whole
3771 * memory entry, so reject a request for a
3772 * partial mapping.
3773 */
39236c6e
A
3774 return KERN_INVALID_ARGUMENT;
3775 }
3776 }
3777
2d21ac55
A
3778 /* the callers parameter offset is defined to be the */
3779 /* offset from beginning of named entry offset in object */
3780 offset = offset + named_entry->offset;
5ba3f43e 3781
39236c6e
A
3782 if (! VM_MAP_PAGE_ALIGNED(size,
3783 VM_MAP_PAGE_MASK(target_map))) {
3784 /*
3785 * Let's not map more than requested;
3786 * vm_map_enter() will handle this "not map-aligned"
3787 * case.
3788 */
3789 map_size = size;
3790 }
3791
2d21ac55
A
3792 named_entry_lock(named_entry);
3793 if (named_entry->is_sub_map) {
3794 vm_map_t submap;
3795
3e170ce0
A
3796 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3797 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3798 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3799 }
3800
2d21ac55
A
3801 submap = named_entry->backing.map;
3802 vm_map_lock(submap);
3803 vm_map_reference(submap);
3804 vm_map_unlock(submap);
3805 named_entry_unlock(named_entry);
3806
5ba3f43e
A
3807 vmk_flags.vmkf_submap = TRUE;
3808
2d21ac55
A
3809 result = vm_map_enter(target_map,
3810 &map_addr,
3811 map_size,
3812 mask,
5ba3f43e
A
3813 flags,
3814 vmk_flags,
3815 tag,
2d21ac55
A
3816 (vm_object_t) submap,
3817 offset,
3818 copy,
3819 cur_protection,
3820 max_protection,
3821 inheritance);
3822 if (result != KERN_SUCCESS) {
3823 vm_map_deallocate(submap);
3824 } else {
3825 /*
3826 * No need to lock "submap" just to check its
3827 * "mapped" flag: that flag is never reset
3828 * once it's been set and if we race, we'll
3829 * just end up setting it twice, which is OK.
3830 */
316670eb
A
3831 if (submap->mapped_in_other_pmaps == FALSE &&
3832 vm_map_pmap(submap) != PMAP_NULL &&
3833 vm_map_pmap(submap) !=
3834 vm_map_pmap(target_map)) {
2d21ac55 3835 /*
316670eb
A
3836 * This submap is being mapped in a map
3837 * that uses a different pmap.
3838 * Set its "mapped_in_other_pmaps" flag
5ba3f43e 3839 * to indicate that we now need to
316670eb
A
3840 * remove mappings from all pmaps rather
3841 * than just the submap's pmap.
2d21ac55
A
3842 */
3843 vm_map_lock(submap);
316670eb 3844 submap->mapped_in_other_pmaps = TRUE;
2d21ac55
A
3845 vm_map_unlock(submap);
3846 }
3847 *address = map_addr;
3848 }
3849 return result;
3850
39236c6e
A
3851 } else if (named_entry->is_copy) {
3852 kern_return_t kr;
3853 vm_map_copy_t copy_map;
3854 vm_map_entry_t copy_entry;
3855 vm_map_offset_t copy_addr;
3856
3857 if (flags & ~(VM_FLAGS_FIXED |
3858 VM_FLAGS_ANYWHERE |
3859 VM_FLAGS_OVERWRITE |
3e170ce0 3860 VM_FLAGS_RETURN_4K_DATA_ADDR |
39037602
A
3861 VM_FLAGS_RETURN_DATA_ADDR |
3862 VM_FLAGS_ALIAS_MASK)) {
39236c6e
A
3863 named_entry_unlock(named_entry);
3864 return KERN_INVALID_ARGUMENT;
3865 }
3866
3e170ce0
A
3867 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3868 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e 3869 offset_in_mapping = offset - vm_object_trunc_page(offset);
3e170ce0
A
3870 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3871 offset_in_mapping &= ~((signed)(0xFFF));
39236c6e
A
3872 offset = vm_object_trunc_page(offset);
3873 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3874 }
3875
3876 copy_map = named_entry->backing.copy;
3877 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
3878 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
3879 /* unsupported type; should not happen */
3880 printf("vm_map_enter_mem_object: "
3881 "memory_entry->backing.copy "
3882 "unsupported type 0x%x\n",
3883 copy_map->type);
3884 named_entry_unlock(named_entry);
3885 return KERN_INVALID_ARGUMENT;
3886 }
3887
3888 /* reserve a contiguous range */
3889 kr = vm_map_enter(target_map,
3890 &map_addr,
fe8ab488
A
3891 /* map whole mem entry, trim later: */
3892 named_entry->size,
39236c6e
A
3893 mask,
3894 flags & (VM_FLAGS_ANYWHERE |
3895 VM_FLAGS_OVERWRITE |
3e170ce0 3896 VM_FLAGS_RETURN_4K_DATA_ADDR |
5ba3f43e
A
3897 VM_FLAGS_RETURN_DATA_ADDR),
3898 vmk_flags,
3899 tag,
39236c6e
A
3900 VM_OBJECT_NULL,
3901 0,
3902 FALSE, /* copy */
3903 cur_protection,
3904 max_protection,
3905 inheritance);
3906 if (kr != KERN_SUCCESS) {
3907 named_entry_unlock(named_entry);
3908 return kr;
3909 }
3910
3911 copy_addr = map_addr;
3912
3913 for (copy_entry = vm_map_copy_first_entry(copy_map);
3914 copy_entry != vm_map_copy_to_entry(copy_map);
3915 copy_entry = copy_entry->vme_next) {
5ba3f43e
A
3916 int remap_flags;
3917 vm_map_kernel_flags_t vmk_remap_flags;
39236c6e
A
3918 vm_map_t copy_submap;
3919 vm_object_t copy_object;
3920 vm_map_size_t copy_size;
3921 vm_object_offset_t copy_offset;
39037602 3922 int copy_vm_alias;
39236c6e 3923
5ba3f43e
A
3924 remap_flags = 0;
3925 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
3926
813fb2f6 3927 copy_object = VME_OBJECT(copy_entry);
3e170ce0 3928 copy_offset = VME_OFFSET(copy_entry);
39236c6e
A
3929 copy_size = (copy_entry->vme_end -
3930 copy_entry->vme_start);
39037602
A
3931 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
3932 if (copy_vm_alias == 0) {
3933 /*
3934 * Caller does not want a specific
3935 * alias for this new mapping: use
3936 * the alias of the original mapping.
3937 */
3938 copy_vm_alias = VME_ALIAS(copy_entry);
3939 }
39236c6e
A
3940
3941 /* sanity check */
fe8ab488
A
3942 if ((copy_addr + copy_size) >
3943 (map_addr +
3944 named_entry->size /* XXX full size */ )) {
39236c6e
A
3945 /* over-mapping too much !? */
3946 kr = KERN_INVALID_ARGUMENT;
3947 /* abort */
3948 break;
3949 }
3950
3951 /* take a reference on the object */
3952 if (copy_entry->is_sub_map) {
5ba3f43e 3953 vmk_remap_flags.vmkf_submap = TRUE;
3e170ce0 3954 copy_submap = VME_SUBMAP(copy_entry);
39236c6e
A
3955 vm_map_lock(copy_submap);
3956 vm_map_reference(copy_submap);
3957 vm_map_unlock(copy_submap);
3958 copy_object = (vm_object_t) copy_submap;
813fb2f6
A
3959 } else if (!copy &&
3960 copy_object != VM_OBJECT_NULL &&
3961 (copy_entry->needs_copy ||
3962 copy_object->shadowed ||
3963 (!copy_object->true_share &&
3964 !copy_entry->is_shared &&
3965 copy_object->vo_size > copy_size))) {
3966 /*
3967 * We need to resolve our side of this
3968 * "symmetric" copy-on-write now; we
3969 * need a new object to map and share,
3970 * instead of the current one which
3971 * might still be shared with the
3972 * original mapping.
3973 *
3974 * Note: A "vm_map_copy_t" does not
3975 * have a lock but we're protected by
3976 * the named entry's lock here.
3977 */
3978 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
3979 VME_OBJECT_SHADOW(copy_entry, copy_size);
3980 if (!copy_entry->needs_copy &&
3981 copy_entry->protection & VM_PROT_WRITE) {
3982 vm_prot_t prot;
3983
3984 prot = copy_entry->protection & ~VM_PROT_WRITE;
3985 vm_object_pmap_protect(copy_object,
3986 copy_offset,
3987 copy_size,
3988 PMAP_NULL,
3989 0,
3990 prot);
3991 }
3992
3993 copy_entry->needs_copy = FALSE;
3994 copy_entry->is_shared = TRUE;
3995 copy_object = VME_OBJECT(copy_entry);
3996 copy_offset = VME_OFFSET(copy_entry);
3997 vm_object_lock(copy_object);
3998 vm_object_reference_locked(copy_object);
3999 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4000 /* we're about to make a shared mapping of this object */
4001 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4002 copy_object->true_share = TRUE;
4003 }
4004 vm_object_unlock(copy_object);
39236c6e 4005 } else {
813fb2f6
A
4006 /*
4007 * We already have the right object
4008 * to map.
4009 */
3e170ce0 4010 copy_object = VME_OBJECT(copy_entry);
39236c6e
A
4011 vm_object_reference(copy_object);
4012 }
4013
4014 /* over-map the object into destination */
4015 remap_flags |= flags;
4016 remap_flags |= VM_FLAGS_FIXED;
4017 remap_flags |= VM_FLAGS_OVERWRITE;
4018 remap_flags &= ~VM_FLAGS_ANYWHERE;
813fb2f6
A
4019 if (!copy && !copy_entry->is_sub_map) {
4020 /*
4021 * copy-on-write should have been
4022 * resolved at this point, or we would
4023 * end up sharing instead of copying.
4024 */
4025 assert(!copy_entry->needs_copy);
4026 }
39236c6e
A
4027 kr = vm_map_enter(target_map,
4028 &copy_addr,
4029 copy_size,
4030 (vm_map_offset_t) 0,
4031 remap_flags,
5ba3f43e
A
4032 vmk_remap_flags,
4033 copy_vm_alias,
39236c6e
A
4034 copy_object,
4035 copy_offset,
4036 copy,
4037 cur_protection,
4038 max_protection,
4039 inheritance);
4040 if (kr != KERN_SUCCESS) {
4041 if (copy_entry->is_sub_map) {
4042 vm_map_deallocate(copy_submap);
4043 } else {
4044 vm_object_deallocate(copy_object);
4045 }
4046 /* abort */
4047 break;
4048 }
4049
4050 /* next mapping */
4051 copy_addr += copy_size;
4052 }
5ba3f43e 4053
39236c6e 4054 if (kr == KERN_SUCCESS) {
3e170ce0
A
4055 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4056 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4057 *address = map_addr + offset_in_mapping;
4058 } else {
4059 *address = map_addr;
4060 }
fe8ab488
A
4061
4062 if (offset) {
4063 /*
4064 * Trim in front, from 0 to "offset".
4065 */
4066 vm_map_remove(target_map,
4067 map_addr,
4068 map_addr + offset,
4069 0);
4070 *address += offset;
4071 }
4072 if (offset + map_size < named_entry->size) {
4073 /*
4074 * Trim in back, from
4075 * "offset + map_size" to
4076 * "named_entry->size".
4077 */
4078 vm_map_remove(target_map,
4079 (map_addr +
4080 offset + map_size),
4081 (map_addr +
4082 named_entry->size),
4083 0);
4084 }
39236c6e
A
4085 }
4086 named_entry_unlock(named_entry);
4087
4088 if (kr != KERN_SUCCESS) {
4089 if (! (flags & VM_FLAGS_OVERWRITE)) {
4090 /* deallocate the contiguous range */
4091 (void) vm_deallocate(target_map,
4092 map_addr,
4093 map_size);
4094 }
4095 }
4096
4097 return kr;
5ba3f43e 4098
2d21ac55 4099 } else {
5ba3f43e
A
4100 unsigned int access;
4101 vm_prot_t protections;
4102 unsigned int wimg_mode;
4103
4104 /* we are mapping a VM object */
4105
4106 protections = named_entry->protection & VM_PROT_ALL;
4107 access = GET_MAP_MEM(named_entry->protection);
4108
3e170ce0
A
4109 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4110 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e 4111 offset_in_mapping = offset - vm_object_trunc_page(offset);
3e170ce0
A
4112 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
4113 offset_in_mapping &= ~((signed)(0xFFF));
39236c6e
A
4114 offset = vm_object_trunc_page(offset);
4115 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
5ba3f43e 4116 }
39236c6e 4117
2d21ac55
A
4118 object = named_entry->backing.object;
4119 assert(object != VM_OBJECT_NULL);
5ba3f43e 4120 vm_object_lock(object);
2d21ac55 4121 named_entry_unlock(named_entry);
5ba3f43e
A
4122
4123 vm_object_reference_locked(object);
4124
4125 wimg_mode = object->wimg_bits;
4126 vm_prot_to_wimg(access, &wimg_mode);
4127 if (object->wimg_bits != wimg_mode)
4128 vm_object_change_wimg_mode(object, wimg_mode);
4129
4130 vm_object_unlock(object);
2d21ac55
A
4131 }
4132 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4133 /*
4134 * JMM - This is temporary until we unify named entries
4135 * and raw memory objects.
4136 *
4137 * Detected fake ip_kotype for a memory object. In
4138 * this case, the port isn't really a port at all, but
4139 * instead is just a raw memory object.
4140 */
3e170ce0
A
4141 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4142 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4143 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4144 }
4145
5ba3f43e 4146 object = memory_object_to_vm_object((memory_object_t)port);
2d21ac55
A
4147 if (object == VM_OBJECT_NULL)
4148 return KERN_INVALID_OBJECT;
5ba3f43e 4149 vm_object_reference(object);
2d21ac55
A
4150
4151 /* wait for object (if any) to be ready */
4152 if (object != VM_OBJECT_NULL) {
4153 if (object == kernel_object) {
4154 printf("Warning: Attempt to map kernel object"
4155 " by a non-private kernel entity\n");
4156 return KERN_INVALID_OBJECT;
4157 }
b0d623f7 4158 if (!object->pager_ready) {
2d21ac55 4159 vm_object_lock(object);
b0d623f7
A
4160
4161 while (!object->pager_ready) {
4162 vm_object_wait(object,
4163 VM_OBJECT_EVENT_PAGER_READY,
4164 THREAD_UNINT);
4165 vm_object_lock(object);
4166 }
4167 vm_object_unlock(object);
2d21ac55 4168 }
2d21ac55
A
4169 }
4170 } else {
4171 return KERN_INVALID_OBJECT;
4172 }
4173
593a1d5f
A
4174 if (object != VM_OBJECT_NULL &&
4175 object->named &&
4176 object->pager != MEMORY_OBJECT_NULL &&
4177 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4178 memory_object_t pager;
4179 vm_prot_t pager_prot;
4180 kern_return_t kr;
4181
4182 /*
4183 * For "named" VM objects, let the pager know that the
4184 * memory object is being mapped. Some pagers need to keep
4185 * track of this, to know when they can reclaim the memory
4186 * object, for example.
4187 * VM calls memory_object_map() for each mapping (specifying
4188 * the protection of each mapping) and calls
4189 * memory_object_last_unmap() when all the mappings are gone.
4190 */
4191 pager_prot = max_protection;
4192 if (copy) {
4193 /*
4194 * Copy-On-Write mapping: won't modify the
4195 * memory object.
4196 */
4197 pager_prot &= ~VM_PROT_WRITE;
4198 }
4199 vm_object_lock(object);
4200 pager = object->pager;
4201 if (object->named &&
4202 pager != MEMORY_OBJECT_NULL &&
4203 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4204 assert(object->pager_ready);
4205 vm_object_mapping_wait(object, THREAD_UNINT);
4206 vm_object_mapping_begin(object);
4207 vm_object_unlock(object);
4208
4209 kr = memory_object_map(pager, pager_prot);
4210 assert(kr == KERN_SUCCESS);
4211
4212 vm_object_lock(object);
4213 vm_object_mapping_end(object);
4214 }
4215 vm_object_unlock(object);
4216 }
4217
2d21ac55
A
4218 /*
4219 * Perform the copy if requested
4220 */
4221
4222 if (copy) {
4223 vm_object_t new_object;
4224 vm_object_offset_t new_offset;
4225
3e170ce0
A
4226 result = vm_object_copy_strategically(object, offset,
4227 map_size,
2d21ac55
A
4228 &new_object, &new_offset,
4229 &copy);
4230
4231
4232 if (result == KERN_MEMORY_RESTART_COPY) {
4233 boolean_t success;
4234 boolean_t src_needs_copy;
4235
4236 /*
4237 * XXX
4238 * We currently ignore src_needs_copy.
4239 * This really is the issue of how to make
4240 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4241 * non-kernel users to use. Solution forthcoming.
4242 * In the meantime, since we don't allow non-kernel
4243 * memory managers to specify symmetric copy,
4244 * we won't run into problems here.
4245 */
4246 new_object = object;
4247 new_offset = offset;
4248 success = vm_object_copy_quickly(&new_object,
3e170ce0
A
4249 new_offset,
4250 map_size,
2d21ac55
A
4251 &src_needs_copy,
4252 &copy);
4253 assert(success);
4254 result = KERN_SUCCESS;
4255 }
4256 /*
4257 * Throw away the reference to the
4258 * original object, as it won't be mapped.
4259 */
4260
4261 vm_object_deallocate(object);
4262
3e170ce0 4263 if (result != KERN_SUCCESS) {
2d21ac55 4264 return result;
3e170ce0 4265 }
2d21ac55
A
4266
4267 object = new_object;
4268 offset = new_offset;
4269 }
4270
fe8ab488 4271 /*
5ba3f43e 4272 * If non-kernel users want to try to prefault pages, the mapping and prefault
fe8ab488
A
4273 * needs to be atomic.
4274 */
5ba3f43e
A
4275 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4276 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4277
4278#if __arm64__
4279 if (fourk) {
4280 /* map this object in a "4K" pager */
4281 result = vm_map_enter_fourk(target_map,
4282 &map_addr,
4283 map_size,
4284 (vm_map_offset_t) mask,
4285 flags,
4286 vmk_flags,
4287 tag,
4288 object,
4289 offset,
4290 copy,
4291 cur_protection,
4292 max_protection,
4293 inheritance);
4294 } else
4295#endif /* __arm64__ */
3e170ce0
A
4296 {
4297 result = vm_map_enter(target_map,
4298 &map_addr, map_size,
4299 (vm_map_offset_t)mask,
4300 flags,
5ba3f43e
A
4301 vmk_flags,
4302 tag,
3e170ce0
A
4303 object, offset,
4304 copy,
4305 cur_protection, max_protection,
4306 inheritance);
4307 }
2d21ac55
A
4308 if (result != KERN_SUCCESS)
4309 vm_object_deallocate(object);
39236c6e 4310
fe8ab488
A
4311 /*
4312 * Try to prefault, and do not forget to release the vm map lock.
4313 */
4314 if (result == KERN_SUCCESS && try_prefault) {
4315 mach_vm_address_t va = map_addr;
4316 kern_return_t kr = KERN_SUCCESS;
4317 unsigned int i = 0;
39037602
A
4318 int pmap_options;
4319
5ba3f43e 4320 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
39037602
A
4321 if (object->internal) {
4322 pmap_options |= PMAP_OPTIONS_INTERNAL;
4323 }
fe8ab488
A
4324
4325 for (i = 0; i < page_list_count; ++i) {
5ba3f43e
A
4326 if (!UPL_VALID_PAGE(page_list, i)) {
4327 if (kernel_prefault) {
4328 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4329 result = KERN_MEMORY_ERROR;
4330 break;
4331 }
4332 } else {
fe8ab488
A
4333 /*
4334 * If this function call failed, we should stop
4335 * trying to optimize, other calls are likely
4336 * going to fail too.
4337 *
4338 * We are not gonna report an error for such
4339 * failure though. That's an optimization, not
4340 * something critical.
4341 */
4342 kr = pmap_enter_options(target_map->pmap,
4343 va, UPL_PHYS_PAGE(page_list, i),
4344 cur_protection, VM_PROT_NONE,
39037602 4345 0, TRUE, pmap_options, NULL);
fe8ab488
A
4346 if (kr != KERN_SUCCESS) {
4347 OSIncrementAtomic64(&vm_prefault_nb_bailout);
5ba3f43e
A
4348 if (kernel_prefault) {
4349 result = kr;
4350 }
3e170ce0 4351 break;
fe8ab488
A
4352 }
4353 OSIncrementAtomic64(&vm_prefault_nb_pages);
4354 }
4355
4356 /* Next virtual address */
4357 va += PAGE_SIZE;
4358 }
5ba3f43e
A
4359 if (vmk_flags.vmkf_keep_map_locked) {
4360 vm_map_unlock(target_map);
4361 }
fe8ab488
A
4362 }
4363
3e170ce0
A
4364 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4365 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4366 *address = map_addr + offset_in_mapping;
4367 } else {
4368 *address = map_addr;
4369 }
2d21ac55
A
4370 return result;
4371}
4372
fe8ab488
A
4373kern_return_t
4374vm_map_enter_mem_object(
4375 vm_map_t target_map,
4376 vm_map_offset_t *address,
4377 vm_map_size_t initial_size,
4378 vm_map_offset_t mask,
4379 int flags,
5ba3f43e
A
4380 vm_map_kernel_flags_t vmk_flags,
4381 vm_tag_t tag,
fe8ab488
A
4382 ipc_port_t port,
4383 vm_object_offset_t offset,
4384 boolean_t copy,
4385 vm_prot_t cur_protection,
4386 vm_prot_t max_protection,
4387 vm_inherit_t inheritance)
4388{
5ba3f43e
A
4389 kern_return_t ret;
4390
4391 ret = vm_map_enter_mem_object_helper(target_map,
4392 address,
4393 initial_size,
4394 mask,
4395 flags,
4396 vmk_flags,
4397 tag,
4398 port,
4399 offset,
4400 copy,
4401 cur_protection,
4402 max_protection,
4403 inheritance,
4404 NULL,
4405 0);
4406
4407#if KASAN
4408 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4409 kasan_notify_address(*address, initial_size);
4410 }
4411#endif
4412
4413 return ret;
fe8ab488 4414}
b0d623f7 4415
fe8ab488
A
4416kern_return_t
4417vm_map_enter_mem_object_prefault(
4418 vm_map_t target_map,
4419 vm_map_offset_t *address,
4420 vm_map_size_t initial_size,
4421 vm_map_offset_t mask,
4422 int flags,
5ba3f43e
A
4423 vm_map_kernel_flags_t vmk_flags,
4424 vm_tag_t tag,
fe8ab488
A
4425 ipc_port_t port,
4426 vm_object_offset_t offset,
4427 vm_prot_t cur_protection,
4428 vm_prot_t max_protection,
4429 upl_page_list_ptr_t page_list,
4430 unsigned int page_list_count)
4431{
5ba3f43e
A
4432 kern_return_t ret;
4433
4434 ret = vm_map_enter_mem_object_helper(target_map,
4435 address,
4436 initial_size,
4437 mask,
4438 flags,
4439 vmk_flags,
4440 tag,
4441 port,
4442 offset,
4443 FALSE,
4444 cur_protection,
4445 max_protection,
4446 VM_INHERIT_DEFAULT,
4447 page_list,
4448 page_list_count);
4449
4450#if KASAN
4451 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4452 kasan_notify_address(*address, initial_size);
4453 }
4454#endif
4455
4456 return ret;
fe8ab488 4457}
b0d623f7
A
4458
4459
4460kern_return_t
4461vm_map_enter_mem_object_control(
4462 vm_map_t target_map,
4463 vm_map_offset_t *address,
4464 vm_map_size_t initial_size,
4465 vm_map_offset_t mask,
4466 int flags,
5ba3f43e
A
4467 vm_map_kernel_flags_t vmk_flags,
4468 vm_tag_t tag,
b0d623f7
A
4469 memory_object_control_t control,
4470 vm_object_offset_t offset,
4471 boolean_t copy,
4472 vm_prot_t cur_protection,
4473 vm_prot_t max_protection,
4474 vm_inherit_t inheritance)
4475{
4476 vm_map_address_t map_addr;
4477 vm_map_size_t map_size;
4478 vm_object_t object;
4479 vm_object_size_t size;
4480 kern_return_t result;
4481 memory_object_t pager;
4482 vm_prot_t pager_prot;
4483 kern_return_t kr;
5ba3f43e
A
4484#if __arm64__
4485 boolean_t fourk = vmk_flags.vmkf_fourk;
4486#endif /* __arm64__ */
b0d623f7
A
4487
4488 /*
4489 * Check arguments for validity
4490 */
4491 if ((target_map == VM_MAP_NULL) ||
4492 (cur_protection & ~VM_PROT_ALL) ||
4493 (max_protection & ~VM_PROT_ALL) ||
4494 (inheritance > VM_INHERIT_LAST_VALID) ||
3e170ce0 4495 initial_size == 0) {
b0d623f7 4496 return KERN_INVALID_ARGUMENT;
3e170ce0 4497 }
b0d623f7 4498
5ba3f43e
A
4499#if __arm64__
4500 if (fourk) {
4501 map_addr = vm_map_trunc_page(*address,
4502 FOURK_PAGE_MASK);
4503 map_size = vm_map_round_page(initial_size,
4504 FOURK_PAGE_MASK);
4505 } else
4506#endif /* __arm64__ */
3e170ce0
A
4507 {
4508 map_addr = vm_map_trunc_page(*address,
4509 VM_MAP_PAGE_MASK(target_map));
4510 map_size = vm_map_round_page(initial_size,
4511 VM_MAP_PAGE_MASK(target_map));
4512 }
4513 size = vm_object_round_page(initial_size);
b0d623f7
A
4514
4515 object = memory_object_control_to_vm_object(control);
4516
4517 if (object == VM_OBJECT_NULL)
4518 return KERN_INVALID_OBJECT;
4519
4520 if (object == kernel_object) {
4521 printf("Warning: Attempt to map kernel object"
4522 " by a non-private kernel entity\n");
4523 return KERN_INVALID_OBJECT;
4524 }
4525
4526 vm_object_lock(object);
4527 object->ref_count++;
4528 vm_object_res_reference(object);
4529
4530 /*
4531 * For "named" VM objects, let the pager know that the
4532 * memory object is being mapped. Some pagers need to keep
4533 * track of this, to know when they can reclaim the memory
4534 * object, for example.
4535 * VM calls memory_object_map() for each mapping (specifying
4536 * the protection of each mapping) and calls
4537 * memory_object_last_unmap() when all the mappings are gone.
4538 */
4539 pager_prot = max_protection;
4540 if (copy) {
4541 pager_prot &= ~VM_PROT_WRITE;
4542 }
4543 pager = object->pager;
4544 if (object->named &&
4545 pager != MEMORY_OBJECT_NULL &&
4546 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4547 assert(object->pager_ready);
4548 vm_object_mapping_wait(object, THREAD_UNINT);
4549 vm_object_mapping_begin(object);
4550 vm_object_unlock(object);
4551
4552 kr = memory_object_map(pager, pager_prot);
4553 assert(kr == KERN_SUCCESS);
4554
4555 vm_object_lock(object);
4556 vm_object_mapping_end(object);
4557 }
4558 vm_object_unlock(object);
4559
4560 /*
4561 * Perform the copy if requested
4562 */
4563
4564 if (copy) {
4565 vm_object_t new_object;
4566 vm_object_offset_t new_offset;
4567
4568 result = vm_object_copy_strategically(object, offset, size,
4569 &new_object, &new_offset,
4570 &copy);
4571
4572
4573 if (result == KERN_MEMORY_RESTART_COPY) {
4574 boolean_t success;
4575 boolean_t src_needs_copy;
4576
4577 /*
4578 * XXX
4579 * We currently ignore src_needs_copy.
4580 * This really is the issue of how to make
4581 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4582 * non-kernel users to use. Solution forthcoming.
4583 * In the meantime, since we don't allow non-kernel
4584 * memory managers to specify symmetric copy,
4585 * we won't run into problems here.
4586 */
4587 new_object = object;
4588 new_offset = offset;
4589 success = vm_object_copy_quickly(&new_object,
4590 new_offset, size,
4591 &src_needs_copy,
4592 &copy);
4593 assert(success);
4594 result = KERN_SUCCESS;
4595 }
4596 /*
4597 * Throw away the reference to the
4598 * original object, as it won't be mapped.
4599 */
4600
4601 vm_object_deallocate(object);
4602
3e170ce0 4603 if (result != KERN_SUCCESS) {
b0d623f7 4604 return result;
3e170ce0 4605 }
b0d623f7
A
4606
4607 object = new_object;
4608 offset = new_offset;
4609 }
4610
5ba3f43e
A
4611#if __arm64__
4612 if (fourk) {
4613 result = vm_map_enter_fourk(target_map,
4614 &map_addr,
4615 map_size,
4616 (vm_map_offset_t)mask,
4617 flags,
4618 vmk_flags,
4619 tag,
4620 object, offset,
4621 copy,
4622 cur_protection, max_protection,
4623 inheritance);
4624 } else
4625#endif /* __arm64__ */
3e170ce0
A
4626 {
4627 result = vm_map_enter(target_map,
4628 &map_addr, map_size,
4629 (vm_map_offset_t)mask,
4630 flags,
5ba3f43e
A
4631 vmk_flags,
4632 tag,
3e170ce0
A
4633 object, offset,
4634 copy,
4635 cur_protection, max_protection,
4636 inheritance);
4637 }
b0d623f7
A
4638 if (result != KERN_SUCCESS)
4639 vm_object_deallocate(object);
4640 *address = map_addr;
4641
4642 return result;
4643}
4644
4645
2d21ac55
A
4646#if VM_CPM
4647
4648#ifdef MACH_ASSERT
4649extern pmap_paddr_t avail_start, avail_end;
4650#endif
4651
4652/*
4653 * Allocate memory in the specified map, with the caveat that
4654 * the memory is physically contiguous. This call may fail
4655 * if the system can't find sufficient contiguous memory.
4656 * This call may cause or lead to heart-stopping amounts of
4657 * paging activity.
4658 *
4659 * Memory obtained from this call should be freed in the
4660 * normal way, viz., via vm_deallocate.
4661 */
4662kern_return_t
4663vm_map_enter_cpm(
4664 vm_map_t map,
4665 vm_map_offset_t *addr,
4666 vm_map_size_t size,
4667 int flags)
4668{
4669 vm_object_t cpm_obj;
4670 pmap_t pmap;
4671 vm_page_t m, pages;
4672 kern_return_t kr;
4673 vm_map_offset_t va, start, end, offset;
4674#if MACH_ASSERT
316670eb 4675 vm_map_offset_t prev_addr = 0;
2d21ac55
A
4676#endif /* MACH_ASSERT */
4677
4678 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3e170ce0
A
4679 vm_tag_t tag;
4680
4681 VM_GET_FLAGS_ALIAS(flags, tag);
2d21ac55 4682
2d21ac55
A
4683 if (size == 0) {
4684 *addr = 0;
4685 return KERN_SUCCESS;
4686 }
4687 if (anywhere)
4688 *addr = vm_map_min(map);
4689 else
39236c6e
A
4690 *addr = vm_map_trunc_page(*addr,
4691 VM_MAP_PAGE_MASK(map));
4692 size = vm_map_round_page(size,
4693 VM_MAP_PAGE_MASK(map));
2d21ac55
A
4694
4695 /*
4696 * LP64todo - cpm_allocate should probably allow
4697 * allocations of >4GB, but not with the current
4698 * algorithm, so just cast down the size for now.
4699 */
4700 if (size > VM_MAX_ADDRESS)
4701 return KERN_RESOURCE_SHORTAGE;
4702 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
b0d623f7 4703 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2d21ac55
A
4704 return kr;
4705
4706 cpm_obj = vm_object_allocate((vm_object_size_t)size);
4707 assert(cpm_obj != VM_OBJECT_NULL);
4708 assert(cpm_obj->internal);
316670eb 4709 assert(cpm_obj->vo_size == (vm_object_size_t)size);
2d21ac55
A
4710 assert(cpm_obj->can_persist == FALSE);
4711 assert(cpm_obj->pager_created == FALSE);
4712 assert(cpm_obj->pageout == FALSE);
4713 assert(cpm_obj->shadow == VM_OBJECT_NULL);
91447636
A
4714
4715 /*
4716 * Insert pages into object.
4717 */
4718
4719 vm_object_lock(cpm_obj);
4720 for (offset = 0; offset < size; offset += PAGE_SIZE) {
4721 m = pages;
4722 pages = NEXT_PAGE(m);
0c530ab8 4723 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
91447636
A
4724
4725 assert(!m->gobbled);
4726 assert(!m->wanted);
4727 assert(!m->pageout);
4728 assert(!m->tabled);
b0d623f7 4729 assert(VM_PAGE_WIRED(m));
91447636 4730 assert(m->busy);
39037602 4731 assert(VM_PAGE_GET_PHYS_PAGE(m)>=(avail_start>>PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m)<=(avail_end>>PAGE_SHIFT));
91447636
A
4732
4733 m->busy = FALSE;
4734 vm_page_insert(m, cpm_obj, offset);
4735 }
4736 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
4737 vm_object_unlock(cpm_obj);
4738
4739 /*
4740 * Hang onto a reference on the object in case a
4741 * multi-threaded application for some reason decides
4742 * to deallocate the portion of the address space into
4743 * which we will insert this object.
4744 *
4745 * Unfortunately, we must insert the object now before
4746 * we can talk to the pmap module about which addresses
4747 * must be wired down. Hence, the race with a multi-
4748 * threaded app.
4749 */
4750 vm_object_reference(cpm_obj);
4751
4752 /*
4753 * Insert object into map.
4754 */
4755
4756 kr = vm_map_enter(
2d21ac55
A
4757 map,
4758 addr,
4759 size,
4760 (vm_map_offset_t)0,
4761 flags,
5ba3f43e 4762 VM_MAP_KERNEL_FLAGS_NONE,
2d21ac55
A
4763 cpm_obj,
4764 (vm_object_offset_t)0,
4765 FALSE,
4766 VM_PROT_ALL,
4767 VM_PROT_ALL,
4768 VM_INHERIT_DEFAULT);
91447636
A
4769
4770 if (kr != KERN_SUCCESS) {
4771 /*
4772 * A CPM object doesn't have can_persist set,
4773 * so all we have to do is deallocate it to
4774 * free up these pages.
4775 */
4776 assert(cpm_obj->pager_created == FALSE);
4777 assert(cpm_obj->can_persist == FALSE);
4778 assert(cpm_obj->pageout == FALSE);
4779 assert(cpm_obj->shadow == VM_OBJECT_NULL);
4780 vm_object_deallocate(cpm_obj); /* kill acquired ref */
4781 vm_object_deallocate(cpm_obj); /* kill creation ref */
4782 }
4783
4784 /*
4785 * Inform the physical mapping system that the
4786 * range of addresses may not fault, so that
4787 * page tables and such can be locked down as well.
4788 */
4789 start = *addr;
4790 end = start + size;
4791 pmap = vm_map_pmap(map);
4792 pmap_pageable(pmap, start, end, FALSE);
4793
4794 /*
4795 * Enter each page into the pmap, to avoid faults.
4796 * Note that this loop could be coded more efficiently,
4797 * if the need arose, rather than looking up each page
4798 * again.
4799 */
4800 for (offset = 0, va = start; offset < size;
4801 va += PAGE_SIZE, offset += PAGE_SIZE) {
2d21ac55
A
4802 int type_of_fault;
4803
91447636
A
4804 vm_object_lock(cpm_obj);
4805 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
91447636 4806 assert(m != VM_PAGE_NULL);
2d21ac55
A
4807
4808 vm_page_zero_fill(m);
4809
4810 type_of_fault = DBG_ZERO_FILL_FAULT;
4811
6d2010ae 4812 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
5ba3f43e
A
4813 VM_PAGE_WIRED(m),
4814 FALSE, /* change_wiring */
4815 VM_KERN_MEMORY_NONE, /* tag - not wiring */
4816 FALSE, /* no_cache */
4817 FALSE, /* cs_bypass */
4818 0, /* user_tag */
4819 0, /* pmap_options */
4820 NULL, /* need_retry */
4821 &type_of_fault);
2d21ac55
A
4822
4823 vm_object_unlock(cpm_obj);
91447636
A
4824 }
4825
4826#if MACH_ASSERT
4827 /*
4828 * Verify ordering in address space.
4829 */
4830 for (offset = 0; offset < size; offset += PAGE_SIZE) {
4831 vm_object_lock(cpm_obj);
4832 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
4833 vm_object_unlock(cpm_obj);
4834 if (m == VM_PAGE_NULL)
316670eb
A
4835 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
4836 cpm_obj, (uint64_t)offset);
91447636
A
4837 assert(m->tabled);
4838 assert(!m->busy);
4839 assert(!m->wanted);
4840 assert(!m->fictitious);
4841 assert(!m->private);
4842 assert(!m->absent);
4843 assert(!m->error);
4844 assert(!m->cleaning);
316670eb 4845 assert(!m->laundry);
91447636
A
4846 assert(!m->precious);
4847 assert(!m->clustered);
4848 if (offset != 0) {
39037602 4849 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
316670eb
A
4850 printf("start 0x%llx end 0x%llx va 0x%llx\n",
4851 (uint64_t)start, (uint64_t)end, (uint64_t)va);
4852 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
4853 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
91447636
A
4854 panic("vm_allocate_cpm: pages not contig!");
4855 }
4856 }
39037602 4857 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
91447636
A
4858 }
4859#endif /* MACH_ASSERT */
4860
4861 vm_object_deallocate(cpm_obj); /* kill extra ref */
4862
4863 return kr;
4864}
4865
4866
4867#else /* VM_CPM */
4868
4869/*
4870 * Interface is defined in all cases, but unless the kernel
4871 * is built explicitly for this option, the interface does
4872 * nothing.
4873 */
4874
4875kern_return_t
4876vm_map_enter_cpm(
4877 __unused vm_map_t map,
4878 __unused vm_map_offset_t *addr,
4879 __unused vm_map_size_t size,
4880 __unused int flags)
4881{
4882 return KERN_FAILURE;
4883}
4884#endif /* VM_CPM */
4885
b0d623f7
A
4886/* Not used without nested pmaps */
4887#ifndef NO_NESTED_PMAP
2d21ac55
A
4888/*
4889 * Clip and unnest a portion of a nested submap mapping.
4890 */
b0d623f7
A
4891
4892
2d21ac55
A
4893static void
4894vm_map_clip_unnest(
4895 vm_map_t map,
4896 vm_map_entry_t entry,
4897 vm_map_offset_t start_unnest,
4898 vm_map_offset_t end_unnest)
4899{
b0d623f7
A
4900 vm_map_offset_t old_start_unnest = start_unnest;
4901 vm_map_offset_t old_end_unnest = end_unnest;
4902
2d21ac55 4903 assert(entry->is_sub_map);
3e170ce0 4904 assert(VME_SUBMAP(entry) != NULL);
fe8ab488 4905 assert(entry->use_pmap);
2d21ac55 4906
b0d623f7
A
4907 /*
4908 * Query the platform for the optimal unnest range.
4909 * DRK: There's some duplication of effort here, since
4910 * callers may have adjusted the range to some extent. This
4911 * routine was introduced to support 1GiB subtree nesting
4912 * for x86 platforms, which can also nest on 2MiB boundaries
4913 * depending on size/alignment.
4914 */
4915 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
39037602
A
4916 assert(VME_SUBMAP(entry)->is_nested_map);
4917 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
4918 log_unnest_badness(map,
4919 old_start_unnest,
4920 old_end_unnest,
4921 VME_SUBMAP(entry)->is_nested_map,
4922 (entry->vme_start +
4923 VME_SUBMAP(entry)->lowest_unnestable_start -
4924 VME_OFFSET(entry)));
b0d623f7
A
4925 }
4926
2d21ac55
A
4927 if (entry->vme_start > start_unnest ||
4928 entry->vme_end < end_unnest) {
4929 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
4930 "bad nested entry: start=0x%llx end=0x%llx\n",
4931 (long long)start_unnest, (long long)end_unnest,
4932 (long long)entry->vme_start, (long long)entry->vme_end);
4933 }
b0d623f7 4934
2d21ac55
A
4935 if (start_unnest > entry->vme_start) {
4936 _vm_map_clip_start(&map->hdr,
4937 entry,
4938 start_unnest);
3e170ce0
A
4939 if (map->holelistenabled) {
4940 vm_map_store_update_first_free(map, NULL, FALSE);
4941 } else {
4942 vm_map_store_update_first_free(map, map->first_free, FALSE);
4943 }
2d21ac55
A
4944 }
4945 if (entry->vme_end > end_unnest) {
4946 _vm_map_clip_end(&map->hdr,
4947 entry,
4948 end_unnest);
3e170ce0
A
4949 if (map->holelistenabled) {
4950 vm_map_store_update_first_free(map, NULL, FALSE);
4951 } else {
4952 vm_map_store_update_first_free(map, map->first_free, FALSE);
4953 }
2d21ac55
A
4954 }
4955
4956 pmap_unnest(map->pmap,
4957 entry->vme_start,
4958 entry->vme_end - entry->vme_start);
316670eb 4959 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
2d21ac55
A
4960 /* clean up parent map/maps */
4961 vm_map_submap_pmap_clean(
4962 map, entry->vme_start,
4963 entry->vme_end,
3e170ce0
A
4964 VME_SUBMAP(entry),
4965 VME_OFFSET(entry));
2d21ac55
A
4966 }
4967 entry->use_pmap = FALSE;
3e170ce0
A
4968 if ((map->pmap != kernel_pmap) &&
4969 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
4970 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
316670eb 4971 }
2d21ac55 4972}
b0d623f7 4973#endif /* NO_NESTED_PMAP */
2d21ac55 4974
1c79356b
A
4975/*
4976 * vm_map_clip_start: [ internal use only ]
4977 *
4978 * Asserts that the given entry begins at or after
4979 * the specified address; if necessary,
4980 * it splits the entry into two.
4981 */
e2d2fc5c 4982void
2d21ac55
A
4983vm_map_clip_start(
4984 vm_map_t map,
4985 vm_map_entry_t entry,
4986 vm_map_offset_t startaddr)
4987{
0c530ab8 4988#ifndef NO_NESTED_PMAP
fe8ab488
A
4989 if (entry->is_sub_map &&
4990 entry->use_pmap &&
2d21ac55
A
4991 startaddr >= entry->vme_start) {
4992 vm_map_offset_t start_unnest, end_unnest;
4993
4994 /*
4995 * Make sure "startaddr" is no longer in a nested range
4996 * before we clip. Unnest only the minimum range the platform
4997 * can handle.
b0d623f7
A
4998 * vm_map_clip_unnest may perform additional adjustments to
4999 * the unnest range.
2d21ac55
A
5000 */
5001 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
5002 end_unnest = start_unnest + pmap_nesting_size_min;
5003 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5004 }
5005#endif /* NO_NESTED_PMAP */
5006 if (startaddr > entry->vme_start) {
3e170ce0 5007 if (VME_OBJECT(entry) &&
2d21ac55 5008 !entry->is_sub_map &&
3e170ce0 5009 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55
A
5010 pmap_remove(map->pmap,
5011 (addr64_t)(entry->vme_start),
5012 (addr64_t)(entry->vme_end));
5013 }
39037602
A
5014 if (entry->vme_atomic) {
5015 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5ba3f43e 5016 }
2d21ac55 5017 _vm_map_clip_start(&map->hdr, entry, startaddr);
3e170ce0
A
5018 if (map->holelistenabled) {
5019 vm_map_store_update_first_free(map, NULL, FALSE);
5020 } else {
5021 vm_map_store_update_first_free(map, map->first_free, FALSE);
5022 }
2d21ac55
A
5023 }
5024}
5025
1c79356b
A
5026
5027#define vm_map_copy_clip_start(copy, entry, startaddr) \
5028 MACRO_BEGIN \
5029 if ((startaddr) > (entry)->vme_start) \
5030 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5031 MACRO_END
5032
5033/*
5034 * This routine is called only when it is known that
5035 * the entry must be split.
5036 */
91447636 5037static void
1c79356b 5038_vm_map_clip_start(
39037602
A
5039 struct vm_map_header *map_header,
5040 vm_map_entry_t entry,
5041 vm_map_offset_t start)
1c79356b 5042{
39037602 5043 vm_map_entry_t new_entry;
1c79356b
A
5044
5045 /*
5046 * Split off the front portion --
5047 * note that we must insert the new
5048 * entry BEFORE this one, so that
5049 * this entry has the specified starting
5050 * address.
5051 */
5052
fe8ab488
A
5053 if (entry->map_aligned) {
5054 assert(VM_MAP_PAGE_ALIGNED(start,
5055 VM_MAP_HDR_PAGE_MASK(map_header)));
5056 }
5057
7ddcb079 5058 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
5059 vm_map_entry_copy_full(new_entry, entry);
5060
5061 new_entry->vme_end = start;
e2d2fc5c 5062 assert(new_entry->vme_start < new_entry->vme_end);
3e170ce0 5063 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
e2d2fc5c 5064 assert(start < entry->vme_end);
1c79356b
A
5065 entry->vme_start = start;
5066
6d2010ae 5067 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
1c79356b
A
5068
5069 if (entry->is_sub_map)
3e170ce0 5070 vm_map_reference(VME_SUBMAP(new_entry));
1c79356b 5071 else
3e170ce0 5072 vm_object_reference(VME_OBJECT(new_entry));
1c79356b
A
5073}
5074
5075
5076/*
5077 * vm_map_clip_end: [ internal use only ]
5078 *
5079 * Asserts that the given entry ends at or before
5080 * the specified address; if necessary,
5081 * it splits the entry into two.
5082 */
e2d2fc5c 5083void
2d21ac55
A
5084vm_map_clip_end(
5085 vm_map_t map,
5086 vm_map_entry_t entry,
5087 vm_map_offset_t endaddr)
5088{
5089 if (endaddr > entry->vme_end) {
5090 /*
5091 * Within the scope of this clipping, limit "endaddr" to
5092 * the end of this map entry...
5093 */
5094 endaddr = entry->vme_end;
5095 }
5096#ifndef NO_NESTED_PMAP
fe8ab488 5097 if (entry->is_sub_map && entry->use_pmap) {
2d21ac55
A
5098 vm_map_offset_t start_unnest, end_unnest;
5099
5100 /*
5101 * Make sure the range between the start of this entry and
5102 * the new "endaddr" is no longer nested before we clip.
5103 * Unnest only the minimum range the platform can handle.
b0d623f7
A
5104 * vm_map_clip_unnest may perform additional adjustments to
5105 * the unnest range.
2d21ac55
A
5106 */
5107 start_unnest = entry->vme_start;
5108 end_unnest =
5109 (endaddr + pmap_nesting_size_min - 1) &
5110 ~(pmap_nesting_size_min - 1);
5111 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5112 }
5113#endif /* NO_NESTED_PMAP */
5114 if (endaddr < entry->vme_end) {
3e170ce0 5115 if (VME_OBJECT(entry) &&
2d21ac55 5116 !entry->is_sub_map &&
3e170ce0 5117 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55
A
5118 pmap_remove(map->pmap,
5119 (addr64_t)(entry->vme_start),
5120 (addr64_t)(entry->vme_end));
5121 }
39037602
A
5122 if (entry->vme_atomic) {
5123 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5124 }
2d21ac55 5125 _vm_map_clip_end(&map->hdr, entry, endaddr);
3e170ce0
A
5126 if (map->holelistenabled) {
5127 vm_map_store_update_first_free(map, NULL, FALSE);
5128 } else {
5129 vm_map_store_update_first_free(map, map->first_free, FALSE);
5130 }
2d21ac55
A
5131 }
5132}
0c530ab8 5133
1c79356b
A
5134
5135#define vm_map_copy_clip_end(copy, entry, endaddr) \
5136 MACRO_BEGIN \
5137 if ((endaddr) < (entry)->vme_end) \
5138 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5139 MACRO_END
5140
5141/*
5142 * This routine is called only when it is known that
5143 * the entry must be split.
5144 */
91447636 5145static void
1c79356b 5146_vm_map_clip_end(
39037602
A
5147 struct vm_map_header *map_header,
5148 vm_map_entry_t entry,
5149 vm_map_offset_t end)
1c79356b 5150{
39037602 5151 vm_map_entry_t new_entry;
1c79356b
A
5152
5153 /*
5154 * Create a new entry and insert it
5155 * AFTER the specified entry
5156 */
5157
fe8ab488
A
5158 if (entry->map_aligned) {
5159 assert(VM_MAP_PAGE_ALIGNED(end,
5160 VM_MAP_HDR_PAGE_MASK(map_header)));
5161 }
5162
7ddcb079 5163 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
5164 vm_map_entry_copy_full(new_entry, entry);
5165
e2d2fc5c 5166 assert(entry->vme_start < end);
1c79356b 5167 new_entry->vme_start = entry->vme_end = end;
3e170ce0
A
5168 VME_OFFSET_SET(new_entry,
5169 VME_OFFSET(new_entry) + (end - entry->vme_start));
e2d2fc5c 5170 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 5171
6d2010ae 5172 _vm_map_store_entry_link(map_header, entry, new_entry);
1c79356b
A
5173
5174 if (entry->is_sub_map)
3e170ce0 5175 vm_map_reference(VME_SUBMAP(new_entry));
1c79356b 5176 else
3e170ce0 5177 vm_object_reference(VME_OBJECT(new_entry));
1c79356b
A
5178}
5179
5180
5181/*
5182 * VM_MAP_RANGE_CHECK: [ internal use only ]
5183 *
5184 * Asserts that the starting and ending region
5185 * addresses fall within the valid range of the map.
5186 */
2d21ac55
A
5187#define VM_MAP_RANGE_CHECK(map, start, end) \
5188 MACRO_BEGIN \
5189 if (start < vm_map_min(map)) \
5190 start = vm_map_min(map); \
5191 if (end > vm_map_max(map)) \
5192 end = vm_map_max(map); \
5193 if (start > end) \
5194 start = end; \
5195 MACRO_END
1c79356b
A
5196
5197/*
5198 * vm_map_range_check: [ internal use only ]
5ba3f43e 5199 *
1c79356b
A
5200 * Check that the region defined by the specified start and
5201 * end addresses are wholly contained within a single map
5202 * entry or set of adjacent map entries of the spacified map,
5203 * i.e. the specified region contains no unmapped space.
5204 * If any or all of the region is unmapped, FALSE is returned.
5205 * Otherwise, TRUE is returned and if the output argument 'entry'
5206 * is not NULL it points to the map entry containing the start
5207 * of the region.
5208 *
5209 * The map is locked for reading on entry and is left locked.
5210 */
91447636 5211static boolean_t
1c79356b 5212vm_map_range_check(
39037602
A
5213 vm_map_t map,
5214 vm_map_offset_t start,
5215 vm_map_offset_t end,
1c79356b
A
5216 vm_map_entry_t *entry)
5217{
5218 vm_map_entry_t cur;
39037602 5219 vm_map_offset_t prev;
1c79356b
A
5220
5221 /*
5222 * Basic sanity checks first
5223 */
5224 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
5225 return (FALSE);
5226
5227 /*
5228 * Check first if the region starts within a valid
5229 * mapping for the map.
5230 */
5231 if (!vm_map_lookup_entry(map, start, &cur))
5232 return (FALSE);
5233
5234 /*
5ba3f43e 5235 * Optimize for the case that the region is contained
1c79356b
A
5236 * in a single map entry.
5237 */
5238 if (entry != (vm_map_entry_t *) NULL)
5239 *entry = cur;
5240 if (end <= cur->vme_end)
5241 return (TRUE);
5242
5243 /*
5244 * If the region is not wholly contained within a
5245 * single entry, walk the entries looking for holes.
5246 */
5247 prev = cur->vme_end;
5248 cur = cur->vme_next;
5249 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5250 if (end <= cur->vme_end)
5251 return (TRUE);
5252 prev = cur->vme_end;
5253 cur = cur->vme_next;
5254 }
5255 return (FALSE);
5256}
5257
5258/*
5259 * vm_map_submap: [ kernel use only ]
5260 *
5261 * Mark the given range as handled by a subordinate map.
5262 *
5263 * This range must have been created with vm_map_find using
5264 * the vm_submap_object, and no other operations may have been
5265 * performed on this range prior to calling vm_map_submap.
5266 *
5267 * Only a limited number of operations can be performed
5268 * within this rage after calling vm_map_submap:
5269 * vm_fault
5270 * [Don't try vm_map_copyin!]
5271 *
5272 * To remove a submapping, one must first remove the
5273 * range from the superior map, and then destroy the
5274 * submap (if desired). [Better yet, don't try it.]
5275 */
5276kern_return_t
5277vm_map_submap(
fe8ab488 5278 vm_map_t map,
91447636
A
5279 vm_map_offset_t start,
5280 vm_map_offset_t end,
fe8ab488 5281 vm_map_t submap,
91447636 5282 vm_map_offset_t offset,
0c530ab8 5283#ifdef NO_NESTED_PMAP
91447636 5284 __unused
0c530ab8 5285#endif /* NO_NESTED_PMAP */
fe8ab488 5286 boolean_t use_pmap)
1c79356b
A
5287{
5288 vm_map_entry_t entry;
39037602
A
5289 kern_return_t result = KERN_INVALID_ARGUMENT;
5290 vm_object_t object;
1c79356b
A
5291
5292 vm_map_lock(map);
5293
2d21ac55 5294 if (! vm_map_lookup_entry(map, start, &entry)) {
1c79356b 5295 entry = entry->vme_next;
2d21ac55 5296 }
1c79356b 5297
2d21ac55
A
5298 if (entry == vm_map_to_entry(map) ||
5299 entry->is_sub_map) {
1c79356b
A
5300 vm_map_unlock(map);
5301 return KERN_INVALID_ARGUMENT;
5302 }
5303
2d21ac55 5304 vm_map_clip_start(map, entry, start);
1c79356b
A
5305 vm_map_clip_end(map, entry, end);
5306
5307 if ((entry->vme_start == start) && (entry->vme_end == end) &&
5308 (!entry->is_sub_map) &&
3e170ce0 5309 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
1c79356b
A
5310 (object->resident_page_count == 0) &&
5311 (object->copy == VM_OBJECT_NULL) &&
5312 (object->shadow == VM_OBJECT_NULL) &&
5313 (!object->pager_created)) {
3e170ce0
A
5314 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5315 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
2d21ac55
A
5316 vm_object_deallocate(object);
5317 entry->is_sub_map = TRUE;
fe8ab488 5318 entry->use_pmap = FALSE;
3e170ce0 5319 VME_SUBMAP_SET(entry, submap);
2d21ac55 5320 vm_map_reference(submap);
316670eb
A
5321 if (submap->mapped_in_other_pmaps == FALSE &&
5322 vm_map_pmap(submap) != PMAP_NULL &&
5323 vm_map_pmap(submap) != vm_map_pmap(map)) {
5324 /*
5325 * This submap is being mapped in a map
5326 * that uses a different pmap.
5327 * Set its "mapped_in_other_pmaps" flag
5ba3f43e 5328 * to indicate that we now need to
316670eb
A
5329 * remove mappings from all pmaps rather
5330 * than just the submap's pmap.
5331 */
5332 submap->mapped_in_other_pmaps = TRUE;
5333 }
2d21ac55 5334
0c530ab8 5335#ifndef NO_NESTED_PMAP
2d21ac55
A
5336 if (use_pmap) {
5337 /* nest if platform code will allow */
5338 if(submap->pmap == NULL) {
316670eb
A
5339 ledger_t ledger = map->pmap->ledger;
5340 submap->pmap = pmap_create(ledger,
5341 (vm_map_size_t) 0, FALSE);
2d21ac55
A
5342 if(submap->pmap == PMAP_NULL) {
5343 vm_map_unlock(map);
5344 return(KERN_NO_SPACE);
55e303ae 5345 }
5ba3f43e
A
5346#if defined(__arm__) || defined(__arm64__)
5347 pmap_set_nested(submap->pmap);
5348#endif
55e303ae 5349 }
2d21ac55 5350 result = pmap_nest(map->pmap,
5ba3f43e 5351 (VME_SUBMAP(entry))->pmap,
2d21ac55
A
5352 (addr64_t)start,
5353 (addr64_t)start,
5354 (uint64_t)(end - start));
5355 if(result)
5356 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
5357 entry->use_pmap = TRUE;
5358 }
0c530ab8 5359#else /* NO_NESTED_PMAP */
2d21ac55 5360 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
0c530ab8 5361#endif /* NO_NESTED_PMAP */
2d21ac55 5362 result = KERN_SUCCESS;
1c79356b
A
5363 }
5364 vm_map_unlock(map);
5365
5366 return(result);
5367}
5368
5ba3f43e
A
5369#if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5370#include <sys/codesign.h>
5371extern int proc_selfcsflags(void);
5372extern int panic_on_unsigned_execute;
5373#endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
490019cf 5374
1c79356b
A
5375/*
5376 * vm_map_protect:
5377 *
5378 * Sets the protection of the specified address
5379 * region in the target map. If "set_max" is
5380 * specified, the maximum protection is to be set;
5381 * otherwise, only the current protection is affected.
5382 */
5383kern_return_t
5384vm_map_protect(
39037602
A
5385 vm_map_t map,
5386 vm_map_offset_t start,
5387 vm_map_offset_t end,
5388 vm_prot_t new_prot,
5389 boolean_t set_max)
5390{
5391 vm_map_entry_t current;
5392 vm_map_offset_t prev;
1c79356b
A
5393 vm_map_entry_t entry;
5394 vm_prot_t new_max;
5ba3f43e 5395 int pmap_options = 0;
5c9f4661 5396 kern_return_t kr;
1c79356b
A
5397
5398 XPR(XPR_VM_MAP,
2d21ac55 5399 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
b0d623f7 5400 map, start, end, new_prot, set_max);
1c79356b 5401
5c9f4661
A
5402 if (new_prot & VM_PROT_COPY) {
5403 vm_map_offset_t new_start;
5404 vm_prot_t cur_prot, max_prot;
5405 vm_map_kernel_flags_t kflags;
5406
5407 /* LP64todo - see below */
5408 if (start >= map->max_offset) {
5409 return KERN_INVALID_ADDRESS;
5410 }
5411
5412 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5413 kflags.vmkf_remap_prot_copy = TRUE;
5414 new_start = start;
5415 kr = vm_map_remap(map,
5416 &new_start,
5417 end - start,
5418 0, /* mask */
5419 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5420 kflags,
5421 0,
5422 map,
5423 start,
5424 TRUE, /* copy-on-write remapping! */
5425 &cur_prot,
5426 &max_prot,
5427 VM_INHERIT_DEFAULT);
5428 if (kr != KERN_SUCCESS) {
5429 return kr;
5430 }
5431 new_prot &= ~VM_PROT_COPY;
5432 }
5433
1c79356b
A
5434 vm_map_lock(map);
5435
91447636
A
5436 /* LP64todo - remove this check when vm_map_commpage64()
5437 * no longer has to stuff in a map_entry for the commpage
5438 * above the map's max_offset.
5439 */
5440 if (start >= map->max_offset) {
5441 vm_map_unlock(map);
5442 return(KERN_INVALID_ADDRESS);
5443 }
5444
b0d623f7
A
5445 while(1) {
5446 /*
5447 * Lookup the entry. If it doesn't start in a valid
5448 * entry, return an error.
5449 */
5450 if (! vm_map_lookup_entry(map, start, &entry)) {
5451 vm_map_unlock(map);
5452 return(KERN_INVALID_ADDRESS);
5453 }
5454
5455 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
5456 start = SUPERPAGE_ROUND_DOWN(start);
5457 continue;
5458 }
5459 break;
5460 }
5461 if (entry->superpage_size)
5462 end = SUPERPAGE_ROUND_UP(end);
1c79356b
A
5463
5464 /*
5465 * Make a first pass to check for protection and address
5466 * violations.
5467 */
5468
5469 current = entry;
5470 prev = current->vme_start;
5471 while ((current != vm_map_to_entry(map)) &&
5472 (current->vme_start < end)) {
5473
5474 /*
5475 * If there is a hole, return an error.
5476 */
5477 if (current->vme_start != prev) {
5478 vm_map_unlock(map);
5479 return(KERN_INVALID_ADDRESS);
5480 }
5481
5482 new_max = current->max_protection;
5c9f4661
A
5483 if ((new_prot & new_max) != new_prot) {
5484 vm_map_unlock(map);
5485 return(KERN_PROTECTION_FAILURE);
1c79356b 5486 }
5ba3f43e
A
5487
5488#if CONFIG_EMBEDDED
5489 if (new_prot & VM_PROT_WRITE) {
5490 if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
5491 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
5492 new_prot &= ~VM_PROT_EXECUTE;
5493 }
5494 }
5495#endif
593a1d5f 5496
1c79356b
A
5497 prev = current->vme_end;
5498 current = current->vme_next;
5499 }
39037602 5500
5ba3f43e
A
5501#if __arm64__
5502 if (end > prev &&
5503 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5504 vm_map_entry_t prev_entry;
5505
5506 prev_entry = current->vme_prev;
5507 if (prev_entry != vm_map_to_entry(map) &&
5508 !prev_entry->map_aligned &&
5509 (vm_map_round_page(prev_entry->vme_end,
5510 VM_MAP_PAGE_MASK(map))
5511 == end)) {
5512 /*
5513 * The last entry in our range is not "map-aligned"
5514 * but it would have reached all the way to "end"
5515 * if it had been map-aligned, so this is not really
5516 * a hole in the range and we can proceed.
5517 */
5518 prev = end;
5519 }
5520 }
5521#endif /* __arm64__ */
39037602 5522
1c79356b
A
5523 if (end > prev) {
5524 vm_map_unlock(map);
5525 return(KERN_INVALID_ADDRESS);
5526 }
5527
5528 /*
5529 * Go back and fix up protections.
5530 * Clip to start here if the range starts within
5531 * the entry.
5532 */
5533
5534 current = entry;
2d21ac55
A
5535 if (current != vm_map_to_entry(map)) {
5536 /* clip and unnest if necessary */
5537 vm_map_clip_start(map, current, start);
1c79356b 5538 }
2d21ac55 5539
1c79356b
A
5540 while ((current != vm_map_to_entry(map)) &&
5541 (current->vme_start < end)) {
5542
5543 vm_prot_t old_prot;
5544
5545 vm_map_clip_end(map, current, end);
5546
fe8ab488
A
5547 if (current->is_sub_map) {
5548 /* clipping did unnest if needed */
5549 assert(!current->use_pmap);
5550 }
2d21ac55 5551
1c79356b
A
5552 old_prot = current->protection;
5553
5c9f4661
A
5554 if (set_max) {
5555 current->max_protection = new_prot;
5556 current->protection = new_prot & old_prot;
5557 } else {
5558 current->protection = new_prot;
5559 }
1c79356b
A
5560
5561 /*
5562 * Update physical map if necessary.
5ba3f43e
A
5563 * If the request is to turn off write protection,
5564 * we won't do it for real (in pmap). This is because
5565 * it would cause copy-on-write to fail. We've already
5566 * set, the new protection in the map, so if a
5567 * write-protect fault occurred, it will be fixed up
1c79356b
A
5568 * properly, COW or not.
5569 */
1c79356b 5570 if (current->protection != old_prot) {
1c79356b
A
5571 /* Look one level in we support nested pmaps */
5572 /* from mapped submaps which are direct entries */
5573 /* in our map */
0c530ab8 5574
2d21ac55 5575 vm_prot_t prot;
0c530ab8 5576
39037602
A
5577 prot = current->protection;
5578 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
5579 prot &= ~VM_PROT_WRITE;
5580 } else {
5581 assert(!VME_OBJECT(current)->code_signed);
5582 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5583 }
2d21ac55 5584
3e170ce0 5585 if (override_nx(map, VME_ALIAS(current)) && prot)
0c530ab8 5586 prot |= VM_PROT_EXECUTE;
2d21ac55 5587
5ba3f43e
A
5588#if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5589 if (!(old_prot & VM_PROT_EXECUTE) &&
5590 (prot & VM_PROT_EXECUTE) &&
5591 (proc_selfcsflags() & CS_KILL) &&
5592 panic_on_unsigned_execute) {
5593 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
5594 }
5595#endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5596
5597 if (pmap_has_prot_policy(prot)) {
5598 if (current->wired_count) {
5599 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5600 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
5601 }
5602
5603 /* If the pmap layer cares about this
5604 * protection type, force a fault for
5605 * each page so that vm_fault will
5606 * repopulate the page with the full
5607 * set of protections.
5608 */
5609 /*
5610 * TODO: We don't seem to need this,
5611 * but this is due to an internal
5612 * implementation detail of
5613 * pmap_protect. Do we want to rely
5614 * on this?
5615 */
5616 prot = VM_PROT_NONE;
5617 }
490019cf 5618
0c530ab8 5619 if (current->is_sub_map && current->use_pmap) {
5ba3f43e 5620 pmap_protect(VME_SUBMAP(current)->pmap,
2d21ac55
A
5621 current->vme_start,
5622 current->vme_end,
5623 prot);
1c79356b 5624 } else {
5ba3f43e
A
5625 if (prot & VM_PROT_WRITE) {
5626 if (VME_OBJECT(current) == compressor_object) {
5627 /*
5628 * For write requests on the
5629 * compressor, we wil ask the
5630 * pmap layer to prevent us from
5631 * taking a write fault when we
5632 * attempt to access the mapping
5633 * next.
5634 */
5635 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
5636 }
5637 }
5638
5639 pmap_protect_options(map->pmap,
5640 current->vme_start,
5641 current->vme_end,
5642 prot,
5643 pmap_options,
5644 NULL);
1c79356b 5645 }
1c79356b
A
5646 }
5647 current = current->vme_next;
5648 }
5649
5353443c 5650 current = entry;
91447636
A
5651 while ((current != vm_map_to_entry(map)) &&
5652 (current->vme_start <= end)) {
5353443c
A
5653 vm_map_simplify_entry(map, current);
5654 current = current->vme_next;
5655 }
5656
1c79356b
A
5657 vm_map_unlock(map);
5658 return(KERN_SUCCESS);
5659}
5660
5661/*
5662 * vm_map_inherit:
5663 *
5664 * Sets the inheritance of the specified address
5665 * range in the target map. Inheritance
5666 * affects how the map will be shared with
5667 * child maps at the time of vm_map_fork.
5668 */
5669kern_return_t
5670vm_map_inherit(
39037602
A
5671 vm_map_t map,
5672 vm_map_offset_t start,
5673 vm_map_offset_t end,
5674 vm_inherit_t new_inheritance)
1c79356b 5675{
39037602 5676 vm_map_entry_t entry;
1c79356b
A
5677 vm_map_entry_t temp_entry;
5678
5679 vm_map_lock(map);
5680
5681 VM_MAP_RANGE_CHECK(map, start, end);
5682
5683 if (vm_map_lookup_entry(map, start, &temp_entry)) {
5684 entry = temp_entry;
1c79356b
A
5685 }
5686 else {
5687 temp_entry = temp_entry->vme_next;
5688 entry = temp_entry;
5689 }
5690
5691 /* first check entire range for submaps which can't support the */
5692 /* given inheritance. */
5693 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5694 if(entry->is_sub_map) {
91447636
A
5695 if(new_inheritance == VM_INHERIT_COPY) {
5696 vm_map_unlock(map);
1c79356b 5697 return(KERN_INVALID_ARGUMENT);
91447636 5698 }
1c79356b
A
5699 }
5700
5701 entry = entry->vme_next;
5702 }
5703
5704 entry = temp_entry;
2d21ac55
A
5705 if (entry != vm_map_to_entry(map)) {
5706 /* clip and unnest if necessary */
5707 vm_map_clip_start(map, entry, start);
5708 }
1c79356b
A
5709
5710 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5711 vm_map_clip_end(map, entry, end);
fe8ab488
A
5712 if (entry->is_sub_map) {
5713 /* clip did unnest if needed */
5714 assert(!entry->use_pmap);
5715 }
1c79356b
A
5716
5717 entry->inheritance = new_inheritance;
5718
5719 entry = entry->vme_next;
5720 }
5721
5722 vm_map_unlock(map);
5723 return(KERN_SUCCESS);
5724}
5725
2d21ac55
A
5726/*
5727 * Update the accounting for the amount of wired memory in this map. If the user has
5728 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
5729 */
5730
5731static kern_return_t
5732add_wire_counts(
5733 vm_map_t map,
5ba3f43e 5734 vm_map_entry_t entry,
2d21ac55 5735 boolean_t user_wire)
5ba3f43e 5736{
2d21ac55
A
5737 vm_map_size_t size;
5738
5739 if (user_wire) {
6d2010ae 5740 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
2d21ac55
A
5741
5742 /*
5743 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
5744 * this map entry.
5745 */
5746
5747 if (entry->user_wired_count == 0) {
5748 size = entry->vme_end - entry->vme_start;
5ba3f43e 5749
2d21ac55
A
5750 /*
5751 * Since this is the first time the user is wiring this map entry, check to see if we're
5752 * exceeding the user wire limits. There is a per map limit which is the smaller of either
5753 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
5754 * a system-wide limit on the amount of memory all users can wire. If the user is over either
5755 * limit, then we fail.
5756 */
5757
5758 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
6d2010ae
A
5759 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
5760 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
2d21ac55
A
5761 return KERN_RESOURCE_SHORTAGE;
5762
5763 /*
5764 * The first time the user wires an entry, we also increment the wired_count and add this to
5765 * the total that has been wired in the map.
5766 */
5767
5768 if (entry->wired_count >= MAX_WIRE_COUNT)
5769 return KERN_FAILURE;
5770
5771 entry->wired_count++;
5772 map->user_wire_size += size;
5773 }
5774
5775 if (entry->user_wired_count >= MAX_WIRE_COUNT)
5776 return KERN_FAILURE;
5777
5778 entry->user_wired_count++;
5779
5780 } else {
5781
5782 /*
5783 * The kernel's wiring the memory. Just bump the count and continue.
5784 */
5785
5786 if (entry->wired_count >= MAX_WIRE_COUNT)
5787 panic("vm_map_wire: too many wirings");
5788
5789 entry->wired_count++;
5790 }
5791
5792 return KERN_SUCCESS;
5793}
5794
5795/*
5796 * Update the memory wiring accounting now that the given map entry is being unwired.
5797 */
5798
5799static void
5800subtract_wire_counts(
5801 vm_map_t map,
5ba3f43e 5802 vm_map_entry_t entry,
2d21ac55 5803 boolean_t user_wire)
5ba3f43e 5804{
2d21ac55
A
5805
5806 if (user_wire) {
5807
5808 /*
5809 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
5810 */
5811
5812 if (entry->user_wired_count == 1) {
5813
5814 /*
5815 * We're removing the last user wire reference. Decrement the wired_count and the total
5816 * user wired memory for this map.
5817 */
5818
5819 assert(entry->wired_count >= 1);
5820 entry->wired_count--;
5821 map->user_wire_size -= entry->vme_end - entry->vme_start;
5822 }
5823
5824 assert(entry->user_wired_count >= 1);
5825 entry->user_wired_count--;
5826
5827 } else {
5828
5829 /*
5830 * The kernel is unwiring the memory. Just update the count.
5831 */
5832
5833 assert(entry->wired_count >= 1);
5834 entry->wired_count--;
5835 }
5836}
5837
5ba3f43e
A
5838#if CONFIG_EMBEDDED
5839int cs_executable_wire = 0;
5840#endif /* CONFIG_EMBEDDED */
39037602 5841
1c79356b
A
5842/*
5843 * vm_map_wire:
5844 *
5845 * Sets the pageability of the specified address range in the
5846 * target map as wired. Regions specified as not pageable require
5847 * locked-down physical memory and physical page maps. The
5848 * access_type variable indicates types of accesses that must not
5849 * generate page faults. This is checked against protection of
5850 * memory being locked-down.
5851 *
5852 * The map must not be locked, but a reference must remain to the
5853 * map throughout the call.
5854 */
91447636 5855static kern_return_t
1c79356b 5856vm_map_wire_nested(
39037602
A
5857 vm_map_t map,
5858 vm_map_offset_t start,
5859 vm_map_offset_t end,
5860 vm_prot_t caller_prot,
5ba3f43e 5861 vm_tag_t tag,
1c79356b 5862 boolean_t user_wire,
5ba3f43e 5863 pmap_t map_pmap,
fe8ab488
A
5864 vm_map_offset_t pmap_addr,
5865 ppnum_t *physpage_p)
1c79356b 5866{
39037602
A
5867 vm_map_entry_t entry;
5868 vm_prot_t access_type;
1c79356b 5869 struct vm_map_entry *first_entry, tmp_entry;
91447636 5870 vm_map_t real_map;
39037602 5871 vm_map_offset_t s,e;
1c79356b
A
5872 kern_return_t rc;
5873 boolean_t need_wakeup;
5874 boolean_t main_map = FALSE;
9bccf70c 5875 wait_interrupt_t interruptible_state;
0b4e3aa0 5876 thread_t cur_thread;
1c79356b 5877 unsigned int last_timestamp;
91447636 5878 vm_map_size_t size;
fe8ab488
A
5879 boolean_t wire_and_extract;
5880
3e170ce0
A
5881 access_type = (caller_prot & VM_PROT_ALL);
5882
fe8ab488
A
5883 wire_and_extract = FALSE;
5884 if (physpage_p != NULL) {
5885 /*
5886 * The caller wants the physical page number of the
5887 * wired page. We return only one physical page number
5888 * so this works for only one page at a time.
5889 */
5890 if ((end - start) != PAGE_SIZE) {
5891 return KERN_INVALID_ARGUMENT;
5892 }
5893 wire_and_extract = TRUE;
5894 *physpage_p = 0;
5895 }
1c79356b
A
5896
5897 vm_map_lock(map);
5898 if(map_pmap == NULL)
5899 main_map = TRUE;
5900 last_timestamp = map->timestamp;
5901
5902 VM_MAP_RANGE_CHECK(map, start, end);
5903 assert(page_aligned(start));
5904 assert(page_aligned(end));
39236c6e
A
5905 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5906 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
0b4e3aa0
A
5907 if (start == end) {
5908 /* We wired what the caller asked for, zero pages */
5909 vm_map_unlock(map);
5910 return KERN_SUCCESS;
5911 }
1c79356b 5912
2d21ac55
A
5913 need_wakeup = FALSE;
5914 cur_thread = current_thread();
5915
5916 s = start;
5917 rc = KERN_SUCCESS;
5918
5919 if (vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b 5920 entry = first_entry;
2d21ac55
A
5921 /*
5922 * vm_map_clip_start will be done later.
5923 * We don't want to unnest any nested submaps here !
5924 */
1c79356b
A
5925 } else {
5926 /* Start address is not in map */
2d21ac55
A
5927 rc = KERN_INVALID_ADDRESS;
5928 goto done;
1c79356b
A
5929 }
5930
2d21ac55
A
5931 while ((entry != vm_map_to_entry(map)) && (s < end)) {
5932 /*
5933 * At this point, we have wired from "start" to "s".
5934 * We still need to wire from "s" to "end".
5935 *
5936 * "entry" hasn't been clipped, so it could start before "s"
5937 * and/or end after "end".
5938 */
5939
5940 /* "e" is how far we want to wire in this entry */
5941 e = entry->vme_end;
5942 if (e > end)
5943 e = end;
5944
1c79356b
A
5945 /*
5946 * If another thread is wiring/unwiring this entry then
5947 * block after informing other thread to wake us up.
5948 */
5949 if (entry->in_transition) {
9bccf70c
A
5950 wait_result_t wait_result;
5951
1c79356b
A
5952 /*
5953 * We have not clipped the entry. Make sure that
5954 * the start address is in range so that the lookup
5955 * below will succeed.
2d21ac55
A
5956 * "s" is the current starting point: we've already
5957 * wired from "start" to "s" and we still have
5958 * to wire from "s" to "end".
1c79356b 5959 */
1c79356b
A
5960
5961 entry->needs_wakeup = TRUE;
5962
5963 /*
5964 * wake up anybody waiting on entries that we have
5965 * already wired.
5966 */
5967 if (need_wakeup) {
5968 vm_map_entry_wakeup(map);
5969 need_wakeup = FALSE;
5970 }
5971 /*
5972 * User wiring is interruptible
5973 */
5ba3f43e 5974 wait_result = vm_map_entry_wait(map,
2d21ac55
A
5975 (user_wire) ? THREAD_ABORTSAFE :
5976 THREAD_UNINT);
9bccf70c 5977 if (user_wire && wait_result == THREAD_INTERRUPTED) {
1c79356b
A
5978 /*
5979 * undo the wirings we have done so far
5980 * We do not clear the needs_wakeup flag,
5981 * because we cannot tell if we were the
5982 * only one waiting.
5983 */
2d21ac55
A
5984 rc = KERN_FAILURE;
5985 goto done;
1c79356b
A
5986 }
5987
1c79356b
A
5988 /*
5989 * Cannot avoid a lookup here. reset timestamp.
5990 */
5991 last_timestamp = map->timestamp;
5992
5993 /*
5994 * The entry could have been clipped, look it up again.
5995 * Worse that can happen is, it may not exist anymore.
5996 */
5997 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
5998 /*
5999 * User: undo everything upto the previous
6000 * entry. let vm_map_unwire worry about
6001 * checking the validity of the range.
6002 */
2d21ac55
A
6003 rc = KERN_FAILURE;
6004 goto done;
1c79356b
A
6005 }
6006 entry = first_entry;
6007 continue;
6008 }
5ba3f43e 6009
2d21ac55 6010 if (entry->is_sub_map) {
91447636
A
6011 vm_map_offset_t sub_start;
6012 vm_map_offset_t sub_end;
6013 vm_map_offset_t local_start;
6014 vm_map_offset_t local_end;
1c79356b 6015 pmap_t pmap;
2d21ac55 6016
fe8ab488
A
6017 if (wire_and_extract) {
6018 /*
6019 * Wiring would result in copy-on-write
6020 * which would not be compatible with
6021 * the sharing we have with the original
6022 * provider of this memory.
6023 */
6024 rc = KERN_INVALID_ARGUMENT;
6025 goto done;
6026 }
6027
2d21ac55 6028 vm_map_clip_start(map, entry, s);
1c79356b
A
6029 vm_map_clip_end(map, entry, end);
6030
3e170ce0 6031 sub_start = VME_OFFSET(entry);
2d21ac55 6032 sub_end = entry->vme_end;
3e170ce0 6033 sub_end += VME_OFFSET(entry) - entry->vme_start;
5ba3f43e 6034
1c79356b
A
6035 local_end = entry->vme_end;
6036 if(map_pmap == NULL) {
2d21ac55
A
6037 vm_object_t object;
6038 vm_object_offset_t offset;
6039 vm_prot_t prot;
6040 boolean_t wired;
6041 vm_map_entry_t local_entry;
6042 vm_map_version_t version;
6043 vm_map_t lookup_map;
6044
1c79356b 6045 if(entry->use_pmap) {
3e170ce0 6046 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c
A
6047 /* ppc implementation requires that */
6048 /* submaps pmap address ranges line */
6049 /* up with parent map */
6050#ifdef notdef
6051 pmap_addr = sub_start;
6052#endif
2d21ac55 6053 pmap_addr = s;
1c79356b
A
6054 } else {
6055 pmap = map->pmap;
2d21ac55 6056 pmap_addr = s;
1c79356b 6057 }
2d21ac55 6058
1c79356b 6059 if (entry->wired_count) {
2d21ac55
A
6060 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6061 goto done;
6062
6063 /*
6064 * The map was not unlocked:
6065 * no need to goto re-lookup.
6066 * Just go directly to next entry.
6067 */
1c79356b 6068 entry = entry->vme_next;
2d21ac55 6069 s = entry->vme_start;
1c79356b
A
6070 continue;
6071
2d21ac55 6072 }
9bccf70c 6073
2d21ac55
A
6074 /* call vm_map_lookup_locked to */
6075 /* cause any needs copy to be */
6076 /* evaluated */
6077 local_start = entry->vme_start;
6078 lookup_map = map;
6079 vm_map_lock_write_to_read(map);
6080 if(vm_map_lookup_locked(
5ba3f43e 6081 &lookup_map, local_start,
39037602 6082 access_type | VM_PROT_COPY,
2d21ac55
A
6083 OBJECT_LOCK_EXCLUSIVE,
6084 &version, &object,
6085 &offset, &prot, &wired,
6086 NULL,
6087 &real_map)) {
1c79356b 6088
2d21ac55 6089 vm_map_unlock_read(lookup_map);
4bd07ac2 6090 assert(map_pmap == NULL);
2d21ac55
A
6091 vm_map_unwire(map, start,
6092 s, user_wire);
6093 return(KERN_FAILURE);
6094 }
316670eb 6095 vm_object_unlock(object);
2d21ac55
A
6096 if(real_map != lookup_map)
6097 vm_map_unlock(real_map);
6098 vm_map_unlock_read(lookup_map);
6099 vm_map_lock(map);
1c79356b 6100
2d21ac55 6101 /* we unlocked, so must re-lookup */
5ba3f43e 6102 if (!vm_map_lookup_entry(map,
2d21ac55
A
6103 local_start,
6104 &local_entry)) {
6105 rc = KERN_FAILURE;
6106 goto done;
6107 }
6108
6109 /*
6110 * entry could have been "simplified",
6111 * so re-clip
6112 */
6113 entry = local_entry;
6114 assert(s == local_start);
6115 vm_map_clip_start(map, entry, s);
6116 vm_map_clip_end(map, entry, end);
6117 /* re-compute "e" */
6118 e = entry->vme_end;
6119 if (e > end)
6120 e = end;
6121
6122 /* did we have a change of type? */
6123 if (!entry->is_sub_map) {
6124 last_timestamp = map->timestamp;
6125 continue;
1c79356b
A
6126 }
6127 } else {
9bccf70c 6128 local_start = entry->vme_start;
2d21ac55
A
6129 pmap = map_pmap;
6130 }
6131
6132 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6133 goto done;
6134
6135 entry->in_transition = TRUE;
6136
6137 vm_map_unlock(map);
5ba3f43e 6138 rc = vm_map_wire_nested(VME_SUBMAP(entry),
1c79356b 6139 sub_start, sub_end,
5ba3f43e 6140 caller_prot, tag,
fe8ab488
A
6141 user_wire, pmap, pmap_addr,
6142 NULL);
2d21ac55 6143 vm_map_lock(map);
9bccf70c 6144
1c79356b
A
6145 /*
6146 * Find the entry again. It could have been clipped
6147 * after we unlocked the map.
6148 */
9bccf70c
A
6149 if (!vm_map_lookup_entry(map, local_start,
6150 &first_entry))
6151 panic("vm_map_wire: re-lookup failed");
6152 entry = first_entry;
1c79356b 6153
2d21ac55
A
6154 assert(local_start == s);
6155 /* re-compute "e" */
6156 e = entry->vme_end;
6157 if (e > end)
6158 e = end;
6159
1c79356b
A
6160 last_timestamp = map->timestamp;
6161 while ((entry != vm_map_to_entry(map)) &&
2d21ac55 6162 (entry->vme_start < e)) {
1c79356b
A
6163 assert(entry->in_transition);
6164 entry->in_transition = FALSE;
6165 if (entry->needs_wakeup) {
6166 entry->needs_wakeup = FALSE;
6167 need_wakeup = TRUE;
6168 }
6169 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
2d21ac55 6170 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
6171 }
6172 entry = entry->vme_next;
6173 }
6174 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 6175 goto done;
1c79356b 6176 }
2d21ac55
A
6177
6178 /* no need to relookup again */
6179 s = entry->vme_start;
1c79356b
A
6180 continue;
6181 }
6182
6183 /*
6184 * If this entry is already wired then increment
6185 * the appropriate wire reference count.
6186 */
9bccf70c 6187 if (entry->wired_count) {
fe8ab488
A
6188
6189 if ((entry->protection & access_type) != access_type) {
6190 /* found a protection problem */
6191
6192 /*
6193 * XXX FBDP
6194 * We should always return an error
6195 * in this case but since we didn't
6196 * enforce it before, let's do
6197 * it only for the new "wire_and_extract"
6198 * code path for now...
6199 */
6200 if (wire_and_extract) {
6201 rc = KERN_PROTECTION_FAILURE;
6202 goto done;
6203 }
6204 }
6205
1c79356b
A
6206 /*
6207 * entry is already wired down, get our reference
6208 * after clipping to our range.
6209 */
2d21ac55 6210 vm_map_clip_start(map, entry, s);
1c79356b 6211 vm_map_clip_end(map, entry, end);
1c79356b 6212
2d21ac55
A
6213 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6214 goto done;
6215
fe8ab488
A
6216 if (wire_and_extract) {
6217 vm_object_t object;
6218 vm_object_offset_t offset;
6219 vm_page_t m;
6220
6221 /*
6222 * We don't have to "wire" the page again
6223 * bit we still have to "extract" its
6224 * physical page number, after some sanity
6225 * checks.
6226 */
6227 assert((entry->vme_end - entry->vme_start)
6228 == PAGE_SIZE);
6229 assert(!entry->needs_copy);
6230 assert(!entry->is_sub_map);
3e170ce0 6231 assert(VME_OBJECT(entry));
fe8ab488
A
6232 if (((entry->vme_end - entry->vme_start)
6233 != PAGE_SIZE) ||
6234 entry->needs_copy ||
6235 entry->is_sub_map ||
3e170ce0 6236 VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
6237 rc = KERN_INVALID_ARGUMENT;
6238 goto done;
6239 }
6240
3e170ce0
A
6241 object = VME_OBJECT(entry);
6242 offset = VME_OFFSET(entry);
fe8ab488
A
6243 /* need exclusive lock to update m->dirty */
6244 if (entry->protection & VM_PROT_WRITE) {
6245 vm_object_lock(object);
6246 } else {
6247 vm_object_lock_shared(object);
6248 }
6249 m = vm_page_lookup(object, offset);
6250 assert(m != VM_PAGE_NULL);
39037602
A
6251 assert(VM_PAGE_WIRED(m));
6252 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6253 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
fe8ab488
A
6254 if (entry->protection & VM_PROT_WRITE) {
6255 vm_object_lock_assert_exclusive(
39037602 6256 object);
fe8ab488
A
6257 m->dirty = TRUE;
6258 }
6259 } else {
6260 /* not already wired !? */
6261 *physpage_p = 0;
6262 }
6263 vm_object_unlock(object);
6264 }
6265
2d21ac55 6266 /* map was not unlocked: no need to relookup */
1c79356b 6267 entry = entry->vme_next;
2d21ac55 6268 s = entry->vme_start;
1c79356b
A
6269 continue;
6270 }
6271
6272 /*
6273 * Unwired entry or wire request transmitted via submap
6274 */
6275
5ba3f43e
A
6276#if CONFIG_EMBEDDED
6277 /*
6278 * Wiring would copy the pages to the shadow object.
6279 * The shadow object would not be code-signed so
6280 * attempting to execute code from these copied pages
6281 * would trigger a code-signing violation.
6282 */
6283 if (entry->protection & VM_PROT_EXECUTE) {
6284#if MACH_ASSERT
6285 printf("pid %d[%s] wiring executable range from "
6286 "0x%llx to 0x%llx: rejected to preserve "
6287 "code-signing\n",
6288 proc_selfpid(),
6289 (current_task()->bsd_info
6290 ? proc_name_address(current_task()->bsd_info)
6291 : "?"),
6292 (uint64_t) entry->vme_start,
6293 (uint64_t) entry->vme_end);
6294#endif /* MACH_ASSERT */
6295 DTRACE_VM2(cs_executable_wire,
6296 uint64_t, (uint64_t)entry->vme_start,
6297 uint64_t, (uint64_t)entry->vme_end);
6298 cs_executable_wire++;
6299 rc = KERN_PROTECTION_FAILURE;
6300 goto done;
6301 }
6302#endif /* CONFIG_EMBEDDED */
1c79356b 6303
39037602 6304
1c79356b
A
6305 /*
6306 * Perform actions of vm_map_lookup that need the write
6307 * lock on the map: create a shadow object for a
6308 * copy-on-write region, or an object for a zero-fill
6309 * region.
6310 */
6311 size = entry->vme_end - entry->vme_start;
6312 /*
6313 * If wiring a copy-on-write page, we need to copy it now
6314 * even if we're only (currently) requesting read access.
6315 * This is aggressive, but once it's wired we can't move it.
6316 */
6317 if (entry->needs_copy) {
fe8ab488
A
6318 if (wire_and_extract) {
6319 /*
6320 * We're supposed to share with the original
6321 * provider so should not be "needs_copy"
6322 */
6323 rc = KERN_INVALID_ARGUMENT;
6324 goto done;
6325 }
3e170ce0
A
6326
6327 VME_OBJECT_SHADOW(entry, size);
1c79356b 6328 entry->needs_copy = FALSE;
3e170ce0 6329 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
6330 if (wire_and_extract) {
6331 /*
6332 * We're supposed to share with the original
6333 * provider so should already have an object.
6334 */
6335 rc = KERN_INVALID_ARGUMENT;
6336 goto done;
6337 }
3e170ce0
A
6338 VME_OBJECT_SET(entry, vm_object_allocate(size));
6339 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
fe8ab488 6340 assert(entry->use_pmap);
1c79356b
A
6341 }
6342
2d21ac55 6343 vm_map_clip_start(map, entry, s);
1c79356b
A
6344 vm_map_clip_end(map, entry, end);
6345
2d21ac55 6346 /* re-compute "e" */
1c79356b 6347 e = entry->vme_end;
2d21ac55
A
6348 if (e > end)
6349 e = end;
1c79356b
A
6350
6351 /*
6352 * Check for holes and protection mismatch.
6353 * Holes: Next entry should be contiguous unless this
6354 * is the end of the region.
6355 * Protection: Access requested must be allowed, unless
6356 * wiring is by protection class
6357 */
2d21ac55
A
6358 if ((entry->vme_end < end) &&
6359 ((entry->vme_next == vm_map_to_entry(map)) ||
6360 (entry->vme_next->vme_start > entry->vme_end))) {
6361 /* found a hole */
6362 rc = KERN_INVALID_ADDRESS;
6363 goto done;
6364 }
6365 if ((entry->protection & access_type) != access_type) {
6366 /* found a protection problem */
6367 rc = KERN_PROTECTION_FAILURE;
6368 goto done;
1c79356b
A
6369 }
6370
6371 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6372
2d21ac55
A
6373 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6374 goto done;
1c79356b
A
6375
6376 entry->in_transition = TRUE;
6377
6378 /*
6379 * This entry might get split once we unlock the map.
6380 * In vm_fault_wire(), we need the current range as
6381 * defined by this entry. In order for this to work
6382 * along with a simultaneous clip operation, we make a
6383 * temporary copy of this entry and use that for the
6384 * wiring. Note that the underlying objects do not
6385 * change during a clip.
6386 */
6387 tmp_entry = *entry;
6388
6389 /*
6390 * The in_transition state guarentees that the entry
6391 * (or entries for this range, if split occured) will be
6392 * there when the map lock is acquired for the second time.
6393 */
6394 vm_map_unlock(map);
0b4e3aa0 6395
9bccf70c
A
6396 if (!user_wire && cur_thread != THREAD_NULL)
6397 interruptible_state = thread_interrupt_level(THREAD_UNINT);
91447636
A
6398 else
6399 interruptible_state = THREAD_UNINT;
9bccf70c 6400
1c79356b 6401 if(map_pmap)
5ba3f43e
A
6402 rc = vm_fault_wire(map,
6403 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
fe8ab488 6404 physpage_p);
1c79356b 6405 else
5ba3f43e
A
6406 rc = vm_fault_wire(map,
6407 &tmp_entry, caller_prot, tag, map->pmap,
fe8ab488
A
6408 tmp_entry.vme_start,
6409 physpage_p);
0b4e3aa0
A
6410
6411 if (!user_wire && cur_thread != THREAD_NULL)
9bccf70c 6412 thread_interrupt_level(interruptible_state);
0b4e3aa0 6413
1c79356b
A
6414 vm_map_lock(map);
6415
6416 if (last_timestamp+1 != map->timestamp) {
6417 /*
6418 * Find the entry again. It could have been clipped
6419 * after we unlocked the map.
6420 */
6421 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
2d21ac55 6422 &first_entry))
1c79356b
A
6423 panic("vm_map_wire: re-lookup failed");
6424
6425 entry = first_entry;
6426 }
6427
6428 last_timestamp = map->timestamp;
6429
6430 while ((entry != vm_map_to_entry(map)) &&
6431 (entry->vme_start < tmp_entry.vme_end)) {
6432 assert(entry->in_transition);
6433 entry->in_transition = FALSE;
6434 if (entry->needs_wakeup) {
6435 entry->needs_wakeup = FALSE;
6436 need_wakeup = TRUE;
6437 }
6438 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 6439 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
6440 }
6441 entry = entry->vme_next;
6442 }
6443
6444 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 6445 goto done;
1c79356b 6446 }
2d21ac55 6447
d190cdc3
A
6448 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6449 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
6450 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6451 /* found a "new" hole */
6452 s = tmp_entry.vme_end;
6453 rc = KERN_INVALID_ADDRESS;
6454 goto done;
6455 }
6456
2d21ac55 6457 s = entry->vme_start;
d190cdc3 6458
1c79356b 6459 } /* end while loop through map entries */
2d21ac55
A
6460
6461done:
6462 if (rc == KERN_SUCCESS) {
6463 /* repair any damage we may have made to the VM map */
6464 vm_map_simplify_range(map, start, end);
6465 }
6466
1c79356b
A
6467 vm_map_unlock(map);
6468
6469 /*
6470 * wake up anybody waiting on entries we wired.
6471 */
6472 if (need_wakeup)
6473 vm_map_entry_wakeup(map);
6474
2d21ac55
A
6475 if (rc != KERN_SUCCESS) {
6476 /* undo what has been wired so far */
4bd07ac2
A
6477 vm_map_unwire_nested(map, start, s, user_wire,
6478 map_pmap, pmap_addr);
fe8ab488
A
6479 if (physpage_p) {
6480 *physpage_p = 0;
6481 }
2d21ac55
A
6482 }
6483
6484 return rc;
1c79356b
A
6485
6486}
6487
6488kern_return_t
3e170ce0 6489vm_map_wire_external(
39037602
A
6490 vm_map_t map,
6491 vm_map_offset_t start,
6492 vm_map_offset_t end,
6493 vm_prot_t caller_prot,
1c79356b
A
6494 boolean_t user_wire)
6495{
3e170ce0
A
6496 kern_return_t kret;
6497
5ba3f43e 6498 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
3e170ce0
A
6499 user_wire, (pmap_t)NULL, 0, NULL);
6500 return kret;
6501}
1c79356b 6502
3e170ce0 6503kern_return_t
5ba3f43e 6504vm_map_wire_kernel(
39037602
A
6505 vm_map_t map,
6506 vm_map_offset_t start,
6507 vm_map_offset_t end,
6508 vm_prot_t caller_prot,
5ba3f43e 6509 vm_tag_t tag,
3e170ce0
A
6510 boolean_t user_wire)
6511{
1c79356b
A
6512 kern_return_t kret;
6513
5ba3f43e 6514 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
fe8ab488
A
6515 user_wire, (pmap_t)NULL, 0, NULL);
6516 return kret;
6517}
6518
6519kern_return_t
3e170ce0 6520vm_map_wire_and_extract_external(
fe8ab488
A
6521 vm_map_t map,
6522 vm_map_offset_t start,
3e170ce0 6523 vm_prot_t caller_prot,
fe8ab488
A
6524 boolean_t user_wire,
6525 ppnum_t *physpage_p)
6526{
3e170ce0
A
6527 kern_return_t kret;
6528
3e170ce0
A
6529 kret = vm_map_wire_nested(map,
6530 start,
6531 start+VM_MAP_PAGE_SIZE(map),
5ba3f43e
A
6532 caller_prot,
6533 vm_tag_bt(),
3e170ce0
A
6534 user_wire,
6535 (pmap_t)NULL,
6536 0,
6537 physpage_p);
6538 if (kret != KERN_SUCCESS &&
6539 physpage_p != NULL) {
6540 *physpage_p = 0;
6541 }
6542 return kret;
6543}
fe8ab488 6544
3e170ce0 6545kern_return_t
5ba3f43e 6546vm_map_wire_and_extract_kernel(
3e170ce0
A
6547 vm_map_t map,
6548 vm_map_offset_t start,
6549 vm_prot_t caller_prot,
5ba3f43e 6550 vm_tag_t tag,
3e170ce0
A
6551 boolean_t user_wire,
6552 ppnum_t *physpage_p)
6553{
fe8ab488
A
6554 kern_return_t kret;
6555
6556 kret = vm_map_wire_nested(map,
6557 start,
6558 start+VM_MAP_PAGE_SIZE(map),
5ba3f43e
A
6559 caller_prot,
6560 tag,
fe8ab488
A
6561 user_wire,
6562 (pmap_t)NULL,
6563 0,
6564 physpage_p);
6565 if (kret != KERN_SUCCESS &&
6566 physpage_p != NULL) {
6567 *physpage_p = 0;
6568 }
1c79356b
A
6569 return kret;
6570}
6571
6572/*
6573 * vm_map_unwire:
6574 *
6575 * Sets the pageability of the specified address range in the target
6576 * as pageable. Regions specified must have been wired previously.
6577 *
6578 * The map must not be locked, but a reference must remain to the map
6579 * throughout the call.
6580 *
6581 * Kernel will panic on failures. User unwire ignores holes and
6582 * unwired and intransition entries to avoid losing memory by leaving
6583 * it unwired.
6584 */
91447636 6585static kern_return_t
1c79356b 6586vm_map_unwire_nested(
39037602
A
6587 vm_map_t map,
6588 vm_map_offset_t start,
6589 vm_map_offset_t end,
1c79356b 6590 boolean_t user_wire,
9bccf70c 6591 pmap_t map_pmap,
91447636 6592 vm_map_offset_t pmap_addr)
1c79356b 6593{
39037602 6594 vm_map_entry_t entry;
1c79356b
A
6595 struct vm_map_entry *first_entry, tmp_entry;
6596 boolean_t need_wakeup;
6597 boolean_t main_map = FALSE;
6598 unsigned int last_timestamp;
6599
6600 vm_map_lock(map);
6601 if(map_pmap == NULL)
6602 main_map = TRUE;
6603 last_timestamp = map->timestamp;
6604
6605 VM_MAP_RANGE_CHECK(map, start, end);
6606 assert(page_aligned(start));
6607 assert(page_aligned(end));
39236c6e
A
6608 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6609 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1c79356b 6610
2d21ac55
A
6611 if (start == end) {
6612 /* We unwired what the caller asked for: zero pages */
6613 vm_map_unlock(map);
6614 return KERN_SUCCESS;
6615 }
6616
1c79356b
A
6617 if (vm_map_lookup_entry(map, start, &first_entry)) {
6618 entry = first_entry;
2d21ac55
A
6619 /*
6620 * vm_map_clip_start will be done later.
6621 * We don't want to unnest any nested sub maps here !
6622 */
1c79356b
A
6623 }
6624 else {
2d21ac55
A
6625 if (!user_wire) {
6626 panic("vm_map_unwire: start not found");
6627 }
1c79356b
A
6628 /* Start address is not in map. */
6629 vm_map_unlock(map);
6630 return(KERN_INVALID_ADDRESS);
6631 }
6632
b0d623f7
A
6633 if (entry->superpage_size) {
6634 /* superpages are always wired */
6635 vm_map_unlock(map);
6636 return KERN_INVALID_ADDRESS;
6637 }
6638
1c79356b
A
6639 need_wakeup = FALSE;
6640 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6641 if (entry->in_transition) {
6642 /*
6643 * 1)
6644 * Another thread is wiring down this entry. Note
6645 * that if it is not for the other thread we would
6646 * be unwiring an unwired entry. This is not
6647 * permitted. If we wait, we will be unwiring memory
6648 * we did not wire.
6649 *
6650 * 2)
6651 * Another thread is unwiring this entry. We did not
6652 * have a reference to it, because if we did, this
6653 * entry will not be getting unwired now.
6654 */
2d21ac55
A
6655 if (!user_wire) {
6656 /*
6657 * XXX FBDP
6658 * This could happen: there could be some
6659 * overlapping vslock/vsunlock operations
6660 * going on.
6661 * We should probably just wait and retry,
6662 * but then we have to be careful that this
5ba3f43e 6663 * entry could get "simplified" after
2d21ac55
A
6664 * "in_transition" gets unset and before
6665 * we re-lookup the entry, so we would
6666 * have to re-clip the entry to avoid
6667 * re-unwiring what we have already unwired...
6668 * See vm_map_wire_nested().
6669 *
6670 * Or we could just ignore "in_transition"
6671 * here and proceed to decement the wired
6672 * count(s) on this entry. That should be fine
6673 * as long as "wired_count" doesn't drop all
6674 * the way to 0 (and we should panic if THAT
6675 * happens).
6676 */
1c79356b 6677 panic("vm_map_unwire: in_transition entry");
2d21ac55 6678 }
1c79356b
A
6679
6680 entry = entry->vme_next;
6681 continue;
6682 }
6683
2d21ac55 6684 if (entry->is_sub_map) {
91447636
A
6685 vm_map_offset_t sub_start;
6686 vm_map_offset_t sub_end;
6687 vm_map_offset_t local_end;
1c79356b 6688 pmap_t pmap;
5ba3f43e 6689
1c79356b
A
6690 vm_map_clip_start(map, entry, start);
6691 vm_map_clip_end(map, entry, end);
6692
3e170ce0 6693 sub_start = VME_OFFSET(entry);
1c79356b 6694 sub_end = entry->vme_end - entry->vme_start;
3e170ce0 6695 sub_end += VME_OFFSET(entry);
1c79356b
A
6696 local_end = entry->vme_end;
6697 if(map_pmap == NULL) {
2d21ac55 6698 if(entry->use_pmap) {
3e170ce0 6699 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c 6700 pmap_addr = sub_start;
2d21ac55 6701 } else {
1c79356b 6702 pmap = map->pmap;
9bccf70c 6703 pmap_addr = start;
2d21ac55
A
6704 }
6705 if (entry->wired_count == 0 ||
6706 (user_wire && entry->user_wired_count == 0)) {
6707 if (!user_wire)
6708 panic("vm_map_unwire: entry is unwired");
6709 entry = entry->vme_next;
6710 continue;
6711 }
6712
6713 /*
6714 * Check for holes
6715 * Holes: Next entry should be contiguous unless
6716 * this is the end of the region.
6717 */
5ba3f43e 6718 if (((entry->vme_end < end) &&
2d21ac55 6719 ((entry->vme_next == vm_map_to_entry(map)) ||
5ba3f43e 6720 (entry->vme_next->vme_start
2d21ac55
A
6721 > entry->vme_end)))) {
6722 if (!user_wire)
6723 panic("vm_map_unwire: non-contiguous region");
1c79356b 6724/*
2d21ac55
A
6725 entry = entry->vme_next;
6726 continue;
1c79356b 6727*/
2d21ac55 6728 }
1c79356b 6729
2d21ac55 6730 subtract_wire_counts(map, entry, user_wire);
1c79356b 6731
2d21ac55
A
6732 if (entry->wired_count != 0) {
6733 entry = entry->vme_next;
6734 continue;
6735 }
1c79356b 6736
2d21ac55
A
6737 entry->in_transition = TRUE;
6738 tmp_entry = *entry;/* see comment in vm_map_wire() */
6739
6740 /*
6741 * We can unlock the map now. The in_transition state
6742 * guarantees existance of the entry.
6743 */
6744 vm_map_unlock(map);
5ba3f43e 6745 vm_map_unwire_nested(VME_SUBMAP(entry),
2d21ac55
A
6746 sub_start, sub_end, user_wire, pmap, pmap_addr);
6747 vm_map_lock(map);
1c79356b 6748
2d21ac55
A
6749 if (last_timestamp+1 != map->timestamp) {
6750 /*
5ba3f43e 6751 * Find the entry again. It could have been
2d21ac55
A
6752 * clipped or deleted after we unlocked the map.
6753 */
5ba3f43e 6754 if (!vm_map_lookup_entry(map,
2d21ac55
A
6755 tmp_entry.vme_start,
6756 &first_entry)) {
6757 if (!user_wire)
6758 panic("vm_map_unwire: re-lookup failed");
6759 entry = first_entry->vme_next;
6760 } else
6761 entry = first_entry;
6762 }
6763 last_timestamp = map->timestamp;
1c79356b 6764
1c79356b 6765 /*
2d21ac55 6766 * clear transition bit for all constituent entries
5ba3f43e 6767 * that were in the original entry (saved in
2d21ac55
A
6768 * tmp_entry). Also check for waiters.
6769 */
6770 while ((entry != vm_map_to_entry(map)) &&
6771 (entry->vme_start < tmp_entry.vme_end)) {
6772 assert(entry->in_transition);
6773 entry->in_transition = FALSE;
6774 if (entry->needs_wakeup) {
6775 entry->needs_wakeup = FALSE;
6776 need_wakeup = TRUE;
6777 }
6778 entry = entry->vme_next;
1c79356b 6779 }
2d21ac55 6780 continue;
1c79356b 6781 } else {
2d21ac55 6782 vm_map_unlock(map);
3e170ce0 6783 vm_map_unwire_nested(VME_SUBMAP(entry),
2d21ac55
A
6784 sub_start, sub_end, user_wire, map_pmap,
6785 pmap_addr);
6786 vm_map_lock(map);
1c79356b 6787
2d21ac55
A
6788 if (last_timestamp+1 != map->timestamp) {
6789 /*
5ba3f43e 6790 * Find the entry again. It could have been
2d21ac55
A
6791 * clipped or deleted after we unlocked the map.
6792 */
5ba3f43e 6793 if (!vm_map_lookup_entry(map,
2d21ac55
A
6794 tmp_entry.vme_start,
6795 &first_entry)) {
6796 if (!user_wire)
6797 panic("vm_map_unwire: re-lookup failed");
6798 entry = first_entry->vme_next;
6799 } else
6800 entry = first_entry;
6801 }
6802 last_timestamp = map->timestamp;
1c79356b
A
6803 }
6804 }
6805
6806
9bccf70c 6807 if ((entry->wired_count == 0) ||
2d21ac55 6808 (user_wire && entry->user_wired_count == 0)) {
1c79356b
A
6809 if (!user_wire)
6810 panic("vm_map_unwire: entry is unwired");
6811
6812 entry = entry->vme_next;
6813 continue;
6814 }
5ba3f43e 6815
1c79356b 6816 assert(entry->wired_count > 0 &&
2d21ac55 6817 (!user_wire || entry->user_wired_count > 0));
1c79356b
A
6818
6819 vm_map_clip_start(map, entry, start);
6820 vm_map_clip_end(map, entry, end);
6821
6822 /*
6823 * Check for holes
6824 * Holes: Next entry should be contiguous unless
6825 * this is the end of the region.
6826 */
5ba3f43e 6827 if (((entry->vme_end < end) &&
2d21ac55
A
6828 ((entry->vme_next == vm_map_to_entry(map)) ||
6829 (entry->vme_next->vme_start > entry->vme_end)))) {
1c79356b
A
6830
6831 if (!user_wire)
6832 panic("vm_map_unwire: non-contiguous region");
6833 entry = entry->vme_next;
6834 continue;
6835 }
6836
2d21ac55 6837 subtract_wire_counts(map, entry, user_wire);
1c79356b 6838
9bccf70c 6839 if (entry->wired_count != 0) {
1c79356b
A
6840 entry = entry->vme_next;
6841 continue;
1c79356b
A
6842 }
6843
b0d623f7
A
6844 if(entry->zero_wired_pages) {
6845 entry->zero_wired_pages = FALSE;
6846 }
6847
1c79356b
A
6848 entry->in_transition = TRUE;
6849 tmp_entry = *entry; /* see comment in vm_map_wire() */
6850
6851 /*
6852 * We can unlock the map now. The in_transition state
6853 * guarantees existance of the entry.
6854 */
6855 vm_map_unlock(map);
6856 if(map_pmap) {
5ba3f43e 6857 vm_fault_unwire(map,
2d21ac55 6858 &tmp_entry, FALSE, map_pmap, pmap_addr);
1c79356b 6859 } else {
5ba3f43e
A
6860 vm_fault_unwire(map,
6861 &tmp_entry, FALSE, map->pmap,
2d21ac55 6862 tmp_entry.vme_start);
1c79356b
A
6863 }
6864 vm_map_lock(map);
6865
6866 if (last_timestamp+1 != map->timestamp) {
6867 /*
6868 * Find the entry again. It could have been clipped
6869 * or deleted after we unlocked the map.
6870 */
6871 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
2d21ac55 6872 &first_entry)) {
1c79356b 6873 if (!user_wire)
2d21ac55 6874 panic("vm_map_unwire: re-lookup failed");
1c79356b
A
6875 entry = first_entry->vme_next;
6876 } else
6877 entry = first_entry;
6878 }
6879 last_timestamp = map->timestamp;
6880
6881 /*
6882 * clear transition bit for all constituent entries that
6883 * were in the original entry (saved in tmp_entry). Also
6884 * check for waiters.
6885 */
6886 while ((entry != vm_map_to_entry(map)) &&
6887 (entry->vme_start < tmp_entry.vme_end)) {
6888 assert(entry->in_transition);
6889 entry->in_transition = FALSE;
6890 if (entry->needs_wakeup) {
6891 entry->needs_wakeup = FALSE;
6892 need_wakeup = TRUE;
6893 }
6894 entry = entry->vme_next;
6895 }
6896 }
91447636
A
6897
6898 /*
6899 * We might have fragmented the address space when we wired this
6900 * range of addresses. Attempt to re-coalesce these VM map entries
6901 * with their neighbors now that they're no longer wired.
6902 * Under some circumstances, address space fragmentation can
6903 * prevent VM object shadow chain collapsing, which can cause
6904 * swap space leaks.
6905 */
6906 vm_map_simplify_range(map, start, end);
6907
1c79356b
A
6908 vm_map_unlock(map);
6909 /*
6910 * wake up anybody waiting on entries that we have unwired.
6911 */
6912 if (need_wakeup)
6913 vm_map_entry_wakeup(map);
6914 return(KERN_SUCCESS);
6915
6916}
6917
6918kern_return_t
6919vm_map_unwire(
39037602
A
6920 vm_map_t map,
6921 vm_map_offset_t start,
6922 vm_map_offset_t end,
1c79356b
A
6923 boolean_t user_wire)
6924{
5ba3f43e 6925 return vm_map_unwire_nested(map, start, end,
2d21ac55 6926 user_wire, (pmap_t)NULL, 0);
1c79356b
A
6927}
6928
6929
6930/*
6931 * vm_map_entry_delete: [ internal use only ]
6932 *
6933 * Deallocate the given entry from the target map.
5ba3f43e 6934 */
91447636 6935static void
1c79356b 6936vm_map_entry_delete(
39037602
A
6937 vm_map_t map,
6938 vm_map_entry_t entry)
1c79356b 6939{
39037602
A
6940 vm_map_offset_t s, e;
6941 vm_object_t object;
6942 vm_map_t submap;
1c79356b
A
6943
6944 s = entry->vme_start;
6945 e = entry->vme_end;
6946 assert(page_aligned(s));
6947 assert(page_aligned(e));
39236c6e
A
6948 if (entry->map_aligned == TRUE) {
6949 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
6950 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
6951 }
1c79356b
A
6952 assert(entry->wired_count == 0);
6953 assert(entry->user_wired_count == 0);
b0d623f7 6954 assert(!entry->permanent);
1c79356b
A
6955
6956 if (entry->is_sub_map) {
6957 object = NULL;
3e170ce0 6958 submap = VME_SUBMAP(entry);
1c79356b
A
6959 } else {
6960 submap = NULL;
3e170ce0 6961 object = VME_OBJECT(entry);
1c79356b
A
6962 }
6963
6d2010ae 6964 vm_map_store_entry_unlink(map, entry);
1c79356b
A
6965 map->size -= e - s;
6966
6967 vm_map_entry_dispose(map, entry);
6968
6969 vm_map_unlock(map);
6970 /*
6971 * Deallocate the object only after removing all
6972 * pmap entries pointing to its pages.
6973 */
6974 if (submap)
6975 vm_map_deallocate(submap);
6976 else
2d21ac55 6977 vm_object_deallocate(object);
1c79356b
A
6978
6979}
6980
6981void
6982vm_map_submap_pmap_clean(
6983 vm_map_t map,
91447636
A
6984 vm_map_offset_t start,
6985 vm_map_offset_t end,
1c79356b 6986 vm_map_t sub_map,
91447636 6987 vm_map_offset_t offset)
1c79356b 6988{
91447636
A
6989 vm_map_offset_t submap_start;
6990 vm_map_offset_t submap_end;
6991 vm_map_size_t remove_size;
1c79356b
A
6992 vm_map_entry_t entry;
6993
6994 submap_end = offset + (end - start);
6995 submap_start = offset;
b7266188
A
6996
6997 vm_map_lock_read(sub_map);
1c79356b 6998 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
5ba3f43e 6999
1c79356b
A
7000 remove_size = (entry->vme_end - entry->vme_start);
7001 if(offset > entry->vme_start)
7002 remove_size -= offset - entry->vme_start;
5ba3f43e 7003
1c79356b
A
7004
7005 if(submap_end < entry->vme_end) {
7006 remove_size -=
7007 entry->vme_end - submap_end;
7008 }
7009 if(entry->is_sub_map) {
7010 vm_map_submap_pmap_clean(
7011 sub_map,
7012 start,
7013 start + remove_size,
3e170ce0
A
7014 VME_SUBMAP(entry),
7015 VME_OFFSET(entry));
1c79356b 7016 } else {
9bccf70c 7017
316670eb 7018 if((map->mapped_in_other_pmaps) && (map->ref_count)
3e170ce0
A
7019 && (VME_OBJECT(entry) != NULL)) {
7020 vm_object_pmap_protect_options(
7021 VME_OBJECT(entry),
7022 (VME_OFFSET(entry) +
7023 offset -
7024 entry->vme_start),
9bccf70c
A
7025 remove_size,
7026 PMAP_NULL,
7027 entry->vme_start,
3e170ce0
A
7028 VM_PROT_NONE,
7029 PMAP_OPTIONS_REMOVE);
9bccf70c 7030 } else {
5ba3f43e
A
7031 pmap_remove(map->pmap,
7032 (addr64_t)start,
2d21ac55 7033 (addr64_t)(start + remove_size));
9bccf70c 7034 }
1c79356b
A
7035 }
7036 }
7037
7038 entry = entry->vme_next;
2d21ac55 7039
5ba3f43e 7040 while((entry != vm_map_to_entry(sub_map))
2d21ac55 7041 && (entry->vme_start < submap_end)) {
5ba3f43e 7042 remove_size = (entry->vme_end - entry->vme_start);
1c79356b
A
7043 if(submap_end < entry->vme_end) {
7044 remove_size -= entry->vme_end - submap_end;
7045 }
7046 if(entry->is_sub_map) {
7047 vm_map_submap_pmap_clean(
7048 sub_map,
7049 (start + entry->vme_start) - offset,
7050 ((start + entry->vme_start) - offset) + remove_size,
3e170ce0
A
7051 VME_SUBMAP(entry),
7052 VME_OFFSET(entry));
1c79356b 7053 } else {
316670eb 7054 if((map->mapped_in_other_pmaps) && (map->ref_count)
3e170ce0
A
7055 && (VME_OBJECT(entry) != NULL)) {
7056 vm_object_pmap_protect_options(
7057 VME_OBJECT(entry),
7058 VME_OFFSET(entry),
9bccf70c
A
7059 remove_size,
7060 PMAP_NULL,
7061 entry->vme_start,
3e170ce0
A
7062 VM_PROT_NONE,
7063 PMAP_OPTIONS_REMOVE);
9bccf70c 7064 } else {
5ba3f43e
A
7065 pmap_remove(map->pmap,
7066 (addr64_t)((start + entry->vme_start)
2d21ac55 7067 - offset),
5ba3f43e 7068 (addr64_t)(((start + entry->vme_start)
2d21ac55 7069 - offset) + remove_size));
9bccf70c 7070 }
1c79356b
A
7071 }
7072 entry = entry->vme_next;
b7266188
A
7073 }
7074 vm_map_unlock_read(sub_map);
1c79356b
A
7075 return;
7076}
7077
7078/*
7079 * vm_map_delete: [ internal use only ]
7080 *
7081 * Deallocates the given address range from the target map.
7082 * Removes all user wirings. Unwires one kernel wiring if
7083 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7084 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7085 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7086 *
7087 * This routine is called with map locked and leaves map locked.
7088 */
91447636 7089static kern_return_t
1c79356b 7090vm_map_delete(
91447636
A
7091 vm_map_t map,
7092 vm_map_offset_t start,
7093 vm_map_offset_t end,
7094 int flags,
7095 vm_map_t zap_map)
1c79356b
A
7096{
7097 vm_map_entry_t entry, next;
7098 struct vm_map_entry *first_entry, tmp_entry;
39037602
A
7099 vm_map_offset_t s;
7100 vm_object_t object;
1c79356b
A
7101 boolean_t need_wakeup;
7102 unsigned int last_timestamp = ~0; /* unlikely value */
7103 int interruptible;
1c79356b 7104
5ba3f43e 7105 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
2d21ac55 7106 THREAD_ABORTSAFE : THREAD_UNINT;
1c79356b
A
7107
7108 /*
7109 * All our DMA I/O operations in IOKit are currently done by
7110 * wiring through the map entries of the task requesting the I/O.
7111 * Because of this, we must always wait for kernel wirings
7112 * to go away on the entries before deleting them.
7113 *
7114 * Any caller who wants to actually remove a kernel wiring
7115 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7116 * properly remove one wiring instead of blasting through
7117 * them all.
7118 */
7119 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7120
b0d623f7
A
7121 while(1) {
7122 /*
7123 * Find the start of the region, and clip it
7124 */
7125 if (vm_map_lookup_entry(map, start, &first_entry)) {
7126 entry = first_entry;
fe8ab488
A
7127 if (map == kalloc_map &&
7128 (entry->vme_start != start ||
7129 entry->vme_end != end)) {
7130 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7131 "mismatched entry %p [0x%llx:0x%llx]\n",
7132 map,
7133 (uint64_t)start,
7134 (uint64_t)end,
7135 entry,
7136 (uint64_t)entry->vme_start,
7137 (uint64_t)entry->vme_end);
7138 }
b0d623f7
A
7139 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
7140 start = SUPERPAGE_ROUND_DOWN(start);
7141 continue;
7142 }
7143 if (start == entry->vme_start) {
7144 /*
7145 * No need to clip. We don't want to cause
7146 * any unnecessary unnesting in this case...
7147 */
7148 } else {
fe8ab488
A
7149 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7150 entry->map_aligned &&
7151 !VM_MAP_PAGE_ALIGNED(
7152 start,
7153 VM_MAP_PAGE_MASK(map))) {
7154 /*
7155 * The entry will no longer be
7156 * map-aligned after clipping
7157 * and the caller said it's OK.
7158 */
7159 entry->map_aligned = FALSE;
7160 }
7161 if (map == kalloc_map) {
7162 panic("vm_map_delete(%p,0x%llx,0x%llx):"
7163 " clipping %p at 0x%llx\n",
7164 map,
7165 (uint64_t)start,
7166 (uint64_t)end,
7167 entry,
7168 (uint64_t)start);
7169 }
b0d623f7
A
7170 vm_map_clip_start(map, entry, start);
7171 }
7172
2d21ac55 7173 /*
b0d623f7
A
7174 * Fix the lookup hint now, rather than each
7175 * time through the loop.
2d21ac55 7176 */
b0d623f7 7177 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
2d21ac55 7178 } else {
fe8ab488
A
7179 if (map->pmap == kernel_pmap &&
7180 map->ref_count != 0) {
7181 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7182 "no map entry at 0x%llx\n",
7183 map,
7184 (uint64_t)start,
7185 (uint64_t)end,
7186 (uint64_t)start);
7187 }
b0d623f7 7188 entry = first_entry->vme_next;
2d21ac55 7189 }
b0d623f7 7190 break;
1c79356b 7191 }
b0d623f7
A
7192 if (entry->superpage_size)
7193 end = SUPERPAGE_ROUND_UP(end);
1c79356b
A
7194
7195 need_wakeup = FALSE;
7196 /*
7197 * Step through all entries in this region
7198 */
2d21ac55
A
7199 s = entry->vme_start;
7200 while ((entry != vm_map_to_entry(map)) && (s < end)) {
7201 /*
7202 * At this point, we have deleted all the memory entries
7203 * between "start" and "s". We still need to delete
7204 * all memory entries between "s" and "end".
7205 * While we were blocked and the map was unlocked, some
7206 * new memory entries could have been re-allocated between
7207 * "start" and "s" and we don't want to mess with those.
7208 * Some of those entries could even have been re-assembled
7209 * with an entry after "s" (in vm_map_simplify_entry()), so
7210 * we may have to vm_map_clip_start() again.
7211 */
1c79356b 7212
2d21ac55
A
7213 if (entry->vme_start >= s) {
7214 /*
7215 * This entry starts on or after "s"
7216 * so no need to clip its start.
7217 */
7218 } else {
7219 /*
7220 * This entry has been re-assembled by a
7221 * vm_map_simplify_entry(). We need to
7222 * re-clip its start.
7223 */
fe8ab488
A
7224 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7225 entry->map_aligned &&
7226 !VM_MAP_PAGE_ALIGNED(s,
7227 VM_MAP_PAGE_MASK(map))) {
7228 /*
7229 * The entry will no longer be map-aligned
7230 * after clipping and the caller said it's OK.
7231 */
7232 entry->map_aligned = FALSE;
7233 }
7234 if (map == kalloc_map) {
7235 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7236 "clipping %p at 0x%llx\n",
7237 map,
7238 (uint64_t)start,
7239 (uint64_t)end,
7240 entry,
7241 (uint64_t)s);
7242 }
2d21ac55
A
7243 vm_map_clip_start(map, entry, s);
7244 }
7245 if (entry->vme_end <= end) {
7246 /*
7247 * This entry is going away completely, so no need
7248 * to clip and possibly cause an unnecessary unnesting.
7249 */
7250 } else {
fe8ab488
A
7251 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7252 entry->map_aligned &&
7253 !VM_MAP_PAGE_ALIGNED(end,
7254 VM_MAP_PAGE_MASK(map))) {
7255 /*
7256 * The entry will no longer be map-aligned
7257 * after clipping and the caller said it's OK.
7258 */
7259 entry->map_aligned = FALSE;
7260 }
7261 if (map == kalloc_map) {
7262 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7263 "clipping %p at 0x%llx\n",
7264 map,
7265 (uint64_t)start,
7266 (uint64_t)end,
7267 entry,
7268 (uint64_t)end);
7269 }
2d21ac55
A
7270 vm_map_clip_end(map, entry, end);
7271 }
b0d623f7
A
7272
7273 if (entry->permanent) {
5ba3f43e
A
7274 if (map->pmap == kernel_pmap) {
7275 panic("%s(%p,0x%llx,0x%llx): "
7276 "attempt to remove permanent "
7277 "VM map entry "
7278 "%p [0x%llx:0x%llx]\n",
7279 __FUNCTION__,
7280 map,
7281 (uint64_t) start,
7282 (uint64_t) end,
7283 entry,
7284 (uint64_t) entry->vme_start,
7285 (uint64_t) entry->vme_end);
7286 } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7287// printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7288 entry->permanent = FALSE;
7289 } else {
7290 if (!vm_map_executable_immutable_no_log) {
7291 printf("%d[%s] %s(0x%llx,0x%llx): "
7292 "permanent entry [0x%llx:0x%llx] "
7293 "prot 0x%x/0x%x\n",
7294 proc_selfpid(),
7295 (current_task()->bsd_info
7296 ? proc_name_address(current_task()->bsd_info)
7297 : "?"),
7298 __FUNCTION__,
7299 (uint64_t) start,
7300 (uint64_t) end,
7301 (uint64_t)entry->vme_start,
7302 (uint64_t)entry->vme_end,
7303 entry->protection,
7304 entry->max_protection);
7305 }
7306 /*
7307 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7308 */
7309 DTRACE_VM5(vm_map_delete_permanent,
7310 vm_map_offset_t, entry->vme_start,
7311 vm_map_offset_t, entry->vme_end,
7312 vm_prot_t, entry->protection,
7313 vm_prot_t, entry->max_protection,
7314 int, VME_ALIAS(entry));
7315 }
b0d623f7
A
7316 }
7317
7318
1c79356b 7319 if (entry->in_transition) {
9bccf70c
A
7320 wait_result_t wait_result;
7321
1c79356b
A
7322 /*
7323 * Another thread is wiring/unwiring this entry.
7324 * Let the other thread know we are waiting.
7325 */
2d21ac55 7326 assert(s == entry->vme_start);
1c79356b
A
7327 entry->needs_wakeup = TRUE;
7328
7329 /*
7330 * wake up anybody waiting on entries that we have
7331 * already unwired/deleted.
7332 */
7333 if (need_wakeup) {
7334 vm_map_entry_wakeup(map);
7335 need_wakeup = FALSE;
7336 }
7337
9bccf70c 7338 wait_result = vm_map_entry_wait(map, interruptible);
1c79356b
A
7339
7340 if (interruptible &&
9bccf70c 7341 wait_result == THREAD_INTERRUPTED) {
1c79356b
A
7342 /*
7343 * We do not clear the needs_wakeup flag,
7344 * since we cannot tell if we were the only one.
7345 */
7346 return KERN_ABORTED;
9bccf70c 7347 }
1c79356b
A
7348
7349 /*
7350 * The entry could have been clipped or it
7351 * may not exist anymore. Look it up again.
7352 */
7353 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
7354 /*
7355 * User: use the next entry
7356 */
7357 entry = first_entry->vme_next;
2d21ac55 7358 s = entry->vme_start;
1c79356b
A
7359 } else {
7360 entry = first_entry;
0c530ab8 7361 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 7362 }
9bccf70c 7363 last_timestamp = map->timestamp;
1c79356b
A
7364 continue;
7365 } /* end in_transition */
7366
7367 if (entry->wired_count) {
2d21ac55
A
7368 boolean_t user_wire;
7369
7370 user_wire = entry->user_wired_count > 0;
7371
1c79356b 7372 /*
b0d623f7 7373 * Remove a kernel wiring if requested
1c79356b 7374 */
b0d623f7 7375 if (flags & VM_MAP_REMOVE_KUNWIRE) {
1c79356b 7376 entry->wired_count--;
b0d623f7 7377 }
5ba3f43e 7378
b0d623f7
A
7379 /*
7380 * Remove all user wirings for proper accounting
7381 */
7382 if (entry->user_wired_count > 0) {
7383 while (entry->user_wired_count)
7384 subtract_wire_counts(map, entry, user_wire);
7385 }
1c79356b
A
7386
7387 if (entry->wired_count != 0) {
2d21ac55 7388 assert(map != kernel_map);
1c79356b
A
7389 /*
7390 * Cannot continue. Typical case is when
7391 * a user thread has physical io pending on
7392 * on this page. Either wait for the
7393 * kernel wiring to go away or return an
7394 * error.
7395 */
7396 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
9bccf70c 7397 wait_result_t wait_result;
1c79356b 7398
2d21ac55 7399 assert(s == entry->vme_start);
1c79356b 7400 entry->needs_wakeup = TRUE;
9bccf70c 7401 wait_result = vm_map_entry_wait(map,
2d21ac55 7402 interruptible);
1c79356b
A
7403
7404 if (interruptible &&
2d21ac55 7405 wait_result == THREAD_INTERRUPTED) {
1c79356b 7406 /*
5ba3f43e
A
7407 * We do not clear the
7408 * needs_wakeup flag, since we
7409 * cannot tell if we were the
1c79356b 7410 * only one.
2d21ac55 7411 */
1c79356b 7412 return KERN_ABORTED;
9bccf70c 7413 }
1c79356b
A
7414
7415 /*
2d21ac55 7416 * The entry could have been clipped or
1c79356b
A
7417 * it may not exist anymore. Look it
7418 * up again.
2d21ac55 7419 */
5ba3f43e 7420 if (!vm_map_lookup_entry(map, s,
2d21ac55
A
7421 &first_entry)) {
7422 assert(map != kernel_map);
1c79356b 7423 /*
2d21ac55
A
7424 * User: use the next entry
7425 */
1c79356b 7426 entry = first_entry->vme_next;
2d21ac55 7427 s = entry->vme_start;
1c79356b
A
7428 } else {
7429 entry = first_entry;
0c530ab8 7430 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 7431 }
9bccf70c 7432 last_timestamp = map->timestamp;
1c79356b
A
7433 continue;
7434 }
7435 else {
7436 return KERN_FAILURE;
7437 }
7438 }
7439
7440 entry->in_transition = TRUE;
7441 /*
7442 * copy current entry. see comment in vm_map_wire()
7443 */
7444 tmp_entry = *entry;
2d21ac55 7445 assert(s == entry->vme_start);
1c79356b
A
7446
7447 /*
7448 * We can unlock the map now. The in_transition
7449 * state guarentees existance of the entry.
7450 */
7451 vm_map_unlock(map);
2d21ac55
A
7452
7453 if (tmp_entry.is_sub_map) {
7454 vm_map_t sub_map;
7455 vm_map_offset_t sub_start, sub_end;
7456 pmap_t pmap;
7457 vm_map_offset_t pmap_addr;
5ba3f43e 7458
2d21ac55 7459
3e170ce0
A
7460 sub_map = VME_SUBMAP(&tmp_entry);
7461 sub_start = VME_OFFSET(&tmp_entry);
2d21ac55
A
7462 sub_end = sub_start + (tmp_entry.vme_end -
7463 tmp_entry.vme_start);
7464 if (tmp_entry.use_pmap) {
7465 pmap = sub_map->pmap;
7466 pmap_addr = tmp_entry.vme_start;
7467 } else {
7468 pmap = map->pmap;
7469 pmap_addr = tmp_entry.vme_start;
7470 }
7471 (void) vm_map_unwire_nested(sub_map,
7472 sub_start, sub_end,
7473 user_wire,
7474 pmap, pmap_addr);
7475 } else {
7476
3e170ce0 7477 if (VME_OBJECT(&tmp_entry) == kernel_object) {
39236c6e
A
7478 pmap_protect_options(
7479 map->pmap,
7480 tmp_entry.vme_start,
7481 tmp_entry.vme_end,
7482 VM_PROT_NONE,
7483 PMAP_OPTIONS_REMOVE,
7484 NULL);
7485 }
2d21ac55 7486 vm_fault_unwire(map, &tmp_entry,
3e170ce0 7487 VME_OBJECT(&tmp_entry) == kernel_object,
2d21ac55
A
7488 map->pmap, tmp_entry.vme_start);
7489 }
7490
1c79356b
A
7491 vm_map_lock(map);
7492
7493 if (last_timestamp+1 != map->timestamp) {
7494 /*
7495 * Find the entry again. It could have
7496 * been clipped after we unlocked the map.
7497 */
7498 if (!vm_map_lookup_entry(map, s, &first_entry)){
5ba3f43e 7499 assert((map != kernel_map) &&
2d21ac55 7500 (!entry->is_sub_map));
1c79356b 7501 first_entry = first_entry->vme_next;
2d21ac55 7502 s = first_entry->vme_start;
1c79356b 7503 } else {
0c530ab8 7504 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
7505 }
7506 } else {
0c530ab8 7507 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
7508 first_entry = entry;
7509 }
7510
7511 last_timestamp = map->timestamp;
7512
7513 entry = first_entry;
7514 while ((entry != vm_map_to_entry(map)) &&
7515 (entry->vme_start < tmp_entry.vme_end)) {
7516 assert(entry->in_transition);
7517 entry->in_transition = FALSE;
7518 if (entry->needs_wakeup) {
7519 entry->needs_wakeup = FALSE;
7520 need_wakeup = TRUE;
7521 }
7522 entry = entry->vme_next;
7523 }
7524 /*
7525 * We have unwired the entry(s). Go back and
7526 * delete them.
7527 */
7528 entry = first_entry;
7529 continue;
7530 }
7531
7532 /* entry is unwired */
7533 assert(entry->wired_count == 0);
7534 assert(entry->user_wired_count == 0);
7535
2d21ac55
A
7536 assert(s == entry->vme_start);
7537
7538 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
7539 /*
7540 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
7541 * vm_map_delete(), some map entries might have been
7542 * transferred to a "zap_map", which doesn't have a
7543 * pmap. The original pmap has already been flushed
7544 * in the vm_map_delete() call targeting the original
7545 * map, but when we get to destroying the "zap_map",
7546 * we don't have any pmap to flush, so let's just skip
7547 * all this.
7548 */
7549 } else if (entry->is_sub_map) {
7550 if (entry->use_pmap) {
0c530ab8 7551#ifndef NO_NESTED_PMAP
3e170ce0
A
7552 int pmap_flags;
7553
7554 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
7555 /*
7556 * This is the final cleanup of the
7557 * address space being terminated.
7558 * No new mappings are expected and
7559 * we don't really need to unnest the
7560 * shared region (and lose the "global"
7561 * pmap mappings, if applicable).
7562 *
7563 * Tell the pmap layer that we're
7564 * "clean" wrt nesting.
7565 */
7566 pmap_flags = PMAP_UNNEST_CLEAN;
7567 } else {
7568 /*
7569 * We're unmapping part of the nested
7570 * shared region, so we can't keep the
7571 * nested pmap.
7572 */
7573 pmap_flags = 0;
7574 }
7575 pmap_unnest_options(
7576 map->pmap,
7577 (addr64_t)entry->vme_start,
7578 entry->vme_end - entry->vme_start,
7579 pmap_flags);
0c530ab8 7580#endif /* NO_NESTED_PMAP */
316670eb 7581 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
9bccf70c
A
7582 /* clean up parent map/maps */
7583 vm_map_submap_pmap_clean(
7584 map, entry->vme_start,
7585 entry->vme_end,
3e170ce0
A
7586 VME_SUBMAP(entry),
7587 VME_OFFSET(entry));
9bccf70c 7588 }
2d21ac55 7589 } else {
1c79356b
A
7590 vm_map_submap_pmap_clean(
7591 map, entry->vme_start, entry->vme_end,
3e170ce0
A
7592 VME_SUBMAP(entry),
7593 VME_OFFSET(entry));
2d21ac55 7594 }
3e170ce0
A
7595 } else if (VME_OBJECT(entry) != kernel_object &&
7596 VME_OBJECT(entry) != compressor_object) {
7597 object = VME_OBJECT(entry);
39236c6e
A
7598 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
7599 vm_object_pmap_protect_options(
3e170ce0 7600 object, VME_OFFSET(entry),
55e303ae
A
7601 entry->vme_end - entry->vme_start,
7602 PMAP_NULL,
7603 entry->vme_start,
39236c6e
A
7604 VM_PROT_NONE,
7605 PMAP_OPTIONS_REMOVE);
3e170ce0 7606 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
39236c6e
A
7607 (map->pmap == kernel_pmap)) {
7608 /* Remove translations associated
7609 * with this range unless the entry
7610 * does not have an object, or
7611 * it's the kernel map or a descendant
7612 * since the platform could potentially
7613 * create "backdoor" mappings invisible
7614 * to the VM. It is expected that
7615 * objectless, non-kernel ranges
7616 * do not have such VM invisible
7617 * translations.
7618 */
7619 pmap_remove_options(map->pmap,
7620 (addr64_t)entry->vme_start,
7621 (addr64_t)entry->vme_end,
7622 PMAP_OPTIONS_REMOVE);
1c79356b
A
7623 }
7624 }
7625
fe8ab488
A
7626 if (entry->iokit_acct) {
7627 /* alternate accounting */
ecc0ceb4
A
7628 DTRACE_VM4(vm_map_iokit_unmapped_region,
7629 vm_map_t, map,
7630 vm_map_offset_t, entry->vme_start,
7631 vm_map_offset_t, entry->vme_end,
7632 int, VME_ALIAS(entry));
fe8ab488
A
7633 vm_map_iokit_unmapped_region(map,
7634 (entry->vme_end -
7635 entry->vme_start));
7636 entry->iokit_acct = FALSE;
7637 }
7638
91447636
A
7639 /*
7640 * All pmap mappings for this map entry must have been
7641 * cleared by now.
7642 */
fe8ab488 7643#if DEBUG
91447636
A
7644 assert(vm_map_pmap_is_empty(map,
7645 entry->vme_start,
7646 entry->vme_end));
fe8ab488 7647#endif /* DEBUG */
91447636 7648
1c79356b 7649 next = entry->vme_next;
fe8ab488
A
7650
7651 if (map->pmap == kernel_pmap &&
7652 map->ref_count != 0 &&
7653 entry->vme_end < end &&
7654 (next == vm_map_to_entry(map) ||
7655 next->vme_start != entry->vme_end)) {
7656 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7657 "hole after %p at 0x%llx\n",
7658 map,
7659 (uint64_t)start,
7660 (uint64_t)end,
7661 entry,
7662 (uint64_t)entry->vme_end);
7663 }
7664
1c79356b
A
7665 s = next->vme_start;
7666 last_timestamp = map->timestamp;
91447636 7667
5ba3f43e
A
7668 if (entry->permanent) {
7669 /*
7670 * A permanent entry can not be removed, so leave it
7671 * in place but remove all access permissions.
7672 */
7673 entry->protection = VM_PROT_NONE;
7674 entry->max_protection = VM_PROT_NONE;
7675 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
7676 zap_map != VM_MAP_NULL) {
2d21ac55 7677 vm_map_size_t entry_size;
91447636
A
7678 /*
7679 * The caller wants to save the affected VM map entries
7680 * into the "zap_map". The caller will take care of
7681 * these entries.
7682 */
7683 /* unlink the entry from "map" ... */
6d2010ae 7684 vm_map_store_entry_unlink(map, entry);
91447636 7685 /* ... and add it to the end of the "zap_map" */
6d2010ae 7686 vm_map_store_entry_link(zap_map,
91447636
A
7687 vm_map_last_entry(zap_map),
7688 entry);
2d21ac55
A
7689 entry_size = entry->vme_end - entry->vme_start;
7690 map->size -= entry_size;
7691 zap_map->size += entry_size;
7692 /* we didn't unlock the map, so no timestamp increase */
7693 last_timestamp--;
91447636
A
7694 } else {
7695 vm_map_entry_delete(map, entry);
7696 /* vm_map_entry_delete unlocks the map */
7697 vm_map_lock(map);
7698 }
7699
1c79356b
A
7700 entry = next;
7701
7702 if(entry == vm_map_to_entry(map)) {
7703 break;
7704 }
7705 if (last_timestamp+1 != map->timestamp) {
7706 /*
7707 * we are responsible for deleting everything
7708 * from the give space, if someone has interfered
7709 * we pick up where we left off, back fills should
7710 * be all right for anyone except map_delete and
7711 * we have to assume that the task has been fully
7712 * disabled before we get here
7713 */
7714 if (!vm_map_lookup_entry(map, s, &entry)){
7715 entry = entry->vme_next;
2d21ac55 7716 s = entry->vme_start;
1c79356b 7717 } else {
2d21ac55 7718 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 7719 }
5ba3f43e
A
7720 /*
7721 * others can not only allocate behind us, we can
7722 * also see coalesce while we don't have the map lock
1c79356b
A
7723 */
7724 if(entry == vm_map_to_entry(map)) {
7725 break;
7726 }
1c79356b
A
7727 }
7728 last_timestamp = map->timestamp;
7729 }
7730
7731 if (map->wait_for_space)
7732 thread_wakeup((event_t) map);
7733 /*
7734 * wake up anybody waiting on entries that we have already deleted.
7735 */
7736 if (need_wakeup)
7737 vm_map_entry_wakeup(map);
7738
7739 return KERN_SUCCESS;
7740}
7741
7742/*
7743 * vm_map_remove:
7744 *
7745 * Remove the given address range from the target map.
7746 * This is the exported form of vm_map_delete.
7747 */
7748kern_return_t
7749vm_map_remove(
39037602
A
7750 vm_map_t map,
7751 vm_map_offset_t start,
7752 vm_map_offset_t end,
7753 boolean_t flags)
1c79356b 7754{
39037602 7755 kern_return_t result;
9bccf70c 7756
1c79356b
A
7757 vm_map_lock(map);
7758 VM_MAP_RANGE_CHECK(map, start, end);
39236c6e
A
7759 /*
7760 * For the zone_map, the kernel controls the allocation/freeing of memory.
7761 * Any free to the zone_map should be within the bounds of the map and
7762 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
7763 * free to the zone_map into a no-op, there is a problem and we should
7764 * panic.
7765 */
7766 if ((map == zone_map) && (start == end))
7767 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
91447636 7768 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
1c79356b 7769 vm_map_unlock(map);
91447636 7770
1c79356b
A
7771 return(result);
7772}
7773
39037602
A
7774/*
7775 * vm_map_remove_locked:
7776 *
7777 * Remove the given address range from the target locked map.
7778 * This is the exported form of vm_map_delete.
7779 */
7780kern_return_t
7781vm_map_remove_locked(
7782 vm_map_t map,
7783 vm_map_offset_t start,
7784 vm_map_offset_t end,
7785 boolean_t flags)
7786{
7787 kern_return_t result;
7788
7789 VM_MAP_RANGE_CHECK(map, start, end);
7790 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
7791 return(result);
7792}
7793
1c79356b 7794
1c79356b
A
7795/*
7796 * Routine: vm_map_copy_discard
7797 *
7798 * Description:
7799 * Dispose of a map copy object (returned by
7800 * vm_map_copyin).
7801 */
7802void
7803vm_map_copy_discard(
7804 vm_map_copy_t copy)
7805{
1c79356b
A
7806 if (copy == VM_MAP_COPY_NULL)
7807 return;
7808
7809 switch (copy->type) {
7810 case VM_MAP_COPY_ENTRY_LIST:
7811 while (vm_map_copy_first_entry(copy) !=
2d21ac55 7812 vm_map_copy_to_entry(copy)) {
1c79356b
A
7813 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
7814
7815 vm_map_copy_entry_unlink(copy, entry);
39236c6e 7816 if (entry->is_sub_map) {
3e170ce0 7817 vm_map_deallocate(VME_SUBMAP(entry));
39236c6e 7818 } else {
3e170ce0 7819 vm_object_deallocate(VME_OBJECT(entry));
39236c6e 7820 }
1c79356b
A
7821 vm_map_copy_entry_dispose(copy, entry);
7822 }
7823 break;
7824 case VM_MAP_COPY_OBJECT:
7825 vm_object_deallocate(copy->cpy_object);
7826 break;
1c79356b
A
7827 case VM_MAP_COPY_KERNEL_BUFFER:
7828
7829 /*
7830 * The vm_map_copy_t and possibly the data buffer were
7831 * allocated by a single call to kalloc(), i.e. the
7832 * vm_map_copy_t was not allocated out of the zone.
7833 */
3e170ce0
A
7834 if (copy->size > msg_ool_size_small || copy->offset)
7835 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
7836 (long long)copy->size, (long long)copy->offset);
7837 kfree(copy, copy->size + cpy_kdata_hdr_sz);
1c79356b
A
7838 return;
7839 }
91447636 7840 zfree(vm_map_copy_zone, copy);
1c79356b
A
7841}
7842
7843/*
7844 * Routine: vm_map_copy_copy
7845 *
7846 * Description:
7847 * Move the information in a map copy object to
7848 * a new map copy object, leaving the old one
7849 * empty.
7850 *
7851 * This is used by kernel routines that need
7852 * to look at out-of-line data (in copyin form)
7853 * before deciding whether to return SUCCESS.
7854 * If the routine returns FAILURE, the original
7855 * copy object will be deallocated; therefore,
7856 * these routines must make a copy of the copy
7857 * object and leave the original empty so that
7858 * deallocation will not fail.
7859 */
7860vm_map_copy_t
7861vm_map_copy_copy(
7862 vm_map_copy_t copy)
7863{
7864 vm_map_copy_t new_copy;
7865
7866 if (copy == VM_MAP_COPY_NULL)
7867 return VM_MAP_COPY_NULL;
7868
7869 /*
7870 * Allocate a new copy object, and copy the information
7871 * from the old one into it.
7872 */
7873
7874 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 7875 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b
A
7876 *new_copy = *copy;
7877
7878 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
7879 /*
7880 * The links in the entry chain must be
7881 * changed to point to the new copy object.
7882 */
7883 vm_map_copy_first_entry(copy)->vme_prev
7884 = vm_map_copy_to_entry(new_copy);
7885 vm_map_copy_last_entry(copy)->vme_next
7886 = vm_map_copy_to_entry(new_copy);
7887 }
7888
7889 /*
7890 * Change the old copy object into one that contains
7891 * nothing to be deallocated.
7892 */
7893 copy->type = VM_MAP_COPY_OBJECT;
7894 copy->cpy_object = VM_OBJECT_NULL;
7895
7896 /*
7897 * Return the new object.
7898 */
7899 return new_copy;
7900}
7901
91447636 7902static kern_return_t
1c79356b
A
7903vm_map_overwrite_submap_recurse(
7904 vm_map_t dst_map,
91447636
A
7905 vm_map_offset_t dst_addr,
7906 vm_map_size_t dst_size)
1c79356b 7907{
91447636 7908 vm_map_offset_t dst_end;
1c79356b
A
7909 vm_map_entry_t tmp_entry;
7910 vm_map_entry_t entry;
7911 kern_return_t result;
7912 boolean_t encountered_sub_map = FALSE;
7913
7914
7915
7916 /*
7917 * Verify that the destination is all writeable
7918 * initially. We have to trunc the destination
7919 * address and round the copy size or we'll end up
7920 * splitting entries in strange ways.
7921 */
7922
39236c6e
A
7923 dst_end = vm_map_round_page(dst_addr + dst_size,
7924 VM_MAP_PAGE_MASK(dst_map));
9bccf70c 7925 vm_map_lock(dst_map);
1c79356b
A
7926
7927start_pass_1:
1c79356b
A
7928 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
7929 vm_map_unlock(dst_map);
7930 return(KERN_INVALID_ADDRESS);
7931 }
7932
39236c6e
A
7933 vm_map_clip_start(dst_map,
7934 tmp_entry,
7935 vm_map_trunc_page(dst_addr,
7936 VM_MAP_PAGE_MASK(dst_map)));
fe8ab488
A
7937 if (tmp_entry->is_sub_map) {
7938 /* clipping did unnest if needed */
7939 assert(!tmp_entry->use_pmap);
7940 }
1c79356b
A
7941
7942 for (entry = tmp_entry;;) {
7943 vm_map_entry_t next;
7944
7945 next = entry->vme_next;
7946 while(entry->is_sub_map) {
91447636
A
7947 vm_map_offset_t sub_start;
7948 vm_map_offset_t sub_end;
7949 vm_map_offset_t local_end;
1c79356b
A
7950
7951 if (entry->in_transition) {
2d21ac55
A
7952 /*
7953 * Say that we are waiting, and wait for entry.
7954 */
1c79356b
A
7955 entry->needs_wakeup = TRUE;
7956 vm_map_entry_wait(dst_map, THREAD_UNINT);
7957
7958 goto start_pass_1;
7959 }
7960
7961 encountered_sub_map = TRUE;
3e170ce0 7962 sub_start = VME_OFFSET(entry);
1c79356b
A
7963
7964 if(entry->vme_end < dst_end)
7965 sub_end = entry->vme_end;
5ba3f43e 7966 else
1c79356b
A
7967 sub_end = dst_end;
7968 sub_end -= entry->vme_start;
3e170ce0 7969 sub_end += VME_OFFSET(entry);
1c79356b
A
7970 local_end = entry->vme_end;
7971 vm_map_unlock(dst_map);
5ba3f43e 7972
1c79356b 7973 result = vm_map_overwrite_submap_recurse(
3e170ce0 7974 VME_SUBMAP(entry),
2d21ac55
A
7975 sub_start,
7976 sub_end - sub_start);
1c79356b
A
7977
7978 if(result != KERN_SUCCESS)
7979 return result;
7980 if (dst_end <= entry->vme_end)
7981 return KERN_SUCCESS;
7982 vm_map_lock(dst_map);
5ba3f43e 7983 if(!vm_map_lookup_entry(dst_map, local_end,
1c79356b
A
7984 &tmp_entry)) {
7985 vm_map_unlock(dst_map);
7986 return(KERN_INVALID_ADDRESS);
7987 }
7988 entry = tmp_entry;
7989 next = entry->vme_next;
7990 }
7991
7992 if ( ! (entry->protection & VM_PROT_WRITE)) {
7993 vm_map_unlock(dst_map);
7994 return(KERN_PROTECTION_FAILURE);
7995 }
7996
7997 /*
7998 * If the entry is in transition, we must wait
7999 * for it to exit that state. Anything could happen
8000 * when we unlock the map, so start over.
8001 */
8002 if (entry->in_transition) {
8003
8004 /*
8005 * Say that we are waiting, and wait for entry.
8006 */
8007 entry->needs_wakeup = TRUE;
8008 vm_map_entry_wait(dst_map, THREAD_UNINT);
8009
8010 goto start_pass_1;
8011 }
8012
8013/*
8014 * our range is contained completely within this map entry
8015 */
8016 if (dst_end <= entry->vme_end) {
8017 vm_map_unlock(dst_map);
8018 return KERN_SUCCESS;
8019 }
8020/*
8021 * check that range specified is contiguous region
8022 */
8023 if ((next == vm_map_to_entry(dst_map)) ||
8024 (next->vme_start != entry->vme_end)) {
8025 vm_map_unlock(dst_map);
8026 return(KERN_INVALID_ADDRESS);
8027 }
8028
8029 /*
8030 * Check for permanent objects in the destination.
8031 */
3e170ce0
A
8032 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8033 ((!VME_OBJECT(entry)->internal) ||
8034 (VME_OBJECT(entry)->true_share))) {
1c79356b
A
8035 if(encountered_sub_map) {
8036 vm_map_unlock(dst_map);
8037 return(KERN_FAILURE);
8038 }
8039 }
8040
8041
8042 entry = next;
8043 }/* for */
8044 vm_map_unlock(dst_map);
8045 return(KERN_SUCCESS);
8046}
8047
8048/*
8049 * Routine: vm_map_copy_overwrite
8050 *
8051 * Description:
8052 * Copy the memory described by the map copy
8053 * object (copy; returned by vm_map_copyin) onto
8054 * the specified destination region (dst_map, dst_addr).
8055 * The destination must be writeable.
8056 *
8057 * Unlike vm_map_copyout, this routine actually
8058 * writes over previously-mapped memory. If the
8059 * previous mapping was to a permanent (user-supplied)
8060 * memory object, it is preserved.
8061 *
8062 * The attributes (protection and inheritance) of the
8063 * destination region are preserved.
8064 *
8065 * If successful, consumes the copy object.
8066 * Otherwise, the caller is responsible for it.
8067 *
8068 * Implementation notes:
8069 * To overwrite aligned temporary virtual memory, it is
8070 * sufficient to remove the previous mapping and insert
8071 * the new copy. This replacement is done either on
8072 * the whole region (if no permanent virtual memory
8073 * objects are embedded in the destination region) or
8074 * in individual map entries.
8075 *
8076 * To overwrite permanent virtual memory , it is necessary
8077 * to copy each page, as the external memory management
8078 * interface currently does not provide any optimizations.
8079 *
8080 * Unaligned memory also has to be copied. It is possible
8081 * to use 'vm_trickery' to copy the aligned data. This is
8082 * not done but not hard to implement.
8083 *
8084 * Once a page of permanent memory has been overwritten,
8085 * it is impossible to interrupt this function; otherwise,
8086 * the call would be neither atomic nor location-independent.
8087 * The kernel-state portion of a user thread must be
8088 * interruptible.
8089 *
8090 * It may be expensive to forward all requests that might
8091 * overwrite permanent memory (vm_write, vm_copy) to
8092 * uninterruptible kernel threads. This routine may be
8093 * called by interruptible threads; however, success is
8094 * not guaranteed -- if the request cannot be performed
8095 * atomically and interruptibly, an error indication is
8096 * returned.
8097 */
8098
91447636 8099static kern_return_t
1c79356b 8100vm_map_copy_overwrite_nested(
91447636
A
8101 vm_map_t dst_map,
8102 vm_map_address_t dst_addr,
8103 vm_map_copy_t copy,
8104 boolean_t interruptible,
6d2010ae
A
8105 pmap_t pmap,
8106 boolean_t discard_on_success)
1c79356b 8107{
91447636
A
8108 vm_map_offset_t dst_end;
8109 vm_map_entry_t tmp_entry;
8110 vm_map_entry_t entry;
8111 kern_return_t kr;
8112 boolean_t aligned = TRUE;
8113 boolean_t contains_permanent_objects = FALSE;
8114 boolean_t encountered_sub_map = FALSE;
8115 vm_map_offset_t base_addr;
8116 vm_map_size_t copy_size;
8117 vm_map_size_t total_size;
1c79356b
A
8118
8119
8120 /*
8121 * Check for null copy object.
8122 */
8123
8124 if (copy == VM_MAP_COPY_NULL)
8125 return(KERN_SUCCESS);
8126
8127 /*
8128 * Check for special kernel buffer allocated
8129 * by new_ipc_kmsg_copyin.
8130 */
8131
8132 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
0b4e3aa0 8133 return(vm_map_copyout_kernel_buffer(
5ba3f43e 8134 dst_map, &dst_addr,
39037602 8135 copy, copy->size, TRUE, discard_on_success));
1c79356b
A
8136 }
8137
8138 /*
8139 * Only works for entry lists at the moment. Will
8140 * support page lists later.
8141 */
8142
8143 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8144
8145 if (copy->size == 0) {
6d2010ae
A
8146 if (discard_on_success)
8147 vm_map_copy_discard(copy);
1c79356b
A
8148 return(KERN_SUCCESS);
8149 }
8150
8151 /*
8152 * Verify that the destination is all writeable
8153 * initially. We have to trunc the destination
8154 * address and round the copy size or we'll end up
8155 * splitting entries in strange ways.
8156 */
8157
39236c6e
A
8158 if (!VM_MAP_PAGE_ALIGNED(copy->size,
8159 VM_MAP_PAGE_MASK(dst_map)) ||
8160 !VM_MAP_PAGE_ALIGNED(copy->offset,
8161 VM_MAP_PAGE_MASK(dst_map)) ||
8162 !VM_MAP_PAGE_ALIGNED(dst_addr,
fe8ab488 8163 VM_MAP_PAGE_MASK(dst_map)))
1c79356b
A
8164 {
8165 aligned = FALSE;
39236c6e
A
8166 dst_end = vm_map_round_page(dst_addr + copy->size,
8167 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
8168 } else {
8169 dst_end = dst_addr + copy->size;
8170 }
8171
1c79356b 8172 vm_map_lock(dst_map);
9bccf70c 8173
91447636
A
8174 /* LP64todo - remove this check when vm_map_commpage64()
8175 * no longer has to stuff in a map_entry for the commpage
8176 * above the map's max_offset.
8177 */
8178 if (dst_addr >= dst_map->max_offset) {
8179 vm_map_unlock(dst_map);
8180 return(KERN_INVALID_ADDRESS);
8181 }
5ba3f43e 8182
9bccf70c 8183start_pass_1:
1c79356b
A
8184 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8185 vm_map_unlock(dst_map);
8186 return(KERN_INVALID_ADDRESS);
8187 }
39236c6e
A
8188 vm_map_clip_start(dst_map,
8189 tmp_entry,
8190 vm_map_trunc_page(dst_addr,
8191 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
8192 for (entry = tmp_entry;;) {
8193 vm_map_entry_t next = entry->vme_next;
8194
8195 while(entry->is_sub_map) {
91447636
A
8196 vm_map_offset_t sub_start;
8197 vm_map_offset_t sub_end;
8198 vm_map_offset_t local_end;
1c79356b
A
8199
8200 if (entry->in_transition) {
8201
2d21ac55
A
8202 /*
8203 * Say that we are waiting, and wait for entry.
8204 */
1c79356b
A
8205 entry->needs_wakeup = TRUE;
8206 vm_map_entry_wait(dst_map, THREAD_UNINT);
8207
8208 goto start_pass_1;
8209 }
8210
8211 local_end = entry->vme_end;
8212 if (!(entry->needs_copy)) {
8213 /* if needs_copy we are a COW submap */
8214 /* in such a case we just replace so */
8215 /* there is no need for the follow- */
8216 /* ing check. */
8217 encountered_sub_map = TRUE;
3e170ce0 8218 sub_start = VME_OFFSET(entry);
1c79356b
A
8219
8220 if(entry->vme_end < dst_end)
8221 sub_end = entry->vme_end;
5ba3f43e 8222 else
1c79356b
A
8223 sub_end = dst_end;
8224 sub_end -= entry->vme_start;
3e170ce0 8225 sub_end += VME_OFFSET(entry);
1c79356b 8226 vm_map_unlock(dst_map);
5ba3f43e 8227
1c79356b 8228 kr = vm_map_overwrite_submap_recurse(
3e170ce0 8229 VME_SUBMAP(entry),
1c79356b
A
8230 sub_start,
8231 sub_end - sub_start);
8232 if(kr != KERN_SUCCESS)
8233 return kr;
8234 vm_map_lock(dst_map);
8235 }
8236
8237 if (dst_end <= entry->vme_end)
8238 goto start_overwrite;
5ba3f43e 8239 if(!vm_map_lookup_entry(dst_map, local_end,
1c79356b
A
8240 &entry)) {
8241 vm_map_unlock(dst_map);
8242 return(KERN_INVALID_ADDRESS);
8243 }
8244 next = entry->vme_next;
8245 }
8246
8247 if ( ! (entry->protection & VM_PROT_WRITE)) {
8248 vm_map_unlock(dst_map);
8249 return(KERN_PROTECTION_FAILURE);
8250 }
8251
8252 /*
8253 * If the entry is in transition, we must wait
8254 * for it to exit that state. Anything could happen
8255 * when we unlock the map, so start over.
8256 */
8257 if (entry->in_transition) {
8258
8259 /*
8260 * Say that we are waiting, and wait for entry.
8261 */
8262 entry->needs_wakeup = TRUE;
8263 vm_map_entry_wait(dst_map, THREAD_UNINT);
8264
8265 goto start_pass_1;
8266 }
8267
8268/*
8269 * our range is contained completely within this map entry
8270 */
8271 if (dst_end <= entry->vme_end)
8272 break;
8273/*
8274 * check that range specified is contiguous region
8275 */
8276 if ((next == vm_map_to_entry(dst_map)) ||
8277 (next->vme_start != entry->vme_end)) {
8278 vm_map_unlock(dst_map);
8279 return(KERN_INVALID_ADDRESS);
8280 }
8281
8282
8283 /*
8284 * Check for permanent objects in the destination.
8285 */
3e170ce0
A
8286 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8287 ((!VME_OBJECT(entry)->internal) ||
8288 (VME_OBJECT(entry)->true_share))) {
1c79356b
A
8289 contains_permanent_objects = TRUE;
8290 }
8291
8292 entry = next;
8293 }/* for */
8294
8295start_overwrite:
8296 /*
8297 * If there are permanent objects in the destination, then
8298 * the copy cannot be interrupted.
8299 */
8300
8301 if (interruptible && contains_permanent_objects) {
8302 vm_map_unlock(dst_map);
8303 return(KERN_FAILURE); /* XXX */
8304 }
8305
8306 /*
8307 *
8308 * Make a second pass, overwriting the data
8309 * At the beginning of each loop iteration,
8310 * the next entry to be overwritten is "tmp_entry"
8311 * (initially, the value returned from the lookup above),
8312 * and the starting address expected in that entry
8313 * is "start".
8314 */
8315
8316 total_size = copy->size;
8317 if(encountered_sub_map) {
8318 copy_size = 0;
8319 /* re-calculate tmp_entry since we've had the map */
8320 /* unlocked */
8321 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
8322 vm_map_unlock(dst_map);
8323 return(KERN_INVALID_ADDRESS);
8324 }
8325 } else {
8326 copy_size = copy->size;
8327 }
5ba3f43e 8328
1c79356b
A
8329 base_addr = dst_addr;
8330 while(TRUE) {
8331 /* deconstruct the copy object and do in parts */
8332 /* only in sub_map, interruptable case */
8333 vm_map_entry_t copy_entry;
91447636
A
8334 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
8335 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
1c79356b 8336 int nentries;
91447636 8337 int remaining_entries = 0;
b0d623f7 8338 vm_map_offset_t new_offset = 0;
5ba3f43e 8339
1c79356b
A
8340 for (entry = tmp_entry; copy_size == 0;) {
8341 vm_map_entry_t next;
8342
8343 next = entry->vme_next;
8344
8345 /* tmp_entry and base address are moved along */
8346 /* each time we encounter a sub-map. Otherwise */
8347 /* entry can outpase tmp_entry, and the copy_size */
8348 /* may reflect the distance between them */
8349 /* if the current entry is found to be in transition */
8350 /* we will start over at the beginning or the last */
8351 /* encounter of a submap as dictated by base_addr */
8352 /* we will zero copy_size accordingly. */
8353 if (entry->in_transition) {
8354 /*
8355 * Say that we are waiting, and wait for entry.
8356 */
8357 entry->needs_wakeup = TRUE;
8358 vm_map_entry_wait(dst_map, THREAD_UNINT);
8359
5ba3f43e 8360 if(!vm_map_lookup_entry(dst_map, base_addr,
2d21ac55 8361 &tmp_entry)) {
1c79356b
A
8362 vm_map_unlock(dst_map);
8363 return(KERN_INVALID_ADDRESS);
8364 }
8365 copy_size = 0;
8366 entry = tmp_entry;
8367 continue;
8368 }
5ba3f43e 8369 if (entry->is_sub_map) {
91447636
A
8370 vm_map_offset_t sub_start;
8371 vm_map_offset_t sub_end;
8372 vm_map_offset_t local_end;
1c79356b
A
8373
8374 if (entry->needs_copy) {
8375 /* if this is a COW submap */
8376 /* just back the range with a */
8377 /* anonymous entry */
8378 if(entry->vme_end < dst_end)
8379 sub_end = entry->vme_end;
5ba3f43e 8380 else
1c79356b
A
8381 sub_end = dst_end;
8382 if(entry->vme_start < base_addr)
8383 sub_start = base_addr;
5ba3f43e 8384 else
1c79356b
A
8385 sub_start = entry->vme_start;
8386 vm_map_clip_end(
8387 dst_map, entry, sub_end);
8388 vm_map_clip_start(
8389 dst_map, entry, sub_start);
2d21ac55 8390 assert(!entry->use_pmap);
1c79356b
A
8391 entry->is_sub_map = FALSE;
8392 vm_map_deallocate(
3e170ce0 8393 VME_SUBMAP(entry));
5ba3f43e
A
8394 VME_OBJECT_SET(entry, NULL);
8395 VME_OFFSET_SET(entry, 0);
1c79356b
A
8396 entry->is_shared = FALSE;
8397 entry->needs_copy = FALSE;
5ba3f43e 8398 entry->protection = VM_PROT_DEFAULT;
1c79356b
A
8399 entry->max_protection = VM_PROT_ALL;
8400 entry->wired_count = 0;
8401 entry->user_wired_count = 0;
5ba3f43e
A
8402 if(entry->inheritance
8403 == VM_INHERIT_SHARE)
2d21ac55 8404 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
8405 continue;
8406 }
8407 /* first take care of any non-sub_map */
8408 /* entries to send */
8409 if(base_addr < entry->vme_start) {
8410 /* stuff to send */
5ba3f43e 8411 copy_size =
1c79356b
A
8412 entry->vme_start - base_addr;
8413 break;
8414 }
3e170ce0 8415 sub_start = VME_OFFSET(entry);
1c79356b
A
8416
8417 if(entry->vme_end < dst_end)
8418 sub_end = entry->vme_end;
5ba3f43e 8419 else
1c79356b
A
8420 sub_end = dst_end;
8421 sub_end -= entry->vme_start;
3e170ce0 8422 sub_end += VME_OFFSET(entry);
1c79356b
A
8423 local_end = entry->vme_end;
8424 vm_map_unlock(dst_map);
8425 copy_size = sub_end - sub_start;
8426
8427 /* adjust the copy object */
8428 if (total_size > copy_size) {
91447636
A
8429 vm_map_size_t local_size = 0;
8430 vm_map_size_t entry_size;
1c79356b 8431
2d21ac55
A
8432 nentries = 1;
8433 new_offset = copy->offset;
8434 copy_entry = vm_map_copy_first_entry(copy);
5ba3f43e 8435 while(copy_entry !=
2d21ac55 8436 vm_map_copy_to_entry(copy)){
5ba3f43e 8437 entry_size = copy_entry->vme_end -
2d21ac55
A
8438 copy_entry->vme_start;
8439 if((local_size < copy_size) &&
5ba3f43e 8440 ((local_size + entry_size)
2d21ac55 8441 >= copy_size)) {
5ba3f43e
A
8442 vm_map_copy_clip_end(copy,
8443 copy_entry,
2d21ac55
A
8444 copy_entry->vme_start +
8445 (copy_size - local_size));
5ba3f43e 8446 entry_size = copy_entry->vme_end -
2d21ac55
A
8447 copy_entry->vme_start;
8448 local_size += entry_size;
8449 new_offset += entry_size;
8450 }
8451 if(local_size >= copy_size) {
8452 next_copy = copy_entry->vme_next;
5ba3f43e 8453 copy_entry->vme_next =
2d21ac55 8454 vm_map_copy_to_entry(copy);
5ba3f43e 8455 previous_prev =
2d21ac55
A
8456 copy->cpy_hdr.links.prev;
8457 copy->cpy_hdr.links.prev = copy_entry;
8458 copy->size = copy_size;
5ba3f43e 8459 remaining_entries =
2d21ac55
A
8460 copy->cpy_hdr.nentries;
8461 remaining_entries -= nentries;
8462 copy->cpy_hdr.nentries = nentries;
8463 break;
8464 } else {
8465 local_size += entry_size;
8466 new_offset += entry_size;
8467 nentries++;
8468 }
8469 copy_entry = copy_entry->vme_next;
8470 }
1c79356b 8471 }
5ba3f43e 8472
1c79356b
A
8473 if((entry->use_pmap) && (pmap == NULL)) {
8474 kr = vm_map_copy_overwrite_nested(
3e170ce0 8475 VME_SUBMAP(entry),
1c79356b
A
8476 sub_start,
8477 copy,
5ba3f43e 8478 interruptible,
3e170ce0 8479 VME_SUBMAP(entry)->pmap,
6d2010ae 8480 TRUE);
1c79356b
A
8481 } else if (pmap != NULL) {
8482 kr = vm_map_copy_overwrite_nested(
3e170ce0 8483 VME_SUBMAP(entry),
1c79356b
A
8484 sub_start,
8485 copy,
6d2010ae
A
8486 interruptible, pmap,
8487 TRUE);
1c79356b
A
8488 } else {
8489 kr = vm_map_copy_overwrite_nested(
3e170ce0 8490 VME_SUBMAP(entry),
1c79356b
A
8491 sub_start,
8492 copy,
8493 interruptible,
6d2010ae
A
8494 dst_map->pmap,
8495 TRUE);
1c79356b
A
8496 }
8497 if(kr != KERN_SUCCESS) {
8498 if(next_copy != NULL) {
5ba3f43e 8499 copy->cpy_hdr.nentries +=
2d21ac55 8500 remaining_entries;
5ba3f43e 8501 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 8502 next_copy;
5ba3f43e 8503 copy->cpy_hdr.links.prev
2d21ac55
A
8504 = previous_prev;
8505 copy->size = total_size;
1c79356b
A
8506 }
8507 return kr;
8508 }
8509 if (dst_end <= local_end) {
8510 return(KERN_SUCCESS);
8511 }
8512 /* otherwise copy no longer exists, it was */
8513 /* destroyed after successful copy_overwrite */
5ba3f43e 8514 copy = (vm_map_copy_t)
2d21ac55 8515 zalloc(vm_map_copy_zone);
04b8595b 8516 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b 8517 vm_map_copy_first_entry(copy) =
2d21ac55
A
8518 vm_map_copy_last_entry(copy) =
8519 vm_map_copy_to_entry(copy);
1c79356b
A
8520 copy->type = VM_MAP_COPY_ENTRY_LIST;
8521 copy->offset = new_offset;
8522
e2d2fc5c
A
8523 /*
8524 * XXX FBDP
8525 * this does not seem to deal with
8526 * the VM map store (R&B tree)
8527 */
8528
1c79356b
A
8529 total_size -= copy_size;
8530 copy_size = 0;
8531 /* put back remainder of copy in container */
8532 if(next_copy != NULL) {
2d21ac55
A
8533 copy->cpy_hdr.nentries = remaining_entries;
8534 copy->cpy_hdr.links.next = next_copy;
8535 copy->cpy_hdr.links.prev = previous_prev;
8536 copy->size = total_size;
5ba3f43e 8537 next_copy->vme_prev =
2d21ac55
A
8538 vm_map_copy_to_entry(copy);
8539 next_copy = NULL;
1c79356b
A
8540 }
8541 base_addr = local_end;
8542 vm_map_lock(dst_map);
5ba3f43e 8543 if(!vm_map_lookup_entry(dst_map,
2d21ac55 8544 local_end, &tmp_entry)) {
1c79356b
A
8545 vm_map_unlock(dst_map);
8546 return(KERN_INVALID_ADDRESS);
8547 }
8548 entry = tmp_entry;
8549 continue;
5ba3f43e 8550 }
1c79356b
A
8551 if (dst_end <= entry->vme_end) {
8552 copy_size = dst_end - base_addr;
8553 break;
8554 }
8555
8556 if ((next == vm_map_to_entry(dst_map)) ||
2d21ac55 8557 (next->vme_start != entry->vme_end)) {
1c79356b
A
8558 vm_map_unlock(dst_map);
8559 return(KERN_INVALID_ADDRESS);
8560 }
8561
8562 entry = next;
8563 }/* for */
8564
8565 next_copy = NULL;
8566 nentries = 1;
8567
8568 /* adjust the copy object */
8569 if (total_size > copy_size) {
91447636
A
8570 vm_map_size_t local_size = 0;
8571 vm_map_size_t entry_size;
1c79356b
A
8572
8573 new_offset = copy->offset;
8574 copy_entry = vm_map_copy_first_entry(copy);
8575 while(copy_entry != vm_map_copy_to_entry(copy)) {
5ba3f43e 8576 entry_size = copy_entry->vme_end -
2d21ac55 8577 copy_entry->vme_start;
1c79356b 8578 if((local_size < copy_size) &&
5ba3f43e 8579 ((local_size + entry_size)
2d21ac55 8580 >= copy_size)) {
5ba3f43e 8581 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55
A
8582 copy_entry->vme_start +
8583 (copy_size - local_size));
5ba3f43e 8584 entry_size = copy_entry->vme_end -
2d21ac55 8585 copy_entry->vme_start;
1c79356b
A
8586 local_size += entry_size;
8587 new_offset += entry_size;
8588 }
8589 if(local_size >= copy_size) {
8590 next_copy = copy_entry->vme_next;
5ba3f43e 8591 copy_entry->vme_next =
1c79356b 8592 vm_map_copy_to_entry(copy);
5ba3f43e 8593 previous_prev =
1c79356b
A
8594 copy->cpy_hdr.links.prev;
8595 copy->cpy_hdr.links.prev = copy_entry;
8596 copy->size = copy_size;
5ba3f43e 8597 remaining_entries =
1c79356b
A
8598 copy->cpy_hdr.nentries;
8599 remaining_entries -= nentries;
8600 copy->cpy_hdr.nentries = nentries;
8601 break;
8602 } else {
8603 local_size += entry_size;
8604 new_offset += entry_size;
8605 nentries++;
8606 }
8607 copy_entry = copy_entry->vme_next;
8608 }
8609 }
8610
8611 if (aligned) {
8612 pmap_t local_pmap;
8613
8614 if(pmap)
8615 local_pmap = pmap;
8616 else
8617 local_pmap = dst_map->pmap;
8618
5ba3f43e 8619 if ((kr = vm_map_copy_overwrite_aligned(
2d21ac55
A
8620 dst_map, tmp_entry, copy,
8621 base_addr, local_pmap)) != KERN_SUCCESS) {
1c79356b 8622 if(next_copy != NULL) {
5ba3f43e 8623 copy->cpy_hdr.nentries +=
2d21ac55 8624 remaining_entries;
5ba3f43e 8625 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 8626 next_copy;
5ba3f43e 8627 copy->cpy_hdr.links.prev =
2d21ac55 8628 previous_prev;
1c79356b
A
8629 copy->size += copy_size;
8630 }
8631 return kr;
8632 }
8633 vm_map_unlock(dst_map);
8634 } else {
2d21ac55
A
8635 /*
8636 * Performance gain:
8637 *
8638 * if the copy and dst address are misaligned but the same
8639 * offset within the page we can copy_not_aligned the
8640 * misaligned parts and copy aligned the rest. If they are
8641 * aligned but len is unaligned we simply need to copy
8642 * the end bit unaligned. We'll need to split the misaligned
8643 * bits of the region in this case !
8644 */
8645 /* ALWAYS UNLOCKS THE dst_map MAP */
39236c6e
A
8646 kr = vm_map_copy_overwrite_unaligned(
8647 dst_map,
8648 tmp_entry,
8649 copy,
8650 base_addr,
8651 discard_on_success);
8652 if (kr != KERN_SUCCESS) {
1c79356b
A
8653 if(next_copy != NULL) {
8654 copy->cpy_hdr.nentries +=
2d21ac55 8655 remaining_entries;
5ba3f43e 8656 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 8657 next_copy;
5ba3f43e 8658 copy->cpy_hdr.links.prev =
1c79356b
A
8659 previous_prev;
8660 copy->size += copy_size;
8661 }
8662 return kr;
8663 }
8664 }
8665 total_size -= copy_size;
8666 if(total_size == 0)
8667 break;
8668 base_addr += copy_size;
8669 copy_size = 0;
8670 copy->offset = new_offset;
8671 if(next_copy != NULL) {
8672 copy->cpy_hdr.nentries = remaining_entries;
8673 copy->cpy_hdr.links.next = next_copy;
8674 copy->cpy_hdr.links.prev = previous_prev;
8675 next_copy->vme_prev = vm_map_copy_to_entry(copy);
8676 copy->size = total_size;
8677 }
8678 vm_map_lock(dst_map);
8679 while(TRUE) {
5ba3f43e 8680 if (!vm_map_lookup_entry(dst_map,
2d21ac55 8681 base_addr, &tmp_entry)) {
1c79356b
A
8682 vm_map_unlock(dst_map);
8683 return(KERN_INVALID_ADDRESS);
8684 }
8685 if (tmp_entry->in_transition) {
8686 entry->needs_wakeup = TRUE;
8687 vm_map_entry_wait(dst_map, THREAD_UNINT);
8688 } else {
8689 break;
8690 }
8691 }
39236c6e
A
8692 vm_map_clip_start(dst_map,
8693 tmp_entry,
8694 vm_map_trunc_page(base_addr,
8695 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
8696
8697 entry = tmp_entry;
8698 } /* while */
8699
8700 /*
8701 * Throw away the vm_map_copy object
8702 */
6d2010ae
A
8703 if (discard_on_success)
8704 vm_map_copy_discard(copy);
1c79356b
A
8705
8706 return(KERN_SUCCESS);
8707}/* vm_map_copy_overwrite */
8708
8709kern_return_t
8710vm_map_copy_overwrite(
8711 vm_map_t dst_map,
91447636 8712 vm_map_offset_t dst_addr,
1c79356b
A
8713 vm_map_copy_t copy,
8714 boolean_t interruptible)
8715{
6d2010ae
A
8716 vm_map_size_t head_size, tail_size;
8717 vm_map_copy_t head_copy, tail_copy;
8718 vm_map_offset_t head_addr, tail_addr;
8719 vm_map_entry_t entry;
8720 kern_return_t kr;
5ba3f43e 8721 vm_map_offset_t effective_page_mask, effective_page_size;
6d2010ae
A
8722
8723 head_size = 0;
8724 tail_size = 0;
8725 head_copy = NULL;
8726 tail_copy = NULL;
8727 head_addr = 0;
8728 tail_addr = 0;
8729
8730 if (interruptible ||
8731 copy == VM_MAP_COPY_NULL ||
8732 copy->type != VM_MAP_COPY_ENTRY_LIST) {
8733 /*
8734 * We can't split the "copy" map if we're interruptible
8735 * or if we don't have a "copy" map...
8736 */
8737 blunt_copy:
8738 return vm_map_copy_overwrite_nested(dst_map,
8739 dst_addr,
8740 copy,
8741 interruptible,
8742 (pmap_t) NULL,
8743 TRUE);
8744 }
8745
5ba3f43e
A
8746 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
8747 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
8748 effective_page_mask);
8749 effective_page_size = effective_page_mask + 1;
8750
8751 if (copy->size < 3 * effective_page_size) {
6d2010ae
A
8752 /*
8753 * Too small to bother with optimizing...
8754 */
8755 goto blunt_copy;
8756 }
8757
5ba3f43e
A
8758 if ((dst_addr & effective_page_mask) !=
8759 (copy->offset & effective_page_mask)) {
6d2010ae
A
8760 /*
8761 * Incompatible mis-alignment of source and destination...
8762 */
8763 goto blunt_copy;
8764 }
8765
8766 /*
8767 * Proper alignment or identical mis-alignment at the beginning.
8768 * Let's try and do a small unaligned copy first (if needed)
8769 * and then an aligned copy for the rest.
8770 */
5ba3f43e 8771 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
6d2010ae 8772 head_addr = dst_addr;
5ba3f43e
A
8773 head_size = (effective_page_size -
8774 (copy->offset & effective_page_mask));
8775 head_size = MIN(head_size, copy->size);
6d2010ae 8776 }
5ba3f43e
A
8777 if (!vm_map_page_aligned(copy->offset + copy->size,
8778 effective_page_mask)) {
6d2010ae
A
8779 /*
8780 * Mis-alignment at the end.
8781 * Do an aligned copy up to the last page and
8782 * then an unaligned copy for the remaining bytes.
8783 */
39236c6e 8784 tail_size = ((copy->offset + copy->size) &
5ba3f43e
A
8785 effective_page_mask);
8786 tail_size = MIN(tail_size, copy->size);
6d2010ae 8787 tail_addr = dst_addr + copy->size - tail_size;
5ba3f43e 8788 assert(tail_addr >= head_addr + head_size);
6d2010ae 8789 }
5ba3f43e 8790 assert(head_size + tail_size <= copy->size);
6d2010ae
A
8791
8792 if (head_size + tail_size == copy->size) {
8793 /*
8794 * It's all unaligned, no optimization possible...
8795 */
8796 goto blunt_copy;
8797 }
8798
8799 /*
8800 * Can't optimize if there are any submaps in the
8801 * destination due to the way we free the "copy" map
8802 * progressively in vm_map_copy_overwrite_nested()
8803 * in that case.
8804 */
8805 vm_map_lock_read(dst_map);
8806 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
8807 vm_map_unlock_read(dst_map);
8808 goto blunt_copy;
8809 }
8810 for (;
8811 (entry != vm_map_copy_to_entry(copy) &&
8812 entry->vme_start < dst_addr + copy->size);
8813 entry = entry->vme_next) {
8814 if (entry->is_sub_map) {
8815 vm_map_unlock_read(dst_map);
8816 goto blunt_copy;
8817 }
8818 }
8819 vm_map_unlock_read(dst_map);
8820
8821 if (head_size) {
8822 /*
8823 * Unaligned copy of the first "head_size" bytes, to reach
8824 * a page boundary.
8825 */
5ba3f43e 8826
6d2010ae
A
8827 /*
8828 * Extract "head_copy" out of "copy".
8829 */
8830 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 8831 head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6d2010ae
A
8832 vm_map_copy_first_entry(head_copy) =
8833 vm_map_copy_to_entry(head_copy);
8834 vm_map_copy_last_entry(head_copy) =
8835 vm_map_copy_to_entry(head_copy);
8836 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
8837 head_copy->cpy_hdr.nentries = 0;
8838 head_copy->cpy_hdr.entries_pageable =
8839 copy->cpy_hdr.entries_pageable;
8840 vm_map_store_init(&head_copy->cpy_hdr);
8841
5ba3f43e
A
8842 entry = vm_map_copy_first_entry(copy);
8843 if (entry->vme_end < copy->offset + head_size) {
8844 head_size = entry->vme_end - copy->offset;
8845 }
8846
6d2010ae
A
8847 head_copy->offset = copy->offset;
8848 head_copy->size = head_size;
6d2010ae
A
8849 copy->offset += head_size;
8850 copy->size -= head_size;
8851
6d2010ae
A
8852 vm_map_copy_clip_end(copy, entry, copy->offset);
8853 vm_map_copy_entry_unlink(copy, entry);
8854 vm_map_copy_entry_link(head_copy,
8855 vm_map_copy_to_entry(head_copy),
8856 entry);
8857
8858 /*
8859 * Do the unaligned copy.
8860 */
8861 kr = vm_map_copy_overwrite_nested(dst_map,
8862 head_addr,
8863 head_copy,
8864 interruptible,
8865 (pmap_t) NULL,
8866 FALSE);
8867 if (kr != KERN_SUCCESS)
8868 goto done;
8869 }
8870
8871 if (tail_size) {
8872 /*
8873 * Extract "tail_copy" out of "copy".
8874 */
8875 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 8876 tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6d2010ae
A
8877 vm_map_copy_first_entry(tail_copy) =
8878 vm_map_copy_to_entry(tail_copy);
8879 vm_map_copy_last_entry(tail_copy) =
8880 vm_map_copy_to_entry(tail_copy);
8881 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
8882 tail_copy->cpy_hdr.nentries = 0;
8883 tail_copy->cpy_hdr.entries_pageable =
8884 copy->cpy_hdr.entries_pageable;
8885 vm_map_store_init(&tail_copy->cpy_hdr);
8886
8887 tail_copy->offset = copy->offset + copy->size - tail_size;
8888 tail_copy->size = tail_size;
8889
8890 copy->size -= tail_size;
8891
8892 entry = vm_map_copy_last_entry(copy);
8893 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
8894 entry = vm_map_copy_last_entry(copy);
8895 vm_map_copy_entry_unlink(copy, entry);
8896 vm_map_copy_entry_link(tail_copy,
8897 vm_map_copy_last_entry(tail_copy),
8898 entry);
8899 }
8900
8901 /*
8902 * Copy most (or possibly all) of the data.
8903 */
8904 kr = vm_map_copy_overwrite_nested(dst_map,
8905 dst_addr + head_size,
8906 copy,
8907 interruptible,
8908 (pmap_t) NULL,
8909 FALSE);
8910 if (kr != KERN_SUCCESS) {
8911 goto done;
8912 }
8913
8914 if (tail_size) {
8915 kr = vm_map_copy_overwrite_nested(dst_map,
8916 tail_addr,
8917 tail_copy,
8918 interruptible,
8919 (pmap_t) NULL,
8920 FALSE);
8921 }
8922
8923done:
8924 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8925 if (kr == KERN_SUCCESS) {
8926 /*
8927 * Discard all the copy maps.
8928 */
8929 if (head_copy) {
8930 vm_map_copy_discard(head_copy);
8931 head_copy = NULL;
8932 }
8933 vm_map_copy_discard(copy);
8934 if (tail_copy) {
8935 vm_map_copy_discard(tail_copy);
8936 tail_copy = NULL;
8937 }
8938 } else {
8939 /*
8940 * Re-assemble the original copy map.
8941 */
8942 if (head_copy) {
8943 entry = vm_map_copy_first_entry(head_copy);
8944 vm_map_copy_entry_unlink(head_copy, entry);
8945 vm_map_copy_entry_link(copy,
8946 vm_map_copy_to_entry(copy),
8947 entry);
8948 copy->offset -= head_size;
8949 copy->size += head_size;
8950 vm_map_copy_discard(head_copy);
8951 head_copy = NULL;
8952 }
8953 if (tail_copy) {
8954 entry = vm_map_copy_last_entry(tail_copy);
8955 vm_map_copy_entry_unlink(tail_copy, entry);
8956 vm_map_copy_entry_link(copy,
8957 vm_map_copy_last_entry(copy),
8958 entry);
8959 copy->size += tail_size;
8960 vm_map_copy_discard(tail_copy);
8961 tail_copy = NULL;
8962 }
8963 }
8964 return kr;
1c79356b
A
8965}
8966
8967
8968/*
91447636 8969 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
1c79356b
A
8970 *
8971 * Decription:
8972 * Physically copy unaligned data
8973 *
8974 * Implementation:
8975 * Unaligned parts of pages have to be physically copied. We use
8976 * a modified form of vm_fault_copy (which understands none-aligned
8977 * page offsets and sizes) to do the copy. We attempt to copy as
8978 * much memory in one go as possibly, however vm_fault_copy copies
8979 * within 1 memory object so we have to find the smaller of "amount left"
8980 * "source object data size" and "target object data size". With
8981 * unaligned data we don't need to split regions, therefore the source
8982 * (copy) object should be one map entry, the target range may be split
8983 * over multiple map entries however. In any event we are pessimistic
8984 * about these assumptions.
8985 *
8986 * Assumptions:
8987 * dst_map is locked on entry and is return locked on success,
8988 * unlocked on error.
8989 */
8990
91447636 8991static kern_return_t
1c79356b
A
8992vm_map_copy_overwrite_unaligned(
8993 vm_map_t dst_map,
8994 vm_map_entry_t entry,
8995 vm_map_copy_t copy,
39236c6e
A
8996 vm_map_offset_t start,
8997 boolean_t discard_on_success)
1c79356b 8998{
39236c6e
A
8999 vm_map_entry_t copy_entry;
9000 vm_map_entry_t copy_entry_next;
1c79356b
A
9001 vm_map_version_t version;
9002 vm_object_t dst_object;
9003 vm_object_offset_t dst_offset;
9004 vm_object_offset_t src_offset;
9005 vm_object_offset_t entry_offset;
91447636
A
9006 vm_map_offset_t entry_end;
9007 vm_map_size_t src_size,
1c79356b
A
9008 dst_size,
9009 copy_size,
9010 amount_left;
9011 kern_return_t kr = KERN_SUCCESS;
9012
5ba3f43e 9013
39236c6e
A
9014 copy_entry = vm_map_copy_first_entry(copy);
9015
1c79356b
A
9016 vm_map_lock_write_to_read(dst_map);
9017
91447636 9018 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
1c79356b
A
9019 amount_left = copy->size;
9020/*
9021 * unaligned so we never clipped this entry, we need the offset into
9022 * the vm_object not just the data.
5ba3f43e 9023 */
1c79356b
A
9024 while (amount_left > 0) {
9025
9026 if (entry == vm_map_to_entry(dst_map)) {
9027 vm_map_unlock_read(dst_map);
9028 return KERN_INVALID_ADDRESS;
9029 }
9030
9031 /* "start" must be within the current map entry */
9032 assert ((start>=entry->vme_start) && (start<entry->vme_end));
9033
9034 dst_offset = start - entry->vme_start;
9035
9036 dst_size = entry->vme_end - start;
9037
9038 src_size = copy_entry->vme_end -
9039 (copy_entry->vme_start + src_offset);
9040
9041 if (dst_size < src_size) {
9042/*
9043 * we can only copy dst_size bytes before
9044 * we have to get the next destination entry
9045 */
9046 copy_size = dst_size;
9047 } else {
9048/*
9049 * we can only copy src_size bytes before
9050 * we have to get the next source copy entry
9051 */
9052 copy_size = src_size;
9053 }
9054
9055 if (copy_size > amount_left) {
9056 copy_size = amount_left;
9057 }
9058/*
9059 * Entry needs copy, create a shadow shadow object for
9060 * Copy on write region.
9061 */
9062 if (entry->needs_copy &&
2d21ac55 9063 ((entry->protection & VM_PROT_WRITE) != 0))
1c79356b
A
9064 {
9065 if (vm_map_lock_read_to_write(dst_map)) {
9066 vm_map_lock_read(dst_map);
9067 goto RetryLookup;
9068 }
3e170ce0
A
9069 VME_OBJECT_SHADOW(entry,
9070 (vm_map_size_t)(entry->vme_end
9071 - entry->vme_start));
1c79356b
A
9072 entry->needs_copy = FALSE;
9073 vm_map_lock_write_to_read(dst_map);
9074 }
3e170ce0 9075 dst_object = VME_OBJECT(entry);
1c79356b
A
9076/*
9077 * unlike with the virtual (aligned) copy we're going
9078 * to fault on it therefore we need a target object.
9079 */
9080 if (dst_object == VM_OBJECT_NULL) {
9081 if (vm_map_lock_read_to_write(dst_map)) {
9082 vm_map_lock_read(dst_map);
9083 goto RetryLookup;
9084 }
91447636 9085 dst_object = vm_object_allocate((vm_map_size_t)
2d21ac55 9086 entry->vme_end - entry->vme_start);
3e170ce0
A
9087 VME_OBJECT(entry) = dst_object;
9088 VME_OFFSET_SET(entry, 0);
fe8ab488 9089 assert(entry->use_pmap);
1c79356b
A
9090 vm_map_lock_write_to_read(dst_map);
9091 }
9092/*
9093 * Take an object reference and unlock map. The "entry" may
9094 * disappear or change when the map is unlocked.
9095 */
9096 vm_object_reference(dst_object);
9097 version.main_timestamp = dst_map->timestamp;
3e170ce0 9098 entry_offset = VME_OFFSET(entry);
1c79356b
A
9099 entry_end = entry->vme_end;
9100 vm_map_unlock_read(dst_map);
9101/*
9102 * Copy as much as possible in one pass
9103 */
9104 kr = vm_fault_copy(
3e170ce0
A
9105 VME_OBJECT(copy_entry),
9106 VME_OFFSET(copy_entry) + src_offset,
1c79356b
A
9107 &copy_size,
9108 dst_object,
9109 entry_offset + dst_offset,
9110 dst_map,
9111 &version,
9112 THREAD_UNINT );
9113
9114 start += copy_size;
9115 src_offset += copy_size;
9116 amount_left -= copy_size;
9117/*
9118 * Release the object reference
9119 */
9120 vm_object_deallocate(dst_object);
9121/*
9122 * If a hard error occurred, return it now
9123 */
9124 if (kr != KERN_SUCCESS)
9125 return kr;
9126
9127 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
2d21ac55 9128 || amount_left == 0)
1c79356b
A
9129 {
9130/*
9131 * all done with this copy entry, dispose.
9132 */
39236c6e
A
9133 copy_entry_next = copy_entry->vme_next;
9134
9135 if (discard_on_success) {
9136 vm_map_copy_entry_unlink(copy, copy_entry);
9137 assert(!copy_entry->is_sub_map);
3e170ce0 9138 vm_object_deallocate(VME_OBJECT(copy_entry));
39236c6e
A
9139 vm_map_copy_entry_dispose(copy, copy_entry);
9140 }
1c79356b 9141
39236c6e
A
9142 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9143 amount_left) {
1c79356b
A
9144/*
9145 * not finished copying but run out of source
9146 */
9147 return KERN_INVALID_ADDRESS;
9148 }
39236c6e
A
9149
9150 copy_entry = copy_entry_next;
9151
1c79356b
A
9152 src_offset = 0;
9153 }
9154
9155 if (amount_left == 0)
9156 return KERN_SUCCESS;
9157
9158 vm_map_lock_read(dst_map);
9159 if (version.main_timestamp == dst_map->timestamp) {
9160 if (start == entry_end) {
9161/*
9162 * destination region is split. Use the version
9163 * information to avoid a lookup in the normal
9164 * case.
9165 */
9166 entry = entry->vme_next;
9167/*
9168 * should be contiguous. Fail if we encounter
9169 * a hole in the destination.
9170 */
9171 if (start != entry->vme_start) {
9172 vm_map_unlock_read(dst_map);
9173 return KERN_INVALID_ADDRESS ;
9174 }
9175 }
9176 } else {
9177/*
9178 * Map version check failed.
9179 * we must lookup the entry because somebody
9180 * might have changed the map behind our backs.
9181 */
2d21ac55 9182 RetryLookup:
1c79356b
A
9183 if (!vm_map_lookup_entry(dst_map, start, &entry))
9184 {
9185 vm_map_unlock_read(dst_map);
9186 return KERN_INVALID_ADDRESS ;
9187 }
9188 }
9189 }/* while */
9190
1c79356b
A
9191 return KERN_SUCCESS;
9192}/* vm_map_copy_overwrite_unaligned */
9193
9194/*
91447636 9195 * Routine: vm_map_copy_overwrite_aligned [internal use only]
1c79356b
A
9196 *
9197 * Description:
9198 * Does all the vm_trickery possible for whole pages.
9199 *
9200 * Implementation:
9201 *
9202 * If there are no permanent objects in the destination,
9203 * and the source and destination map entry zones match,
9204 * and the destination map entry is not shared,
9205 * then the map entries can be deleted and replaced
9206 * with those from the copy. The following code is the
9207 * basic idea of what to do, but there are lots of annoying
9208 * little details about getting protection and inheritance
9209 * right. Should add protection, inheritance, and sharing checks
9210 * to the above pass and make sure that no wiring is involved.
9211 */
9212
e2d2fc5c
A
9213int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9214int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9215int vm_map_copy_overwrite_aligned_src_large = 0;
9216
91447636 9217static kern_return_t
1c79356b
A
9218vm_map_copy_overwrite_aligned(
9219 vm_map_t dst_map,
9220 vm_map_entry_t tmp_entry,
9221 vm_map_copy_t copy,
91447636 9222 vm_map_offset_t start,
2d21ac55 9223 __unused pmap_t pmap)
1c79356b
A
9224{
9225 vm_object_t object;
9226 vm_map_entry_t copy_entry;
91447636
A
9227 vm_map_size_t copy_size;
9228 vm_map_size_t size;
1c79356b 9229 vm_map_entry_t entry;
5ba3f43e 9230
1c79356b 9231 while ((copy_entry = vm_map_copy_first_entry(copy))
2d21ac55 9232 != vm_map_copy_to_entry(copy))
1c79356b
A
9233 {
9234 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
5ba3f43e 9235
1c79356b 9236 entry = tmp_entry;
fe8ab488
A
9237 if (entry->is_sub_map) {
9238 /* unnested when clipped earlier */
9239 assert(!entry->use_pmap);
9240 }
1c79356b
A
9241 if (entry == vm_map_to_entry(dst_map)) {
9242 vm_map_unlock(dst_map);
9243 return KERN_INVALID_ADDRESS;
9244 }
9245 size = (entry->vme_end - entry->vme_start);
9246 /*
9247 * Make sure that no holes popped up in the
9248 * address map, and that the protection is
9249 * still valid, in case the map was unlocked
9250 * earlier.
9251 */
9252
9253 if ((entry->vme_start != start) || ((entry->is_sub_map)
2d21ac55 9254 && !entry->needs_copy)) {
1c79356b
A
9255 vm_map_unlock(dst_map);
9256 return(KERN_INVALID_ADDRESS);
9257 }
9258 assert(entry != vm_map_to_entry(dst_map));
9259
9260 /*
9261 * Check protection again
9262 */
9263
9264 if ( ! (entry->protection & VM_PROT_WRITE)) {
9265 vm_map_unlock(dst_map);
9266 return(KERN_PROTECTION_FAILURE);
9267 }
9268
9269 /*
9270 * Adjust to source size first
9271 */
9272
9273 if (copy_size < size) {
fe8ab488
A
9274 if (entry->map_aligned &&
9275 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
9276 VM_MAP_PAGE_MASK(dst_map))) {
9277 /* no longer map-aligned */
9278 entry->map_aligned = FALSE;
9279 }
1c79356b
A
9280 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
9281 size = copy_size;
9282 }
9283
9284 /*
9285 * Adjust to destination size
9286 */
9287
9288 if (size < copy_size) {
9289 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55 9290 copy_entry->vme_start + size);
1c79356b
A
9291 copy_size = size;
9292 }
9293
9294 assert((entry->vme_end - entry->vme_start) == size);
9295 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
9296 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
9297
9298 /*
9299 * If the destination contains temporary unshared memory,
9300 * we can perform the copy by throwing it away and
9301 * installing the source data.
9302 */
9303
3e170ce0 9304 object = VME_OBJECT(entry);
5ba3f43e
A
9305 if ((!entry->is_shared &&
9306 ((object == VM_OBJECT_NULL) ||
2d21ac55 9307 (object->internal && !object->true_share))) ||
1c79356b 9308 entry->needs_copy) {
3e170ce0
A
9309 vm_object_t old_object = VME_OBJECT(entry);
9310 vm_object_offset_t old_offset = VME_OFFSET(entry);
1c79356b
A
9311 vm_object_offset_t offset;
9312
9313 /*
9314 * Ensure that the source and destination aren't
9315 * identical
9316 */
3e170ce0
A
9317 if (old_object == VME_OBJECT(copy_entry) &&
9318 old_offset == VME_OFFSET(copy_entry)) {
1c79356b
A
9319 vm_map_copy_entry_unlink(copy, copy_entry);
9320 vm_map_copy_entry_dispose(copy, copy_entry);
9321
9322 if (old_object != VM_OBJECT_NULL)
9323 vm_object_deallocate(old_object);
9324
9325 start = tmp_entry->vme_end;
9326 tmp_entry = tmp_entry->vme_next;
9327 continue;
9328 }
9329
5ba3f43e 9330#if !CONFIG_EMBEDDED
e2d2fc5c
A
9331#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9332#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
3e170ce0
A
9333 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
9334 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
e2d2fc5c
A
9335 copy_size <= __TRADEOFF1_COPY_SIZE) {
9336 /*
9337 * Virtual vs. Physical copy tradeoff #1.
9338 *
9339 * Copying only a few pages out of a large
9340 * object: do a physical copy instead of
9341 * a virtual copy, to avoid possibly keeping
9342 * the entire large object alive because of
9343 * those few copy-on-write pages.
9344 */
9345 vm_map_copy_overwrite_aligned_src_large++;
9346 goto slow_copy;
9347 }
5ba3f43e 9348#endif /* !CONFIG_EMBEDDED */
e2d2fc5c 9349
3e170ce0
A
9350 if ((dst_map->pmap != kernel_pmap) &&
9351 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
9352 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
ebb1b9f4
A
9353 vm_object_t new_object, new_shadow;
9354
9355 /*
9356 * We're about to map something over a mapping
9357 * established by malloc()...
9358 */
3e170ce0 9359 new_object = VME_OBJECT(copy_entry);
ebb1b9f4
A
9360 if (new_object != VM_OBJECT_NULL) {
9361 vm_object_lock_shared(new_object);
9362 }
9363 while (new_object != VM_OBJECT_NULL &&
5ba3f43e 9364#if !CONFIG_EMBEDDED
e2d2fc5c
A
9365 !new_object->true_share &&
9366 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
5ba3f43e 9367#endif /* !CONFIG_EMBEDDED */
ebb1b9f4
A
9368 new_object->internal) {
9369 new_shadow = new_object->shadow;
9370 if (new_shadow == VM_OBJECT_NULL) {
9371 break;
9372 }
9373 vm_object_lock_shared(new_shadow);
9374 vm_object_unlock(new_object);
9375 new_object = new_shadow;
9376 }
9377 if (new_object != VM_OBJECT_NULL) {
9378 if (!new_object->internal) {
9379 /*
9380 * The new mapping is backed
9381 * by an external object. We
9382 * don't want malloc'ed memory
9383 * to be replaced with such a
9384 * non-anonymous mapping, so
9385 * let's go off the optimized
9386 * path...
9387 */
e2d2fc5c 9388 vm_map_copy_overwrite_aligned_src_not_internal++;
ebb1b9f4
A
9389 vm_object_unlock(new_object);
9390 goto slow_copy;
9391 }
5ba3f43e 9392#if !CONFIG_EMBEDDED
e2d2fc5c
A
9393 if (new_object->true_share ||
9394 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
9395 /*
9396 * Same if there's a "true_share"
9397 * object in the shadow chain, or
9398 * an object with a non-default
9399 * (SYMMETRIC) copy strategy.
9400 */
9401 vm_map_copy_overwrite_aligned_src_not_symmetric++;
9402 vm_object_unlock(new_object);
9403 goto slow_copy;
9404 }
5ba3f43e 9405#endif /* !CONFIG_EMBEDDED */
ebb1b9f4
A
9406 vm_object_unlock(new_object);
9407 }
9408 /*
9409 * The new mapping is still backed by
9410 * anonymous (internal) memory, so it's
9411 * OK to substitute it for the original
9412 * malloc() mapping.
9413 */
9414 }
9415
1c79356b
A
9416 if (old_object != VM_OBJECT_NULL) {
9417 if(entry->is_sub_map) {
9bccf70c 9418 if(entry->use_pmap) {
0c530ab8 9419#ifndef NO_NESTED_PMAP
5ba3f43e 9420 pmap_unnest(dst_map->pmap,
2d21ac55
A
9421 (addr64_t)entry->vme_start,
9422 entry->vme_end - entry->vme_start);
0c530ab8 9423#endif /* NO_NESTED_PMAP */
316670eb 9424 if(dst_map->mapped_in_other_pmaps) {
9bccf70c
A
9425 /* clean up parent */
9426 /* map/maps */
2d21ac55
A
9427 vm_map_submap_pmap_clean(
9428 dst_map, entry->vme_start,
9429 entry->vme_end,
3e170ce0
A
9430 VME_SUBMAP(entry),
9431 VME_OFFSET(entry));
9bccf70c
A
9432 }
9433 } else {
9434 vm_map_submap_pmap_clean(
5ba3f43e 9435 dst_map, entry->vme_start,
9bccf70c 9436 entry->vme_end,
3e170ce0
A
9437 VME_SUBMAP(entry),
9438 VME_OFFSET(entry));
9bccf70c 9439 }
3e170ce0 9440 vm_map_deallocate(VME_SUBMAP(entry));
9bccf70c 9441 } else {
316670eb 9442 if(dst_map->mapped_in_other_pmaps) {
39236c6e 9443 vm_object_pmap_protect_options(
3e170ce0
A
9444 VME_OBJECT(entry),
9445 VME_OFFSET(entry),
5ba3f43e 9446 entry->vme_end
2d21ac55 9447 - entry->vme_start,
9bccf70c
A
9448 PMAP_NULL,
9449 entry->vme_start,
39236c6e
A
9450 VM_PROT_NONE,
9451 PMAP_OPTIONS_REMOVE);
9bccf70c 9452 } else {
39236c6e 9453 pmap_remove_options(
5ba3f43e
A
9454 dst_map->pmap,
9455 (addr64_t)(entry->vme_start),
39236c6e
A
9456 (addr64_t)(entry->vme_end),
9457 PMAP_OPTIONS_REMOVE);
9bccf70c 9458 }
1c79356b 9459 vm_object_deallocate(old_object);
9bccf70c 9460 }
1c79356b
A
9461 }
9462
9463 entry->is_sub_map = FALSE;
3e170ce0
A
9464 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
9465 object = VME_OBJECT(entry);
1c79356b
A
9466 entry->needs_copy = copy_entry->needs_copy;
9467 entry->wired_count = 0;
9468 entry->user_wired_count = 0;
3e170ce0 9469 offset = VME_OFFSET(copy_entry);
5ba3f43e 9470 VME_OFFSET_SET(entry, offset);
1c79356b
A
9471
9472 vm_map_copy_entry_unlink(copy, copy_entry);
9473 vm_map_copy_entry_dispose(copy, copy_entry);
2d21ac55 9474
1c79356b 9475 /*
2d21ac55 9476 * we could try to push pages into the pmap at this point, BUT
1c79356b
A
9477 * this optimization only saved on average 2 us per page if ALL
9478 * the pages in the source were currently mapped
9479 * and ALL the pages in the dest were touched, if there were fewer
9480 * than 2/3 of the pages touched, this optimization actually cost more cycles
2d21ac55 9481 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
1c79356b
A
9482 */
9483
1c79356b
A
9484 /*
9485 * Set up for the next iteration. The map
9486 * has not been unlocked, so the next
9487 * address should be at the end of this
9488 * entry, and the next map entry should be
9489 * the one following it.
9490 */
9491
9492 start = tmp_entry->vme_end;
9493 tmp_entry = tmp_entry->vme_next;
9494 } else {
9495 vm_map_version_t version;
ebb1b9f4
A
9496 vm_object_t dst_object;
9497 vm_object_offset_t dst_offset;
1c79356b
A
9498 kern_return_t r;
9499
ebb1b9f4 9500 slow_copy:
e2d2fc5c 9501 if (entry->needs_copy) {
3e170ce0
A
9502 VME_OBJECT_SHADOW(entry,
9503 (entry->vme_end -
9504 entry->vme_start));
e2d2fc5c
A
9505 entry->needs_copy = FALSE;
9506 }
9507
3e170ce0
A
9508 dst_object = VME_OBJECT(entry);
9509 dst_offset = VME_OFFSET(entry);
ebb1b9f4 9510
1c79356b
A
9511 /*
9512 * Take an object reference, and record
9513 * the map version information so that the
9514 * map can be safely unlocked.
9515 */
9516
ebb1b9f4
A
9517 if (dst_object == VM_OBJECT_NULL) {
9518 /*
9519 * We would usually have just taken the
9520 * optimized path above if the destination
9521 * object has not been allocated yet. But we
9522 * now disable that optimization if the copy
9523 * entry's object is not backed by anonymous
9524 * memory to avoid replacing malloc'ed
9525 * (i.e. re-usable) anonymous memory with a
9526 * not-so-anonymous mapping.
9527 * So we have to handle this case here and
9528 * allocate a new VM object for this map entry.
9529 */
9530 dst_object = vm_object_allocate(
9531 entry->vme_end - entry->vme_start);
9532 dst_offset = 0;
3e170ce0
A
9533 VME_OBJECT_SET(entry, dst_object);
9534 VME_OFFSET_SET(entry, dst_offset);
fe8ab488 9535 assert(entry->use_pmap);
5ba3f43e 9536
ebb1b9f4
A
9537 }
9538
1c79356b
A
9539 vm_object_reference(dst_object);
9540
9bccf70c
A
9541 /* account for unlock bumping up timestamp */
9542 version.main_timestamp = dst_map->timestamp + 1;
1c79356b
A
9543
9544 vm_map_unlock(dst_map);
9545
9546 /*
9547 * Copy as much as possible in one pass
9548 */
9549
9550 copy_size = size;
9551 r = vm_fault_copy(
3e170ce0
A
9552 VME_OBJECT(copy_entry),
9553 VME_OFFSET(copy_entry),
2d21ac55
A
9554 &copy_size,
9555 dst_object,
9556 dst_offset,
9557 dst_map,
9558 &version,
9559 THREAD_UNINT );
1c79356b
A
9560
9561 /*
9562 * Release the object reference
9563 */
9564
9565 vm_object_deallocate(dst_object);
9566
9567 /*
9568 * If a hard error occurred, return it now
9569 */
9570
9571 if (r != KERN_SUCCESS)
9572 return(r);
9573
9574 if (copy_size != 0) {
9575 /*
9576 * Dispose of the copied region
9577 */
9578
9579 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55 9580 copy_entry->vme_start + copy_size);
1c79356b 9581 vm_map_copy_entry_unlink(copy, copy_entry);
3e170ce0 9582 vm_object_deallocate(VME_OBJECT(copy_entry));
1c79356b
A
9583 vm_map_copy_entry_dispose(copy, copy_entry);
9584 }
9585
9586 /*
9587 * Pick up in the destination map where we left off.
9588 *
9589 * Use the version information to avoid a lookup
9590 * in the normal case.
9591 */
9592
9593 start += copy_size;
9594 vm_map_lock(dst_map);
e2d2fc5c
A
9595 if (version.main_timestamp == dst_map->timestamp &&
9596 copy_size != 0) {
1c79356b
A
9597 /* We can safely use saved tmp_entry value */
9598
fe8ab488
A
9599 if (tmp_entry->map_aligned &&
9600 !VM_MAP_PAGE_ALIGNED(
9601 start,
9602 VM_MAP_PAGE_MASK(dst_map))) {
9603 /* no longer map-aligned */
9604 tmp_entry->map_aligned = FALSE;
9605 }
1c79356b
A
9606 vm_map_clip_end(dst_map, tmp_entry, start);
9607 tmp_entry = tmp_entry->vme_next;
9608 } else {
9609 /* Must do lookup of tmp_entry */
9610
9611 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
9612 vm_map_unlock(dst_map);
9613 return(KERN_INVALID_ADDRESS);
9614 }
fe8ab488
A
9615 if (tmp_entry->map_aligned &&
9616 !VM_MAP_PAGE_ALIGNED(
9617 start,
9618 VM_MAP_PAGE_MASK(dst_map))) {
9619 /* no longer map-aligned */
9620 tmp_entry->map_aligned = FALSE;
9621 }
1c79356b
A
9622 vm_map_clip_start(dst_map, tmp_entry, start);
9623 }
9624 }
9625 }/* while */
9626
9627 return(KERN_SUCCESS);
9628}/* vm_map_copy_overwrite_aligned */
9629
9630/*
91447636 9631 * Routine: vm_map_copyin_kernel_buffer [internal use only]
1c79356b
A
9632 *
9633 * Description:
9634 * Copy in data to a kernel buffer from space in the
91447636 9635 * source map. The original space may be optionally
1c79356b
A
9636 * deallocated.
9637 *
9638 * If successful, returns a new copy object.
9639 */
91447636 9640static kern_return_t
1c79356b
A
9641vm_map_copyin_kernel_buffer(
9642 vm_map_t src_map,
91447636
A
9643 vm_map_offset_t src_addr,
9644 vm_map_size_t len,
1c79356b
A
9645 boolean_t src_destroy,
9646 vm_map_copy_t *copy_result)
9647{
91447636 9648 kern_return_t kr;
1c79356b 9649 vm_map_copy_t copy;
b0d623f7
A
9650 vm_size_t kalloc_size;
9651
3e170ce0
A
9652 if (len > msg_ool_size_small)
9653 return KERN_INVALID_ARGUMENT;
1c79356b 9654
3e170ce0
A
9655 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
9656
9657 copy = (vm_map_copy_t)kalloc(kalloc_size);
9658 if (copy == VM_MAP_COPY_NULL)
1c79356b 9659 return KERN_RESOURCE_SHORTAGE;
1c79356b
A
9660 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
9661 copy->size = len;
9662 copy->offset = 0;
1c79356b 9663
3e170ce0 9664 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
91447636
A
9665 if (kr != KERN_SUCCESS) {
9666 kfree(copy, kalloc_size);
9667 return kr;
1c79356b
A
9668 }
9669 if (src_destroy) {
39236c6e
A
9670 (void) vm_map_remove(
9671 src_map,
9672 vm_map_trunc_page(src_addr,
5ba3f43e 9673 VM_MAP_PAGE_MASK(src_map)),
39236c6e
A
9674 vm_map_round_page(src_addr + len,
9675 VM_MAP_PAGE_MASK(src_map)),
9676 (VM_MAP_REMOVE_INTERRUPTIBLE |
9677 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
39037602 9678 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0)));
1c79356b
A
9679 }
9680 *copy_result = copy;
9681 return KERN_SUCCESS;
9682}
9683
9684/*
91447636 9685 * Routine: vm_map_copyout_kernel_buffer [internal use only]
1c79356b
A
9686 *
9687 * Description:
9688 * Copy out data from a kernel buffer into space in the
9689 * destination map. The space may be otpionally dynamically
9690 * allocated.
9691 *
9692 * If successful, consumes the copy object.
9693 * Otherwise, the caller is responsible for it.
9694 */
91447636
A
9695static int vm_map_copyout_kernel_buffer_failures = 0;
9696static kern_return_t
1c79356b 9697vm_map_copyout_kernel_buffer(
91447636
A
9698 vm_map_t map,
9699 vm_map_address_t *addr, /* IN/OUT */
9700 vm_map_copy_t copy,
39037602 9701 vm_map_size_t copy_size,
39236c6e
A
9702 boolean_t overwrite,
9703 boolean_t consume_on_success)
1c79356b
A
9704{
9705 kern_return_t kr = KERN_SUCCESS;
91447636 9706 thread_t thread = current_thread();
1c79356b 9707
39037602
A
9708 assert(copy->size == copy_size);
9709
3e170ce0
A
9710 /*
9711 * check for corrupted vm_map_copy structure
9712 */
39037602 9713 if (copy_size > msg_ool_size_small || copy->offset)
3e170ce0
A
9714 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
9715 (long long)copy->size, (long long)copy->offset);
9716
1c79356b
A
9717 if (!overwrite) {
9718
9719 /*
9720 * Allocate space in the target map for the data
9721 */
9722 *addr = 0;
5ba3f43e
A
9723 kr = vm_map_enter(map,
9724 addr,
39037602 9725 vm_map_round_page(copy_size,
39236c6e 9726 VM_MAP_PAGE_MASK(map)),
5ba3f43e 9727 (vm_map_offset_t) 0,
91447636 9728 VM_FLAGS_ANYWHERE,
5ba3f43e
A
9729 VM_MAP_KERNEL_FLAGS_NONE,
9730 VM_KERN_MEMORY_NONE,
9731 VM_OBJECT_NULL,
9732 (vm_object_offset_t) 0,
1c79356b 9733 FALSE,
5ba3f43e 9734 VM_PROT_DEFAULT,
1c79356b
A
9735 VM_PROT_ALL,
9736 VM_INHERIT_DEFAULT);
9737 if (kr != KERN_SUCCESS)
91447636 9738 return kr;
5ba3f43e
A
9739#if KASAN
9740 if (map->pmap == kernel_pmap) {
9741 kasan_notify_address(*addr, copy->size);
9742 }
9743#endif
1c79356b
A
9744 }
9745
9746 /*
9747 * Copyout the data from the kernel buffer to the target map.
5ba3f43e 9748 */
91447636 9749 if (thread->map == map) {
5ba3f43e 9750
1c79356b
A
9751 /*
9752 * If the target map is the current map, just do
9753 * the copy.
9754 */
39037602
A
9755 assert((vm_size_t)copy_size == copy_size);
9756 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
91447636 9757 kr = KERN_INVALID_ADDRESS;
1c79356b
A
9758 }
9759 }
9760 else {
9761 vm_map_t oldmap;
9762
9763 /*
9764 * If the target map is another map, assume the
9765 * target's address space identity for the duration
9766 * of the copy.
9767 */
9768 vm_map_reference(map);
9769 oldmap = vm_map_switch(map);
9770
39037602
A
9771 assert((vm_size_t)copy_size == copy_size);
9772 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
91447636
A
9773 vm_map_copyout_kernel_buffer_failures++;
9774 kr = KERN_INVALID_ADDRESS;
1c79356b 9775 }
5ba3f43e 9776
1c79356b
A
9777 (void) vm_map_switch(oldmap);
9778 vm_map_deallocate(map);
9779 }
9780
91447636
A
9781 if (kr != KERN_SUCCESS) {
9782 /* the copy failed, clean up */
9783 if (!overwrite) {
9784 /*
9785 * Deallocate the space we allocated in the target map.
9786 */
39236c6e
A
9787 (void) vm_map_remove(
9788 map,
9789 vm_map_trunc_page(*addr,
9790 VM_MAP_PAGE_MASK(map)),
9791 vm_map_round_page((*addr +
39037602 9792 vm_map_round_page(copy_size,
39236c6e
A
9793 VM_MAP_PAGE_MASK(map))),
9794 VM_MAP_PAGE_MASK(map)),
9795 VM_MAP_NO_FLAGS);
91447636
A
9796 *addr = 0;
9797 }
9798 } else {
9799 /* copy was successful, dicard the copy structure */
39236c6e 9800 if (consume_on_success) {
39037602 9801 kfree(copy, copy_size + cpy_kdata_hdr_sz);
39236c6e 9802 }
91447636 9803 }
1c79356b 9804
91447636 9805 return kr;
1c79356b 9806}
5ba3f43e 9807
1c79356b
A
9808/*
9809 * Macro: vm_map_copy_insert
5ba3f43e 9810 *
1c79356b
A
9811 * Description:
9812 * Link a copy chain ("copy") into a map at the
9813 * specified location (after "where").
9814 * Side effects:
9815 * The copy chain is destroyed.
9816 * Warning:
9817 * The arguments are evaluated multiple times.
9818 */
9819#define vm_map_copy_insert(map, where, copy) \
9820MACRO_BEGIN \
6d2010ae
A
9821 vm_map_store_copy_insert(map, where, copy); \
9822 zfree(vm_map_copy_zone, copy); \
1c79356b
A
9823MACRO_END
9824
39236c6e
A
9825void
9826vm_map_copy_remap(
9827 vm_map_t map,
9828 vm_map_entry_t where,
9829 vm_map_copy_t copy,
9830 vm_map_offset_t adjustment,
9831 vm_prot_t cur_prot,
9832 vm_prot_t max_prot,
9833 vm_inherit_t inheritance)
9834{
9835 vm_map_entry_t copy_entry, new_entry;
9836
9837 for (copy_entry = vm_map_copy_first_entry(copy);
9838 copy_entry != vm_map_copy_to_entry(copy);
9839 copy_entry = copy_entry->vme_next) {
9840 /* get a new VM map entry for the map */
9841 new_entry = vm_map_entry_create(map,
9842 !map->hdr.entries_pageable);
9843 /* copy the "copy entry" to the new entry */
9844 vm_map_entry_copy(new_entry, copy_entry);
9845 /* adjust "start" and "end" */
9846 new_entry->vme_start += adjustment;
9847 new_entry->vme_end += adjustment;
9848 /* clear some attributes */
9849 new_entry->inheritance = inheritance;
9850 new_entry->protection = cur_prot;
9851 new_entry->max_protection = max_prot;
9852 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
9853 /* take an extra reference on the entry's "object" */
9854 if (new_entry->is_sub_map) {
fe8ab488 9855 assert(!new_entry->use_pmap); /* not nested */
3e170ce0
A
9856 vm_map_lock(VME_SUBMAP(new_entry));
9857 vm_map_reference(VME_SUBMAP(new_entry));
9858 vm_map_unlock(VME_SUBMAP(new_entry));
39236c6e 9859 } else {
3e170ce0 9860 vm_object_reference(VME_OBJECT(new_entry));
39236c6e
A
9861 }
9862 /* insert the new entry in the map */
9863 vm_map_store_entry_link(map, where, new_entry);
9864 /* continue inserting the "copy entries" after the new entry */
9865 where = new_entry;
9866 }
9867}
9868
2dced7af 9869
39037602
A
9870/*
9871 * Returns true if *size matches (or is in the range of) copy->size.
9872 * Upon returning true, the *size field is updated with the actual size of the
9873 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
9874 */
2dced7af
A
9875boolean_t
9876vm_map_copy_validate_size(
9877 vm_map_t dst_map,
9878 vm_map_copy_t copy,
39037602 9879 vm_map_size_t *size)
2dced7af
A
9880{
9881 if (copy == VM_MAP_COPY_NULL)
9882 return FALSE;
39037602
A
9883 vm_map_size_t copy_sz = copy->size;
9884 vm_map_size_t sz = *size;
2dced7af
A
9885 switch (copy->type) {
9886 case VM_MAP_COPY_OBJECT:
9887 case VM_MAP_COPY_KERNEL_BUFFER:
39037602 9888 if (sz == copy_sz)
2dced7af
A
9889 return TRUE;
9890 break;
9891 case VM_MAP_COPY_ENTRY_LIST:
9892 /*
9893 * potential page-size rounding prevents us from exactly
9894 * validating this flavor of vm_map_copy, but we can at least
9895 * assert that it's within a range.
9896 */
39037602
A
9897 if (copy_sz >= sz &&
9898 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
9899 *size = copy_sz;
2dced7af 9900 return TRUE;
39037602 9901 }
2dced7af
A
9902 break;
9903 default:
9904 break;
9905 }
9906 return FALSE;
9907}
9908
39037602
A
9909/*
9910 * Routine: vm_map_copyout_size
9911 *
9912 * Description:
9913 * Copy out a copy chain ("copy") into newly-allocated
9914 * space in the destination map. Uses a prevalidated
9915 * size for the copy object (vm_map_copy_validate_size).
9916 *
9917 * If successful, consumes the copy object.
9918 * Otherwise, the caller is responsible for it.
9919 */
9920kern_return_t
9921vm_map_copyout_size(
9922 vm_map_t dst_map,
9923 vm_map_address_t *dst_addr, /* OUT */
9924 vm_map_copy_t copy,
9925 vm_map_size_t copy_size)
9926{
9927 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
9928 TRUE, /* consume_on_success */
9929 VM_PROT_DEFAULT,
9930 VM_PROT_ALL,
9931 VM_INHERIT_DEFAULT);
9932}
2dced7af 9933
1c79356b
A
9934/*
9935 * Routine: vm_map_copyout
9936 *
9937 * Description:
9938 * Copy out a copy chain ("copy") into newly-allocated
9939 * space in the destination map.
9940 *
9941 * If successful, consumes the copy object.
9942 * Otherwise, the caller is responsible for it.
9943 */
9944kern_return_t
9945vm_map_copyout(
91447636
A
9946 vm_map_t dst_map,
9947 vm_map_address_t *dst_addr, /* OUT */
9948 vm_map_copy_t copy)
39236c6e 9949{
39037602
A
9950 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
9951 TRUE, /* consume_on_success */
9952 VM_PROT_DEFAULT,
9953 VM_PROT_ALL,
9954 VM_INHERIT_DEFAULT);
39236c6e
A
9955}
9956
9957kern_return_t
9958vm_map_copyout_internal(
9959 vm_map_t dst_map,
9960 vm_map_address_t *dst_addr, /* OUT */
9961 vm_map_copy_t copy,
39037602 9962 vm_map_size_t copy_size,
39236c6e
A
9963 boolean_t consume_on_success,
9964 vm_prot_t cur_protection,
9965 vm_prot_t max_protection,
9966 vm_inherit_t inheritance)
1c79356b 9967{
91447636
A
9968 vm_map_size_t size;
9969 vm_map_size_t adjustment;
9970 vm_map_offset_t start;
1c79356b
A
9971 vm_object_offset_t vm_copy_start;
9972 vm_map_entry_t last;
1c79356b 9973 vm_map_entry_t entry;
3e170ce0 9974 vm_map_entry_t hole_entry;
1c79356b
A
9975
9976 /*
9977 * Check for null copy object.
9978 */
9979
9980 if (copy == VM_MAP_COPY_NULL) {
9981 *dst_addr = 0;
9982 return(KERN_SUCCESS);
9983 }
9984
39037602
A
9985 if (copy->size != copy_size) {
9986 *dst_addr = 0;
9987 return KERN_FAILURE;
9988 }
9989
1c79356b
A
9990 /*
9991 * Check for special copy object, created
9992 * by vm_map_copyin_object.
9993 */
9994
9995 if (copy->type == VM_MAP_COPY_OBJECT) {
9996 vm_object_t object = copy->cpy_object;
9997 kern_return_t kr;
9998 vm_object_offset_t offset;
9999
91447636 10000 offset = vm_object_trunc_page(copy->offset);
39037602 10001 size = vm_map_round_page((copy_size +
39236c6e
A
10002 (vm_map_size_t)(copy->offset -
10003 offset)),
10004 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
10005 *dst_addr = 0;
10006 kr = vm_map_enter(dst_map, dst_addr, size,
91447636 10007 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
5ba3f43e
A
10008 VM_MAP_KERNEL_FLAGS_NONE,
10009 VM_KERN_MEMORY_NONE,
1c79356b
A
10010 object, offset, FALSE,
10011 VM_PROT_DEFAULT, VM_PROT_ALL,
10012 VM_INHERIT_DEFAULT);
10013 if (kr != KERN_SUCCESS)
10014 return(kr);
10015 /* Account for non-pagealigned copy object */
91447636 10016 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
39236c6e
A
10017 if (consume_on_success)
10018 zfree(vm_map_copy_zone, copy);
1c79356b
A
10019 return(KERN_SUCCESS);
10020 }
10021
10022 /*
10023 * Check for special kernel buffer allocated
10024 * by new_ipc_kmsg_copyin.
10025 */
10026
10027 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
39037602
A
10028 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
10029 copy, copy_size, FALSE,
39236c6e 10030 consume_on_success);
1c79356b
A
10031 }
10032
39236c6e 10033
1c79356b
A
10034 /*
10035 * Find space for the data
10036 */
10037
39236c6e
A
10038 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
10039 VM_MAP_COPY_PAGE_MASK(copy));
39037602 10040 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
39236c6e 10041 VM_MAP_COPY_PAGE_MASK(copy))
2d21ac55 10042 - vm_copy_start;
1c79356b 10043
39236c6e 10044
2d21ac55 10045StartAgain: ;
1c79356b
A
10046
10047 vm_map_lock(dst_map);
6d2010ae
A
10048 if( dst_map->disable_vmentry_reuse == TRUE) {
10049 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10050 last = entry;
10051 } else {
3e170ce0
A
10052 if (dst_map->holelistenabled) {
10053 hole_entry = (vm_map_entry_t)dst_map->holes_list;
10054
10055 if (hole_entry == NULL) {
10056 /*
10057 * No more space in the map?
10058 */
10059 vm_map_unlock(dst_map);
10060 return(KERN_NO_SPACE);
10061 }
10062
10063 last = hole_entry;
10064 start = last->vme_start;
10065 } else {
10066 assert(first_free_is_valid(dst_map));
10067 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
10068 vm_map_min(dst_map) : last->vme_end;
10069 }
39236c6e
A
10070 start = vm_map_round_page(start,
10071 VM_MAP_PAGE_MASK(dst_map));
6d2010ae 10072 }
1c79356b
A
10073
10074 while (TRUE) {
10075 vm_map_entry_t next = last->vme_next;
91447636 10076 vm_map_offset_t end = start + size;
1c79356b
A
10077
10078 if ((end > dst_map->max_offset) || (end < start)) {
10079 if (dst_map->wait_for_space) {
10080 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10081 assert_wait((event_t) dst_map,
10082 THREAD_INTERRUPTIBLE);
10083 vm_map_unlock(dst_map);
91447636 10084 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
10085 goto StartAgain;
10086 }
10087 }
10088 vm_map_unlock(dst_map);
10089 return(KERN_NO_SPACE);
10090 }
10091
3e170ce0
A
10092 if (dst_map->holelistenabled) {
10093 if (last->vme_end >= end)
10094 break;
10095 } else {
10096 /*
10097 * If there are no more entries, we must win.
10098 *
10099 * OR
10100 *
10101 * If there is another entry, it must be
10102 * after the end of the potential new region.
10103 */
10104
10105 if (next == vm_map_to_entry(dst_map))
10106 break;
10107
10108 if (next->vme_start >= end)
10109 break;
10110 }
1c79356b
A
10111
10112 last = next;
3e170ce0
A
10113
10114 if (dst_map->holelistenabled) {
10115 if (last == (vm_map_entry_t) dst_map->holes_list) {
10116 /*
10117 * Wrapped around
10118 */
10119 vm_map_unlock(dst_map);
10120 return(KERN_NO_SPACE);
10121 }
10122 start = last->vme_start;
10123 } else {
10124 start = last->vme_end;
10125 }
39236c6e
A
10126 start = vm_map_round_page(start,
10127 VM_MAP_PAGE_MASK(dst_map));
10128 }
10129
3e170ce0
A
10130 if (dst_map->holelistenabled) {
10131 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10132 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10133 }
10134 }
10135
10136
39236c6e
A
10137 adjustment = start - vm_copy_start;
10138 if (! consume_on_success) {
10139 /*
10140 * We're not allowed to consume "copy", so we'll have to
10141 * copy its map entries into the destination map below.
10142 * No need to re-allocate map entries from the correct
10143 * (pageable or not) zone, since we'll get new map entries
10144 * during the transfer.
10145 * We'll also adjust the map entries's "start" and "end"
10146 * during the transfer, to keep "copy"'s entries consistent
10147 * with its "offset".
10148 */
10149 goto after_adjustments;
1c79356b
A
10150 }
10151
10152 /*
10153 * Since we're going to just drop the map
10154 * entries from the copy into the destination
10155 * map, they must come from the same pool.
10156 */
10157
10158 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
2d21ac55
A
10159 /*
10160 * Mismatches occur when dealing with the default
10161 * pager.
10162 */
10163 zone_t old_zone;
10164 vm_map_entry_t next, new;
10165
10166 /*
10167 * Find the zone that the copies were allocated from
10168 */
7ddcb079 10169
2d21ac55
A
10170 entry = vm_map_copy_first_entry(copy);
10171
10172 /*
10173 * Reinitialize the copy so that vm_map_copy_entry_link
10174 * will work.
10175 */
6d2010ae 10176 vm_map_store_copy_reset(copy, entry);
2d21ac55 10177 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
2d21ac55
A
10178
10179 /*
10180 * Copy each entry.
10181 */
10182 while (entry != vm_map_copy_to_entry(copy)) {
7ddcb079 10183 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
2d21ac55 10184 vm_map_entry_copy_full(new, entry);
fe8ab488
A
10185 assert(!new->iokit_acct);
10186 if (new->is_sub_map) {
10187 /* clr address space specifics */
10188 new->use_pmap = FALSE;
10189 }
2d21ac55
A
10190 vm_map_copy_entry_link(copy,
10191 vm_map_copy_last_entry(copy),
10192 new);
10193 next = entry->vme_next;
7ddcb079 10194 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
2d21ac55
A
10195 zfree(old_zone, entry);
10196 entry = next;
10197 }
1c79356b
A
10198 }
10199
10200 /*
10201 * Adjust the addresses in the copy chain, and
10202 * reset the region attributes.
10203 */
10204
1c79356b
A
10205 for (entry = vm_map_copy_first_entry(copy);
10206 entry != vm_map_copy_to_entry(copy);
10207 entry = entry->vme_next) {
39236c6e
A
10208 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
10209 /*
10210 * We're injecting this copy entry into a map that
10211 * has the standard page alignment, so clear
10212 * "map_aligned" (which might have been inherited
10213 * from the original map entry).
10214 */
10215 entry->map_aligned = FALSE;
10216 }
10217
1c79356b
A
10218 entry->vme_start += adjustment;
10219 entry->vme_end += adjustment;
10220
39236c6e
A
10221 if (entry->map_aligned) {
10222 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
10223 VM_MAP_PAGE_MASK(dst_map)));
10224 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
10225 VM_MAP_PAGE_MASK(dst_map)));
10226 }
10227
1c79356b
A
10228 entry->inheritance = VM_INHERIT_DEFAULT;
10229 entry->protection = VM_PROT_DEFAULT;
10230 entry->max_protection = VM_PROT_ALL;
10231 entry->behavior = VM_BEHAVIOR_DEFAULT;
10232
10233 /*
10234 * If the entry is now wired,
10235 * map the pages into the destination map.
10236 */
10237 if (entry->wired_count != 0) {
39037602 10238 vm_map_offset_t va;
2d21ac55 10239 vm_object_offset_t offset;
39037602 10240 vm_object_t object;
2d21ac55
A
10241 vm_prot_t prot;
10242 int type_of_fault;
1c79356b 10243
3e170ce0
A
10244 object = VME_OBJECT(entry);
10245 offset = VME_OFFSET(entry);
2d21ac55 10246 va = entry->vme_start;
1c79356b 10247
2d21ac55
A
10248 pmap_pageable(dst_map->pmap,
10249 entry->vme_start,
10250 entry->vme_end,
10251 TRUE);
1c79356b 10252
2d21ac55 10253 while (va < entry->vme_end) {
39037602 10254 vm_page_t m;
1c79356b 10255
2d21ac55
A
10256 /*
10257 * Look up the page in the object.
10258 * Assert that the page will be found in the
10259 * top object:
10260 * either
10261 * the object was newly created by
10262 * vm_object_copy_slowly, and has
10263 * copies of all of the pages from
10264 * the source object
10265 * or
10266 * the object was moved from the old
10267 * map entry; because the old map
10268 * entry was wired, all of the pages
10269 * were in the top-level object.
10270 * (XXX not true if we wire pages for
10271 * reading)
10272 */
10273 vm_object_lock(object);
91447636 10274
2d21ac55 10275 m = vm_page_lookup(object, offset);
b0d623f7 10276 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
2d21ac55
A
10277 m->absent)
10278 panic("vm_map_copyout: wiring %p", m);
1c79356b 10279
2d21ac55 10280 prot = entry->protection;
1c79356b 10281
3e170ce0
A
10282 if (override_nx(dst_map, VME_ALIAS(entry)) &&
10283 prot)
2d21ac55 10284 prot |= VM_PROT_EXECUTE;
1c79356b 10285
2d21ac55 10286 type_of_fault = DBG_CACHE_HIT_FAULT;
1c79356b 10287
6d2010ae 10288 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
5ba3f43e
A
10289 VM_PAGE_WIRED(m),
10290 FALSE, /* change_wiring */
10291 VM_KERN_MEMORY_NONE, /* tag - not wiring */
10292 FALSE, /* no_cache */
10293 FALSE, /* cs_bypass */
10294 VME_ALIAS(entry),
10295 ((entry->iokit_acct ||
10296 (!entry->is_sub_map &&
10297 !entry->use_pmap))
10298 ? PMAP_OPTIONS_ALT_ACCT
10299 : 0), /* pmap_options */
10300 NULL, /* need_retry */
10301 &type_of_fault);
1c79356b 10302
2d21ac55 10303 vm_object_unlock(object);
1c79356b 10304
2d21ac55
A
10305 offset += PAGE_SIZE_64;
10306 va += PAGE_SIZE;
1c79356b
A
10307 }
10308 }
10309 }
10310
39236c6e
A
10311after_adjustments:
10312
1c79356b
A
10313 /*
10314 * Correct the page alignment for the result
10315 */
10316
10317 *dst_addr = start + (copy->offset - vm_copy_start);
10318
5ba3f43e
A
10319#if KASAN
10320 kasan_notify_address(*dst_addr, size);
10321#endif
10322
1c79356b
A
10323 /*
10324 * Update the hints and the map size
10325 */
10326
39236c6e
A
10327 if (consume_on_success) {
10328 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
10329 } else {
10330 SAVE_HINT_MAP_WRITE(dst_map, last);
10331 }
1c79356b
A
10332
10333 dst_map->size += size;
10334
10335 /*
10336 * Link in the copy
10337 */
10338
39236c6e
A
10339 if (consume_on_success) {
10340 vm_map_copy_insert(dst_map, last, copy);
10341 } else {
10342 vm_map_copy_remap(dst_map, last, copy, adjustment,
10343 cur_protection, max_protection,
10344 inheritance);
10345 }
1c79356b
A
10346
10347 vm_map_unlock(dst_map);
10348
10349 /*
10350 * XXX If wiring_required, call vm_map_pageable
10351 */
10352
10353 return(KERN_SUCCESS);
10354}
10355
1c79356b
A
10356/*
10357 * Routine: vm_map_copyin
10358 *
10359 * Description:
2d21ac55
A
10360 * see vm_map_copyin_common. Exported via Unsupported.exports.
10361 *
10362 */
10363
10364#undef vm_map_copyin
10365
10366kern_return_t
10367vm_map_copyin(
10368 vm_map_t src_map,
10369 vm_map_address_t src_addr,
10370 vm_map_size_t len,
10371 boolean_t src_destroy,
10372 vm_map_copy_t *copy_result) /* OUT */
10373{
10374 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
10375 FALSE, copy_result, FALSE));
10376}
10377
10378/*
10379 * Routine: vm_map_copyin_common
10380 *
10381 * Description:
1c79356b
A
10382 * Copy the specified region (src_addr, len) from the
10383 * source address space (src_map), possibly removing
10384 * the region from the source address space (src_destroy).
10385 *
10386 * Returns:
10387 * A vm_map_copy_t object (copy_result), suitable for
10388 * insertion into another address space (using vm_map_copyout),
10389 * copying over another address space region (using
10390 * vm_map_copy_overwrite). If the copy is unused, it
10391 * should be destroyed (using vm_map_copy_discard).
10392 *
10393 * In/out conditions:
10394 * The source map should not be locked on entry.
10395 */
10396
10397typedef struct submap_map {
10398 vm_map_t parent_map;
91447636
A
10399 vm_map_offset_t base_start;
10400 vm_map_offset_t base_end;
2d21ac55 10401 vm_map_size_t base_len;
1c79356b
A
10402 struct submap_map *next;
10403} submap_map_t;
10404
10405kern_return_t
10406vm_map_copyin_common(
10407 vm_map_t src_map,
91447636
A
10408 vm_map_address_t src_addr,
10409 vm_map_size_t len,
1c79356b 10410 boolean_t src_destroy,
91447636 10411 __unused boolean_t src_volatile,
1c79356b
A
10412 vm_map_copy_t *copy_result, /* OUT */
10413 boolean_t use_maxprot)
4bd07ac2
A
10414{
10415 int flags;
10416
10417 flags = 0;
10418 if (src_destroy) {
10419 flags |= VM_MAP_COPYIN_SRC_DESTROY;
10420 }
10421 if (use_maxprot) {
10422 flags |= VM_MAP_COPYIN_USE_MAXPROT;
10423 }
10424 return vm_map_copyin_internal(src_map,
10425 src_addr,
10426 len,
10427 flags,
10428 copy_result);
10429}
10430kern_return_t
10431vm_map_copyin_internal(
10432 vm_map_t src_map,
10433 vm_map_address_t src_addr,
10434 vm_map_size_t len,
10435 int flags,
10436 vm_map_copy_t *copy_result) /* OUT */
1c79356b 10437{
1c79356b
A
10438 vm_map_entry_t tmp_entry; /* Result of last map lookup --
10439 * in multi-level lookup, this
10440 * entry contains the actual
10441 * vm_object/offset.
10442 */
1c79356b
A
10443 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
10444
91447636 10445 vm_map_offset_t src_start; /* Start of current entry --
1c79356b
A
10446 * where copy is taking place now
10447 */
91447636 10448 vm_map_offset_t src_end; /* End of entire region to be
1c79356b 10449 * copied */
2d21ac55 10450 vm_map_offset_t src_base;
91447636 10451 vm_map_t base_map = src_map;
1c79356b
A
10452 boolean_t map_share=FALSE;
10453 submap_map_t *parent_maps = NULL;
10454
1c79356b 10455 vm_map_copy_t copy; /* Resulting copy */
fe8ab488
A
10456 vm_map_address_t copy_addr;
10457 vm_map_size_t copy_size;
4bd07ac2
A
10458 boolean_t src_destroy;
10459 boolean_t use_maxprot;
39037602 10460 boolean_t preserve_purgeable;
5ba3f43e
A
10461 boolean_t entry_was_shared;
10462 vm_map_entry_t saved_src_entry;
4bd07ac2
A
10463
10464 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
10465 return KERN_INVALID_ARGUMENT;
10466 }
5ba3f43e 10467
4bd07ac2
A
10468 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
10469 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
39037602
A
10470 preserve_purgeable =
10471 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
1c79356b
A
10472
10473 /*
10474 * Check for copies of zero bytes.
10475 */
10476
10477 if (len == 0) {
10478 *copy_result = VM_MAP_COPY_NULL;
10479 return(KERN_SUCCESS);
10480 }
10481
4a249263
A
10482 /*
10483 * Check that the end address doesn't overflow
10484 */
10485 src_end = src_addr + len;
10486 if (src_end < src_addr)
10487 return KERN_INVALID_ADDRESS;
10488
39037602
A
10489 /*
10490 * Compute (page aligned) start and end of region
10491 */
10492 src_start = vm_map_trunc_page(src_addr,
10493 VM_MAP_PAGE_MASK(src_map));
10494 src_end = vm_map_round_page(src_end,
10495 VM_MAP_PAGE_MASK(src_map));
10496
1c79356b
A
10497 /*
10498 * If the copy is sufficiently small, use a kernel buffer instead
10499 * of making a virtual copy. The theory being that the cost of
10500 * setting up VM (and taking C-O-W faults) dominates the copy costs
10501 * for small regions.
10502 */
4bd07ac2
A
10503 if ((len < msg_ool_size_small) &&
10504 !use_maxprot &&
39037602
A
10505 !preserve_purgeable &&
10506 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
10507 /*
10508 * Since the "msg_ool_size_small" threshold was increased and
10509 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
10510 * address space limits, we revert to doing a virtual copy if the
10511 * copied range goes beyond those limits. Otherwise, mach_vm_read()
10512 * of the commpage would now fail when it used to work.
10513 */
10514 (src_start >= vm_map_min(src_map) &&
10515 src_start < vm_map_max(src_map) &&
10516 src_end >= vm_map_min(src_map) &&
10517 src_end < vm_map_max(src_map)))
2d21ac55
A
10518 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
10519 src_destroy, copy_result);
1c79356b 10520
b0d623f7 10521 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
1c79356b 10522
1c79356b
A
10523 /*
10524 * Allocate a header element for the list.
10525 *
5ba3f43e 10526 * Use the start and end in the header to
1c79356b
A
10527 * remember the endpoints prior to rounding.
10528 */
10529
10530 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 10531 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b 10532 vm_map_copy_first_entry(copy) =
2d21ac55 10533 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
1c79356b
A
10534 copy->type = VM_MAP_COPY_ENTRY_LIST;
10535 copy->cpy_hdr.nentries = 0;
10536 copy->cpy_hdr.entries_pageable = TRUE;
39236c6e
A
10537#if 00
10538 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
10539#else
10540 /*
10541 * The copy entries can be broken down for a variety of reasons,
10542 * so we can't guarantee that they will remain map-aligned...
10543 * Will need to adjust the first copy_entry's "vme_start" and
10544 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
10545 * rather than the original map's alignment.
10546 */
10547 copy->cpy_hdr.page_shift = PAGE_SHIFT;
10548#endif
1c79356b 10549
6d2010ae
A
10550 vm_map_store_init( &(copy->cpy_hdr) );
10551
1c79356b
A
10552 copy->offset = src_addr;
10553 copy->size = len;
5ba3f43e 10554
7ddcb079 10555 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b
A
10556
10557#define RETURN(x) \
10558 MACRO_BEGIN \
10559 vm_map_unlock(src_map); \
9bccf70c
A
10560 if(src_map != base_map) \
10561 vm_map_deallocate(src_map); \
1c79356b
A
10562 if (new_entry != VM_MAP_ENTRY_NULL) \
10563 vm_map_copy_entry_dispose(copy,new_entry); \
10564 vm_map_copy_discard(copy); \
10565 { \
91447636 10566 submap_map_t *_ptr; \
1c79356b 10567 \
91447636 10568 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
1c79356b 10569 parent_maps=parent_maps->next; \
91447636
A
10570 if (_ptr->parent_map != base_map) \
10571 vm_map_deallocate(_ptr->parent_map); \
10572 kfree(_ptr, sizeof(submap_map_t)); \
1c79356b
A
10573 } \
10574 } \
10575 MACRO_RETURN(x); \
10576 MACRO_END
10577
10578 /*
10579 * Find the beginning of the region.
10580 */
10581
10582 vm_map_lock(src_map);
10583
fe8ab488
A
10584 /*
10585 * Lookup the original "src_addr" rather than the truncated
10586 * "src_start", in case "src_start" falls in a non-map-aligned
10587 * map entry *before* the map entry that contains "src_addr"...
10588 */
10589 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
1c79356b
A
10590 RETURN(KERN_INVALID_ADDRESS);
10591 if(!tmp_entry->is_sub_map) {
fe8ab488
A
10592 /*
10593 * ... but clip to the map-rounded "src_start" rather than
10594 * "src_addr" to preserve map-alignment. We'll adjust the
10595 * first copy entry at the end, if needed.
10596 */
1c79356b
A
10597 vm_map_clip_start(src_map, tmp_entry, src_start);
10598 }
fe8ab488
A
10599 if (src_start < tmp_entry->vme_start) {
10600 /*
10601 * Move "src_start" up to the start of the
10602 * first map entry to copy.
10603 */
10604 src_start = tmp_entry->vme_start;
10605 }
1c79356b
A
10606 /* set for later submap fix-up */
10607 copy_addr = src_start;
10608
10609 /*
10610 * Go through entries until we get to the end.
10611 */
10612
10613 while (TRUE) {
1c79356b 10614 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
91447636 10615 vm_map_size_t src_size; /* Size of source
1c79356b
A
10616 * map entry (in both
10617 * maps)
10618 */
10619
1c79356b
A
10620 vm_object_t src_object; /* Object to copy */
10621 vm_object_offset_t src_offset;
10622
10623 boolean_t src_needs_copy; /* Should source map
10624 * be made read-only
10625 * for copy-on-write?
10626 */
10627
10628 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
10629
10630 boolean_t was_wired; /* Was source wired? */
10631 vm_map_version_t version; /* Version before locks
10632 * dropped to make copy
10633 */
10634 kern_return_t result; /* Return value from
10635 * copy_strategically.
10636 */
10637 while(tmp_entry->is_sub_map) {
91447636 10638 vm_map_size_t submap_len;
1c79356b
A
10639 submap_map_t *ptr;
10640
10641 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
10642 ptr->next = parent_maps;
10643 parent_maps = ptr;
10644 ptr->parent_map = src_map;
10645 ptr->base_start = src_start;
10646 ptr->base_end = src_end;
10647 submap_len = tmp_entry->vme_end - src_start;
10648 if(submap_len > (src_end-src_start))
10649 submap_len = src_end-src_start;
2d21ac55 10650 ptr->base_len = submap_len;
5ba3f43e 10651
1c79356b 10652 src_start -= tmp_entry->vme_start;
3e170ce0 10653 src_start += VME_OFFSET(tmp_entry);
1c79356b 10654 src_end = src_start + submap_len;
3e170ce0 10655 src_map = VME_SUBMAP(tmp_entry);
1c79356b 10656 vm_map_lock(src_map);
9bccf70c
A
10657 /* keep an outstanding reference for all maps in */
10658 /* the parents tree except the base map */
10659 vm_map_reference(src_map);
1c79356b
A
10660 vm_map_unlock(ptr->parent_map);
10661 if (!vm_map_lookup_entry(
2d21ac55 10662 src_map, src_start, &tmp_entry))
1c79356b
A
10663 RETURN(KERN_INVALID_ADDRESS);
10664 map_share = TRUE;
10665 if(!tmp_entry->is_sub_map)
2d21ac55 10666 vm_map_clip_start(src_map, tmp_entry, src_start);
1c79356b
A
10667 src_entry = tmp_entry;
10668 }
2d21ac55
A
10669 /* we are now in the lowest level submap... */
10670
5ba3f43e 10671 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
3e170ce0 10672 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
55e303ae
A
10673 /* This is not, supported for now.In future */
10674 /* we will need to detect the phys_contig */
10675 /* condition and then upgrade copy_slowly */
10676 /* to do physical copy from the device mem */
10677 /* based object. We can piggy-back off of */
10678 /* the was wired boolean to set-up the */
10679 /* proper handling */
0b4e3aa0
A
10680 RETURN(KERN_PROTECTION_FAILURE);
10681 }
1c79356b 10682 /*
5ba3f43e 10683 * Create a new address map entry to hold the result.
1c79356b
A
10684 * Fill in the fields from the appropriate source entries.
10685 * We must unlock the source map to do this if we need
10686 * to allocate a map entry.
10687 */
10688 if (new_entry == VM_MAP_ENTRY_NULL) {
2d21ac55
A
10689 version.main_timestamp = src_map->timestamp;
10690 vm_map_unlock(src_map);
1c79356b 10691
7ddcb079 10692 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b 10693
2d21ac55
A
10694 vm_map_lock(src_map);
10695 if ((version.main_timestamp + 1) != src_map->timestamp) {
10696 if (!vm_map_lookup_entry(src_map, src_start,
10697 &tmp_entry)) {
10698 RETURN(KERN_INVALID_ADDRESS);
10699 }
10700 if (!tmp_entry->is_sub_map)
10701 vm_map_clip_start(src_map, tmp_entry, src_start);
10702 continue; /* restart w/ new tmp_entry */
1c79356b 10703 }
1c79356b
A
10704 }
10705
10706 /*
10707 * Verify that the region can be read.
10708 */
10709 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
2d21ac55 10710 !use_maxprot) ||
1c79356b
A
10711 (src_entry->max_protection & VM_PROT_READ) == 0)
10712 RETURN(KERN_PROTECTION_FAILURE);
10713
10714 /*
10715 * Clip against the endpoints of the entire region.
10716 */
10717
10718 vm_map_clip_end(src_map, src_entry, src_end);
10719
10720 src_size = src_entry->vme_end - src_start;
3e170ce0
A
10721 src_object = VME_OBJECT(src_entry);
10722 src_offset = VME_OFFSET(src_entry);
1c79356b
A
10723 was_wired = (src_entry->wired_count != 0);
10724
10725 vm_map_entry_copy(new_entry, src_entry);
fe8ab488
A
10726 if (new_entry->is_sub_map) {
10727 /* clr address space specifics */
10728 new_entry->use_pmap = FALSE;
10729 }
1c79356b
A
10730
10731 /*
10732 * Attempt non-blocking copy-on-write optimizations.
10733 */
10734
813fb2f6
A
10735 if (src_destroy &&
10736 (src_object == VM_OBJECT_NULL ||
10737 (src_object->internal &&
10738 src_object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
10739 !src_object->true_share &&
10740 !map_share))) {
2d21ac55
A
10741 /*
10742 * If we are destroying the source, and the object
10743 * is internal, we can move the object reference
10744 * from the source to the copy. The copy is
10745 * copy-on-write only if the source is.
10746 * We make another reference to the object, because
10747 * destroying the source entry will deallocate it.
10748 */
10749 vm_object_reference(src_object);
1c79356b 10750
2d21ac55
A
10751 /*
10752 * Copy is always unwired. vm_map_copy_entry
10753 * set its wired count to zero.
10754 */
1c79356b 10755
2d21ac55 10756 goto CopySuccessful;
1c79356b
A
10757 }
10758
10759
2d21ac55 10760 RestartCopy:
1c79356b 10761 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
3e170ce0 10762 src_object, new_entry, VME_OBJECT(new_entry),
1c79356b 10763 was_wired, 0);
55e303ae 10764 if ((src_object == VM_OBJECT_NULL ||
2d21ac55
A
10765 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
10766 vm_object_copy_quickly(
3e170ce0 10767 &VME_OBJECT(new_entry),
2d21ac55
A
10768 src_offset,
10769 src_size,
10770 &src_needs_copy,
10771 &new_entry_needs_copy)) {
1c79356b
A
10772
10773 new_entry->needs_copy = new_entry_needs_copy;
10774
10775 /*
10776 * Handle copy-on-write obligations
10777 */
10778
10779 if (src_needs_copy && !tmp_entry->needs_copy) {
0c530ab8
A
10780 vm_prot_t prot;
10781
10782 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 10783
3e170ce0
A
10784 if (override_nx(src_map, VME_ALIAS(src_entry))
10785 && prot)
0c530ab8 10786 prot |= VM_PROT_EXECUTE;
2d21ac55 10787
55e303ae
A
10788 vm_object_pmap_protect(
10789 src_object,
10790 src_offset,
10791 src_size,
5ba3f43e 10792 (src_entry->is_shared ?
2d21ac55
A
10793 PMAP_NULL
10794 : src_map->pmap),
55e303ae 10795 src_entry->vme_start,
0c530ab8
A
10796 prot);
10797
3e170ce0 10798 assert(tmp_entry->wired_count == 0);
55e303ae 10799 tmp_entry->needs_copy = TRUE;
1c79356b
A
10800 }
10801
10802 /*
10803 * The map has never been unlocked, so it's safe
10804 * to move to the next entry rather than doing
10805 * another lookup.
10806 */
10807
10808 goto CopySuccessful;
10809 }
10810
5ba3f43e
A
10811 entry_was_shared = tmp_entry->is_shared;
10812
1c79356b
A
10813 /*
10814 * Take an object reference, so that we may
10815 * release the map lock(s).
10816 */
10817
10818 assert(src_object != VM_OBJECT_NULL);
10819 vm_object_reference(src_object);
10820
10821 /*
10822 * Record the timestamp for later verification.
10823 * Unlock the map.
10824 */
10825
10826 version.main_timestamp = src_map->timestamp;
9bccf70c 10827 vm_map_unlock(src_map); /* Increments timestamp once! */
5ba3f43e
A
10828 saved_src_entry = src_entry;
10829 tmp_entry = VM_MAP_ENTRY_NULL;
10830 src_entry = VM_MAP_ENTRY_NULL;
1c79356b
A
10831
10832 /*
10833 * Perform the copy
10834 */
10835
10836 if (was_wired) {
55e303ae 10837 CopySlowly:
1c79356b
A
10838 vm_object_lock(src_object);
10839 result = vm_object_copy_slowly(
2d21ac55
A
10840 src_object,
10841 src_offset,
10842 src_size,
10843 THREAD_UNINT,
3e170ce0
A
10844 &VME_OBJECT(new_entry));
10845 VME_OFFSET_SET(new_entry, 0);
1c79356b 10846 new_entry->needs_copy = FALSE;
55e303ae
A
10847
10848 }
10849 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
5ba3f43e 10850 (entry_was_shared || map_share)) {
55e303ae
A
10851 vm_object_t new_object;
10852
2d21ac55 10853 vm_object_lock_shared(src_object);
55e303ae 10854 new_object = vm_object_copy_delayed(
2d21ac55 10855 src_object,
5ba3f43e 10856 src_offset,
2d21ac55
A
10857 src_size,
10858 TRUE);
55e303ae
A
10859 if (new_object == VM_OBJECT_NULL)
10860 goto CopySlowly;
10861
3e170ce0
A
10862 VME_OBJECT_SET(new_entry, new_object);
10863 assert(new_entry->wired_count == 0);
55e303ae 10864 new_entry->needs_copy = TRUE;
fe8ab488
A
10865 assert(!new_entry->iokit_acct);
10866 assert(new_object->purgable == VM_PURGABLE_DENY);
10867 new_entry->use_pmap = TRUE;
55e303ae
A
10868 result = KERN_SUCCESS;
10869
1c79356b 10870 } else {
3e170ce0
A
10871 vm_object_offset_t new_offset;
10872 new_offset = VME_OFFSET(new_entry);
1c79356b 10873 result = vm_object_copy_strategically(src_object,
2d21ac55
A
10874 src_offset,
10875 src_size,
3e170ce0
A
10876 &VME_OBJECT(new_entry),
10877 &new_offset,
2d21ac55 10878 &new_entry_needs_copy);
3e170ce0
A
10879 if (new_offset != VME_OFFSET(new_entry)) {
10880 VME_OFFSET_SET(new_entry, new_offset);
10881 }
1c79356b
A
10882
10883 new_entry->needs_copy = new_entry_needs_copy;
1c79356b
A
10884 }
10885
39037602
A
10886 if (result == KERN_SUCCESS &&
10887 preserve_purgeable &&
10888 src_object->purgable != VM_PURGABLE_DENY) {
10889 vm_object_t new_object;
10890
10891 new_object = VME_OBJECT(new_entry);
10892 assert(new_object != src_object);
10893 vm_object_lock(new_object);
10894 assert(new_object->ref_count == 1);
10895 assert(new_object->shadow == VM_OBJECT_NULL);
10896 assert(new_object->copy == VM_OBJECT_NULL);
10897 assert(new_object->vo_purgeable_owner == NULL);
10898
10899 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
10900 new_object->true_share = TRUE;
10901 /* start as non-volatile with no owner... */
10902 new_object->purgable = VM_PURGABLE_NONVOLATILE;
10903 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
10904 /* ... and move to src_object's purgeable state */
10905 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
10906 int state;
10907 state = src_object->purgable;
10908 vm_object_purgable_control(
10909 new_object,
5ba3f43e 10910 VM_PURGABLE_SET_STATE_FROM_KERNEL,
39037602
A
10911 &state);
10912 }
10913 vm_object_unlock(new_object);
10914 new_object = VM_OBJECT_NULL;
10915 }
10916
1c79356b
A
10917 if (result != KERN_SUCCESS &&
10918 result != KERN_MEMORY_RESTART_COPY) {
10919 vm_map_lock(src_map);
10920 RETURN(result);
10921 }
10922
10923 /*
10924 * Throw away the extra reference
10925 */
10926
10927 vm_object_deallocate(src_object);
10928
10929 /*
10930 * Verify that the map has not substantially
10931 * changed while the copy was being made.
10932 */
10933
9bccf70c 10934 vm_map_lock(src_map);
1c79356b 10935
5ba3f43e
A
10936 if ((version.main_timestamp + 1) == src_map->timestamp) {
10937 /* src_map hasn't changed: src_entry is still valid */
10938 src_entry = saved_src_entry;
1c79356b 10939 goto VerificationSuccessful;
5ba3f43e 10940 }
1c79356b
A
10941
10942 /*
10943 * Simple version comparison failed.
10944 *
10945 * Retry the lookup and verify that the
10946 * same object/offset are still present.
10947 *
10948 * [Note: a memory manager that colludes with
10949 * the calling task can detect that we have
10950 * cheated. While the map was unlocked, the
10951 * mapping could have been changed and restored.]
10952 */
10953
10954 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
fe8ab488 10955 if (result != KERN_MEMORY_RESTART_COPY) {
3e170ce0
A
10956 vm_object_deallocate(VME_OBJECT(new_entry));
10957 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
fe8ab488
A
10958 assert(!new_entry->iokit_acct);
10959 new_entry->use_pmap = TRUE;
10960 }
1c79356b
A
10961 RETURN(KERN_INVALID_ADDRESS);
10962 }
10963
10964 src_entry = tmp_entry;
10965 vm_map_clip_start(src_map, src_entry, src_start);
10966
91447636
A
10967 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
10968 !use_maxprot) ||
10969 ((src_entry->max_protection & VM_PROT_READ) == 0))
1c79356b
A
10970 goto VerificationFailed;
10971
39236c6e 10972 if (src_entry->vme_end < new_entry->vme_end) {
39037602
A
10973 /*
10974 * This entry might have been shortened
10975 * (vm_map_clip_end) or been replaced with
10976 * an entry that ends closer to "src_start"
10977 * than before.
10978 * Adjust "new_entry" accordingly; copying
10979 * less memory would be correct but we also
10980 * redo the copy (see below) if the new entry
10981 * no longer points at the same object/offset.
10982 */
39236c6e
A
10983 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
10984 VM_MAP_COPY_PAGE_MASK(copy)));
10985 new_entry->vme_end = src_entry->vme_end;
10986 src_size = new_entry->vme_end - src_start;
39037602
A
10987 } else if (src_entry->vme_end > new_entry->vme_end) {
10988 /*
10989 * This entry might have been extended
10990 * (vm_map_entry_simplify() or coalesce)
10991 * or been replaced with an entry that ends farther
5ba3f43e 10992 * from "src_start" than before.
39037602
A
10993 *
10994 * We've called vm_object_copy_*() only on
10995 * the previous <start:end> range, so we can't
10996 * just extend new_entry. We have to re-do
10997 * the copy based on the new entry as if it was
10998 * pointing at a different object/offset (see
10999 * "Verification failed" below).
11000 */
39236c6e 11001 }
1c79356b 11002
3e170ce0 11003 if ((VME_OBJECT(src_entry) != src_object) ||
39037602
A
11004 (VME_OFFSET(src_entry) != src_offset) ||
11005 (src_entry->vme_end > new_entry->vme_end)) {
1c79356b
A
11006
11007 /*
11008 * Verification failed.
11009 *
11010 * Start over with this top-level entry.
11011 */
11012
2d21ac55 11013 VerificationFailed: ;
1c79356b 11014
3e170ce0 11015 vm_object_deallocate(VME_OBJECT(new_entry));
1c79356b
A
11016 tmp_entry = src_entry;
11017 continue;
11018 }
11019
11020 /*
11021 * Verification succeeded.
11022 */
11023
2d21ac55 11024 VerificationSuccessful: ;
1c79356b
A
11025
11026 if (result == KERN_MEMORY_RESTART_COPY)
11027 goto RestartCopy;
11028
11029 /*
11030 * Copy succeeded.
11031 */
11032
2d21ac55 11033 CopySuccessful: ;
1c79356b
A
11034
11035 /*
11036 * Link in the new copy entry.
11037 */
11038
11039 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
11040 new_entry);
5ba3f43e 11041
1c79356b
A
11042 /*
11043 * Determine whether the entire region
11044 * has been copied.
11045 */
2d21ac55 11046 src_base = src_start;
1c79356b
A
11047 src_start = new_entry->vme_end;
11048 new_entry = VM_MAP_ENTRY_NULL;
11049 while ((src_start >= src_end) && (src_end != 0)) {
fe8ab488
A
11050 submap_map_t *ptr;
11051
11052 if (src_map == base_map) {
11053 /* back to the top */
1c79356b 11054 break;
fe8ab488
A
11055 }
11056
11057 ptr = parent_maps;
11058 assert(ptr != NULL);
11059 parent_maps = parent_maps->next;
11060
11061 /* fix up the damage we did in that submap */
11062 vm_map_simplify_range(src_map,
11063 src_base,
11064 src_end);
11065
11066 vm_map_unlock(src_map);
11067 vm_map_deallocate(src_map);
11068 vm_map_lock(ptr->parent_map);
11069 src_map = ptr->parent_map;
11070 src_base = ptr->base_start;
11071 src_start = ptr->base_start + ptr->base_len;
11072 src_end = ptr->base_end;
11073 if (!vm_map_lookup_entry(src_map,
11074 src_start,
11075 &tmp_entry) &&
11076 (src_end > src_start)) {
11077 RETURN(KERN_INVALID_ADDRESS);
11078 }
11079 kfree(ptr, sizeof(submap_map_t));
11080 if (parent_maps == NULL)
11081 map_share = FALSE;
11082 src_entry = tmp_entry->vme_prev;
11083 }
11084
11085 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11086 (src_start >= src_addr + len) &&
11087 (src_addr + len != 0)) {
11088 /*
11089 * Stop copying now, even though we haven't reached
11090 * "src_end". We'll adjust the end of the last copy
11091 * entry at the end, if needed.
11092 *
11093 * If src_map's aligment is different from the
11094 * system's page-alignment, there could be
11095 * extra non-map-aligned map entries between
11096 * the original (non-rounded) "src_addr + len"
11097 * and the rounded "src_end".
11098 * We do not want to copy those map entries since
11099 * they're not part of the copied range.
11100 */
11101 break;
1c79356b 11102 }
fe8ab488 11103
1c79356b
A
11104 if ((src_start >= src_end) && (src_end != 0))
11105 break;
11106
11107 /*
11108 * Verify that there are no gaps in the region
11109 */
11110
11111 tmp_entry = src_entry->vme_next;
fe8ab488 11112 if ((tmp_entry->vme_start != src_start) ||
39236c6e 11113 (tmp_entry == vm_map_to_entry(src_map))) {
1c79356b 11114 RETURN(KERN_INVALID_ADDRESS);
39236c6e 11115 }
1c79356b
A
11116 }
11117
11118 /*
11119 * If the source should be destroyed, do it now, since the
5ba3f43e 11120 * copy was successful.
1c79356b
A
11121 */
11122 if (src_destroy) {
39236c6e
A
11123 (void) vm_map_delete(
11124 src_map,
11125 vm_map_trunc_page(src_addr,
11126 VM_MAP_PAGE_MASK(src_map)),
11127 src_end,
11128 ((src_map == kernel_map) ?
11129 VM_MAP_REMOVE_KUNWIRE :
11130 VM_MAP_NO_FLAGS),
11131 VM_MAP_NULL);
2d21ac55
A
11132 } else {
11133 /* fix up the damage we did in the base map */
39236c6e
A
11134 vm_map_simplify_range(
11135 src_map,
11136 vm_map_trunc_page(src_addr,
5ba3f43e 11137 VM_MAP_PAGE_MASK(src_map)),
39236c6e
A
11138 vm_map_round_page(src_end,
11139 VM_MAP_PAGE_MASK(src_map)));
1c79356b
A
11140 }
11141
11142 vm_map_unlock(src_map);
5ba3f43e 11143 tmp_entry = VM_MAP_ENTRY_NULL;
1c79356b 11144
39236c6e 11145 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
fe8ab488 11146 vm_map_offset_t original_start, original_offset, original_end;
5ba3f43e 11147
39236c6e
A
11148 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
11149
11150 /* adjust alignment of first copy_entry's "vme_start" */
11151 tmp_entry = vm_map_copy_first_entry(copy);
11152 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11153 vm_map_offset_t adjustment;
fe8ab488
A
11154
11155 original_start = tmp_entry->vme_start;
3e170ce0 11156 original_offset = VME_OFFSET(tmp_entry);
fe8ab488
A
11157
11158 /* map-align the start of the first copy entry... */
11159 adjustment = (tmp_entry->vme_start -
11160 vm_map_trunc_page(
11161 tmp_entry->vme_start,
11162 VM_MAP_PAGE_MASK(src_map)));
11163 tmp_entry->vme_start -= adjustment;
3e170ce0
A
11164 VME_OFFSET_SET(tmp_entry,
11165 VME_OFFSET(tmp_entry) - adjustment);
fe8ab488
A
11166 copy_addr -= adjustment;
11167 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11168 /* ... adjust for mis-aligned start of copy range */
39236c6e
A
11169 adjustment =
11170 (vm_map_trunc_page(copy->offset,
11171 PAGE_MASK) -
11172 vm_map_trunc_page(copy->offset,
11173 VM_MAP_PAGE_MASK(src_map)));
11174 if (adjustment) {
11175 assert(page_aligned(adjustment));
11176 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11177 tmp_entry->vme_start += adjustment;
3e170ce0
A
11178 VME_OFFSET_SET(tmp_entry,
11179 (VME_OFFSET(tmp_entry) +
11180 adjustment));
39236c6e
A
11181 copy_addr += adjustment;
11182 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11183 }
fe8ab488
A
11184
11185 /*
11186 * Assert that the adjustments haven't exposed
11187 * more than was originally copied...
11188 */
11189 assert(tmp_entry->vme_start >= original_start);
3e170ce0 11190 assert(VME_OFFSET(tmp_entry) >= original_offset);
fe8ab488
A
11191 /*
11192 * ... and that it did not adjust outside of a
11193 * a single 16K page.
11194 */
11195 assert(vm_map_trunc_page(tmp_entry->vme_start,
11196 VM_MAP_PAGE_MASK(src_map)) ==
11197 vm_map_trunc_page(original_start,
11198 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
11199 }
11200
11201 /* adjust alignment of last copy_entry's "vme_end" */
11202 tmp_entry = vm_map_copy_last_entry(copy);
11203 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11204 vm_map_offset_t adjustment;
fe8ab488
A
11205
11206 original_end = tmp_entry->vme_end;
11207
11208 /* map-align the end of the last copy entry... */
11209 tmp_entry->vme_end =
11210 vm_map_round_page(tmp_entry->vme_end,
11211 VM_MAP_PAGE_MASK(src_map));
11212 /* ... adjust for mis-aligned end of copy range */
39236c6e
A
11213 adjustment =
11214 (vm_map_round_page((copy->offset +
11215 copy->size),
11216 VM_MAP_PAGE_MASK(src_map)) -
11217 vm_map_round_page((copy->offset +
11218 copy->size),
11219 PAGE_MASK));
11220 if (adjustment) {
11221 assert(page_aligned(adjustment));
11222 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11223 tmp_entry->vme_end -= adjustment;
11224 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11225 }
fe8ab488
A
11226
11227 /*
11228 * Assert that the adjustments haven't exposed
11229 * more than was originally copied...
11230 */
11231 assert(tmp_entry->vme_end <= original_end);
11232 /*
11233 * ... and that it did not adjust outside of a
11234 * a single 16K page.
11235 */
11236 assert(vm_map_round_page(tmp_entry->vme_end,
11237 VM_MAP_PAGE_MASK(src_map)) ==
11238 vm_map_round_page(original_end,
11239 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
11240 }
11241 }
11242
1c79356b
A
11243 /* Fix-up start and end points in copy. This is necessary */
11244 /* when the various entries in the copy object were picked */
11245 /* up from different sub-maps */
11246
11247 tmp_entry = vm_map_copy_first_entry(copy);
fe8ab488 11248 copy_size = 0; /* compute actual size */
1c79356b 11249 while (tmp_entry != vm_map_copy_to_entry(copy)) {
39236c6e
A
11250 assert(VM_MAP_PAGE_ALIGNED(
11251 copy_addr + (tmp_entry->vme_end -
11252 tmp_entry->vme_start),
11253 VM_MAP_COPY_PAGE_MASK(copy)));
11254 assert(VM_MAP_PAGE_ALIGNED(
11255 copy_addr,
11256 VM_MAP_COPY_PAGE_MASK(copy)));
11257
11258 /*
11259 * The copy_entries will be injected directly into the
11260 * destination map and might not be "map aligned" there...
11261 */
11262 tmp_entry->map_aligned = FALSE;
11263
5ba3f43e 11264 tmp_entry->vme_end = copy_addr +
1c79356b
A
11265 (tmp_entry->vme_end - tmp_entry->vme_start);
11266 tmp_entry->vme_start = copy_addr;
e2d2fc5c 11267 assert(tmp_entry->vme_start < tmp_entry->vme_end);
1c79356b 11268 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
fe8ab488 11269 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
1c79356b
A
11270 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
11271 }
11272
fe8ab488
A
11273 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
11274 copy_size < copy->size) {
11275 /*
11276 * The actual size of the VM map copy is smaller than what
11277 * was requested by the caller. This must be because some
11278 * PAGE_SIZE-sized pages are missing at the end of the last
11279 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11280 * The caller might not have been aware of those missing
11281 * pages and might not want to be aware of it, which is
11282 * fine as long as they don't try to access (and crash on)
11283 * those missing pages.
11284 * Let's adjust the size of the "copy", to avoid failing
11285 * in vm_map_copyout() or vm_map_copy_overwrite().
11286 */
11287 assert(vm_map_round_page(copy_size,
11288 VM_MAP_PAGE_MASK(src_map)) ==
11289 vm_map_round_page(copy->size,
11290 VM_MAP_PAGE_MASK(src_map)));
11291 copy->size = copy_size;
11292 }
11293
1c79356b
A
11294 *copy_result = copy;
11295 return(KERN_SUCCESS);
11296
11297#undef RETURN
11298}
11299
39236c6e
A
11300kern_return_t
11301vm_map_copy_extract(
11302 vm_map_t src_map,
11303 vm_map_address_t src_addr,
11304 vm_map_size_t len,
11305 vm_map_copy_t *copy_result, /* OUT */
11306 vm_prot_t *cur_prot, /* OUT */
11307 vm_prot_t *max_prot)
11308{
11309 vm_map_offset_t src_start, src_end;
11310 vm_map_copy_t copy;
11311 kern_return_t kr;
11312
11313 /*
11314 * Check for copies of zero bytes.
11315 */
11316
11317 if (len == 0) {
11318 *copy_result = VM_MAP_COPY_NULL;
11319 return(KERN_SUCCESS);
11320 }
11321
11322 /*
11323 * Check that the end address doesn't overflow
11324 */
11325 src_end = src_addr + len;
11326 if (src_end < src_addr)
11327 return KERN_INVALID_ADDRESS;
11328
11329 /*
11330 * Compute (page aligned) start and end of region
11331 */
11332 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
11333 src_end = vm_map_round_page(src_end, PAGE_MASK);
11334
11335 /*
11336 * Allocate a header element for the list.
11337 *
5ba3f43e 11338 * Use the start and end in the header to
39236c6e
A
11339 * remember the endpoints prior to rounding.
11340 */
11341
11342 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 11343 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
39236c6e
A
11344 vm_map_copy_first_entry(copy) =
11345 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
11346 copy->type = VM_MAP_COPY_ENTRY_LIST;
11347 copy->cpy_hdr.nentries = 0;
11348 copy->cpy_hdr.entries_pageable = TRUE;
11349
11350 vm_map_store_init(&copy->cpy_hdr);
11351
11352 copy->offset = 0;
11353 copy->size = len;
11354
11355 kr = vm_map_remap_extract(src_map,
11356 src_addr,
11357 len,
11358 FALSE, /* copy */
11359 &copy->cpy_hdr,
11360 cur_prot,
11361 max_prot,
11362 VM_INHERIT_SHARE,
39037602 11363 TRUE, /* pageable */
5c9f4661
A
11364 FALSE, /* same_map */
11365 VM_MAP_KERNEL_FLAGS_NONE);
39236c6e
A
11366 if (kr != KERN_SUCCESS) {
11367 vm_map_copy_discard(copy);
11368 return kr;
11369 }
11370
11371 *copy_result = copy;
11372 return KERN_SUCCESS;
11373}
11374
1c79356b
A
11375/*
11376 * vm_map_copyin_object:
11377 *
11378 * Create a copy object from an object.
11379 * Our caller donates an object reference.
11380 */
11381
11382kern_return_t
11383vm_map_copyin_object(
11384 vm_object_t object,
11385 vm_object_offset_t offset, /* offset of region in object */
11386 vm_object_size_t size, /* size of region in object */
11387 vm_map_copy_t *copy_result) /* OUT */
11388{
11389 vm_map_copy_t copy; /* Resulting copy */
11390
11391 /*
11392 * We drop the object into a special copy object
11393 * that contains the object directly.
11394 */
11395
11396 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 11397 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b
A
11398 copy->type = VM_MAP_COPY_OBJECT;
11399 copy->cpy_object = object;
1c79356b
A
11400 copy->offset = offset;
11401 copy->size = size;
11402
11403 *copy_result = copy;
11404 return(KERN_SUCCESS);
11405}
11406
91447636 11407static void
1c79356b
A
11408vm_map_fork_share(
11409 vm_map_t old_map,
11410 vm_map_entry_t old_entry,
11411 vm_map_t new_map)
11412{
11413 vm_object_t object;
11414 vm_map_entry_t new_entry;
1c79356b
A
11415
11416 /*
11417 * New sharing code. New map entry
11418 * references original object. Internal
11419 * objects use asynchronous copy algorithm for
11420 * future copies. First make sure we have
11421 * the right object. If we need a shadow,
11422 * or someone else already has one, then
11423 * make a new shadow and share it.
11424 */
5ba3f43e 11425
3e170ce0 11426 object = VME_OBJECT(old_entry);
1c79356b
A
11427 if (old_entry->is_sub_map) {
11428 assert(old_entry->wired_count == 0);
0c530ab8 11429#ifndef NO_NESTED_PMAP
1c79356b 11430 if(old_entry->use_pmap) {
91447636
A
11431 kern_return_t result;
11432
5ba3f43e
A
11433 result = pmap_nest(new_map->pmap,
11434 (VME_SUBMAP(old_entry))->pmap,
2d21ac55
A
11435 (addr64_t)old_entry->vme_start,
11436 (addr64_t)old_entry->vme_start,
11437 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
1c79356b
A
11438 if(result)
11439 panic("vm_map_fork_share: pmap_nest failed!");
11440 }
0c530ab8 11441#endif /* NO_NESTED_PMAP */
1c79356b 11442 } else if (object == VM_OBJECT_NULL) {
91447636 11443 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
2d21ac55 11444 old_entry->vme_start));
3e170ce0
A
11445 VME_OFFSET_SET(old_entry, 0);
11446 VME_OBJECT_SET(old_entry, object);
fe8ab488 11447 old_entry->use_pmap = TRUE;
1c79356b 11448 } else if (object->copy_strategy !=
2d21ac55 11449 MEMORY_OBJECT_COPY_SYMMETRIC) {
5ba3f43e 11450
1c79356b
A
11451 /*
11452 * We are already using an asymmetric
11453 * copy, and therefore we already have
11454 * the right object.
11455 */
5ba3f43e 11456
1c79356b
A
11457 assert(! old_entry->needs_copy);
11458 }
11459 else if (old_entry->needs_copy || /* case 1 */
11460 object->shadowed || /* case 2 */
11461 (!object->true_share && /* case 3 */
2d21ac55 11462 !old_entry->is_shared &&
6d2010ae 11463 (object->vo_size >
2d21ac55
A
11464 (vm_map_size_t)(old_entry->vme_end -
11465 old_entry->vme_start)))) {
5ba3f43e 11466
1c79356b
A
11467 /*
11468 * We need to create a shadow.
11469 * There are three cases here.
11470 * In the first case, we need to
11471 * complete a deferred symmetrical
11472 * copy that we participated in.
11473 * In the second and third cases,
11474 * we need to create the shadow so
11475 * that changes that we make to the
11476 * object do not interfere with
11477 * any symmetrical copies which
11478 * have occured (case 2) or which
11479 * might occur (case 3).
11480 *
11481 * The first case is when we had
11482 * deferred shadow object creation
11483 * via the entry->needs_copy mechanism.
11484 * This mechanism only works when
11485 * only one entry points to the source
11486 * object, and we are about to create
11487 * a second entry pointing to the
11488 * same object. The problem is that
11489 * there is no way of mapping from
11490 * an object to the entries pointing
11491 * to it. (Deferred shadow creation
11492 * works with one entry because occurs
11493 * at fault time, and we walk from the
11494 * entry to the object when handling
11495 * the fault.)
11496 *
11497 * The second case is when the object
11498 * to be shared has already been copied
11499 * with a symmetric copy, but we point
11500 * directly to the object without
11501 * needs_copy set in our entry. (This
11502 * can happen because different ranges
11503 * of an object can be pointed to by
11504 * different entries. In particular,
11505 * a single entry pointing to an object
11506 * can be split by a call to vm_inherit,
11507 * which, combined with task_create, can
11508 * result in the different entries
11509 * having different needs_copy values.)
11510 * The shadowed flag in the object allows
11511 * us to detect this case. The problem
11512 * with this case is that if this object
11513 * has or will have shadows, then we
11514 * must not perform an asymmetric copy
11515 * of this object, since such a copy
11516 * allows the object to be changed, which
11517 * will break the previous symmetrical
11518 * copies (which rely upon the object
11519 * not changing). In a sense, the shadowed
11520 * flag says "don't change this object".
11521 * We fix this by creating a shadow
11522 * object for this object, and sharing
11523 * that. This works because we are free
11524 * to change the shadow object (and thus
11525 * to use an asymmetric copy strategy);
11526 * this is also semantically correct,
11527 * since this object is temporary, and
11528 * therefore a copy of the object is
11529 * as good as the object itself. (This
11530 * is not true for permanent objects,
11531 * since the pager needs to see changes,
11532 * which won't happen if the changes
11533 * are made to a copy.)
11534 *
11535 * The third case is when the object
11536 * to be shared has parts sticking
11537 * outside of the entry we're working
11538 * with, and thus may in the future
11539 * be subject to a symmetrical copy.
11540 * (This is a preemptive version of
11541 * case 2.)
11542 */
3e170ce0
A
11543 VME_OBJECT_SHADOW(old_entry,
11544 (vm_map_size_t) (old_entry->vme_end -
11545 old_entry->vme_start));
5ba3f43e 11546
1c79356b
A
11547 /*
11548 * If we're making a shadow for other than
11549 * copy on write reasons, then we have
11550 * to remove write permission.
11551 */
11552
1c79356b
A
11553 if (!old_entry->needs_copy &&
11554 (old_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
11555 vm_prot_t prot;
11556
5ba3f43e
A
11557 assert(!pmap_has_prot_policy(old_entry->protection));
11558
0c530ab8 11559 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 11560
5ba3f43e
A
11561 assert(!pmap_has_prot_policy(prot));
11562
3e170ce0 11563 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
0c530ab8 11564 prot |= VM_PROT_EXECUTE;
2d21ac55 11565
5ba3f43e 11566
316670eb 11567 if (old_map->mapped_in_other_pmaps) {
9bccf70c 11568 vm_object_pmap_protect(
3e170ce0
A
11569 VME_OBJECT(old_entry),
11570 VME_OFFSET(old_entry),
9bccf70c 11571 (old_entry->vme_end -
2d21ac55 11572 old_entry->vme_start),
9bccf70c
A
11573 PMAP_NULL,
11574 old_entry->vme_start,
0c530ab8 11575 prot);
1c79356b 11576 } else {
9bccf70c 11577 pmap_protect(old_map->pmap,
2d21ac55
A
11578 old_entry->vme_start,
11579 old_entry->vme_end,
11580 prot);
1c79356b
A
11581 }
11582 }
5ba3f43e 11583
1c79356b 11584 old_entry->needs_copy = FALSE;
3e170ce0 11585 object = VME_OBJECT(old_entry);
1c79356b 11586 }
6d2010ae 11587
5ba3f43e 11588
1c79356b
A
11589 /*
11590 * If object was using a symmetric copy strategy,
11591 * change its copy strategy to the default
11592 * asymmetric copy strategy, which is copy_delay
11593 * in the non-norma case and copy_call in the
11594 * norma case. Bump the reference count for the
11595 * new entry.
11596 */
5ba3f43e 11597
1c79356b 11598 if(old_entry->is_sub_map) {
3e170ce0
A
11599 vm_map_lock(VME_SUBMAP(old_entry));
11600 vm_map_reference(VME_SUBMAP(old_entry));
11601 vm_map_unlock(VME_SUBMAP(old_entry));
1c79356b
A
11602 } else {
11603 vm_object_lock(object);
2d21ac55 11604 vm_object_reference_locked(object);
1c79356b
A
11605 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
11606 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
11607 }
11608 vm_object_unlock(object);
11609 }
5ba3f43e 11610
1c79356b
A
11611 /*
11612 * Clone the entry, using object ref from above.
11613 * Mark both entries as shared.
11614 */
5ba3f43e 11615
7ddcb079
A
11616 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
11617 * map or descendants */
1c79356b
A
11618 vm_map_entry_copy(new_entry, old_entry);
11619 old_entry->is_shared = TRUE;
11620 new_entry->is_shared = TRUE;
39037602
A
11621
11622 /*
11623 * If old entry's inheritence is VM_INHERIT_NONE,
11624 * the new entry is for corpse fork, remove the
11625 * write permission from the new entry.
11626 */
11627 if (old_entry->inheritance == VM_INHERIT_NONE) {
11628
11629 new_entry->protection &= ~VM_PROT_WRITE;
11630 new_entry->max_protection &= ~VM_PROT_WRITE;
11631 }
5ba3f43e 11632
1c79356b
A
11633 /*
11634 * Insert the entry into the new map -- we
11635 * know we're inserting at the end of the new
11636 * map.
11637 */
5ba3f43e 11638
6d2010ae 11639 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
5ba3f43e 11640
1c79356b
A
11641 /*
11642 * Update the physical map
11643 */
5ba3f43e 11644
1c79356b
A
11645 if (old_entry->is_sub_map) {
11646 /* Bill Angell pmap support goes here */
11647 } else {
11648 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
2d21ac55
A
11649 old_entry->vme_end - old_entry->vme_start,
11650 old_entry->vme_start);
1c79356b
A
11651 }
11652}
11653
91447636 11654static boolean_t
1c79356b
A
11655vm_map_fork_copy(
11656 vm_map_t old_map,
11657 vm_map_entry_t *old_entry_p,
39037602
A
11658 vm_map_t new_map,
11659 int vm_map_copyin_flags)
1c79356b
A
11660{
11661 vm_map_entry_t old_entry = *old_entry_p;
91447636
A
11662 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
11663 vm_map_offset_t start = old_entry->vme_start;
1c79356b
A
11664 vm_map_copy_t copy;
11665 vm_map_entry_t last = vm_map_last_entry(new_map);
11666
11667 vm_map_unlock(old_map);
11668 /*
11669 * Use maxprot version of copyin because we
11670 * care about whether this memory can ever
11671 * be accessed, not just whether it's accessible
11672 * right now.
11673 */
39037602
A
11674 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
11675 if (vm_map_copyin_internal(old_map, start, entry_size,
11676 vm_map_copyin_flags, &copy)
1c79356b
A
11677 != KERN_SUCCESS) {
11678 /*
11679 * The map might have changed while it
11680 * was unlocked, check it again. Skip
11681 * any blank space or permanently
11682 * unreadable region.
11683 */
11684 vm_map_lock(old_map);
11685 if (!vm_map_lookup_entry(old_map, start, &last) ||
55e303ae 11686 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
1c79356b
A
11687 last = last->vme_next;
11688 }
11689 *old_entry_p = last;
11690
11691 /*
11692 * XXX For some error returns, want to
11693 * XXX skip to the next element. Note
11694 * that INVALID_ADDRESS and
11695 * PROTECTION_FAILURE are handled above.
11696 */
5ba3f43e 11697
1c79356b
A
11698 return FALSE;
11699 }
5ba3f43e 11700
1c79356b
A
11701 /*
11702 * Insert the copy into the new map
11703 */
5ba3f43e 11704
1c79356b 11705 vm_map_copy_insert(new_map, last, copy);
5ba3f43e 11706
1c79356b
A
11707 /*
11708 * Pick up the traversal at the end of
11709 * the copied region.
11710 */
5ba3f43e 11711
1c79356b
A
11712 vm_map_lock(old_map);
11713 start += entry_size;
11714 if (! vm_map_lookup_entry(old_map, start, &last)) {
11715 last = last->vme_next;
11716 } else {
2d21ac55
A
11717 if (last->vme_start == start) {
11718 /*
11719 * No need to clip here and we don't
11720 * want to cause any unnecessary
11721 * unnesting...
11722 */
11723 } else {
11724 vm_map_clip_start(old_map, last, start);
11725 }
1c79356b
A
11726 }
11727 *old_entry_p = last;
11728
11729 return TRUE;
11730}
11731
11732/*
11733 * vm_map_fork:
11734 *
11735 * Create and return a new map based on the old
11736 * map, according to the inheritance values on the
39037602 11737 * regions in that map and the options.
1c79356b
A
11738 *
11739 * The source map must not be locked.
11740 */
11741vm_map_t
11742vm_map_fork(
316670eb 11743 ledger_t ledger,
39037602
A
11744 vm_map_t old_map,
11745 int options)
1c79356b 11746{
2d21ac55 11747 pmap_t new_pmap;
1c79356b
A
11748 vm_map_t new_map;
11749 vm_map_entry_t old_entry;
91447636 11750 vm_map_size_t new_size = 0, entry_size;
1c79356b
A
11751 vm_map_entry_t new_entry;
11752 boolean_t src_needs_copy;
11753 boolean_t new_entry_needs_copy;
3e170ce0 11754 boolean_t pmap_is64bit;
39037602
A
11755 int vm_map_copyin_flags;
11756
11757 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
11758 VM_MAP_FORK_PRESERVE_PURGEABLE)) {
11759 /* unsupported option */
11760 return VM_MAP_NULL;
11761 }
1c79356b 11762
3e170ce0 11763 pmap_is64bit =
b0d623f7 11764#if defined(__i386__) || defined(__x86_64__)
3e170ce0 11765 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
5ba3f43e
A
11766#elif defined(__arm64__)
11767 old_map->pmap->max == MACH_VM_MAX_ADDRESS;
11768#elif defined(__arm__)
11769 FALSE;
b0d623f7 11770#else
316670eb 11771#error Unknown architecture.
b0d623f7 11772#endif
3e170ce0
A
11773
11774 new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
2d21ac55 11775
1c79356b
A
11776 vm_map_reference_swap(old_map);
11777 vm_map_lock(old_map);
11778
11779 new_map = vm_map_create(new_pmap,
2d21ac55
A
11780 old_map->min_offset,
11781 old_map->max_offset,
11782 old_map->hdr.entries_pageable);
5ba3f43e 11783 vm_map_lock(new_map);
39037602 11784 vm_commit_pagezero_status(new_map);
39236c6e
A
11785 /* inherit the parent map's page size */
11786 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
1c79356b 11787 for (
2d21ac55
A
11788 old_entry = vm_map_first_entry(old_map);
11789 old_entry != vm_map_to_entry(old_map);
11790 ) {
1c79356b
A
11791
11792 entry_size = old_entry->vme_end - old_entry->vme_start;
11793
11794 switch (old_entry->inheritance) {
11795 case VM_INHERIT_NONE:
39037602
A
11796 /*
11797 * Skip making a share entry if VM_MAP_FORK_SHARE_IF_INHERIT_NONE
11798 * is not passed or it is backed by a device pager.
11799 */
11800 if ((!(options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE)) ||
11801 (!old_entry->is_sub_map &&
11802 VME_OBJECT(old_entry) != NULL &&
11803 VME_OBJECT(old_entry)->pager != NULL &&
11804 is_device_pager_ops(VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
11805 break;
11806 }
11807 /* FALLTHROUGH */
1c79356b
A
11808
11809 case VM_INHERIT_SHARE:
11810 vm_map_fork_share(old_map, old_entry, new_map);
11811 new_size += entry_size;
11812 break;
11813
11814 case VM_INHERIT_COPY:
11815
11816 /*
11817 * Inline the copy_quickly case;
11818 * upon failure, fall back on call
11819 * to vm_map_fork_copy.
11820 */
11821
11822 if(old_entry->is_sub_map)
11823 break;
9bccf70c 11824 if ((old_entry->wired_count != 0) ||
3e170ce0
A
11825 ((VME_OBJECT(old_entry) != NULL) &&
11826 (VME_OBJECT(old_entry)->true_share))) {
1c79356b
A
11827 goto slow_vm_map_fork_copy;
11828 }
11829
7ddcb079 11830 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
1c79356b 11831 vm_map_entry_copy(new_entry, old_entry);
fe8ab488
A
11832 if (new_entry->is_sub_map) {
11833 /* clear address space specifics */
11834 new_entry->use_pmap = FALSE;
11835 }
1c79356b
A
11836
11837 if (! vm_object_copy_quickly(
3e170ce0
A
11838 &VME_OBJECT(new_entry),
11839 VME_OFFSET(old_entry),
2d21ac55
A
11840 (old_entry->vme_end -
11841 old_entry->vme_start),
11842 &src_needs_copy,
11843 &new_entry_needs_copy)) {
1c79356b
A
11844 vm_map_entry_dispose(new_map, new_entry);
11845 goto slow_vm_map_fork_copy;
11846 }
11847
11848 /*
11849 * Handle copy-on-write obligations
11850 */
5ba3f43e 11851
1c79356b 11852 if (src_needs_copy && !old_entry->needs_copy) {
0c530ab8
A
11853 vm_prot_t prot;
11854
5ba3f43e
A
11855 assert(!pmap_has_prot_policy(old_entry->protection));
11856
0c530ab8 11857 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 11858
3e170ce0
A
11859 if (override_nx(old_map, VME_ALIAS(old_entry))
11860 && prot)
0c530ab8 11861 prot |= VM_PROT_EXECUTE;
2d21ac55 11862
5ba3f43e
A
11863 assert(!pmap_has_prot_policy(prot));
11864
1c79356b 11865 vm_object_pmap_protect(
3e170ce0
A
11866 VME_OBJECT(old_entry),
11867 VME_OFFSET(old_entry),
1c79356b 11868 (old_entry->vme_end -
2d21ac55 11869 old_entry->vme_start),
5ba3f43e 11870 ((old_entry->is_shared
316670eb 11871 || old_map->mapped_in_other_pmaps)
2d21ac55
A
11872 ? PMAP_NULL :
11873 old_map->pmap),
1c79356b 11874 old_entry->vme_start,
0c530ab8 11875 prot);
1c79356b 11876
3e170ce0 11877 assert(old_entry->wired_count == 0);
1c79356b
A
11878 old_entry->needs_copy = TRUE;
11879 }
11880 new_entry->needs_copy = new_entry_needs_copy;
5ba3f43e 11881
1c79356b
A
11882 /*
11883 * Insert the entry at the end
11884 * of the map.
11885 */
5ba3f43e 11886
6d2010ae 11887 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
1c79356b
A
11888 new_entry);
11889 new_size += entry_size;
11890 break;
11891
11892 slow_vm_map_fork_copy:
39037602
A
11893 vm_map_copyin_flags = 0;
11894 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
11895 vm_map_copyin_flags |=
11896 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
11897 }
11898 if (vm_map_fork_copy(old_map,
11899 &old_entry,
11900 new_map,
11901 vm_map_copyin_flags)) {
1c79356b
A
11902 new_size += entry_size;
11903 }
11904 continue;
11905 }
11906 old_entry = old_entry->vme_next;
11907 }
11908
5ba3f43e
A
11909#if defined(__arm64__)
11910 pmap_insert_sharedpage(new_map->pmap);
11911#endif
fe8ab488 11912
1c79356b 11913 new_map->size = new_size;
5ba3f43e 11914 vm_map_unlock(new_map);
1c79356b
A
11915 vm_map_unlock(old_map);
11916 vm_map_deallocate(old_map);
11917
11918 return(new_map);
11919}
11920
2d21ac55
A
11921/*
11922 * vm_map_exec:
11923 *
11924 * Setup the "new_map" with the proper execution environment according
11925 * to the type of executable (platform, 64bit, chroot environment).
11926 * Map the comm page and shared region, etc...
11927 */
11928kern_return_t
11929vm_map_exec(
11930 vm_map_t new_map,
11931 task_t task,
39037602 11932 boolean_t is64bit,
2d21ac55
A
11933 void *fsroot,
11934 cpu_type_t cpu)
11935{
11936 SHARED_REGION_TRACE_DEBUG(
11937 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
fe8ab488
A
11938 (void *)VM_KERNEL_ADDRPERM(current_task()),
11939 (void *)VM_KERNEL_ADDRPERM(new_map),
11940 (void *)VM_KERNEL_ADDRPERM(task),
11941 (void *)VM_KERNEL_ADDRPERM(fsroot),
11942 cpu));
39037602
A
11943 (void) vm_commpage_enter(new_map, task, is64bit);
11944 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu);
2d21ac55
A
11945 SHARED_REGION_TRACE_DEBUG(
11946 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
fe8ab488
A
11947 (void *)VM_KERNEL_ADDRPERM(current_task()),
11948 (void *)VM_KERNEL_ADDRPERM(new_map),
11949 (void *)VM_KERNEL_ADDRPERM(task),
11950 (void *)VM_KERNEL_ADDRPERM(fsroot),
11951 cpu));
2d21ac55
A
11952 return KERN_SUCCESS;
11953}
1c79356b
A
11954
11955/*
11956 * vm_map_lookup_locked:
11957 *
11958 * Finds the VM object, offset, and
11959 * protection for a given virtual address in the
11960 * specified map, assuming a page fault of the
11961 * type specified.
11962 *
11963 * Returns the (object, offset, protection) for
11964 * this address, whether it is wired down, and whether
11965 * this map has the only reference to the data in question.
11966 * In order to later verify this lookup, a "version"
11967 * is returned.
11968 *
11969 * The map MUST be locked by the caller and WILL be
11970 * locked on exit. In order to guarantee the
11971 * existence of the returned object, it is returned
11972 * locked.
11973 *
11974 * If a lookup is requested with "write protection"
11975 * specified, the map may be changed to perform virtual
11976 * copying operations, although the data referenced will
11977 * remain the same.
11978 */
11979kern_return_t
11980vm_map_lookup_locked(
11981 vm_map_t *var_map, /* IN/OUT */
2d21ac55 11982 vm_map_offset_t vaddr,
91447636 11983 vm_prot_t fault_type,
2d21ac55 11984 int object_lock_type,
1c79356b
A
11985 vm_map_version_t *out_version, /* OUT */
11986 vm_object_t *object, /* OUT */
11987 vm_object_offset_t *offset, /* OUT */
11988 vm_prot_t *out_prot, /* OUT */
11989 boolean_t *wired, /* OUT */
2d21ac55 11990 vm_object_fault_info_t fault_info, /* OUT */
91447636 11991 vm_map_t *real_map)
1c79356b
A
11992{
11993 vm_map_entry_t entry;
39037602 11994 vm_map_t map = *var_map;
1c79356b
A
11995 vm_map_t old_map = *var_map;
11996 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
91447636
A
11997 vm_map_offset_t cow_parent_vaddr = 0;
11998 vm_map_offset_t old_start = 0;
11999 vm_map_offset_t old_end = 0;
39037602 12000 vm_prot_t prot;
6d2010ae 12001 boolean_t mask_protections;
fe8ab488 12002 boolean_t force_copy;
6d2010ae
A
12003 vm_prot_t original_fault_type;
12004
12005 /*
12006 * VM_PROT_MASK means that the caller wants us to use "fault_type"
12007 * as a mask against the mapping's actual protections, not as an
12008 * absolute value.
12009 */
12010 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
fe8ab488
A
12011 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
12012 fault_type &= VM_PROT_ALL;
6d2010ae 12013 original_fault_type = fault_type;
1c79356b 12014
91447636 12015 *real_map = map;
6d2010ae
A
12016
12017RetryLookup:
12018 fault_type = original_fault_type;
1c79356b
A
12019
12020 /*
12021 * If the map has an interesting hint, try it before calling
12022 * full blown lookup routine.
12023 */
1c79356b 12024 entry = map->hint;
1c79356b
A
12025
12026 if ((entry == vm_map_to_entry(map)) ||
12027 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
12028 vm_map_entry_t tmp_entry;
12029
12030 /*
12031 * Entry was either not a valid hint, or the vaddr
12032 * was not contained in the entry, so do a full lookup.
12033 */
12034 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
12035 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
12036 vm_map_unlock(cow_sub_map_parent);
5ba3f43e 12037 if((*real_map != map)
2d21ac55 12038 && (*real_map != cow_sub_map_parent))
91447636 12039 vm_map_unlock(*real_map);
1c79356b
A
12040 return KERN_INVALID_ADDRESS;
12041 }
12042
12043 entry = tmp_entry;
12044 }
12045 if(map == old_map) {
12046 old_start = entry->vme_start;
12047 old_end = entry->vme_end;
12048 }
12049
12050 /*
12051 * Handle submaps. Drop lock on upper map, submap is
12052 * returned locked.
12053 */
12054
12055submap_recurse:
12056 if (entry->is_sub_map) {
91447636
A
12057 vm_map_offset_t local_vaddr;
12058 vm_map_offset_t end_delta;
5ba3f43e 12059 vm_map_offset_t start_delta;
1c79356b 12060 vm_map_entry_t submap_entry;
5ba3f43e
A
12061 vm_prot_t subentry_protection;
12062 vm_prot_t subentry_max_protection;
1c79356b
A
12063 boolean_t mapped_needs_copy=FALSE;
12064
12065 local_vaddr = vaddr;
12066
39037602
A
12067 if ((entry->use_pmap &&
12068 ! ((fault_type & VM_PROT_WRITE) ||
12069 force_copy))) {
91447636 12070 /* if real_map equals map we unlock below */
5ba3f43e 12071 if ((*real_map != map) &&
2d21ac55 12072 (*real_map != cow_sub_map_parent))
91447636 12073 vm_map_unlock(*real_map);
3e170ce0 12074 *real_map = VME_SUBMAP(entry);
1c79356b
A
12075 }
12076
39037602
A
12077 if(entry->needs_copy &&
12078 ((fault_type & VM_PROT_WRITE) ||
12079 force_copy)) {
1c79356b
A
12080 if (!mapped_needs_copy) {
12081 if (vm_map_lock_read_to_write(map)) {
12082 vm_map_lock_read(map);
99c3a104 12083 *real_map = map;
1c79356b
A
12084 goto RetryLookup;
12085 }
3e170ce0
A
12086 vm_map_lock_read(VME_SUBMAP(entry));
12087 *var_map = VME_SUBMAP(entry);
1c79356b
A
12088 cow_sub_map_parent = map;
12089 /* reset base to map before cow object */
12090 /* this is the map which will accept */
12091 /* the new cow object */
12092 old_start = entry->vme_start;
12093 old_end = entry->vme_end;
12094 cow_parent_vaddr = vaddr;
12095 mapped_needs_copy = TRUE;
12096 } else {
3e170ce0
A
12097 vm_map_lock_read(VME_SUBMAP(entry));
12098 *var_map = VME_SUBMAP(entry);
1c79356b 12099 if((cow_sub_map_parent != map) &&
2d21ac55 12100 (*real_map != map))
1c79356b
A
12101 vm_map_unlock(map);
12102 }
12103 } else {
3e170ce0 12104 vm_map_lock_read(VME_SUBMAP(entry));
5ba3f43e 12105 *var_map = VME_SUBMAP(entry);
1c79356b
A
12106 /* leave map locked if it is a target */
12107 /* cow sub_map above otherwise, just */
12108 /* follow the maps down to the object */
12109 /* here we unlock knowing we are not */
12110 /* revisiting the map. */
91447636 12111 if((*real_map != map) && (map != cow_sub_map_parent))
1c79356b
A
12112 vm_map_unlock_read(map);
12113 }
12114
99c3a104 12115 map = *var_map;
1c79356b
A
12116
12117 /* calculate the offset in the submap for vaddr */
3e170ce0 12118 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
1c79356b 12119
2d21ac55 12120 RetrySubMap:
1c79356b
A
12121 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
12122 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
12123 vm_map_unlock(cow_sub_map_parent);
12124 }
5ba3f43e 12125 if((*real_map != map)
2d21ac55 12126 && (*real_map != cow_sub_map_parent)) {
91447636 12127 vm_map_unlock(*real_map);
1c79356b 12128 }
91447636 12129 *real_map = map;
1c79356b
A
12130 return KERN_INVALID_ADDRESS;
12131 }
2d21ac55 12132
1c79356b
A
12133 /* find the attenuated shadow of the underlying object */
12134 /* on our target map */
12135
12136 /* in english the submap object may extend beyond the */
12137 /* region mapped by the entry or, may only fill a portion */
12138 /* of it. For our purposes, we only care if the object */
12139 /* doesn't fill. In this case the area which will */
12140 /* ultimately be clipped in the top map will only need */
12141 /* to be as big as the portion of the underlying entry */
12142 /* which is mapped */
3e170ce0
A
12143 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
12144 submap_entry->vme_start - VME_OFFSET(entry) : 0;
1c79356b 12145
5ba3f43e 12146 end_delta =
3e170ce0 12147 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
1c79356b 12148 submap_entry->vme_end ?
5ba3f43e 12149 0 : (VME_OFFSET(entry) +
2d21ac55 12150 (old_end - old_start))
5ba3f43e 12151 - submap_entry->vme_end;
1c79356b
A
12152
12153 old_start += start_delta;
12154 old_end -= end_delta;
12155
12156 if(submap_entry->is_sub_map) {
12157 entry = submap_entry;
12158 vaddr = local_vaddr;
12159 goto submap_recurse;
12160 }
12161
39037602
A
12162 if (((fault_type & VM_PROT_WRITE) ||
12163 force_copy)
12164 && cow_sub_map_parent) {
1c79356b 12165
2d21ac55
A
12166 vm_object_t sub_object, copy_object;
12167 vm_object_offset_t copy_offset;
91447636
A
12168 vm_map_offset_t local_start;
12169 vm_map_offset_t local_end;
0b4e3aa0 12170 boolean_t copied_slowly = FALSE;
1c79356b
A
12171
12172 if (vm_map_lock_read_to_write(map)) {
12173 vm_map_lock_read(map);
12174 old_start -= start_delta;
12175 old_end += end_delta;
12176 goto RetrySubMap;
12177 }
0b4e3aa0
A
12178
12179
3e170ce0 12180 sub_object = VME_OBJECT(submap_entry);
2d21ac55
A
12181 if (sub_object == VM_OBJECT_NULL) {
12182 sub_object =
1c79356b 12183 vm_object_allocate(
91447636 12184 (vm_map_size_t)
2d21ac55
A
12185 (submap_entry->vme_end -
12186 submap_entry->vme_start));
3e170ce0
A
12187 VME_OBJECT_SET(submap_entry, sub_object);
12188 VME_OFFSET_SET(submap_entry, 0);
1c79356b 12189 }
5ba3f43e 12190 local_start = local_vaddr -
2d21ac55 12191 (cow_parent_vaddr - old_start);
5ba3f43e 12192 local_end = local_vaddr +
2d21ac55 12193 (old_end - cow_parent_vaddr);
1c79356b
A
12194 vm_map_clip_start(map, submap_entry, local_start);
12195 vm_map_clip_end(map, submap_entry, local_end);
fe8ab488
A
12196 if (submap_entry->is_sub_map) {
12197 /* unnesting was done when clipping */
12198 assert(!submap_entry->use_pmap);
12199 }
1c79356b
A
12200
12201 /* This is the COW case, lets connect */
12202 /* an entry in our space to the underlying */
12203 /* object in the submap, bypassing the */
12204 /* submap. */
0b4e3aa0
A
12205
12206
2d21ac55 12207 if(submap_entry->wired_count != 0 ||
4a3eedf9
A
12208 (sub_object->copy_strategy ==
12209 MEMORY_OBJECT_COPY_NONE)) {
2d21ac55
A
12210 vm_object_lock(sub_object);
12211 vm_object_copy_slowly(sub_object,
3e170ce0 12212 VME_OFFSET(submap_entry),
2d21ac55
A
12213 (submap_entry->vme_end -
12214 submap_entry->vme_start),
12215 FALSE,
12216 &copy_object);
12217 copied_slowly = TRUE;
0b4e3aa0 12218 } else {
5ba3f43e 12219
0b4e3aa0 12220 /* set up shadow object */
2d21ac55 12221 copy_object = sub_object;
39037602
A
12222 vm_object_lock(sub_object);
12223 vm_object_reference_locked(sub_object);
2d21ac55 12224 sub_object->shadowed = TRUE;
39037602
A
12225 vm_object_unlock(sub_object);
12226
3e170ce0 12227 assert(submap_entry->wired_count == 0);
0b4e3aa0 12228 submap_entry->needs_copy = TRUE;
0c530ab8 12229
5ba3f43e
A
12230 prot = submap_entry->protection;
12231 assert(!pmap_has_prot_policy(prot));
12232 prot = prot & ~VM_PROT_WRITE;
12233 assert(!pmap_has_prot_policy(prot));
2d21ac55 12234
3e170ce0
A
12235 if (override_nx(old_map,
12236 VME_ALIAS(submap_entry))
12237 && prot)
0c530ab8 12238 prot |= VM_PROT_EXECUTE;
2d21ac55 12239
0b4e3aa0 12240 vm_object_pmap_protect(
2d21ac55 12241 sub_object,
3e170ce0 12242 VME_OFFSET(submap_entry),
5ba3f43e 12243 submap_entry->vme_end -
2d21ac55 12244 submap_entry->vme_start,
5ba3f43e 12245 (submap_entry->is_shared
316670eb 12246 || map->mapped_in_other_pmaps) ?
2d21ac55 12247 PMAP_NULL : map->pmap,
1c79356b 12248 submap_entry->vme_start,
0c530ab8 12249 prot);
0b4e3aa0 12250 }
5ba3f43e 12251
2d21ac55
A
12252 /*
12253 * Adjust the fault offset to the submap entry.
12254 */
12255 copy_offset = (local_vaddr -
12256 submap_entry->vme_start +
3e170ce0 12257 VME_OFFSET(submap_entry));
1c79356b
A
12258
12259 /* This works diffently than the */
12260 /* normal submap case. We go back */
12261 /* to the parent of the cow map and*/
12262 /* clip out the target portion of */
12263 /* the sub_map, substituting the */
12264 /* new copy object, */
12265
5ba3f43e
A
12266 subentry_protection = submap_entry->protection;
12267 subentry_max_protection = submap_entry->max_protection;
1c79356b 12268 vm_map_unlock(map);
5ba3f43e
A
12269 submap_entry = NULL; /* not valid after map unlock */
12270
1c79356b
A
12271 local_start = old_start;
12272 local_end = old_end;
12273 map = cow_sub_map_parent;
12274 *var_map = cow_sub_map_parent;
12275 vaddr = cow_parent_vaddr;
12276 cow_sub_map_parent = NULL;
12277
5ba3f43e 12278 if(!vm_map_lookup_entry(map,
2d21ac55
A
12279 vaddr, &entry)) {
12280 vm_object_deallocate(
12281 copy_object);
12282 vm_map_lock_write_to_read(map);
12283 return KERN_INVALID_ADDRESS;
12284 }
5ba3f43e 12285
2d21ac55
A
12286 /* clip out the portion of space */
12287 /* mapped by the sub map which */
12288 /* corresponds to the underlying */
12289 /* object */
12290
12291 /*
12292 * Clip (and unnest) the smallest nested chunk
12293 * possible around the faulting address...
12294 */
12295 local_start = vaddr & ~(pmap_nesting_size_min - 1);
12296 local_end = local_start + pmap_nesting_size_min;
12297 /*
12298 * ... but don't go beyond the "old_start" to "old_end"
12299 * range, to avoid spanning over another VM region
12300 * with a possibly different VM object and/or offset.
12301 */
12302 if (local_start < old_start) {
12303 local_start = old_start;
12304 }
12305 if (local_end > old_end) {
12306 local_end = old_end;
12307 }
12308 /*
12309 * Adjust copy_offset to the start of the range.
12310 */
12311 copy_offset -= (vaddr - local_start);
12312
1c79356b
A
12313 vm_map_clip_start(map, entry, local_start);
12314 vm_map_clip_end(map, entry, local_end);
fe8ab488
A
12315 if (entry->is_sub_map) {
12316 /* unnesting was done when clipping */
12317 assert(!entry->use_pmap);
12318 }
1c79356b
A
12319
12320 /* substitute copy object for */
12321 /* shared map entry */
3e170ce0 12322 vm_map_deallocate(VME_SUBMAP(entry));
fe8ab488 12323 assert(!entry->iokit_acct);
1c79356b 12324 entry->is_sub_map = FALSE;
fe8ab488 12325 entry->use_pmap = TRUE;
3e170ce0 12326 VME_OBJECT_SET(entry, copy_object);
1c79356b 12327
2d21ac55 12328 /* propagate the submap entry's protections */
5ba3f43e
A
12329 entry->protection |= subentry_protection;
12330 entry->max_protection |= subentry_max_protection;
12331
12332#if CONFIG_EMBEDDED
12333 if (entry->protection & VM_PROT_WRITE) {
12334 if ((entry->protection & VM_PROT_EXECUTE) && !(entry->used_for_jit)) {
12335 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
12336 entry->protection &= ~VM_PROT_EXECUTE;
12337 }
12338 }
12339#endif
2d21ac55 12340
0b4e3aa0 12341 if(copied_slowly) {
3e170ce0 12342 VME_OFFSET_SET(entry, local_start - old_start);
0b4e3aa0
A
12343 entry->needs_copy = FALSE;
12344 entry->is_shared = FALSE;
12345 } else {
3e170ce0
A
12346 VME_OFFSET_SET(entry, copy_offset);
12347 assert(entry->wired_count == 0);
0b4e3aa0 12348 entry->needs_copy = TRUE;
5ba3f43e 12349 if(entry->inheritance == VM_INHERIT_SHARE)
0b4e3aa0
A
12350 entry->inheritance = VM_INHERIT_COPY;
12351 if (map != old_map)
12352 entry->is_shared = TRUE;
12353 }
5ba3f43e 12354 if(entry->inheritance == VM_INHERIT_SHARE)
0b4e3aa0 12355 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
12356
12357 vm_map_lock_write_to_read(map);
12358 } else {
12359 if((cow_sub_map_parent)
2d21ac55
A
12360 && (cow_sub_map_parent != *real_map)
12361 && (cow_sub_map_parent != map)) {
1c79356b
A
12362 vm_map_unlock(cow_sub_map_parent);
12363 }
12364 entry = submap_entry;
12365 vaddr = local_vaddr;
12366 }
12367 }
5ba3f43e 12368
1c79356b
A
12369 /*
12370 * Check whether this task is allowed to have
12371 * this page.
12372 */
2d21ac55 12373
6601e61a 12374 prot = entry->protection;
0c530ab8 12375
3e170ce0 12376 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
0c530ab8 12377 /*
2d21ac55 12378 * HACK -- if not a stack, then allow execution
0c530ab8
A
12379 */
12380 prot |= VM_PROT_EXECUTE;
2d21ac55
A
12381 }
12382
6d2010ae
A
12383 if (mask_protections) {
12384 fault_type &= prot;
12385 if (fault_type == VM_PROT_NONE) {
12386 goto protection_failure;
12387 }
12388 }
39037602 12389 if (((fault_type & prot) != fault_type)
5ba3f43e
A
12390#if __arm64__
12391 /* prefetch abort in execute-only page */
12392 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
12393#endif
39037602 12394 ) {
6d2010ae 12395 protection_failure:
2d21ac55
A
12396 if (*real_map != map) {
12397 vm_map_unlock(*real_map);
0c530ab8
A
12398 }
12399 *real_map = map;
12400
12401 if ((fault_type & VM_PROT_EXECUTE) && prot)
2d21ac55 12402 log_stack_execution_failure((addr64_t)vaddr, prot);
0c530ab8 12403
2d21ac55 12404 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
0c530ab8 12405 return KERN_PROTECTION_FAILURE;
1c79356b
A
12406 }
12407
12408 /*
12409 * If this page is not pageable, we have to get
12410 * it for all possible accesses.
12411 */
12412
91447636
A
12413 *wired = (entry->wired_count != 0);
12414 if (*wired)
0c530ab8 12415 fault_type = prot;
1c79356b
A
12416
12417 /*
12418 * If the entry was copy-on-write, we either ...
12419 */
12420
12421 if (entry->needs_copy) {
12422 /*
12423 * If we want to write the page, we may as well
12424 * handle that now since we've got the map locked.
12425 *
12426 * If we don't need to write the page, we just
12427 * demote the permissions allowed.
12428 */
12429
fe8ab488 12430 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
1c79356b
A
12431 /*
12432 * Make a new object, and place it in the
12433 * object chain. Note that no new references
12434 * have appeared -- one just moved from the
12435 * map to the new object.
12436 */
12437
12438 if (vm_map_lock_read_to_write(map)) {
12439 vm_map_lock_read(map);
12440 goto RetryLookup;
12441 }
39037602
A
12442
12443 if (VME_OBJECT(entry)->shadowed == FALSE) {
12444 vm_object_lock(VME_OBJECT(entry));
12445 VME_OBJECT(entry)->shadowed = TRUE;
12446 vm_object_unlock(VME_OBJECT(entry));
12447 }
3e170ce0
A
12448 VME_OBJECT_SHADOW(entry,
12449 (vm_map_size_t) (entry->vme_end -
12450 entry->vme_start));
1c79356b 12451 entry->needs_copy = FALSE;
39037602 12452
1c79356b
A
12453 vm_map_lock_write_to_read(map);
12454 }
39037602 12455 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
1c79356b
A
12456 /*
12457 * We're attempting to read a copy-on-write
12458 * page -- don't allow writes.
12459 */
12460
12461 prot &= (~VM_PROT_WRITE);
12462 }
12463 }
12464
12465 /*
12466 * Create an object if necessary.
12467 */
3e170ce0 12468 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
1c79356b
A
12469
12470 if (vm_map_lock_read_to_write(map)) {
12471 vm_map_lock_read(map);
12472 goto RetryLookup;
12473 }
12474
3e170ce0
A
12475 VME_OBJECT_SET(entry,
12476 vm_object_allocate(
12477 (vm_map_size_t)(entry->vme_end -
12478 entry->vme_start)));
12479 VME_OFFSET_SET(entry, 0);
1c79356b
A
12480 vm_map_lock_write_to_read(map);
12481 }
12482
12483 /*
12484 * Return the object/offset from this entry. If the entry
12485 * was copy-on-write or empty, it has been fixed up. Also
12486 * return the protection.
12487 */
12488
3e170ce0
A
12489 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
12490 *object = VME_OBJECT(entry);
1c79356b 12491 *out_prot = prot;
2d21ac55
A
12492
12493 if (fault_info) {
12494 fault_info->interruptible = THREAD_UNINT; /* for now... */
12495 /* ... the caller will change "interruptible" if needed */
12496 fault_info->cluster_size = 0;
3e170ce0 12497 fault_info->user_tag = VME_ALIAS(entry);
fe8ab488
A
12498 fault_info->pmap_options = 0;
12499 if (entry->iokit_acct ||
12500 (!entry->is_sub_map && !entry->use_pmap)) {
12501 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
12502 }
2d21ac55 12503 fault_info->behavior = entry->behavior;
3e170ce0
A
12504 fault_info->lo_offset = VME_OFFSET(entry);
12505 fault_info->hi_offset =
12506 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
2d21ac55 12507 fault_info->no_cache = entry->no_cache;
b0d623f7 12508 fault_info->stealth = FALSE;
6d2010ae 12509 fault_info->io_sync = FALSE;
3e170ce0
A
12510 if (entry->used_for_jit ||
12511 entry->vme_resilient_codesign) {
12512 fault_info->cs_bypass = TRUE;
12513 } else {
12514 fault_info->cs_bypass = FALSE;
12515 }
0b4c1975 12516 fault_info->mark_zf_absent = FALSE;
316670eb 12517 fault_info->batch_pmap_op = FALSE;
2d21ac55 12518 }
1c79356b
A
12519
12520 /*
12521 * Lock the object to prevent it from disappearing
12522 */
2d21ac55
A
12523 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
12524 vm_object_lock(*object);
12525 else
12526 vm_object_lock_shared(*object);
5ba3f43e 12527
1c79356b
A
12528 /*
12529 * Save the version number
12530 */
12531
12532 out_version->main_timestamp = map->timestamp;
12533
12534 return KERN_SUCCESS;
12535}
12536
12537
12538/*
12539 * vm_map_verify:
12540 *
12541 * Verifies that the map in question has not changed
5ba3f43e
A
12542 * since the given version. The map has to be locked
12543 * ("shared" mode is fine) before calling this function
12544 * and it will be returned locked too.
1c79356b
A
12545 */
12546boolean_t
12547vm_map_verify(
39037602
A
12548 vm_map_t map,
12549 vm_map_version_t *version) /* REF */
1c79356b
A
12550{
12551 boolean_t result;
12552
5ba3f43e 12553 vm_map_lock_assert_held(map);
1c79356b
A
12554 result = (map->timestamp == version->main_timestamp);
12555
1c79356b
A
12556 return(result);
12557}
12558
91447636
A
12559/*
12560 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
12561 * Goes away after regular vm_region_recurse function migrates to
12562 * 64 bits
12563 * vm_region_recurse: A form of vm_region which follows the
12564 * submaps in a target map
12565 *
12566 */
12567
39037602
A
12568#if DEVELOPMENT || DEBUG
12569int vm_region_footprint = 0;
12570#endif /* DEVELOPMENT || DEBUG */
12571
91447636
A
12572kern_return_t
12573vm_map_region_recurse_64(
12574 vm_map_t map,
12575 vm_map_offset_t *address, /* IN/OUT */
12576 vm_map_size_t *size, /* OUT */
12577 natural_t *nesting_depth, /* IN/OUT */
12578 vm_region_submap_info_64_t submap_info, /* IN/OUT */
12579 mach_msg_type_number_t *count) /* IN/OUT */
12580{
39236c6e 12581 mach_msg_type_number_t original_count;
91447636
A
12582 vm_region_extended_info_data_t extended;
12583 vm_map_entry_t tmp_entry;
12584 vm_map_offset_t user_address;
12585 unsigned int user_max_depth;
12586
12587 /*
12588 * "curr_entry" is the VM map entry preceding or including the
12589 * address we're looking for.
12590 * "curr_map" is the map or sub-map containing "curr_entry".
5ba3f43e 12591 * "curr_address" is the equivalent of the top map's "user_address"
6d2010ae 12592 * in the current map.
91447636
A
12593 * "curr_offset" is the cumulated offset of "curr_map" in the
12594 * target task's address space.
12595 * "curr_depth" is the depth of "curr_map" in the chain of
12596 * sub-maps.
5ba3f43e 12597 *
6d2010ae
A
12598 * "curr_max_below" and "curr_max_above" limit the range (around
12599 * "curr_address") we should take into account in the current (sub)map.
12600 * They limit the range to what's visible through the map entries
12601 * we've traversed from the top map to the current map.
12602
91447636
A
12603 */
12604 vm_map_entry_t curr_entry;
6d2010ae 12605 vm_map_address_t curr_address;
91447636
A
12606 vm_map_offset_t curr_offset;
12607 vm_map_t curr_map;
12608 unsigned int curr_depth;
6d2010ae
A
12609 vm_map_offset_t curr_max_below, curr_max_above;
12610 vm_map_offset_t curr_skip;
91447636
A
12611
12612 /*
12613 * "next_" is the same as "curr_" but for the VM region immediately
12614 * after the address we're looking for. We need to keep track of this
12615 * too because we want to return info about that region if the
12616 * address we're looking for is not mapped.
12617 */
12618 vm_map_entry_t next_entry;
12619 vm_map_offset_t next_offset;
6d2010ae 12620 vm_map_offset_t next_address;
91447636
A
12621 vm_map_t next_map;
12622 unsigned int next_depth;
6d2010ae
A
12623 vm_map_offset_t next_max_below, next_max_above;
12624 vm_map_offset_t next_skip;
91447636 12625
2d21ac55
A
12626 boolean_t look_for_pages;
12627 vm_region_submap_short_info_64_t short_info;
12628
91447636
A
12629 if (map == VM_MAP_NULL) {
12630 /* no address space to work on */
12631 return KERN_INVALID_ARGUMENT;
12632 }
12633
5ba3f43e 12634
39236c6e
A
12635 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
12636 /*
12637 * "info" structure is not big enough and
12638 * would overflow
12639 */
12640 return KERN_INVALID_ARGUMENT;
12641 }
5ba3f43e 12642
39236c6e 12643 original_count = *count;
5ba3f43e 12644
39236c6e
A
12645 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
12646 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
12647 look_for_pages = FALSE;
12648 short_info = (vm_region_submap_short_info_64_t) submap_info;
12649 submap_info = NULL;
2d21ac55
A
12650 } else {
12651 look_for_pages = TRUE;
39236c6e 12652 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
2d21ac55 12653 short_info = NULL;
5ba3f43e 12654
39236c6e
A
12655 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
12656 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
12657 }
91447636 12658 }
5ba3f43e 12659
91447636
A
12660 user_address = *address;
12661 user_max_depth = *nesting_depth;
5ba3f43e 12662
3e170ce0
A
12663 if (not_in_kdp) {
12664 vm_map_lock_read(map);
12665 }
12666
12667recurse_again:
91447636
A
12668 curr_entry = NULL;
12669 curr_map = map;
6d2010ae 12670 curr_address = user_address;
91447636 12671 curr_offset = 0;
6d2010ae 12672 curr_skip = 0;
91447636 12673 curr_depth = 0;
6d2010ae
A
12674 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
12675 curr_max_below = curr_address;
91447636
A
12676
12677 next_entry = NULL;
12678 next_map = NULL;
6d2010ae 12679 next_address = 0;
91447636 12680 next_offset = 0;
6d2010ae 12681 next_skip = 0;
91447636 12682 next_depth = 0;
6d2010ae
A
12683 next_max_above = (vm_map_offset_t) -1;
12684 next_max_below = (vm_map_offset_t) -1;
91447636 12685
91447636
A
12686 for (;;) {
12687 if (vm_map_lookup_entry(curr_map,
6d2010ae 12688 curr_address,
91447636
A
12689 &tmp_entry)) {
12690 /* tmp_entry contains the address we're looking for */
12691 curr_entry = tmp_entry;
12692 } else {
6d2010ae 12693 vm_map_offset_t skip;
91447636
A
12694 /*
12695 * The address is not mapped. "tmp_entry" is the
12696 * map entry preceding the address. We want the next
12697 * one, if it exists.
12698 */
12699 curr_entry = tmp_entry->vme_next;
6d2010ae 12700
91447636 12701 if (curr_entry == vm_map_to_entry(curr_map) ||
6d2010ae
A
12702 (curr_entry->vme_start >=
12703 curr_address + curr_max_above)) {
91447636
A
12704 /* no next entry at this level: stop looking */
12705 if (not_in_kdp) {
12706 vm_map_unlock_read(curr_map);
12707 }
12708 curr_entry = NULL;
12709 curr_map = NULL;
3e170ce0 12710 curr_skip = 0;
91447636
A
12711 curr_offset = 0;
12712 curr_depth = 0;
6d2010ae
A
12713 curr_max_above = 0;
12714 curr_max_below = 0;
91447636
A
12715 break;
12716 }
6d2010ae
A
12717
12718 /* adjust current address and offset */
12719 skip = curr_entry->vme_start - curr_address;
12720 curr_address = curr_entry->vme_start;
3e170ce0 12721 curr_skip += skip;
6d2010ae
A
12722 curr_offset += skip;
12723 curr_max_above -= skip;
12724 curr_max_below = 0;
91447636
A
12725 }
12726
12727 /*
12728 * Is the next entry at this level closer to the address (or
12729 * deeper in the submap chain) than the one we had
12730 * so far ?
12731 */
12732 tmp_entry = curr_entry->vme_next;
12733 if (tmp_entry == vm_map_to_entry(curr_map)) {
12734 /* no next entry at this level */
6d2010ae
A
12735 } else if (tmp_entry->vme_start >=
12736 curr_address + curr_max_above) {
91447636
A
12737 /*
12738 * tmp_entry is beyond the scope of what we mapped of
12739 * this submap in the upper level: ignore it.
12740 */
12741 } else if ((next_entry == NULL) ||
12742 (tmp_entry->vme_start + curr_offset <=
12743 next_entry->vme_start + next_offset)) {
12744 /*
12745 * We didn't have a "next_entry" or this one is
12746 * closer to the address we're looking for:
12747 * use this "tmp_entry" as the new "next_entry".
12748 */
12749 if (next_entry != NULL) {
12750 /* unlock the last "next_map" */
12751 if (next_map != curr_map && not_in_kdp) {
12752 vm_map_unlock_read(next_map);
12753 }
12754 }
12755 next_entry = tmp_entry;
12756 next_map = curr_map;
91447636 12757 next_depth = curr_depth;
6d2010ae
A
12758 next_address = next_entry->vme_start;
12759 next_skip = curr_skip;
3e170ce0 12760 next_skip += (next_address - curr_address);
6d2010ae
A
12761 next_offset = curr_offset;
12762 next_offset += (next_address - curr_address);
12763 next_max_above = MIN(next_max_above, curr_max_above);
12764 next_max_above = MIN(next_max_above,
12765 next_entry->vme_end - next_address);
12766 next_max_below = MIN(next_max_below, curr_max_below);
12767 next_max_below = MIN(next_max_below,
12768 next_address - next_entry->vme_start);
91447636
A
12769 }
12770
6d2010ae
A
12771 /*
12772 * "curr_max_{above,below}" allow us to keep track of the
12773 * portion of the submap that is actually mapped at this level:
12774 * the rest of that submap is irrelevant to us, since it's not
12775 * mapped here.
12776 * The relevant portion of the map starts at
3e170ce0 12777 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
6d2010ae
A
12778 */
12779 curr_max_above = MIN(curr_max_above,
12780 curr_entry->vme_end - curr_address);
12781 curr_max_below = MIN(curr_max_below,
12782 curr_address - curr_entry->vme_start);
12783
91447636
A
12784 if (!curr_entry->is_sub_map ||
12785 curr_depth >= user_max_depth) {
12786 /*
12787 * We hit a leaf map or we reached the maximum depth
12788 * we could, so stop looking. Keep the current map
12789 * locked.
12790 */
12791 break;
12792 }
12793
12794 /*
12795 * Get down to the next submap level.
12796 */
12797
12798 /*
12799 * Lock the next level and unlock the current level,
12800 * unless we need to keep it locked to access the "next_entry"
12801 * later.
12802 */
12803 if (not_in_kdp) {
3e170ce0 12804 vm_map_lock_read(VME_SUBMAP(curr_entry));
91447636
A
12805 }
12806 if (curr_map == next_map) {
12807 /* keep "next_map" locked in case we need it */
12808 } else {
12809 /* release this map */
b0d623f7
A
12810 if (not_in_kdp)
12811 vm_map_unlock_read(curr_map);
91447636
A
12812 }
12813
12814 /*
12815 * Adjust the offset. "curr_entry" maps the submap
12816 * at relative address "curr_entry->vme_start" in the
3e170ce0 12817 * curr_map but skips the first "VME_OFFSET(curr_entry)"
91447636
A
12818 * bytes of the submap.
12819 * "curr_offset" always represents the offset of a virtual
12820 * address in the curr_map relative to the absolute address
12821 * space (i.e. the top-level VM map).
12822 */
12823 curr_offset +=
3e170ce0 12824 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
6d2010ae 12825 curr_address = user_address + curr_offset;
91447636 12826 /* switch to the submap */
3e170ce0 12827 curr_map = VME_SUBMAP(curr_entry);
91447636 12828 curr_depth++;
91447636
A
12829 curr_entry = NULL;
12830 }
12831
12832 if (curr_entry == NULL) {
12833 /* no VM region contains the address... */
39037602
A
12834#if DEVELOPMENT || DEBUG
12835 if (vm_region_footprint && /* we want footprint numbers */
12836 look_for_pages && /* & we want page counts */
12837 next_entry == NULL && /* & there are no more regions */
12838 /* & we haven't already provided our fake region: */
12839 user_address == vm_map_last_entry(map)->vme_end) {
12840 ledger_amount_t nonvol, nonvol_compressed;
12841 /*
12842 * Add a fake memory region to account for
12843 * purgeable memory that counts towards this
12844 * task's memory footprint, i.e. the resident
12845 * compressed pages of non-volatile objects
12846 * owned by that task.
12847 */
12848 ledger_get_balance(
12849 map->pmap->ledger,
12850 task_ledgers.purgeable_nonvolatile,
12851 &nonvol);
12852 ledger_get_balance(
12853 map->pmap->ledger,
12854 task_ledgers.purgeable_nonvolatile_compressed,
12855 &nonvol_compressed);
12856 if (nonvol + nonvol_compressed == 0) {
12857 /* no purgeable memory usage to report */
12858 return KERN_FAILURE;
12859 }
12860 /* fake region to show nonvolatile footprint */
12861 submap_info->protection = VM_PROT_DEFAULT;
12862 submap_info->max_protection = VM_PROT_DEFAULT;
12863 submap_info->inheritance = VM_INHERIT_DEFAULT;
12864 submap_info->offset = 0;
12865 submap_info->user_tag = 0;
12866 submap_info->pages_resident = (unsigned int) (nonvol / PAGE_SIZE);
12867 submap_info->pages_shared_now_private = 0;
12868 submap_info->pages_swapped_out = (unsigned int) (nonvol_compressed / PAGE_SIZE);
12869 submap_info->pages_dirtied = submap_info->pages_resident;
12870 submap_info->ref_count = 1;
12871 submap_info->shadow_depth = 0;
12872 submap_info->external_pager = 0;
12873 submap_info->share_mode = SM_PRIVATE;
12874 submap_info->is_submap = 0;
12875 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
12876 submap_info->object_id = 0x11111111;
12877 submap_info->user_wired_count = 0;
12878 submap_info->pages_reusable = 0;
12879 *nesting_depth = 0;
12880 *size = (vm_map_size_t) (nonvol + nonvol_compressed);
12881 *address = user_address;
12882 return KERN_SUCCESS;
12883 }
12884#endif /* DEVELOPMENT || DEBUG */
91447636
A
12885 if (next_entry == NULL) {
12886 /* ... and no VM region follows it either */
12887 return KERN_INVALID_ADDRESS;
12888 }
12889 /* ... gather info about the next VM region */
12890 curr_entry = next_entry;
12891 curr_map = next_map; /* still locked ... */
6d2010ae
A
12892 curr_address = next_address;
12893 curr_skip = next_skip;
91447636
A
12894 curr_offset = next_offset;
12895 curr_depth = next_depth;
6d2010ae
A
12896 curr_max_above = next_max_above;
12897 curr_max_below = next_max_below;
91447636
A
12898 } else {
12899 /* we won't need "next_entry" after all */
12900 if (next_entry != NULL) {
12901 /* release "next_map" */
12902 if (next_map != curr_map && not_in_kdp) {
12903 vm_map_unlock_read(next_map);
12904 }
12905 }
12906 }
12907 next_entry = NULL;
12908 next_map = NULL;
12909 next_offset = 0;
6d2010ae 12910 next_skip = 0;
91447636 12911 next_depth = 0;
6d2010ae
A
12912 next_max_below = -1;
12913 next_max_above = -1;
91447636 12914
3e170ce0
A
12915 if (curr_entry->is_sub_map &&
12916 curr_depth < user_max_depth) {
12917 /*
12918 * We're not as deep as we could be: we must have
12919 * gone back up after not finding anything mapped
12920 * below the original top-level map entry's.
12921 * Let's move "curr_address" forward and recurse again.
12922 */
12923 user_address = curr_address;
12924 goto recurse_again;
12925 }
12926
91447636 12927 *nesting_depth = curr_depth;
6d2010ae
A
12928 *size = curr_max_above + curr_max_below;
12929 *address = user_address + curr_skip - curr_max_below;
91447636 12930
b0d623f7
A
12931// LP64todo: all the current tools are 32bit, obviously never worked for 64b
12932// so probably should be a real 32b ID vs. ptr.
12933// Current users just check for equality
39236c6e 12934#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
b0d623f7 12935
2d21ac55 12936 if (look_for_pages) {
3e170ce0 12937 submap_info->user_tag = VME_ALIAS(curr_entry);
5ba3f43e 12938 submap_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
12939 submap_info->protection = curr_entry->protection;
12940 submap_info->inheritance = curr_entry->inheritance;
12941 submap_info->max_protection = curr_entry->max_protection;
12942 submap_info->behavior = curr_entry->behavior;
12943 submap_info->user_wired_count = curr_entry->user_wired_count;
12944 submap_info->is_submap = curr_entry->is_sub_map;
3e170ce0 12945 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 12946 } else {
3e170ce0 12947 short_info->user_tag = VME_ALIAS(curr_entry);
5ba3f43e 12948 short_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
12949 short_info->protection = curr_entry->protection;
12950 short_info->inheritance = curr_entry->inheritance;
12951 short_info->max_protection = curr_entry->max_protection;
12952 short_info->behavior = curr_entry->behavior;
12953 short_info->user_wired_count = curr_entry->user_wired_count;
12954 short_info->is_submap = curr_entry->is_sub_map;
3e170ce0 12955 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 12956 }
91447636
A
12957
12958 extended.pages_resident = 0;
12959 extended.pages_swapped_out = 0;
12960 extended.pages_shared_now_private = 0;
12961 extended.pages_dirtied = 0;
39236c6e 12962 extended.pages_reusable = 0;
91447636
A
12963 extended.external_pager = 0;
12964 extended.shadow_depth = 0;
3e170ce0
A
12965 extended.share_mode = SM_EMPTY;
12966 extended.ref_count = 0;
91447636
A
12967
12968 if (not_in_kdp) {
12969 if (!curr_entry->is_sub_map) {
6d2010ae
A
12970 vm_map_offset_t range_start, range_end;
12971 range_start = MAX((curr_address - curr_max_below),
12972 curr_entry->vme_start);
12973 range_end = MIN((curr_address + curr_max_above),
12974 curr_entry->vme_end);
91447636 12975 vm_map_region_walk(curr_map,
6d2010ae 12976 range_start,
91447636 12977 curr_entry,
3e170ce0 12978 (VME_OFFSET(curr_entry) +
6d2010ae
A
12979 (range_start -
12980 curr_entry->vme_start)),
12981 range_end - range_start,
2d21ac55 12982 &extended,
39236c6e 12983 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
91447636
A
12984 if (extended.external_pager &&
12985 extended.ref_count == 2 &&
12986 extended.share_mode == SM_SHARED) {
2d21ac55 12987 extended.share_mode = SM_PRIVATE;
91447636 12988 }
91447636
A
12989 } else {
12990 if (curr_entry->use_pmap) {
2d21ac55 12991 extended.share_mode = SM_TRUESHARED;
91447636 12992 } else {
2d21ac55 12993 extended.share_mode = SM_PRIVATE;
91447636 12994 }
3e170ce0 12995 extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
91447636
A
12996 }
12997 }
12998
2d21ac55
A
12999 if (look_for_pages) {
13000 submap_info->pages_resident = extended.pages_resident;
13001 submap_info->pages_swapped_out = extended.pages_swapped_out;
13002 submap_info->pages_shared_now_private =
13003 extended.pages_shared_now_private;
13004 submap_info->pages_dirtied = extended.pages_dirtied;
13005 submap_info->external_pager = extended.external_pager;
13006 submap_info->shadow_depth = extended.shadow_depth;
13007 submap_info->share_mode = extended.share_mode;
13008 submap_info->ref_count = extended.ref_count;
5ba3f43e 13009
39236c6e
A
13010 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13011 submap_info->pages_reusable = extended.pages_reusable;
13012 }
2d21ac55
A
13013 } else {
13014 short_info->external_pager = extended.external_pager;
13015 short_info->shadow_depth = extended.shadow_depth;
13016 short_info->share_mode = extended.share_mode;
13017 short_info->ref_count = extended.ref_count;
13018 }
91447636
A
13019
13020 if (not_in_kdp) {
13021 vm_map_unlock_read(curr_map);
13022 }
13023
13024 return KERN_SUCCESS;
13025}
13026
1c79356b
A
13027/*
13028 * vm_region:
13029 *
13030 * User call to obtain information about a region in
13031 * a task's address map. Currently, only one flavor is
13032 * supported.
13033 *
13034 * XXX The reserved and behavior fields cannot be filled
13035 * in until the vm merge from the IK is completed, and
13036 * vm_reserve is implemented.
1c79356b
A
13037 */
13038
13039kern_return_t
91447636 13040vm_map_region(
1c79356b 13041 vm_map_t map,
91447636
A
13042 vm_map_offset_t *address, /* IN/OUT */
13043 vm_map_size_t *size, /* OUT */
1c79356b
A
13044 vm_region_flavor_t flavor, /* IN */
13045 vm_region_info_t info, /* OUT */
91447636
A
13046 mach_msg_type_number_t *count, /* IN/OUT */
13047 mach_port_t *object_name) /* OUT */
1c79356b
A
13048{
13049 vm_map_entry_t tmp_entry;
1c79356b 13050 vm_map_entry_t entry;
91447636 13051 vm_map_offset_t start;
1c79356b 13052
5ba3f43e 13053 if (map == VM_MAP_NULL)
1c79356b
A
13054 return(KERN_INVALID_ARGUMENT);
13055
13056 switch (flavor) {
91447636 13057
1c79356b 13058 case VM_REGION_BASIC_INFO:
2d21ac55 13059 /* legacy for old 32-bit objects info */
1c79356b 13060 {
2d21ac55 13061 vm_region_basic_info_t basic;
91447636 13062
2d21ac55
A
13063 if (*count < VM_REGION_BASIC_INFO_COUNT)
13064 return(KERN_INVALID_ARGUMENT);
1c79356b 13065
2d21ac55
A
13066 basic = (vm_region_basic_info_t) info;
13067 *count = VM_REGION_BASIC_INFO_COUNT;
1c79356b 13068
2d21ac55 13069 vm_map_lock_read(map);
1c79356b 13070
2d21ac55
A
13071 start = *address;
13072 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13073 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13074 vm_map_unlock_read(map);
13075 return(KERN_INVALID_ADDRESS);
13076 }
13077 } else {
13078 entry = tmp_entry;
1c79356b 13079 }
1c79356b 13080
2d21ac55 13081 start = entry->vme_start;
1c79356b 13082
3e170ce0 13083 basic->offset = (uint32_t)VME_OFFSET(entry);
2d21ac55
A
13084 basic->protection = entry->protection;
13085 basic->inheritance = entry->inheritance;
13086 basic->max_protection = entry->max_protection;
13087 basic->behavior = entry->behavior;
13088 basic->user_wired_count = entry->user_wired_count;
13089 basic->reserved = entry->is_sub_map;
13090 *address = start;
13091 *size = (entry->vme_end - start);
91447636 13092
2d21ac55
A
13093 if (object_name) *object_name = IP_NULL;
13094 if (entry->is_sub_map) {
13095 basic->shared = FALSE;
13096 } else {
13097 basic->shared = entry->is_shared;
13098 }
91447636 13099
2d21ac55
A
13100 vm_map_unlock_read(map);
13101 return(KERN_SUCCESS);
91447636
A
13102 }
13103
13104 case VM_REGION_BASIC_INFO_64:
13105 {
2d21ac55 13106 vm_region_basic_info_64_t basic;
91447636 13107
2d21ac55
A
13108 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
13109 return(KERN_INVALID_ARGUMENT);
13110
13111 basic = (vm_region_basic_info_64_t) info;
13112 *count = VM_REGION_BASIC_INFO_COUNT_64;
13113
13114 vm_map_lock_read(map);
13115
13116 start = *address;
13117 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13118 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13119 vm_map_unlock_read(map);
13120 return(KERN_INVALID_ADDRESS);
13121 }
13122 } else {
13123 entry = tmp_entry;
13124 }
91447636 13125
2d21ac55 13126 start = entry->vme_start;
91447636 13127
3e170ce0 13128 basic->offset = VME_OFFSET(entry);
2d21ac55
A
13129 basic->protection = entry->protection;
13130 basic->inheritance = entry->inheritance;
13131 basic->max_protection = entry->max_protection;
13132 basic->behavior = entry->behavior;
13133 basic->user_wired_count = entry->user_wired_count;
13134 basic->reserved = entry->is_sub_map;
13135 *address = start;
13136 *size = (entry->vme_end - start);
91447636 13137
2d21ac55
A
13138 if (object_name) *object_name = IP_NULL;
13139 if (entry->is_sub_map) {
13140 basic->shared = FALSE;
13141 } else {
13142 basic->shared = entry->is_shared;
91447636 13143 }
2d21ac55
A
13144
13145 vm_map_unlock_read(map);
13146 return(KERN_SUCCESS);
1c79356b
A
13147 }
13148 case VM_REGION_EXTENDED_INFO:
2d21ac55
A
13149 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
13150 return(KERN_INVALID_ARGUMENT);
39236c6e
A
13151 /*fallthru*/
13152 case VM_REGION_EXTENDED_INFO__legacy:
13153 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
13154 return KERN_INVALID_ARGUMENT;
13155
13156 {
13157 vm_region_extended_info_t extended;
13158 mach_msg_type_number_t original_count;
1c79356b 13159
2d21ac55 13160 extended = (vm_region_extended_info_t) info;
1c79356b 13161
2d21ac55 13162 vm_map_lock_read(map);
1c79356b 13163
2d21ac55
A
13164 start = *address;
13165 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13166 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13167 vm_map_unlock_read(map);
13168 return(KERN_INVALID_ADDRESS);
13169 }
13170 } else {
13171 entry = tmp_entry;
1c79356b 13172 }
2d21ac55 13173 start = entry->vme_start;
1c79356b 13174
2d21ac55 13175 extended->protection = entry->protection;
3e170ce0 13176 extended->user_tag = VME_ALIAS(entry);
2d21ac55
A
13177 extended->pages_resident = 0;
13178 extended->pages_swapped_out = 0;
13179 extended->pages_shared_now_private = 0;
13180 extended->pages_dirtied = 0;
13181 extended->external_pager = 0;
13182 extended->shadow_depth = 0;
1c79356b 13183
39236c6e
A
13184 original_count = *count;
13185 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
13186 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
13187 } else {
13188 extended->pages_reusable = 0;
13189 *count = VM_REGION_EXTENDED_INFO_COUNT;
13190 }
13191
3e170ce0 13192 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
1c79356b 13193
2d21ac55
A
13194 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
13195 extended->share_mode = SM_PRIVATE;
1c79356b 13196
2d21ac55
A
13197 if (object_name)
13198 *object_name = IP_NULL;
13199 *address = start;
13200 *size = (entry->vme_end - start);
1c79356b 13201
2d21ac55
A
13202 vm_map_unlock_read(map);
13203 return(KERN_SUCCESS);
1c79356b
A
13204 }
13205 case VM_REGION_TOP_INFO:
5ba3f43e 13206 {
2d21ac55 13207 vm_region_top_info_t top;
1c79356b 13208
2d21ac55
A
13209 if (*count < VM_REGION_TOP_INFO_COUNT)
13210 return(KERN_INVALID_ARGUMENT);
1c79356b 13211
2d21ac55
A
13212 top = (vm_region_top_info_t) info;
13213 *count = VM_REGION_TOP_INFO_COUNT;
1c79356b 13214
2d21ac55 13215 vm_map_lock_read(map);
1c79356b 13216
2d21ac55
A
13217 start = *address;
13218 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13219 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13220 vm_map_unlock_read(map);
13221 return(KERN_INVALID_ADDRESS);
13222 }
13223 } else {
13224 entry = tmp_entry;
1c79356b 13225
2d21ac55
A
13226 }
13227 start = entry->vme_start;
1c79356b 13228
2d21ac55
A
13229 top->private_pages_resident = 0;
13230 top->shared_pages_resident = 0;
1c79356b 13231
2d21ac55 13232 vm_map_region_top_walk(entry, top);
1c79356b 13233
2d21ac55
A
13234 if (object_name)
13235 *object_name = IP_NULL;
13236 *address = start;
13237 *size = (entry->vme_end - start);
1c79356b 13238
2d21ac55
A
13239 vm_map_unlock_read(map);
13240 return(KERN_SUCCESS);
1c79356b
A
13241 }
13242 default:
2d21ac55 13243 return(KERN_INVALID_ARGUMENT);
1c79356b
A
13244 }
13245}
13246
b0d623f7
A
13247#define OBJ_RESIDENT_COUNT(obj, entry_size) \
13248 MIN((entry_size), \
13249 ((obj)->all_reusable ? \
13250 (obj)->wired_page_count : \
13251 (obj)->resident_page_count - (obj)->reusable_page_count))
2d21ac55 13252
0c530ab8 13253void
91447636
A
13254vm_map_region_top_walk(
13255 vm_map_entry_t entry,
13256 vm_region_top_info_t top)
1c79356b 13257{
1c79356b 13258
3e170ce0 13259 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
2d21ac55
A
13260 top->share_mode = SM_EMPTY;
13261 top->ref_count = 0;
13262 top->obj_id = 0;
13263 return;
1c79356b 13264 }
2d21ac55 13265
91447636 13266 {
2d21ac55
A
13267 struct vm_object *obj, *tmp_obj;
13268 int ref_count;
13269 uint32_t entry_size;
1c79356b 13270
b0d623f7 13271 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
1c79356b 13272
3e170ce0 13273 obj = VME_OBJECT(entry);
1c79356b 13274
2d21ac55
A
13275 vm_object_lock(obj);
13276
13277 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13278 ref_count--;
13279
b0d623f7 13280 assert(obj->reusable_page_count <= obj->resident_page_count);
2d21ac55
A
13281 if (obj->shadow) {
13282 if (ref_count == 1)
b0d623f7
A
13283 top->private_pages_resident =
13284 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55 13285 else
b0d623f7
A
13286 top->shared_pages_resident =
13287 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
13288 top->ref_count = ref_count;
13289 top->share_mode = SM_COW;
5ba3f43e 13290
2d21ac55
A
13291 while ((tmp_obj = obj->shadow)) {
13292 vm_object_lock(tmp_obj);
13293 vm_object_unlock(obj);
13294 obj = tmp_obj;
1c79356b 13295
2d21ac55
A
13296 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13297 ref_count--;
1c79356b 13298
b0d623f7
A
13299 assert(obj->reusable_page_count <= obj->resident_page_count);
13300 top->shared_pages_resident +=
13301 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
13302 top->ref_count += ref_count - 1;
13303 }
1c79356b 13304 } else {
6d2010ae
A
13305 if (entry->superpage_size) {
13306 top->share_mode = SM_LARGE_PAGE;
13307 top->shared_pages_resident = 0;
13308 top->private_pages_resident = entry_size;
13309 } else if (entry->needs_copy) {
2d21ac55 13310 top->share_mode = SM_COW;
b0d623f7
A
13311 top->shared_pages_resident =
13312 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
13313 } else {
13314 if (ref_count == 1 ||
13315 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
13316 top->share_mode = SM_PRIVATE;
39236c6e
A
13317 top->private_pages_resident =
13318 OBJ_RESIDENT_COUNT(obj,
13319 entry_size);
2d21ac55
A
13320 } else {
13321 top->share_mode = SM_SHARED;
b0d623f7
A
13322 top->shared_pages_resident =
13323 OBJ_RESIDENT_COUNT(obj,
13324 entry_size);
2d21ac55
A
13325 }
13326 }
13327 top->ref_count = ref_count;
1c79356b 13328 }
b0d623f7 13329 /* XXX K64: obj_id will be truncated */
39236c6e 13330 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
1c79356b 13331
2d21ac55 13332 vm_object_unlock(obj);
1c79356b 13333 }
91447636
A
13334}
13335
0c530ab8 13336void
91447636
A
13337vm_map_region_walk(
13338 vm_map_t map,
2d21ac55
A
13339 vm_map_offset_t va,
13340 vm_map_entry_t entry,
91447636
A
13341 vm_object_offset_t offset,
13342 vm_object_size_t range,
2d21ac55 13343 vm_region_extended_info_t extended,
39236c6e
A
13344 boolean_t look_for_pages,
13345 mach_msg_type_number_t count)
91447636 13346{
39037602
A
13347 struct vm_object *obj, *tmp_obj;
13348 vm_map_offset_t last_offset;
13349 int i;
13350 int ref_count;
91447636
A
13351 struct vm_object *shadow_object;
13352 int shadow_depth;
13353
3e170ce0 13354 if ((VME_OBJECT(entry) == 0) ||
2d21ac55 13355 (entry->is_sub_map) ||
3e170ce0 13356 (VME_OBJECT(entry)->phys_contiguous &&
6d2010ae 13357 !entry->superpage_size)) {
2d21ac55
A
13358 extended->share_mode = SM_EMPTY;
13359 extended->ref_count = 0;
13360 return;
1c79356b 13361 }
6d2010ae
A
13362
13363 if (entry->superpage_size) {
13364 extended->shadow_depth = 0;
13365 extended->share_mode = SM_LARGE_PAGE;
13366 extended->ref_count = 1;
13367 extended->external_pager = 0;
13368 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
13369 extended->shadow_depth = 0;
13370 return;
13371 }
13372
39037602 13373 obj = VME_OBJECT(entry);
2d21ac55 13374
39037602 13375 vm_object_lock(obj);
2d21ac55 13376
39037602
A
13377 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13378 ref_count--;
2d21ac55 13379
39037602
A
13380 if (look_for_pages) {
13381 for (last_offset = offset + range;
13382 offset < last_offset;
13383 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
13384#if DEVELOPMENT || DEBUG
13385 if (vm_region_footprint) {
13386 if (obj->purgable != VM_PURGABLE_DENY) {
13387 /* alternate accounting */
13388 } else if (entry->iokit_acct) {
13389 /* alternate accounting */
13390 extended->pages_resident++;
13391 extended->pages_dirtied++;
13392 } else {
13393 int disp;
13394
13395 disp = 0;
13396 pmap_query_page_info(map->pmap, va, &disp);
13397 if (disp & PMAP_QUERY_PAGE_PRESENT) {
13398 extended->pages_resident++;
13399 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
13400 extended->pages_reusable++;
13401 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
13402 (disp & PMAP_QUERY_PAGE_ALTACCT)) {
13403 /* alternate accounting */
13404 } else {
13405 extended->pages_dirtied++;
13406 }
13407 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
13408 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
13409 /* alternate accounting */
13410 } else {
13411 extended->pages_swapped_out++;
13412 }
b0d623f7 13413 }
2d21ac55 13414 }
39037602 13415 continue;
2d21ac55 13416 }
39037602
A
13417#endif /* DEVELOPMENT || DEBUG */
13418 vm_map_region_look_for_page(map, va, obj,
13419 offset, ref_count,
13420 0, extended, count);
2d21ac55 13421 }
39037602
A
13422#if DEVELOPMENT || DEBUG
13423 if (vm_region_footprint) {
13424 goto collect_object_info;
13425 }
13426#endif /* DEVELOPMENT || DEBUG */
13427 } else {
13428#if DEVELOPMENT || DEBUG
13429 collect_object_info:
13430#endif /* DEVELOPMENT || DEBUG */
13431 shadow_object = obj->shadow;
13432 shadow_depth = 0;
2d21ac55 13433
39037602
A
13434 if ( !(obj->pager_trusted) && !(obj->internal))
13435 extended->external_pager = 1;
13436
13437 if (shadow_object != VM_OBJECT_NULL) {
13438 vm_object_lock(shadow_object);
13439 for (;
13440 shadow_object != VM_OBJECT_NULL;
13441 shadow_depth++) {
13442 vm_object_t next_shadow;
13443
13444 if ( !(shadow_object->pager_trusted) &&
13445 !(shadow_object->internal))
13446 extended->external_pager = 1;
13447
13448 next_shadow = shadow_object->shadow;
13449 if (next_shadow) {
13450 vm_object_lock(next_shadow);
13451 }
13452 vm_object_unlock(shadow_object);
13453 shadow_object = next_shadow;
2d21ac55 13454 }
91447636 13455 }
39037602
A
13456 extended->shadow_depth = shadow_depth;
13457 }
1c79356b 13458
39037602
A
13459 if (extended->shadow_depth || entry->needs_copy)
13460 extended->share_mode = SM_COW;
13461 else {
13462 if (ref_count == 1)
13463 extended->share_mode = SM_PRIVATE;
13464 else {
13465 if (obj->true_share)
13466 extended->share_mode = SM_TRUESHARED;
13467 else
13468 extended->share_mode = SM_SHARED;
2d21ac55 13469 }
39037602
A
13470 }
13471 extended->ref_count = ref_count - extended->shadow_depth;
5ba3f43e 13472
39037602
A
13473 for (i = 0; i < extended->shadow_depth; i++) {
13474 if ((tmp_obj = obj->shadow) == 0)
13475 break;
13476 vm_object_lock(tmp_obj);
2d21ac55 13477 vm_object_unlock(obj);
1c79356b 13478
39037602
A
13479 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
13480 ref_count--;
13481
13482 extended->ref_count += ref_count;
13483 obj = tmp_obj;
13484 }
13485 vm_object_unlock(obj);
91447636 13486
39037602
A
13487 if (extended->share_mode == SM_SHARED) {
13488 vm_map_entry_t cur;
13489 vm_map_entry_t last;
13490 int my_refs;
91447636 13491
39037602
A
13492 obj = VME_OBJECT(entry);
13493 last = vm_map_to_entry(map);
13494 my_refs = 0;
91447636 13495
39037602
A
13496 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13497 ref_count--;
13498 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
13499 my_refs += vm_map_region_count_obj_refs(cur, obj);
13500
13501 if (my_refs == ref_count)
13502 extended->share_mode = SM_PRIVATE_ALIASED;
13503 else if (my_refs > 1)
13504 extended->share_mode = SM_SHARED_ALIASED;
91447636 13505 }
1c79356b
A
13506}
13507
1c79356b 13508
91447636
A
13509/* object is locked on entry and locked on return */
13510
13511
13512static void
13513vm_map_region_look_for_page(
13514 __unused vm_map_t map,
2d21ac55
A
13515 __unused vm_map_offset_t va,
13516 vm_object_t object,
13517 vm_object_offset_t offset,
91447636
A
13518 int max_refcnt,
13519 int depth,
39236c6e
A
13520 vm_region_extended_info_t extended,
13521 mach_msg_type_number_t count)
1c79356b 13522{
39037602
A
13523 vm_page_t p;
13524 vm_object_t shadow;
13525 int ref_count;
13526 vm_object_t caller_object;
13527
91447636
A
13528 shadow = object->shadow;
13529 caller_object = object;
1c79356b 13530
5ba3f43e 13531
91447636 13532 while (TRUE) {
1c79356b 13533
91447636 13534 if ( !(object->pager_trusted) && !(object->internal))
2d21ac55 13535 extended->external_pager = 1;
1c79356b 13536
91447636
A
13537 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
13538 if (shadow && (max_refcnt == 1))
13539 extended->pages_shared_now_private++;
1c79356b 13540
39236c6e 13541 if (!p->fictitious &&
39037602 13542 (p->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p))))
91447636 13543 extended->pages_dirtied++;
39236c6e 13544 else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
39037602 13545 if (p->reusable || object->all_reusable) {
39236c6e
A
13546 extended->pages_reusable++;
13547 }
13548 }
1c79356b 13549
39236c6e 13550 extended->pages_resident++;
91447636
A
13551
13552 if(object != caller_object)
2d21ac55 13553 vm_object_unlock(object);
91447636
A
13554
13555 return;
1c79356b 13556 }
39236c6e
A
13557 if (object->internal &&
13558 object->alive &&
13559 !object->terminating &&
13560 object->pager_ready) {
13561
39037602
A
13562 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
13563 == VM_EXTERNAL_STATE_EXISTS) {
13564 /* the pager has that page */
13565 extended->pages_swapped_out++;
13566 if (object != caller_object)
13567 vm_object_unlock(object);
13568 return;
2d21ac55 13569 }
1c79356b 13570 }
2d21ac55 13571
91447636 13572 if (shadow) {
2d21ac55 13573 vm_object_lock(shadow);
1c79356b 13574
91447636
A
13575 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
13576 ref_count--;
1c79356b 13577
91447636
A
13578 if (++depth > extended->shadow_depth)
13579 extended->shadow_depth = depth;
1c79356b 13580
91447636
A
13581 if (ref_count > max_refcnt)
13582 max_refcnt = ref_count;
5ba3f43e 13583
91447636 13584 if(object != caller_object)
2d21ac55 13585 vm_object_unlock(object);
91447636 13586
6d2010ae 13587 offset = offset + object->vo_shadow_offset;
91447636
A
13588 object = shadow;
13589 shadow = object->shadow;
13590 continue;
1c79356b 13591 }
91447636 13592 if(object != caller_object)
2d21ac55 13593 vm_object_unlock(object);
91447636
A
13594 break;
13595 }
13596}
1c79356b 13597
91447636
A
13598static int
13599vm_map_region_count_obj_refs(
13600 vm_map_entry_t entry,
13601 vm_object_t object)
13602{
39037602
A
13603 int ref_count;
13604 vm_object_t chk_obj;
13605 vm_object_t tmp_obj;
1c79356b 13606
3e170ce0 13607 if (VME_OBJECT(entry) == 0)
2d21ac55 13608 return(0);
1c79356b 13609
91447636 13610 if (entry->is_sub_map)
2d21ac55 13611 return(0);
91447636 13612 else {
2d21ac55 13613 ref_count = 0;
1c79356b 13614
3e170ce0 13615 chk_obj = VME_OBJECT(entry);
2d21ac55 13616 vm_object_lock(chk_obj);
1c79356b 13617
2d21ac55
A
13618 while (chk_obj) {
13619 if (chk_obj == object)
13620 ref_count++;
13621 tmp_obj = chk_obj->shadow;
13622 if (tmp_obj)
13623 vm_object_lock(tmp_obj);
13624 vm_object_unlock(chk_obj);
1c79356b 13625
2d21ac55
A
13626 chk_obj = tmp_obj;
13627 }
1c79356b 13628 }
91447636 13629 return(ref_count);
1c79356b
A
13630}
13631
13632
13633/*
91447636
A
13634 * Routine: vm_map_simplify
13635 *
13636 * Description:
13637 * Attempt to simplify the map representation in
13638 * the vicinity of the given starting address.
13639 * Note:
13640 * This routine is intended primarily to keep the
13641 * kernel maps more compact -- they generally don't
13642 * benefit from the "expand a map entry" technology
13643 * at allocation time because the adjacent entry
13644 * is often wired down.
1c79356b 13645 */
91447636
A
13646void
13647vm_map_simplify_entry(
13648 vm_map_t map,
13649 vm_map_entry_t this_entry)
1c79356b 13650{
91447636 13651 vm_map_entry_t prev_entry;
1c79356b 13652
91447636 13653 counter(c_vm_map_simplify_entry_called++);
1c79356b 13654
91447636 13655 prev_entry = this_entry->vme_prev;
1c79356b 13656
91447636 13657 if ((this_entry != vm_map_to_entry(map)) &&
2d21ac55 13658 (prev_entry != vm_map_to_entry(map)) &&
1c79356b 13659
91447636 13660 (prev_entry->vme_end == this_entry->vme_start) &&
1c79356b 13661
2d21ac55 13662 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
3e170ce0
A
13663 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
13664 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
91447636 13665 prev_entry->vme_start))
3e170ce0 13666 == VME_OFFSET(this_entry)) &&
1c79356b 13667
fe8ab488
A
13668 (prev_entry->behavior == this_entry->behavior) &&
13669 (prev_entry->needs_copy == this_entry->needs_copy) &&
91447636
A
13670 (prev_entry->protection == this_entry->protection) &&
13671 (prev_entry->max_protection == this_entry->max_protection) &&
fe8ab488
A
13672 (prev_entry->inheritance == this_entry->inheritance) &&
13673 (prev_entry->use_pmap == this_entry->use_pmap) &&
3e170ce0 13674 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
2d21ac55 13675 (prev_entry->no_cache == this_entry->no_cache) &&
fe8ab488
A
13676 (prev_entry->permanent == this_entry->permanent) &&
13677 (prev_entry->map_aligned == this_entry->map_aligned) &&
13678 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
13679 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
13680 /* from_reserved_zone: OK if that field doesn't match */
13681 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
3e170ce0
A
13682 (prev_entry->vme_resilient_codesign ==
13683 this_entry->vme_resilient_codesign) &&
13684 (prev_entry->vme_resilient_media ==
13685 this_entry->vme_resilient_media) &&
fe8ab488 13686
91447636
A
13687 (prev_entry->wired_count == this_entry->wired_count) &&
13688 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
1c79356b 13689
39037602 13690 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
91447636
A
13691 (prev_entry->in_transition == FALSE) &&
13692 (this_entry->in_transition == FALSE) &&
13693 (prev_entry->needs_wakeup == FALSE) &&
13694 (this_entry->needs_wakeup == FALSE) &&
13695 (prev_entry->is_shared == FALSE) &&
fe8ab488
A
13696 (this_entry->is_shared == FALSE) &&
13697 (prev_entry->superpage_size == FALSE) &&
13698 (this_entry->superpage_size == FALSE)
2d21ac55 13699 ) {
316670eb 13700 vm_map_store_entry_unlink(map, prev_entry);
e2d2fc5c 13701 assert(prev_entry->vme_start < this_entry->vme_end);
39236c6e
A
13702 if (prev_entry->map_aligned)
13703 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
13704 VM_MAP_PAGE_MASK(map)));
91447636 13705 this_entry->vme_start = prev_entry->vme_start;
3e170ce0
A
13706 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
13707
13708 if (map->holelistenabled) {
13709 vm_map_store_update_first_free(map, this_entry, TRUE);
13710 }
13711
2d21ac55 13712 if (prev_entry->is_sub_map) {
3e170ce0 13713 vm_map_deallocate(VME_SUBMAP(prev_entry));
2d21ac55 13714 } else {
3e170ce0 13715 vm_object_deallocate(VME_OBJECT(prev_entry));
2d21ac55 13716 }
91447636 13717 vm_map_entry_dispose(map, prev_entry);
0c530ab8 13718 SAVE_HINT_MAP_WRITE(map, this_entry);
91447636 13719 counter(c_vm_map_simplified++);
1c79356b 13720 }
91447636 13721}
1c79356b 13722
91447636
A
13723void
13724vm_map_simplify(
13725 vm_map_t map,
13726 vm_map_offset_t start)
13727{
13728 vm_map_entry_t this_entry;
1c79356b 13729
91447636
A
13730 vm_map_lock(map);
13731 if (vm_map_lookup_entry(map, start, &this_entry)) {
13732 vm_map_simplify_entry(map, this_entry);
13733 vm_map_simplify_entry(map, this_entry->vme_next);
13734 }
13735 counter(c_vm_map_simplify_called++);
13736 vm_map_unlock(map);
13737}
1c79356b 13738
91447636
A
13739static void
13740vm_map_simplify_range(
13741 vm_map_t map,
13742 vm_map_offset_t start,
13743 vm_map_offset_t end)
13744{
13745 vm_map_entry_t entry;
1c79356b 13746
91447636
A
13747 /*
13748 * The map should be locked (for "write") by the caller.
13749 */
1c79356b 13750
91447636
A
13751 if (start >= end) {
13752 /* invalid address range */
13753 return;
13754 }
1c79356b 13755
39236c6e
A
13756 start = vm_map_trunc_page(start,
13757 VM_MAP_PAGE_MASK(map));
13758 end = vm_map_round_page(end,
13759 VM_MAP_PAGE_MASK(map));
2d21ac55 13760
91447636
A
13761 if (!vm_map_lookup_entry(map, start, &entry)) {
13762 /* "start" is not mapped and "entry" ends before "start" */
13763 if (entry == vm_map_to_entry(map)) {
13764 /* start with first entry in the map */
13765 entry = vm_map_first_entry(map);
13766 } else {
13767 /* start with next entry */
13768 entry = entry->vme_next;
13769 }
13770 }
5ba3f43e 13771
91447636
A
13772 while (entry != vm_map_to_entry(map) &&
13773 entry->vme_start <= end) {
13774 /* try and coalesce "entry" with its previous entry */
13775 vm_map_simplify_entry(map, entry);
13776 entry = entry->vme_next;
13777 }
13778}
1c79356b 13779
1c79356b 13780
91447636
A
13781/*
13782 * Routine: vm_map_machine_attribute
13783 * Purpose:
13784 * Provide machine-specific attributes to mappings,
13785 * such as cachability etc. for machines that provide
13786 * them. NUMA architectures and machines with big/strange
13787 * caches will use this.
13788 * Note:
13789 * Responsibilities for locking and checking are handled here,
13790 * everything else in the pmap module. If any non-volatile
13791 * information must be kept, the pmap module should handle
13792 * it itself. [This assumes that attributes do not
13793 * need to be inherited, which seems ok to me]
13794 */
13795kern_return_t
13796vm_map_machine_attribute(
13797 vm_map_t map,
13798 vm_map_offset_t start,
13799 vm_map_offset_t end,
13800 vm_machine_attribute_t attribute,
13801 vm_machine_attribute_val_t* value) /* IN/OUT */
13802{
13803 kern_return_t ret;
13804 vm_map_size_t sync_size;
13805 vm_map_entry_t entry;
5ba3f43e 13806
91447636
A
13807 if (start < vm_map_min(map) || end > vm_map_max(map))
13808 return KERN_INVALID_ADDRESS;
1c79356b 13809
91447636
A
13810 /* Figure how much memory we need to flush (in page increments) */
13811 sync_size = end - start;
1c79356b 13812
91447636 13813 vm_map_lock(map);
5ba3f43e
A
13814
13815 if (attribute != MATTR_CACHE) {
91447636
A
13816 /* If we don't have to find physical addresses, we */
13817 /* don't have to do an explicit traversal here. */
13818 ret = pmap_attribute(map->pmap, start, end-start,
13819 attribute, value);
13820 vm_map_unlock(map);
13821 return ret;
13822 }
1c79356b 13823
91447636 13824 ret = KERN_SUCCESS; /* Assume it all worked */
1c79356b 13825
91447636
A
13826 while(sync_size) {
13827 if (vm_map_lookup_entry(map, start, &entry)) {
13828 vm_map_size_t sub_size;
13829 if((entry->vme_end - start) > sync_size) {
13830 sub_size = sync_size;
13831 sync_size = 0;
13832 } else {
13833 sub_size = entry->vme_end - start;
2d21ac55 13834 sync_size -= sub_size;
91447636
A
13835 }
13836 if(entry->is_sub_map) {
13837 vm_map_offset_t sub_start;
13838 vm_map_offset_t sub_end;
1c79356b 13839
5ba3f43e 13840 sub_start = (start - entry->vme_start)
3e170ce0 13841 + VME_OFFSET(entry);
91447636
A
13842 sub_end = sub_start + sub_size;
13843 vm_map_machine_attribute(
5ba3f43e 13844 VME_SUBMAP(entry),
91447636
A
13845 sub_start,
13846 sub_end,
13847 attribute, value);
13848 } else {
3e170ce0 13849 if (VME_OBJECT(entry)) {
91447636
A
13850 vm_page_t m;
13851 vm_object_t object;
13852 vm_object_t base_object;
13853 vm_object_t last_object;
13854 vm_object_offset_t offset;
13855 vm_object_offset_t base_offset;
13856 vm_map_size_t range;
13857 range = sub_size;
13858 offset = (start - entry->vme_start)
3e170ce0 13859 + VME_OFFSET(entry);
91447636 13860 base_offset = offset;
3e170ce0 13861 object = VME_OBJECT(entry);
91447636
A
13862 base_object = object;
13863 last_object = NULL;
1c79356b 13864
91447636 13865 vm_object_lock(object);
1c79356b 13866
91447636
A
13867 while (range) {
13868 m = vm_page_lookup(
13869 object, offset);
1c79356b 13870
91447636 13871 if (m && !m->fictitious) {
5ba3f43e 13872 ret =
2d21ac55 13873 pmap_attribute_cache_sync(
5ba3f43e
A
13874 VM_PAGE_GET_PHYS_PAGE(m),
13875 PAGE_SIZE,
2d21ac55 13876 attribute, value);
5ba3f43e 13877
91447636 13878 } else if (object->shadow) {
6d2010ae 13879 offset = offset + object->vo_shadow_offset;
91447636
A
13880 last_object = object;
13881 object = object->shadow;
13882 vm_object_lock(last_object->shadow);
13883 vm_object_unlock(last_object);
13884 continue;
13885 }
13886 range -= PAGE_SIZE;
1c79356b 13887
91447636
A
13888 if (base_object != object) {
13889 vm_object_unlock(object);
13890 vm_object_lock(base_object);
13891 object = base_object;
13892 }
13893 /* Bump to the next page */
13894 base_offset += PAGE_SIZE;
13895 offset = base_offset;
13896 }
13897 vm_object_unlock(object);
13898 }
13899 }
13900 start += sub_size;
13901 } else {
13902 vm_map_unlock(map);
13903 return KERN_FAILURE;
13904 }
5ba3f43e 13905
1c79356b 13906 }
e5568f75 13907
91447636 13908 vm_map_unlock(map);
e5568f75 13909
91447636
A
13910 return ret;
13911}
e5568f75 13912
91447636
A
13913/*
13914 * vm_map_behavior_set:
13915 *
13916 * Sets the paging reference behavior of the specified address
13917 * range in the target map. Paging reference behavior affects
5ba3f43e 13918 * how pagein operations resulting from faults on the map will be
91447636
A
13919 * clustered.
13920 */
5ba3f43e 13921kern_return_t
91447636
A
13922vm_map_behavior_set(
13923 vm_map_t map,
13924 vm_map_offset_t start,
13925 vm_map_offset_t end,
13926 vm_behavior_t new_behavior)
13927{
39037602 13928 vm_map_entry_t entry;
91447636 13929 vm_map_entry_t temp_entry;
e5568f75 13930
91447636 13931 XPR(XPR_VM_MAP,
2d21ac55 13932 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
b0d623f7 13933 map, start, end, new_behavior, 0);
e5568f75 13934
6d2010ae
A
13935 if (start > end ||
13936 start < vm_map_min(map) ||
13937 end > vm_map_max(map)) {
13938 return KERN_NO_SPACE;
13939 }
13940
91447636 13941 switch (new_behavior) {
b0d623f7
A
13942
13943 /*
13944 * This first block of behaviors all set a persistent state on the specified
13945 * memory range. All we have to do here is to record the desired behavior
13946 * in the vm_map_entry_t's.
13947 */
13948
91447636
A
13949 case VM_BEHAVIOR_DEFAULT:
13950 case VM_BEHAVIOR_RANDOM:
13951 case VM_BEHAVIOR_SEQUENTIAL:
13952 case VM_BEHAVIOR_RSEQNTL:
b0d623f7
A
13953 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
13954 vm_map_lock(map);
5ba3f43e 13955
b0d623f7
A
13956 /*
13957 * The entire address range must be valid for the map.
5ba3f43e 13958 * Note that vm_map_range_check() does a
b0d623f7
A
13959 * vm_map_lookup_entry() internally and returns the
13960 * entry containing the start of the address range if
13961 * the entire range is valid.
13962 */
13963 if (vm_map_range_check(map, start, end, &temp_entry)) {
13964 entry = temp_entry;
13965 vm_map_clip_start(map, entry, start);
13966 }
13967 else {
13968 vm_map_unlock(map);
13969 return(KERN_INVALID_ADDRESS);
13970 }
5ba3f43e 13971
b0d623f7
A
13972 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
13973 vm_map_clip_end(map, entry, end);
fe8ab488
A
13974 if (entry->is_sub_map) {
13975 assert(!entry->use_pmap);
13976 }
5ba3f43e 13977
b0d623f7
A
13978 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
13979 entry->zero_wired_pages = TRUE;
13980 } else {
13981 entry->behavior = new_behavior;
13982 }
13983 entry = entry->vme_next;
13984 }
5ba3f43e 13985
b0d623f7 13986 vm_map_unlock(map);
91447636 13987 break;
b0d623f7
A
13988
13989 /*
13990 * The rest of these are different from the above in that they cause
5ba3f43e 13991 * an immediate action to take place as opposed to setting a behavior that
b0d623f7
A
13992 * affects future actions.
13993 */
13994
91447636 13995 case VM_BEHAVIOR_WILLNEED:
b0d623f7
A
13996 return vm_map_willneed(map, start, end);
13997
91447636 13998 case VM_BEHAVIOR_DONTNEED:
b0d623f7
A
13999 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
14000
14001 case VM_BEHAVIOR_FREE:
14002 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
14003
14004 case VM_BEHAVIOR_REUSABLE:
14005 return vm_map_reusable_pages(map, start, end);
14006
14007 case VM_BEHAVIOR_REUSE:
14008 return vm_map_reuse_pages(map, start, end);
14009
14010 case VM_BEHAVIOR_CAN_REUSE:
14011 return vm_map_can_reuse(map, start, end);
14012
3e170ce0
A
14013#if MACH_ASSERT
14014 case VM_BEHAVIOR_PAGEOUT:
14015 return vm_map_pageout(map, start, end);
14016#endif /* MACH_ASSERT */
14017
1c79356b 14018 default:
91447636 14019 return(KERN_INVALID_ARGUMENT);
1c79356b 14020 }
1c79356b 14021
b0d623f7
A
14022 return(KERN_SUCCESS);
14023}
14024
14025
14026/*
14027 * Internals for madvise(MADV_WILLNEED) system call.
14028 *
14029 * The present implementation is to do a read-ahead if the mapping corresponds
14030 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
14031 * and basically ignore the "advice" (which we are always free to do).
14032 */
14033
14034
14035static kern_return_t
14036vm_map_willneed(
14037 vm_map_t map,
14038 vm_map_offset_t start,
14039 vm_map_offset_t end
14040)
14041{
14042 vm_map_entry_t entry;
14043 vm_object_t object;
14044 memory_object_t pager;
14045 struct vm_object_fault_info fault_info;
14046 kern_return_t kr;
14047 vm_object_size_t len;
14048 vm_object_offset_t offset;
1c79356b 14049
91447636 14050 /*
b0d623f7
A
14051 * Fill in static values in fault_info. Several fields get ignored by the code
14052 * we call, but we'll fill them in anyway since uninitialized fields are bad
14053 * when it comes to future backwards compatibility.
91447636 14054 */
b0d623f7
A
14055
14056 fault_info.interruptible = THREAD_UNINT; /* ignored value */
14057 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
14058 fault_info.no_cache = FALSE; /* ignored value */
14059 fault_info.stealth = TRUE;
6d2010ae
A
14060 fault_info.io_sync = FALSE;
14061 fault_info.cs_bypass = FALSE;
0b4c1975 14062 fault_info.mark_zf_absent = FALSE;
316670eb 14063 fault_info.batch_pmap_op = FALSE;
b0d623f7
A
14064
14065 /*
14066 * The MADV_WILLNEED operation doesn't require any changes to the
14067 * vm_map_entry_t's, so the read lock is sufficient.
14068 */
14069
14070 vm_map_lock_read(map);
14071
14072 /*
14073 * The madvise semantics require that the address range be fully
14074 * allocated with no holes. Otherwise, we're required to return
14075 * an error.
14076 */
14077
6d2010ae
A
14078 if (! vm_map_range_check(map, start, end, &entry)) {
14079 vm_map_unlock_read(map);
14080 return KERN_INVALID_ADDRESS;
14081 }
b0d623f7 14082
6d2010ae
A
14083 /*
14084 * Examine each vm_map_entry_t in the range.
14085 */
14086 for (; entry != vm_map_to_entry(map) && start < end; ) {
5ba3f43e 14087
b0d623f7 14088 /*
6d2010ae
A
14089 * The first time through, the start address could be anywhere
14090 * within the vm_map_entry we found. So adjust the offset to
14091 * correspond. After that, the offset will always be zero to
14092 * correspond to the beginning of the current vm_map_entry.
b0d623f7 14093 */
3e170ce0 14094 offset = (start - entry->vme_start) + VME_OFFSET(entry);
b0d623f7 14095
6d2010ae
A
14096 /*
14097 * Set the length so we don't go beyond the end of the
14098 * map_entry or beyond the end of the range we were given.
14099 * This range could span also multiple map entries all of which
14100 * map different files, so make sure we only do the right amount
14101 * of I/O for each object. Note that it's possible for there
14102 * to be multiple map entries all referring to the same object
14103 * but with different page permissions, but it's not worth
14104 * trying to optimize that case.
14105 */
14106 len = MIN(entry->vme_end - start, end - start);
b0d623f7 14107
6d2010ae
A
14108 if ((vm_size_t) len != len) {
14109 /* 32-bit overflow */
14110 len = (vm_size_t) (0 - PAGE_SIZE);
14111 }
14112 fault_info.cluster_size = (vm_size_t) len;
5ba3f43e 14113 fault_info.lo_offset = offset;
6d2010ae 14114 fault_info.hi_offset = offset + len;
3e170ce0 14115 fault_info.user_tag = VME_ALIAS(entry);
fe8ab488
A
14116 fault_info.pmap_options = 0;
14117 if (entry->iokit_acct ||
14118 (!entry->is_sub_map && !entry->use_pmap)) {
14119 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
14120 }
b0d623f7 14121
6d2010ae
A
14122 /*
14123 * If there's no read permission to this mapping, then just
14124 * skip it.
14125 */
14126 if ((entry->protection & VM_PROT_READ) == 0) {
14127 entry = entry->vme_next;
14128 start = entry->vme_start;
14129 continue;
14130 }
b0d623f7 14131
6d2010ae
A
14132 /*
14133 * Find the file object backing this map entry. If there is
14134 * none, then we simply ignore the "will need" advice for this
14135 * entry and go on to the next one.
14136 */
14137 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
14138 entry = entry->vme_next;
14139 start = entry->vme_start;
14140 continue;
14141 }
b0d623f7 14142
6d2010ae
A
14143 /*
14144 * The data_request() could take a long time, so let's
14145 * release the map lock to avoid blocking other threads.
14146 */
14147 vm_map_unlock_read(map);
b0d623f7 14148
6d2010ae
A
14149 vm_object_paging_begin(object);
14150 pager = object->pager;
14151 vm_object_unlock(object);
b0d623f7 14152
6d2010ae
A
14153 /*
14154 * Get the data from the object asynchronously.
14155 *
14156 * Note that memory_object_data_request() places limits on the
14157 * amount of I/O it will do. Regardless of the len we
fe8ab488 14158 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
6d2010ae
A
14159 * silently truncates the len to that size. This isn't
14160 * necessarily bad since madvise shouldn't really be used to
14161 * page in unlimited amounts of data. Other Unix variants
14162 * limit the willneed case as well. If this turns out to be an
14163 * issue for developers, then we can always adjust the policy
14164 * here and still be backwards compatible since this is all
14165 * just "advice".
14166 */
14167 kr = memory_object_data_request(
14168 pager,
14169 offset + object->paging_offset,
14170 0, /* ignored */
14171 VM_PROT_READ,
14172 (memory_object_fault_info_t)&fault_info);
b0d623f7 14173
6d2010ae
A
14174 vm_object_lock(object);
14175 vm_object_paging_end(object);
14176 vm_object_unlock(object);
b0d623f7 14177
6d2010ae
A
14178 /*
14179 * If we couldn't do the I/O for some reason, just give up on
14180 * the madvise. We still return success to the user since
14181 * madvise isn't supposed to fail when the advice can't be
14182 * taken.
14183 */
14184 if (kr != KERN_SUCCESS) {
14185 return KERN_SUCCESS;
14186 }
b0d623f7 14187
6d2010ae
A
14188 start += len;
14189 if (start >= end) {
14190 /* done */
14191 return KERN_SUCCESS;
14192 }
b0d623f7 14193
6d2010ae
A
14194 /* look up next entry */
14195 vm_map_lock_read(map);
14196 if (! vm_map_lookup_entry(map, start, &entry)) {
b0d623f7 14197 /*
6d2010ae 14198 * There's a new hole in the address range.
b0d623f7 14199 */
6d2010ae
A
14200 vm_map_unlock_read(map);
14201 return KERN_INVALID_ADDRESS;
b0d623f7 14202 }
6d2010ae 14203 }
b0d623f7
A
14204
14205 vm_map_unlock_read(map);
6d2010ae 14206 return KERN_SUCCESS;
b0d623f7
A
14207}
14208
14209static boolean_t
14210vm_map_entry_is_reusable(
14211 vm_map_entry_t entry)
14212{
3e170ce0
A
14213 /* Only user map entries */
14214
b0d623f7
A
14215 vm_object_t object;
14216
2dced7af
A
14217 if (entry->is_sub_map) {
14218 return FALSE;
14219 }
14220
3e170ce0 14221 switch (VME_ALIAS(entry)) {
39236c6e
A
14222 case VM_MEMORY_MALLOC:
14223 case VM_MEMORY_MALLOC_SMALL:
14224 case VM_MEMORY_MALLOC_LARGE:
14225 case VM_MEMORY_REALLOC:
14226 case VM_MEMORY_MALLOC_TINY:
14227 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
14228 case VM_MEMORY_MALLOC_LARGE_REUSED:
14229 /*
14230 * This is a malloc() memory region: check if it's still
14231 * in its original state and can be re-used for more
14232 * malloc() allocations.
14233 */
14234 break;
14235 default:
14236 /*
14237 * Not a malloc() memory region: let the caller decide if
14238 * it's re-usable.
14239 */
14240 return TRUE;
14241 }
14242
b0d623f7
A
14243 if (entry->is_shared ||
14244 entry->is_sub_map ||
14245 entry->in_transition ||
14246 entry->protection != VM_PROT_DEFAULT ||
14247 entry->max_protection != VM_PROT_ALL ||
14248 entry->inheritance != VM_INHERIT_DEFAULT ||
14249 entry->no_cache ||
14250 entry->permanent ||
39236c6e 14251 entry->superpage_size != FALSE ||
b0d623f7
A
14252 entry->zero_wired_pages ||
14253 entry->wired_count != 0 ||
14254 entry->user_wired_count != 0) {
14255 return FALSE;
91447636 14256 }
b0d623f7 14257
3e170ce0 14258 object = VME_OBJECT(entry);
b0d623f7
A
14259 if (object == VM_OBJECT_NULL) {
14260 return TRUE;
14261 }
316670eb
A
14262 if (
14263#if 0
14264 /*
14265 * Let's proceed even if the VM object is potentially
14266 * shared.
14267 * We check for this later when processing the actual
14268 * VM pages, so the contents will be safe if shared.
5ba3f43e 14269 *
316670eb
A
14270 * But we can still mark this memory region as "reusable" to
14271 * acknowledge that the caller did let us know that the memory
14272 * could be re-used and should not be penalized for holding
14273 * on to it. This allows its "resident size" to not include
14274 * the reusable range.
14275 */
14276 object->ref_count == 1 &&
14277#endif
b0d623f7
A
14278 object->wired_page_count == 0 &&
14279 object->copy == VM_OBJECT_NULL &&
14280 object->shadow == VM_OBJECT_NULL &&
14281 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
14282 object->internal &&
14283 !object->true_share &&
6d2010ae 14284 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
b0d623f7
A
14285 !object->code_signed) {
14286 return TRUE;
1c79356b 14287 }
b0d623f7 14288 return FALSE;
5ba3f43e
A
14289
14290
b0d623f7 14291}
1c79356b 14292
b0d623f7
A
14293static kern_return_t
14294vm_map_reuse_pages(
14295 vm_map_t map,
14296 vm_map_offset_t start,
14297 vm_map_offset_t end)
14298{
14299 vm_map_entry_t entry;
14300 vm_object_t object;
14301 vm_object_offset_t start_offset, end_offset;
14302
14303 /*
14304 * The MADV_REUSE operation doesn't require any changes to the
14305 * vm_map_entry_t's, so the read lock is sufficient.
14306 */
0b4e3aa0 14307
b0d623f7 14308 vm_map_lock_read(map);
3e170ce0 14309 assert(map->pmap != kernel_pmap); /* protect alias access */
1c79356b 14310
b0d623f7
A
14311 /*
14312 * The madvise semantics require that the address range be fully
14313 * allocated with no holes. Otherwise, we're required to return
14314 * an error.
14315 */
14316
14317 if (!vm_map_range_check(map, start, end, &entry)) {
14318 vm_map_unlock_read(map);
14319 vm_page_stats_reusable.reuse_pages_failure++;
14320 return KERN_INVALID_ADDRESS;
1c79356b 14321 }
91447636 14322
b0d623f7
A
14323 /*
14324 * Examine each vm_map_entry_t in the range.
14325 */
14326 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14327 entry = entry->vme_next) {
14328 /*
14329 * Sanity check on the VM map entry.
14330 */
14331 if (! vm_map_entry_is_reusable(entry)) {
14332 vm_map_unlock_read(map);
14333 vm_page_stats_reusable.reuse_pages_failure++;
14334 return KERN_INVALID_ADDRESS;
14335 }
14336
14337 /*
14338 * The first time through, the start address could be anywhere
14339 * within the vm_map_entry we found. So adjust the offset to
14340 * correspond.
14341 */
14342 if (entry->vme_start < start) {
14343 start_offset = start - entry->vme_start;
14344 } else {
14345 start_offset = 0;
14346 }
14347 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
14348 start_offset += VME_OFFSET(entry);
14349 end_offset += VME_OFFSET(entry);
b0d623f7 14350
2dced7af 14351 assert(!entry->is_sub_map);
3e170ce0 14352 object = VME_OBJECT(entry);
b0d623f7
A
14353 if (object != VM_OBJECT_NULL) {
14354 vm_object_lock(object);
14355 vm_object_reuse_pages(object, start_offset, end_offset,
14356 TRUE);
14357 vm_object_unlock(object);
14358 }
14359
3e170ce0 14360 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
b0d623f7
A
14361 /*
14362 * XXX
14363 * We do not hold the VM map exclusively here.
14364 * The "alias" field is not that critical, so it's
14365 * safe to update it here, as long as it is the only
14366 * one that can be modified while holding the VM map
14367 * "shared".
14368 */
3e170ce0 14369 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
b0d623f7
A
14370 }
14371 }
5ba3f43e 14372
b0d623f7
A
14373 vm_map_unlock_read(map);
14374 vm_page_stats_reusable.reuse_pages_success++;
14375 return KERN_SUCCESS;
1c79356b
A
14376}
14377
1c79356b 14378
b0d623f7
A
14379static kern_return_t
14380vm_map_reusable_pages(
14381 vm_map_t map,
14382 vm_map_offset_t start,
14383 vm_map_offset_t end)
14384{
14385 vm_map_entry_t entry;
14386 vm_object_t object;
14387 vm_object_offset_t start_offset, end_offset;
3e170ce0 14388 vm_map_offset_t pmap_offset;
b0d623f7
A
14389
14390 /*
14391 * The MADV_REUSABLE operation doesn't require any changes to the
14392 * vm_map_entry_t's, so the read lock is sufficient.
14393 */
14394
14395 vm_map_lock_read(map);
3e170ce0 14396 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
14397
14398 /*
14399 * The madvise semantics require that the address range be fully
14400 * allocated with no holes. Otherwise, we're required to return
14401 * an error.
14402 */
14403
14404 if (!vm_map_range_check(map, start, end, &entry)) {
14405 vm_map_unlock_read(map);
14406 vm_page_stats_reusable.reusable_pages_failure++;
14407 return KERN_INVALID_ADDRESS;
14408 }
14409
14410 /*
14411 * Examine each vm_map_entry_t in the range.
14412 */
14413 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14414 entry = entry->vme_next) {
14415 int kill_pages = 0;
14416
14417 /*
14418 * Sanity check on the VM map entry.
14419 */
14420 if (! vm_map_entry_is_reusable(entry)) {
14421 vm_map_unlock_read(map);
14422 vm_page_stats_reusable.reusable_pages_failure++;
14423 return KERN_INVALID_ADDRESS;
14424 }
14425
39037602
A
14426 if (! (entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
14427 /* not writable: can't discard contents */
14428 vm_map_unlock_read(map);
14429 vm_page_stats_reusable.reusable_nonwritable++;
14430 vm_page_stats_reusable.reusable_pages_failure++;
14431 return KERN_PROTECTION_FAILURE;
14432 }
14433
b0d623f7
A
14434 /*
14435 * The first time through, the start address could be anywhere
14436 * within the vm_map_entry we found. So adjust the offset to
14437 * correspond.
14438 */
14439 if (entry->vme_start < start) {
14440 start_offset = start - entry->vme_start;
3e170ce0 14441 pmap_offset = start;
b0d623f7
A
14442 } else {
14443 start_offset = 0;
3e170ce0 14444 pmap_offset = entry->vme_start;
b0d623f7
A
14445 }
14446 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
14447 start_offset += VME_OFFSET(entry);
14448 end_offset += VME_OFFSET(entry);
b0d623f7 14449
2dced7af 14450 assert(!entry->is_sub_map);
3e170ce0 14451 object = VME_OBJECT(entry);
b0d623f7
A
14452 if (object == VM_OBJECT_NULL)
14453 continue;
14454
14455
14456 vm_object_lock(object);
39037602
A
14457 if (((object->ref_count == 1) ||
14458 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
14459 object->copy == VM_OBJECT_NULL)) &&
14460 object->shadow == VM_OBJECT_NULL &&
fe8ab488
A
14461 /*
14462 * "iokit_acct" entries are billed for their virtual size
14463 * (rather than for their resident pages only), so they
14464 * wouldn't benefit from making pages reusable, and it
14465 * would be hard to keep track of pages that are both
39037602
A
14466 * "iokit_acct" and "reusable" in the pmap stats and
14467 * ledgers.
fe8ab488
A
14468 */
14469 !(entry->iokit_acct ||
39037602
A
14470 (!entry->is_sub_map && !entry->use_pmap))) {
14471 if (object->ref_count != 1) {
14472 vm_page_stats_reusable.reusable_shared++;
14473 }
b0d623f7 14474 kill_pages = 1;
39037602 14475 } else {
b0d623f7 14476 kill_pages = -1;
39037602 14477 }
b0d623f7
A
14478 if (kill_pages != -1) {
14479 vm_object_deactivate_pages(object,
14480 start_offset,
14481 end_offset - start_offset,
14482 kill_pages,
3e170ce0
A
14483 TRUE /*reusable_pages*/,
14484 map->pmap,
14485 pmap_offset);
b0d623f7
A
14486 } else {
14487 vm_page_stats_reusable.reusable_pages_shared++;
14488 }
14489 vm_object_unlock(object);
14490
3e170ce0
A
14491 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
14492 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
b0d623f7
A
14493 /*
14494 * XXX
14495 * We do not hold the VM map exclusively here.
14496 * The "alias" field is not that critical, so it's
14497 * safe to update it here, as long as it is the only
14498 * one that can be modified while holding the VM map
14499 * "shared".
14500 */
3e170ce0 14501 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
b0d623f7
A
14502 }
14503 }
5ba3f43e 14504
b0d623f7
A
14505 vm_map_unlock_read(map);
14506 vm_page_stats_reusable.reusable_pages_success++;
14507 return KERN_SUCCESS;
14508}
14509
14510
14511static kern_return_t
14512vm_map_can_reuse(
14513 vm_map_t map,
14514 vm_map_offset_t start,
14515 vm_map_offset_t end)
14516{
14517 vm_map_entry_t entry;
14518
14519 /*
14520 * The MADV_REUSABLE operation doesn't require any changes to the
14521 * vm_map_entry_t's, so the read lock is sufficient.
14522 */
14523
14524 vm_map_lock_read(map);
3e170ce0 14525 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
14526
14527 /*
14528 * The madvise semantics require that the address range be fully
14529 * allocated with no holes. Otherwise, we're required to return
14530 * an error.
14531 */
14532
14533 if (!vm_map_range_check(map, start, end, &entry)) {
14534 vm_map_unlock_read(map);
14535 vm_page_stats_reusable.can_reuse_failure++;
14536 return KERN_INVALID_ADDRESS;
14537 }
14538
14539 /*
14540 * Examine each vm_map_entry_t in the range.
14541 */
14542 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14543 entry = entry->vme_next) {
14544 /*
14545 * Sanity check on the VM map entry.
14546 */
14547 if (! vm_map_entry_is_reusable(entry)) {
14548 vm_map_unlock_read(map);
14549 vm_page_stats_reusable.can_reuse_failure++;
14550 return KERN_INVALID_ADDRESS;
14551 }
14552 }
5ba3f43e 14553
b0d623f7
A
14554 vm_map_unlock_read(map);
14555 vm_page_stats_reusable.can_reuse_success++;
14556 return KERN_SUCCESS;
14557}
14558
14559
3e170ce0
A
14560#if MACH_ASSERT
14561static kern_return_t
14562vm_map_pageout(
14563 vm_map_t map,
14564 vm_map_offset_t start,
14565 vm_map_offset_t end)
14566{
14567 vm_map_entry_t entry;
14568
14569 /*
14570 * The MADV_PAGEOUT operation doesn't require any changes to the
14571 * vm_map_entry_t's, so the read lock is sufficient.
14572 */
14573
14574 vm_map_lock_read(map);
14575
14576 /*
14577 * The madvise semantics require that the address range be fully
14578 * allocated with no holes. Otherwise, we're required to return
14579 * an error.
14580 */
14581
14582 if (!vm_map_range_check(map, start, end, &entry)) {
14583 vm_map_unlock_read(map);
14584 return KERN_INVALID_ADDRESS;
14585 }
14586
14587 /*
14588 * Examine each vm_map_entry_t in the range.
14589 */
14590 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14591 entry = entry->vme_next) {
14592 vm_object_t object;
14593
14594 /*
14595 * Sanity check on the VM map entry.
14596 */
14597 if (entry->is_sub_map) {
14598 vm_map_t submap;
14599 vm_map_offset_t submap_start;
14600 vm_map_offset_t submap_end;
14601 vm_map_entry_t submap_entry;
14602
14603 submap = VME_SUBMAP(entry);
14604 submap_start = VME_OFFSET(entry);
5ba3f43e 14605 submap_end = submap_start + (entry->vme_end -
3e170ce0
A
14606 entry->vme_start);
14607
14608 vm_map_lock_read(submap);
14609
14610 if (! vm_map_range_check(submap,
14611 submap_start,
14612 submap_end,
14613 &submap_entry)) {
14614 vm_map_unlock_read(submap);
14615 vm_map_unlock_read(map);
14616 return KERN_INVALID_ADDRESS;
14617 }
14618
14619 object = VME_OBJECT(submap_entry);
14620 if (submap_entry->is_sub_map ||
14621 object == VM_OBJECT_NULL ||
14622 !object->internal) {
14623 vm_map_unlock_read(submap);
14624 continue;
14625 }
14626
14627 vm_object_pageout(object);
14628
14629 vm_map_unlock_read(submap);
14630 submap = VM_MAP_NULL;
14631 submap_entry = VM_MAP_ENTRY_NULL;
14632 continue;
14633 }
14634
14635 object = VME_OBJECT(entry);
14636 if (entry->is_sub_map ||
14637 object == VM_OBJECT_NULL ||
14638 !object->internal) {
14639 continue;
14640 }
14641
14642 vm_object_pageout(object);
14643 }
5ba3f43e 14644
3e170ce0
A
14645 vm_map_unlock_read(map);
14646 return KERN_SUCCESS;
14647}
14648#endif /* MACH_ASSERT */
14649
14650
1c79356b 14651/*
91447636
A
14652 * Routine: vm_map_entry_insert
14653 *
14654 * Descritpion: This routine inserts a new vm_entry in a locked map.
1c79356b 14655 */
91447636
A
14656vm_map_entry_t
14657vm_map_entry_insert(
14658 vm_map_t map,
14659 vm_map_entry_t insp_entry,
14660 vm_map_offset_t start,
14661 vm_map_offset_t end,
14662 vm_object_t object,
14663 vm_object_offset_t offset,
14664 boolean_t needs_copy,
14665 boolean_t is_shared,
14666 boolean_t in_transition,
14667 vm_prot_t cur_protection,
14668 vm_prot_t max_protection,
14669 vm_behavior_t behavior,
14670 vm_inherit_t inheritance,
2d21ac55 14671 unsigned wired_count,
b0d623f7
A
14672 boolean_t no_cache,
14673 boolean_t permanent,
39236c6e 14674 unsigned int superpage_size,
fe8ab488 14675 boolean_t clear_map_aligned,
5ba3f43e
A
14676 boolean_t is_submap,
14677 boolean_t used_for_jit,
14678 int alias)
1c79356b 14679{
91447636 14680 vm_map_entry_t new_entry;
1c79356b 14681
91447636 14682 assert(insp_entry != (vm_map_entry_t)0);
1c79356b 14683
7ddcb079 14684 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
1c79356b 14685
39236c6e
A
14686 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
14687 new_entry->map_aligned = TRUE;
14688 } else {
14689 new_entry->map_aligned = FALSE;
14690 }
14691 if (clear_map_aligned &&
fe8ab488
A
14692 (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
14693 ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
39236c6e
A
14694 new_entry->map_aligned = FALSE;
14695 }
14696
91447636
A
14697 new_entry->vme_start = start;
14698 new_entry->vme_end = end;
14699 assert(page_aligned(new_entry->vme_start));
14700 assert(page_aligned(new_entry->vme_end));
39236c6e 14701 if (new_entry->map_aligned) {
fe8ab488
A
14702 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
14703 VM_MAP_PAGE_MASK(map)));
39236c6e
A
14704 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
14705 VM_MAP_PAGE_MASK(map)));
14706 }
e2d2fc5c 14707 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 14708
3e170ce0
A
14709 VME_OBJECT_SET(new_entry, object);
14710 VME_OFFSET_SET(new_entry, offset);
91447636 14711 new_entry->is_shared = is_shared;
fe8ab488 14712 new_entry->is_sub_map = is_submap;
91447636
A
14713 new_entry->needs_copy = needs_copy;
14714 new_entry->in_transition = in_transition;
14715 new_entry->needs_wakeup = FALSE;
14716 new_entry->inheritance = inheritance;
14717 new_entry->protection = cur_protection;
14718 new_entry->max_protection = max_protection;
14719 new_entry->behavior = behavior;
14720 new_entry->wired_count = wired_count;
14721 new_entry->user_wired_count = 0;
fe8ab488
A
14722 if (is_submap) {
14723 /*
14724 * submap: "use_pmap" means "nested".
14725 * default: false.
14726 */
14727 new_entry->use_pmap = FALSE;
14728 } else {
14729 /*
14730 * object: "use_pmap" means "use pmap accounting" for footprint.
14731 * default: true.
14732 */
14733 new_entry->use_pmap = TRUE;
14734 }
5ba3f43e 14735 VME_ALIAS_SET(new_entry, alias);
b0d623f7 14736 new_entry->zero_wired_pages = FALSE;
2d21ac55 14737 new_entry->no_cache = no_cache;
b0d623f7 14738 new_entry->permanent = permanent;
39236c6e
A
14739 if (superpage_size)
14740 new_entry->superpage_size = TRUE;
14741 else
14742 new_entry->superpage_size = FALSE;
5ba3f43e
A
14743 if (used_for_jit){
14744 if (!(map->jit_entry_exists)){
14745 new_entry->used_for_jit = TRUE;
14746 map->jit_entry_exists = TRUE;
14747
14748 /* Tell the pmap that it supports JIT. */
14749 pmap_set_jit_entitled(map->pmap);
14750 }
14751 } else {
14752 new_entry->used_for_jit = FALSE;
14753 }
fe8ab488 14754 new_entry->iokit_acct = FALSE;
3e170ce0
A
14755 new_entry->vme_resilient_codesign = FALSE;
14756 new_entry->vme_resilient_media = FALSE;
39037602 14757 new_entry->vme_atomic = FALSE;
1c79356b 14758
91447636
A
14759 /*
14760 * Insert the new entry into the list.
14761 */
1c79356b 14762
6d2010ae 14763 vm_map_store_entry_link(map, insp_entry, new_entry);
91447636
A
14764 map->size += end - start;
14765
14766 /*
14767 * Update the free space hint and the lookup hint.
14768 */
14769
0c530ab8 14770 SAVE_HINT_MAP_WRITE(map, new_entry);
91447636 14771 return new_entry;
1c79356b
A
14772}
14773
14774/*
91447636
A
14775 * Routine: vm_map_remap_extract
14776 *
14777 * Descritpion: This routine returns a vm_entry list from a map.
1c79356b 14778 */
91447636
A
14779static kern_return_t
14780vm_map_remap_extract(
14781 vm_map_t map,
14782 vm_map_offset_t addr,
14783 vm_map_size_t size,
14784 boolean_t copy,
14785 struct vm_map_header *map_header,
14786 vm_prot_t *cur_protection,
14787 vm_prot_t *max_protection,
14788 /* What, no behavior? */
14789 vm_inherit_t inheritance,
39037602 14790 boolean_t pageable,
5c9f4661
A
14791 boolean_t same_map,
14792 vm_map_kernel_flags_t vmk_flags)
1c79356b 14793{
91447636
A
14794 kern_return_t result;
14795 vm_map_size_t mapped_size;
14796 vm_map_size_t tmp_size;
14797 vm_map_entry_t src_entry; /* result of last map lookup */
14798 vm_map_entry_t new_entry;
14799 vm_object_offset_t offset;
14800 vm_map_offset_t map_address;
14801 vm_map_offset_t src_start; /* start of entry to map */
14802 vm_map_offset_t src_end; /* end of region to be mapped */
5ba3f43e 14803 vm_object_t object;
91447636
A
14804 vm_map_version_t version;
14805 boolean_t src_needs_copy;
14806 boolean_t new_entry_needs_copy;
1c79356b 14807
91447636 14808 assert(map != VM_MAP_NULL);
39236c6e
A
14809 assert(size != 0);
14810 assert(size == vm_map_round_page(size, PAGE_MASK));
91447636
A
14811 assert(inheritance == VM_INHERIT_NONE ||
14812 inheritance == VM_INHERIT_COPY ||
14813 inheritance == VM_INHERIT_SHARE);
1c79356b 14814
91447636
A
14815 /*
14816 * Compute start and end of region.
14817 */
39236c6e
A
14818 src_start = vm_map_trunc_page(addr, PAGE_MASK);
14819 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
14820
1c79356b 14821
91447636
A
14822 /*
14823 * Initialize map_header.
14824 */
14825 map_header->links.next = (struct vm_map_entry *)&map_header->links;
14826 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
14827 map_header->nentries = 0;
14828 map_header->entries_pageable = pageable;
39236c6e 14829 map_header->page_shift = PAGE_SHIFT;
1c79356b 14830
6d2010ae
A
14831 vm_map_store_init( map_header );
14832
91447636
A
14833 *cur_protection = VM_PROT_ALL;
14834 *max_protection = VM_PROT_ALL;
1c79356b 14835
91447636
A
14836 map_address = 0;
14837 mapped_size = 0;
14838 result = KERN_SUCCESS;
1c79356b 14839
5ba3f43e 14840 /*
91447636
A
14841 * The specified source virtual space might correspond to
14842 * multiple map entries, need to loop on them.
14843 */
14844 vm_map_lock(map);
14845 while (mapped_size != size) {
14846 vm_map_size_t entry_size;
1c79356b 14847
91447636
A
14848 /*
14849 * Find the beginning of the region.
5ba3f43e 14850 */
91447636
A
14851 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
14852 result = KERN_INVALID_ADDRESS;
14853 break;
14854 }
1c79356b 14855
91447636
A
14856 if (src_start < src_entry->vme_start ||
14857 (mapped_size && src_start != src_entry->vme_start)) {
14858 result = KERN_INVALID_ADDRESS;
14859 break;
14860 }
1c79356b 14861
91447636
A
14862 tmp_size = size - mapped_size;
14863 if (src_end > src_entry->vme_end)
14864 tmp_size -= (src_end - src_entry->vme_end);
1c79356b 14865
91447636 14866 entry_size = (vm_map_size_t)(src_entry->vme_end -
2d21ac55 14867 src_entry->vme_start);
1c79356b 14868
91447636 14869 if(src_entry->is_sub_map) {
3e170ce0 14870 vm_map_reference(VME_SUBMAP(src_entry));
91447636
A
14871 object = VM_OBJECT_NULL;
14872 } else {
3e170ce0 14873 object = VME_OBJECT(src_entry);
fe8ab488
A
14874 if (src_entry->iokit_acct) {
14875 /*
14876 * This entry uses "IOKit accounting".
14877 */
14878 } else if (object != VM_OBJECT_NULL &&
14879 object->purgable != VM_PURGABLE_DENY) {
14880 /*
14881 * Purgeable objects have their own accounting:
14882 * no pmap accounting for them.
14883 */
14884 assert(!src_entry->use_pmap);
14885 } else {
14886 /*
14887 * Not IOKit or purgeable:
14888 * must be accounted by pmap stats.
14889 */
14890 assert(src_entry->use_pmap);
14891 }
55e303ae 14892
91447636
A
14893 if (object == VM_OBJECT_NULL) {
14894 object = vm_object_allocate(entry_size);
3e170ce0
A
14895 VME_OFFSET_SET(src_entry, 0);
14896 VME_OBJECT_SET(src_entry, object);
91447636
A
14897 } else if (object->copy_strategy !=
14898 MEMORY_OBJECT_COPY_SYMMETRIC) {
14899 /*
14900 * We are already using an asymmetric
14901 * copy, and therefore we already have
14902 * the right object.
14903 */
14904 assert(!src_entry->needs_copy);
14905 } else if (src_entry->needs_copy || object->shadowed ||
14906 (object->internal && !object->true_share &&
2d21ac55 14907 !src_entry->is_shared &&
6d2010ae 14908 object->vo_size > entry_size)) {
1c79356b 14909
3e170ce0 14910 VME_OBJECT_SHADOW(src_entry, entry_size);
1c79356b 14911
91447636
A
14912 if (!src_entry->needs_copy &&
14913 (src_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
14914 vm_prot_t prot;
14915
5ba3f43e
A
14916 assert(!pmap_has_prot_policy(src_entry->protection));
14917
0c530ab8 14918 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 14919
3e170ce0
A
14920 if (override_nx(map,
14921 VME_ALIAS(src_entry))
14922 && prot)
0c530ab8 14923 prot |= VM_PROT_EXECUTE;
2d21ac55 14924
5ba3f43e
A
14925 assert(!pmap_has_prot_policy(prot));
14926
316670eb 14927 if(map->mapped_in_other_pmaps) {
2d21ac55 14928 vm_object_pmap_protect(
3e170ce0
A
14929 VME_OBJECT(src_entry),
14930 VME_OFFSET(src_entry),
2d21ac55
A
14931 entry_size,
14932 PMAP_NULL,
0c530ab8 14933 src_entry->vme_start,
0c530ab8 14934 prot);
2d21ac55
A
14935 } else {
14936 pmap_protect(vm_map_pmap(map),
14937 src_entry->vme_start,
14938 src_entry->vme_end,
14939 prot);
91447636
A
14940 }
14941 }
1c79356b 14942
3e170ce0 14943 object = VME_OBJECT(src_entry);
91447636
A
14944 src_entry->needs_copy = FALSE;
14945 }
1c79356b 14946
1c79356b 14947
91447636 14948 vm_object_lock(object);
2d21ac55 14949 vm_object_reference_locked(object); /* object ref. for new entry */
5ba3f43e 14950 if (object->copy_strategy ==
2d21ac55 14951 MEMORY_OBJECT_COPY_SYMMETRIC) {
5ba3f43e 14952 object->copy_strategy =
91447636
A
14953 MEMORY_OBJECT_COPY_DELAY;
14954 }
14955 vm_object_unlock(object);
14956 }
1c79356b 14957
3e170ce0
A
14958 offset = (VME_OFFSET(src_entry) +
14959 (src_start - src_entry->vme_start));
1c79356b 14960
7ddcb079 14961 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
91447636 14962 vm_map_entry_copy(new_entry, src_entry);
fe8ab488
A
14963 if (new_entry->is_sub_map) {
14964 /* clr address space specifics */
14965 new_entry->use_pmap = FALSE;
14966 }
1c79356b 14967
39236c6e
A
14968 new_entry->map_aligned = FALSE;
14969
91447636
A
14970 new_entry->vme_start = map_address;
14971 new_entry->vme_end = map_address + tmp_size;
e2d2fc5c 14972 assert(new_entry->vme_start < new_entry->vme_end);
5c9f4661
A
14973 if (copy && vmk_flags.vmkf_remap_prot_copy) {
14974 /*
14975 * Remapping for vm_map_protect(VM_PROT_COPY)
14976 * to convert a read-only mapping into a
14977 * copy-on-write version of itself but
14978 * with write access:
14979 * keep the original inheritance and add
14980 * VM_PROT_WRITE to the max protection.
14981 */
14982 new_entry->inheritance = src_entry->inheritance;
14983 new_entry->max_protection |= VM_PROT_WRITE;
14984 } else {
14985 new_entry->inheritance = inheritance;
14986 }
3e170ce0 14987 VME_OFFSET_SET(new_entry, offset);
5ba3f43e 14988
91447636
A
14989 /*
14990 * The new region has to be copied now if required.
14991 */
14992 RestartCopy:
14993 if (!copy) {
316670eb
A
14994 /*
14995 * Cannot allow an entry describing a JIT
14996 * region to be shared across address spaces.
14997 */
39037602 14998 if (src_entry->used_for_jit == TRUE && !same_map) {
316670eb
A
14999 result = KERN_INVALID_ARGUMENT;
15000 break;
15001 }
91447636
A
15002 src_entry->is_shared = TRUE;
15003 new_entry->is_shared = TRUE;
5ba3f43e 15004 if (!(new_entry->is_sub_map))
91447636 15005 new_entry->needs_copy = FALSE;
1c79356b 15006
91447636
A
15007 } else if (src_entry->is_sub_map) {
15008 /* make this a COW sub_map if not already */
3e170ce0 15009 assert(new_entry->wired_count == 0);
91447636
A
15010 new_entry->needs_copy = TRUE;
15011 object = VM_OBJECT_NULL;
15012 } else if (src_entry->wired_count == 0 &&
3e170ce0
A
15013 vm_object_copy_quickly(&VME_OBJECT(new_entry),
15014 VME_OFFSET(new_entry),
2d21ac55
A
15015 (new_entry->vme_end -
15016 new_entry->vme_start),
15017 &src_needs_copy,
15018 &new_entry_needs_copy)) {
55e303ae 15019
91447636
A
15020 new_entry->needs_copy = new_entry_needs_copy;
15021 new_entry->is_shared = FALSE;
1c79356b 15022
91447636
A
15023 /*
15024 * Handle copy_on_write semantics.
15025 */
15026 if (src_needs_copy && !src_entry->needs_copy) {
0c530ab8
A
15027 vm_prot_t prot;
15028
5ba3f43e
A
15029 assert(!pmap_has_prot_policy(src_entry->protection));
15030
0c530ab8 15031 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 15032
3e170ce0
A
15033 if (override_nx(map,
15034 VME_ALIAS(src_entry))
15035 && prot)
0c530ab8 15036 prot |= VM_PROT_EXECUTE;
2d21ac55 15037
5ba3f43e
A
15038 assert(!pmap_has_prot_policy(prot));
15039
91447636
A
15040 vm_object_pmap_protect(object,
15041 offset,
15042 entry_size,
5ba3f43e 15043 ((src_entry->is_shared
316670eb 15044 || map->mapped_in_other_pmaps) ?
91447636
A
15045 PMAP_NULL : map->pmap),
15046 src_entry->vme_start,
0c530ab8 15047 prot);
1c79356b 15048
3e170ce0 15049 assert(src_entry->wired_count == 0);
91447636
A
15050 src_entry->needs_copy = TRUE;
15051 }
15052 /*
15053 * Throw away the old object reference of the new entry.
15054 */
15055 vm_object_deallocate(object);
1c79356b 15056
91447636
A
15057 } else {
15058 new_entry->is_shared = FALSE;
1c79356b 15059
91447636
A
15060 /*
15061 * The map can be safely unlocked since we
15062 * already hold a reference on the object.
15063 *
15064 * Record the timestamp of the map for later
15065 * verification, and unlock the map.
15066 */
15067 version.main_timestamp = map->timestamp;
15068 vm_map_unlock(map); /* Increments timestamp once! */
55e303ae 15069
91447636
A
15070 /*
15071 * Perform the copy.
15072 */
15073 if (src_entry->wired_count > 0) {
15074 vm_object_lock(object);
15075 result = vm_object_copy_slowly(
2d21ac55
A
15076 object,
15077 offset,
5ba3f43e
A
15078 (new_entry->vme_end -
15079 new_entry->vme_start),
2d21ac55 15080 THREAD_UNINT,
3e170ce0 15081 &VME_OBJECT(new_entry));
1c79356b 15082
3e170ce0 15083 VME_OFFSET_SET(new_entry, 0);
91447636
A
15084 new_entry->needs_copy = FALSE;
15085 } else {
3e170ce0
A
15086 vm_object_offset_t new_offset;
15087
15088 new_offset = VME_OFFSET(new_entry);
91447636 15089 result = vm_object_copy_strategically(
2d21ac55
A
15090 object,
15091 offset,
5ba3f43e
A
15092 (new_entry->vme_end -
15093 new_entry->vme_start),
3e170ce0
A
15094 &VME_OBJECT(new_entry),
15095 &new_offset,
2d21ac55 15096 &new_entry_needs_copy);
3e170ce0
A
15097 if (new_offset != VME_OFFSET(new_entry)) {
15098 VME_OFFSET_SET(new_entry, new_offset);
15099 }
1c79356b 15100
91447636
A
15101 new_entry->needs_copy = new_entry_needs_copy;
15102 }
1c79356b 15103
91447636
A
15104 /*
15105 * Throw away the old object reference of the new entry.
15106 */
15107 vm_object_deallocate(object);
1c79356b 15108
91447636
A
15109 if (result != KERN_SUCCESS &&
15110 result != KERN_MEMORY_RESTART_COPY) {
15111 _vm_map_entry_dispose(map_header, new_entry);
39037602 15112 vm_map_lock(map);
91447636
A
15113 break;
15114 }
1c79356b 15115
91447636
A
15116 /*
15117 * Verify that the map has not substantially
15118 * changed while the copy was being made.
15119 */
1c79356b 15120
91447636
A
15121 vm_map_lock(map);
15122 if (version.main_timestamp + 1 != map->timestamp) {
15123 /*
15124 * Simple version comparison failed.
15125 *
15126 * Retry the lookup and verify that the
15127 * same object/offset are still present.
15128 */
3e170ce0 15129 vm_object_deallocate(VME_OBJECT(new_entry));
91447636
A
15130 _vm_map_entry_dispose(map_header, new_entry);
15131 if (result == KERN_MEMORY_RESTART_COPY)
15132 result = KERN_SUCCESS;
15133 continue;
15134 }
1c79356b 15135
91447636
A
15136 if (result == KERN_MEMORY_RESTART_COPY) {
15137 vm_object_reference(object);
15138 goto RestartCopy;
15139 }
15140 }
1c79356b 15141
6d2010ae 15142 _vm_map_store_entry_link(map_header,
91447636 15143 map_header->links.prev, new_entry);
1c79356b 15144
6d2010ae
A
15145 /*Protections for submap mapping are irrelevant here*/
15146 if( !src_entry->is_sub_map ) {
15147 *cur_protection &= src_entry->protection;
15148 *max_protection &= src_entry->max_protection;
15149 }
91447636
A
15150 map_address += tmp_size;
15151 mapped_size += tmp_size;
15152 src_start += tmp_size;
1c79356b 15153
91447636 15154 } /* end while */
1c79356b 15155
91447636
A
15156 vm_map_unlock(map);
15157 if (result != KERN_SUCCESS) {
15158 /*
15159 * Free all allocated elements.
15160 */
15161 for (src_entry = map_header->links.next;
15162 src_entry != (struct vm_map_entry *)&map_header->links;
15163 src_entry = new_entry) {
15164 new_entry = src_entry->vme_next;
6d2010ae 15165 _vm_map_store_entry_unlink(map_header, src_entry);
39236c6e 15166 if (src_entry->is_sub_map) {
3e170ce0 15167 vm_map_deallocate(VME_SUBMAP(src_entry));
39236c6e 15168 } else {
3e170ce0 15169 vm_object_deallocate(VME_OBJECT(src_entry));
39236c6e 15170 }
91447636
A
15171 _vm_map_entry_dispose(map_header, src_entry);
15172 }
15173 }
15174 return result;
1c79356b
A
15175}
15176
15177/*
91447636 15178 * Routine: vm_remap
1c79356b 15179 *
91447636
A
15180 * Map portion of a task's address space.
15181 * Mapped region must not overlap more than
15182 * one vm memory object. Protections and
15183 * inheritance attributes remain the same
15184 * as in the original task and are out parameters.
15185 * Source and Target task can be identical
15186 * Other attributes are identical as for vm_map()
1c79356b
A
15187 */
15188kern_return_t
91447636
A
15189vm_map_remap(
15190 vm_map_t target_map,
15191 vm_map_address_t *address,
15192 vm_map_size_t size,
15193 vm_map_offset_t mask,
060df5ea 15194 int flags,
5ba3f43e
A
15195 vm_map_kernel_flags_t vmk_flags,
15196 vm_tag_t tag,
91447636
A
15197 vm_map_t src_map,
15198 vm_map_offset_t memory_address,
1c79356b 15199 boolean_t copy,
1c79356b
A
15200 vm_prot_t *cur_protection,
15201 vm_prot_t *max_protection,
91447636 15202 vm_inherit_t inheritance)
1c79356b
A
15203{
15204 kern_return_t result;
91447636 15205 vm_map_entry_t entry;
0c530ab8 15206 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
1c79356b 15207 vm_map_entry_t new_entry;
91447636 15208 struct vm_map_header map_header;
39236c6e 15209 vm_map_offset_t offset_in_mapping;
1c79356b 15210
91447636
A
15211 if (target_map == VM_MAP_NULL)
15212 return KERN_INVALID_ARGUMENT;
1c79356b 15213
91447636 15214 switch (inheritance) {
2d21ac55
A
15215 case VM_INHERIT_NONE:
15216 case VM_INHERIT_COPY:
15217 case VM_INHERIT_SHARE:
91447636
A
15218 if (size != 0 && src_map != VM_MAP_NULL)
15219 break;
15220 /*FALL THRU*/
2d21ac55 15221 default:
91447636
A
15222 return KERN_INVALID_ARGUMENT;
15223 }
1c79356b 15224
5ba3f43e
A
15225 /*
15226 * If the user is requesting that we return the address of the
15227 * first byte of the data (rather than the base of the page),
15228 * then we use different rounding semantics: specifically,
39236c6e
A
15229 * we assume that (memory_address, size) describes a region
15230 * all of whose pages we must cover, rather than a base to be truncated
15231 * down and a size to be added to that base. So we figure out
15232 * the highest page that the requested region includes and make
15233 * sure that the size will cover it.
5ba3f43e 15234 *
39236c6e
A
15235 * The key example we're worried about it is of the form:
15236 *
15237 * memory_address = 0x1ff0, size = 0x20
5ba3f43e
A
15238 *
15239 * With the old semantics, we round down the memory_address to 0x1000
39236c6e
A
15240 * and round up the size to 0x1000, resulting in our covering *only*
15241 * page 0x1000. With the new semantics, we'd realize that the region covers
5ba3f43e 15242 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
39236c6e
A
15243 * 0x1000 and page 0x2000 in the region we remap.
15244 */
15245 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
15246 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
15247 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
15248 } else {
15249 size = vm_map_round_page(size, PAGE_MASK);
5ba3f43e
A
15250 }
15251 if (size == 0) {
15252 return KERN_INVALID_ARGUMENT;
15253 }
1c79356b 15254
91447636 15255 result = vm_map_remap_extract(src_map, memory_address,
2d21ac55
A
15256 size, copy, &map_header,
15257 cur_protection,
15258 max_protection,
15259 inheritance,
39037602 15260 target_map->hdr.entries_pageable,
5c9f4661
A
15261 src_map == target_map,
15262 vmk_flags);
1c79356b 15263
91447636
A
15264 if (result != KERN_SUCCESS) {
15265 return result;
15266 }
1c79356b 15267
91447636
A
15268 /*
15269 * Allocate/check a range of free virtual address
15270 * space for the target
1c79356b 15271 */
39236c6e
A
15272 *address = vm_map_trunc_page(*address,
15273 VM_MAP_PAGE_MASK(target_map));
91447636
A
15274 vm_map_lock(target_map);
15275 result = vm_map_remap_range_allocate(target_map, address, size,
5ba3f43e
A
15276 mask, flags, vmk_flags, tag,
15277 &insp_entry);
1c79356b 15278
91447636
A
15279 for (entry = map_header.links.next;
15280 entry != (struct vm_map_entry *)&map_header.links;
15281 entry = new_entry) {
15282 new_entry = entry->vme_next;
6d2010ae 15283 _vm_map_store_entry_unlink(&map_header, entry);
91447636 15284 if (result == KERN_SUCCESS) {
3e170ce0
A
15285 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
15286 /* no codesigning -> read-only access */
15287 assert(!entry->used_for_jit);
15288 entry->max_protection = VM_PROT_READ;
15289 entry->protection = VM_PROT_READ;
15290 entry->vme_resilient_codesign = TRUE;
15291 }
91447636
A
15292 entry->vme_start += *address;
15293 entry->vme_end += *address;
39236c6e 15294 assert(!entry->map_aligned);
6d2010ae 15295 vm_map_store_entry_link(target_map, insp_entry, entry);
91447636
A
15296 insp_entry = entry;
15297 } else {
15298 if (!entry->is_sub_map) {
3e170ce0 15299 vm_object_deallocate(VME_OBJECT(entry));
91447636 15300 } else {
3e170ce0 15301 vm_map_deallocate(VME_SUBMAP(entry));
2d21ac55 15302 }
91447636 15303 _vm_map_entry_dispose(&map_header, entry);
1c79356b 15304 }
91447636 15305 }
1c79356b 15306
3e170ce0
A
15307 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
15308 *cur_protection = VM_PROT_READ;
15309 *max_protection = VM_PROT_READ;
15310 }
15311
6d2010ae 15312 if( target_map->disable_vmentry_reuse == TRUE) {
39037602 15313 assert(!target_map->is_nested_map);
6d2010ae
A
15314 if( target_map->highest_entry_end < insp_entry->vme_end ){
15315 target_map->highest_entry_end = insp_entry->vme_end;
15316 }
15317 }
15318
91447636
A
15319 if (result == KERN_SUCCESS) {
15320 target_map->size += size;
0c530ab8 15321 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
5ba3f43e 15322
91447636
A
15323 }
15324 vm_map_unlock(target_map);
1c79356b 15325
91447636 15326 if (result == KERN_SUCCESS && target_map->wiring_required)
5ba3f43e
A
15327 result = vm_map_wire_kernel(target_map, *address,
15328 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
3e170ce0 15329 TRUE);
39236c6e 15330
5ba3f43e
A
15331 /*
15332 * If requested, return the address of the data pointed to by the
39236c6e
A
15333 * request, rather than the base of the resulting page.
15334 */
15335 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
15336 *address += offset_in_mapping;
15337 }
15338
91447636
A
15339 return result;
15340}
1c79356b 15341
91447636
A
15342/*
15343 * Routine: vm_map_remap_range_allocate
15344 *
15345 * Description:
15346 * Allocate a range in the specified virtual address map.
15347 * returns the address and the map entry just before the allocated
15348 * range
15349 *
15350 * Map must be locked.
15351 */
1c79356b 15352
91447636
A
15353static kern_return_t
15354vm_map_remap_range_allocate(
15355 vm_map_t map,
15356 vm_map_address_t *address, /* IN/OUT */
15357 vm_map_size_t size,
15358 vm_map_offset_t mask,
060df5ea 15359 int flags,
5ba3f43e
A
15360 __unused vm_map_kernel_flags_t vmk_flags,
15361 __unused vm_tag_t tag,
91447636
A
15362 vm_map_entry_t *map_entry) /* OUT */
15363{
060df5ea
A
15364 vm_map_entry_t entry;
15365 vm_map_offset_t start;
15366 vm_map_offset_t end;
15367 kern_return_t kr;
3e170ce0 15368 vm_map_entry_t hole_entry;
1c79356b 15369
2d21ac55 15370StartAgain: ;
1c79356b 15371
2d21ac55 15372 start = *address;
1c79356b 15373
060df5ea 15374 if (flags & VM_FLAGS_ANYWHERE)
2d21ac55 15375 {
39037602
A
15376 if (flags & VM_FLAGS_RANDOM_ADDR)
15377 {
15378 /*
15379 * Get a random start address.
15380 */
15381 kr = vm_map_random_address_for_size(map, address, size);
15382 if (kr != KERN_SUCCESS) {
15383 return(kr);
15384 }
15385 start = *address;
15386 }
15387
2d21ac55
A
15388 /*
15389 * Calculate the first possible address.
15390 */
1c79356b 15391
2d21ac55
A
15392 if (start < map->min_offset)
15393 start = map->min_offset;
15394 if (start > map->max_offset)
15395 return(KERN_NO_SPACE);
5ba3f43e 15396
2d21ac55
A
15397 /*
15398 * Look for the first possible address;
15399 * if there's already something at this
15400 * address, we have to start after it.
15401 */
1c79356b 15402
6d2010ae
A
15403 if( map->disable_vmentry_reuse == TRUE) {
15404 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2d21ac55 15405 } else {
3e170ce0
A
15406
15407 if (map->holelistenabled) {
15408 hole_entry = (vm_map_entry_t)map->holes_list;
15409
15410 if (hole_entry == NULL) {
15411 /*
15412 * No more space in the map?
15413 */
15414 return(KERN_NO_SPACE);
15415 } else {
15416
15417 boolean_t found_hole = FALSE;
15418
15419 do {
15420 if (hole_entry->vme_start >= start) {
15421 start = hole_entry->vme_start;
15422 found_hole = TRUE;
15423 break;
15424 }
15425
15426 if (hole_entry->vme_end > start) {
15427 found_hole = TRUE;
15428 break;
15429 }
15430 hole_entry = hole_entry->vme_next;
15431
15432 } while (hole_entry != (vm_map_entry_t) map->holes_list);
15433
15434 if (found_hole == FALSE) {
15435 return (KERN_NO_SPACE);
15436 }
15437
15438 entry = hole_entry;
15439 }
6d2010ae 15440 } else {
3e170ce0
A
15441 assert(first_free_is_valid(map));
15442 if (start == map->min_offset) {
15443 if ((entry = map->first_free) != vm_map_to_entry(map))
15444 start = entry->vme_end;
15445 } else {
15446 vm_map_entry_t tmp_entry;
15447 if (vm_map_lookup_entry(map, start, &tmp_entry))
15448 start = tmp_entry->vme_end;
15449 entry = tmp_entry;
15450 }
6d2010ae 15451 }
39236c6e
A
15452 start = vm_map_round_page(start,
15453 VM_MAP_PAGE_MASK(map));
2d21ac55 15454 }
5ba3f43e 15455
2d21ac55
A
15456 /*
15457 * In any case, the "entry" always precedes
15458 * the proposed new region throughout the
15459 * loop:
15460 */
1c79356b 15461
2d21ac55 15462 while (TRUE) {
39037602 15463 vm_map_entry_t next;
2d21ac55
A
15464
15465 /*
15466 * Find the end of the proposed new region.
15467 * Be sure we didn't go beyond the end, or
15468 * wrap around the address.
15469 */
15470
15471 end = ((start + mask) & ~mask);
39236c6e
A
15472 end = vm_map_round_page(end,
15473 VM_MAP_PAGE_MASK(map));
2d21ac55
A
15474 if (end < start)
15475 return(KERN_NO_SPACE);
15476 start = end;
15477 end += size;
15478
15479 if ((end > map->max_offset) || (end < start)) {
15480 if (map->wait_for_space) {
15481 if (size <= (map->max_offset -
15482 map->min_offset)) {
15483 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
15484 vm_map_unlock(map);
15485 thread_block(THREAD_CONTINUE_NULL);
15486 vm_map_lock(map);
15487 goto StartAgain;
15488 }
15489 }
5ba3f43e 15490
2d21ac55
A
15491 return(KERN_NO_SPACE);
15492 }
1c79356b 15493
2d21ac55 15494 next = entry->vme_next;
1c79356b 15495
3e170ce0
A
15496 if (map->holelistenabled) {
15497 if (entry->vme_end >= end)
15498 break;
15499 } else {
15500 /*
15501 * If there are no more entries, we must win.
15502 *
15503 * OR
15504 *
15505 * If there is another entry, it must be
15506 * after the end of the potential new region.
15507 */
1c79356b 15508
3e170ce0
A
15509 if (next == vm_map_to_entry(map))
15510 break;
15511
15512 if (next->vme_start >= end)
15513 break;
15514 }
1c79356b 15515
2d21ac55
A
15516 /*
15517 * Didn't fit -- move to the next entry.
15518 */
1c79356b 15519
2d21ac55 15520 entry = next;
3e170ce0
A
15521
15522 if (map->holelistenabled) {
15523 if (entry == (vm_map_entry_t) map->holes_list) {
15524 /*
15525 * Wrapped around
15526 */
15527 return(KERN_NO_SPACE);
15528 }
15529 start = entry->vme_start;
15530 } else {
15531 start = entry->vme_end;
15532 }
15533 }
15534
15535 if (map->holelistenabled) {
15536
15537 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
15538 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
15539 }
2d21ac55 15540 }
3e170ce0 15541
2d21ac55 15542 *address = start;
3e170ce0 15543
2d21ac55
A
15544 } else {
15545 vm_map_entry_t temp_entry;
5ba3f43e 15546
2d21ac55
A
15547 /*
15548 * Verify that:
15549 * the address doesn't itself violate
15550 * the mask requirement.
15551 */
1c79356b 15552
2d21ac55
A
15553 if ((start & mask) != 0)
15554 return(KERN_NO_SPACE);
1c79356b 15555
1c79356b 15556
2d21ac55
A
15557 /*
15558 * ... the address is within bounds
15559 */
1c79356b 15560
2d21ac55 15561 end = start + size;
1c79356b 15562
2d21ac55
A
15563 if ((start < map->min_offset) ||
15564 (end > map->max_offset) ||
15565 (start >= end)) {
15566 return(KERN_INVALID_ADDRESS);
15567 }
1c79356b 15568
060df5ea
A
15569 /*
15570 * If we're asked to overwrite whatever was mapped in that
15571 * range, first deallocate that range.
15572 */
15573 if (flags & VM_FLAGS_OVERWRITE) {
15574 vm_map_t zap_map;
15575
15576 /*
15577 * We use a "zap_map" to avoid having to unlock
15578 * the "map" in vm_map_delete(), which would compromise
15579 * the atomicity of the "deallocate" and then "remap"
15580 * combination.
15581 */
15582 zap_map = vm_map_create(PMAP_NULL,
15583 start,
316670eb 15584 end,
060df5ea
A
15585 map->hdr.entries_pageable);
15586 if (zap_map == VM_MAP_NULL) {
15587 return KERN_RESOURCE_SHORTAGE;
15588 }
39236c6e 15589 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 15590 vm_map_disable_hole_optimization(zap_map);
060df5ea
A
15591
15592 kr = vm_map_delete(map, start, end,
fe8ab488
A
15593 (VM_MAP_REMOVE_SAVE_ENTRIES |
15594 VM_MAP_REMOVE_NO_MAP_ALIGN),
060df5ea
A
15595 zap_map);
15596 if (kr == KERN_SUCCESS) {
15597 vm_map_destroy(zap_map,
15598 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
15599 zap_map = VM_MAP_NULL;
15600 }
15601 }
15602
2d21ac55
A
15603 /*
15604 * ... the starting address isn't allocated
15605 */
91447636 15606
2d21ac55
A
15607 if (vm_map_lookup_entry(map, start, &temp_entry))
15608 return(KERN_NO_SPACE);
91447636 15609
2d21ac55 15610 entry = temp_entry;
91447636 15611
2d21ac55
A
15612 /*
15613 * ... the next region doesn't overlap the
15614 * end point.
15615 */
1c79356b 15616
2d21ac55
A
15617 if ((entry->vme_next != vm_map_to_entry(map)) &&
15618 (entry->vme_next->vme_start < end))
15619 return(KERN_NO_SPACE);
15620 }
15621 *map_entry = entry;
15622 return(KERN_SUCCESS);
91447636 15623}
1c79356b 15624
91447636
A
15625/*
15626 * vm_map_switch:
15627 *
15628 * Set the address map for the current thread to the specified map
15629 */
1c79356b 15630
91447636
A
15631vm_map_t
15632vm_map_switch(
15633 vm_map_t map)
15634{
15635 int mycpu;
15636 thread_t thread = current_thread();
15637 vm_map_t oldmap = thread->map;
1c79356b 15638
91447636
A
15639 mp_disable_preemption();
15640 mycpu = cpu_number();
1c79356b 15641
91447636
A
15642 /*
15643 * Deactivate the current map and activate the requested map
15644 */
15645 PMAP_SWITCH_USER(thread, map, mycpu);
1c79356b 15646
91447636
A
15647 mp_enable_preemption();
15648 return(oldmap);
15649}
1c79356b 15650
1c79356b 15651
91447636
A
15652/*
15653 * Routine: vm_map_write_user
15654 *
15655 * Description:
15656 * Copy out data from a kernel space into space in the
15657 * destination map. The space must already exist in the
15658 * destination map.
15659 * NOTE: This routine should only be called by threads
15660 * which can block on a page fault. i.e. kernel mode user
15661 * threads.
15662 *
15663 */
15664kern_return_t
15665vm_map_write_user(
15666 vm_map_t map,
15667 void *src_p,
15668 vm_map_address_t dst_addr,
15669 vm_size_t size)
15670{
15671 kern_return_t kr = KERN_SUCCESS;
1c79356b 15672
91447636
A
15673 if(current_map() == map) {
15674 if (copyout(src_p, dst_addr, size)) {
15675 kr = KERN_INVALID_ADDRESS;
15676 }
15677 } else {
15678 vm_map_t oldmap;
1c79356b 15679
91447636
A
15680 /* take on the identity of the target map while doing */
15681 /* the transfer */
1c79356b 15682
91447636
A
15683 vm_map_reference(map);
15684 oldmap = vm_map_switch(map);
15685 if (copyout(src_p, dst_addr, size)) {
15686 kr = KERN_INVALID_ADDRESS;
1c79356b 15687 }
91447636
A
15688 vm_map_switch(oldmap);
15689 vm_map_deallocate(map);
1c79356b 15690 }
91447636 15691 return kr;
1c79356b
A
15692}
15693
15694/*
91447636
A
15695 * Routine: vm_map_read_user
15696 *
15697 * Description:
15698 * Copy in data from a user space source map into the
15699 * kernel map. The space must already exist in the
15700 * kernel map.
15701 * NOTE: This routine should only be called by threads
15702 * which can block on a page fault. i.e. kernel mode user
15703 * threads.
1c79356b 15704 *
1c79356b
A
15705 */
15706kern_return_t
91447636
A
15707vm_map_read_user(
15708 vm_map_t map,
15709 vm_map_address_t src_addr,
15710 void *dst_p,
15711 vm_size_t size)
1c79356b 15712{
91447636 15713 kern_return_t kr = KERN_SUCCESS;
1c79356b 15714
91447636
A
15715 if(current_map() == map) {
15716 if (copyin(src_addr, dst_p, size)) {
15717 kr = KERN_INVALID_ADDRESS;
15718 }
15719 } else {
15720 vm_map_t oldmap;
1c79356b 15721
91447636
A
15722 /* take on the identity of the target map while doing */
15723 /* the transfer */
15724
15725 vm_map_reference(map);
15726 oldmap = vm_map_switch(map);
15727 if (copyin(src_addr, dst_p, size)) {
15728 kr = KERN_INVALID_ADDRESS;
15729 }
15730 vm_map_switch(oldmap);
15731 vm_map_deallocate(map);
1c79356b 15732 }
91447636
A
15733 return kr;
15734}
15735
1c79356b 15736
91447636
A
15737/*
15738 * vm_map_check_protection:
15739 *
15740 * Assert that the target map allows the specified
15741 * privilege on the entire address region given.
15742 * The entire region must be allocated.
15743 */
2d21ac55
A
15744boolean_t
15745vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
15746 vm_map_offset_t end, vm_prot_t protection)
91447636 15747{
2d21ac55
A
15748 vm_map_entry_t entry;
15749 vm_map_entry_t tmp_entry;
1c79356b 15750
91447636 15751 vm_map_lock(map);
1c79356b 15752
2d21ac55 15753 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
91447636 15754 {
2d21ac55
A
15755 vm_map_unlock(map);
15756 return (FALSE);
1c79356b
A
15757 }
15758
91447636
A
15759 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
15760 vm_map_unlock(map);
15761 return(FALSE);
15762 }
1c79356b 15763
91447636
A
15764 entry = tmp_entry;
15765
15766 while (start < end) {
15767 if (entry == vm_map_to_entry(map)) {
15768 vm_map_unlock(map);
15769 return(FALSE);
1c79356b 15770 }
1c79356b 15771
91447636
A
15772 /*
15773 * No holes allowed!
15774 */
1c79356b 15775
91447636
A
15776 if (start < entry->vme_start) {
15777 vm_map_unlock(map);
15778 return(FALSE);
15779 }
15780
15781 /*
15782 * Check protection associated with entry.
15783 */
15784
15785 if ((entry->protection & protection) != protection) {
15786 vm_map_unlock(map);
15787 return(FALSE);
15788 }
15789
15790 /* go to next entry */
15791
15792 start = entry->vme_end;
15793 entry = entry->vme_next;
15794 }
15795 vm_map_unlock(map);
15796 return(TRUE);
1c79356b
A
15797}
15798
1c79356b 15799kern_return_t
91447636
A
15800vm_map_purgable_control(
15801 vm_map_t map,
15802 vm_map_offset_t address,
15803 vm_purgable_t control,
15804 int *state)
1c79356b 15805{
91447636
A
15806 vm_map_entry_t entry;
15807 vm_object_t object;
15808 kern_return_t kr;
fe8ab488 15809 boolean_t was_nonvolatile;
1c79356b 15810
1c79356b 15811 /*
91447636
A
15812 * Vet all the input parameters and current type and state of the
15813 * underlaying object. Return with an error if anything is amiss.
1c79356b 15814 */
91447636
A
15815 if (map == VM_MAP_NULL)
15816 return(KERN_INVALID_ARGUMENT);
1c79356b 15817
91447636 15818 if (control != VM_PURGABLE_SET_STATE &&
b0d623f7 15819 control != VM_PURGABLE_GET_STATE &&
5ba3f43e
A
15820 control != VM_PURGABLE_PURGE_ALL &&
15821 control != VM_PURGABLE_SET_STATE_FROM_KERNEL)
91447636 15822 return(KERN_INVALID_ARGUMENT);
1c79356b 15823
b0d623f7
A
15824 if (control == VM_PURGABLE_PURGE_ALL) {
15825 vm_purgeable_object_purge_all();
15826 return KERN_SUCCESS;
15827 }
15828
5ba3f43e
A
15829 if ((control == VM_PURGABLE_SET_STATE ||
15830 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
b0d623f7 15831 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
2d21ac55 15832 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
91447636
A
15833 return(KERN_INVALID_ARGUMENT);
15834
b0d623f7 15835 vm_map_lock_read(map);
91447636
A
15836
15837 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
15838
15839 /*
15840 * Must pass a valid non-submap address.
15841 */
b0d623f7 15842 vm_map_unlock_read(map);
91447636
A
15843 return(KERN_INVALID_ADDRESS);
15844 }
15845
15846 if ((entry->protection & VM_PROT_WRITE) == 0) {
15847 /*
15848 * Can't apply purgable controls to something you can't write.
15849 */
b0d623f7 15850 vm_map_unlock_read(map);
91447636
A
15851 return(KERN_PROTECTION_FAILURE);
15852 }
15853
3e170ce0 15854 object = VME_OBJECT(entry);
fe8ab488
A
15855 if (object == VM_OBJECT_NULL ||
15856 object->purgable == VM_PURGABLE_DENY) {
91447636 15857 /*
fe8ab488 15858 * Object must already be present and be purgeable.
91447636 15859 */
b0d623f7 15860 vm_map_unlock_read(map);
91447636
A
15861 return KERN_INVALID_ARGUMENT;
15862 }
5ba3f43e 15863
91447636
A
15864 vm_object_lock(object);
15865
39236c6e 15866#if 00
5ba3f43e 15867 if (VME_OFFSET(entry) != 0 ||
6d2010ae 15868 entry->vme_end - entry->vme_start != object->vo_size) {
91447636
A
15869 /*
15870 * Can only apply purgable controls to the whole (existing)
15871 * object at once.
15872 */
b0d623f7 15873 vm_map_unlock_read(map);
91447636
A
15874 vm_object_unlock(object);
15875 return KERN_INVALID_ARGUMENT;
1c79356b 15876 }
39236c6e 15877#endif
fe8ab488
A
15878
15879 assert(!entry->is_sub_map);
15880 assert(!entry->use_pmap); /* purgeable has its own accounting */
15881
b0d623f7 15882 vm_map_unlock_read(map);
1c79356b 15883
fe8ab488
A
15884 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
15885
91447636 15886 kr = vm_object_purgable_control(object, control, state);
1c79356b 15887
fe8ab488
A
15888 if (was_nonvolatile &&
15889 object->purgable != VM_PURGABLE_NONVOLATILE &&
15890 map->pmap == kernel_pmap) {
15891#if DEBUG
15892 object->vo_purgeable_volatilizer = kernel_task;
15893#endif /* DEBUG */
15894 }
15895
91447636 15896 vm_object_unlock(object);
1c79356b 15897
91447636
A
15898 return kr;
15899}
1c79356b 15900
91447636 15901kern_return_t
b0d623f7 15902vm_map_page_query_internal(
2d21ac55 15903 vm_map_t target_map,
91447636 15904 vm_map_offset_t offset,
2d21ac55
A
15905 int *disposition,
15906 int *ref_count)
91447636 15907{
b0d623f7
A
15908 kern_return_t kr;
15909 vm_page_info_basic_data_t info;
15910 mach_msg_type_number_t count;
15911
15912 count = VM_PAGE_INFO_BASIC_COUNT;
15913 kr = vm_map_page_info(target_map,
15914 offset,
15915 VM_PAGE_INFO_BASIC,
15916 (vm_page_info_t) &info,
15917 &count);
15918 if (kr == KERN_SUCCESS) {
15919 *disposition = info.disposition;
15920 *ref_count = info.ref_count;
15921 } else {
15922 *disposition = 0;
15923 *ref_count = 0;
15924 }
2d21ac55 15925
b0d623f7
A
15926 return kr;
15927}
5ba3f43e 15928
b0d623f7
A
15929kern_return_t
15930vm_map_page_info(
15931 vm_map_t map,
5ba3f43e
A
15932 vm_map_offset_t offset,
15933 vm_page_info_flavor_t flavor,
15934 vm_page_info_t info,
15935 mach_msg_type_number_t *count)
15936{
15937 return (vm_map_page_range_info_internal(map,
15938 offset, /* start of range */
15939 (offset + 1), /* this will get rounded in the call to the page boundary */
15940 flavor,
15941 info,
15942 count));
15943}
15944
15945kern_return_t
15946vm_map_page_range_info_internal(
15947 vm_map_t map,
15948 vm_map_offset_t start_offset,
15949 vm_map_offset_t end_offset,
b0d623f7
A
15950 vm_page_info_flavor_t flavor,
15951 vm_page_info_t info,
15952 mach_msg_type_number_t *count)
15953{
5ba3f43e
A
15954 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
15955 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
15956 vm_page_t m = VM_PAGE_NULL;
b0d623f7 15957 kern_return_t retval = KERN_SUCCESS;
5ba3f43e
A
15958 int disposition = 0;
15959 int ref_count = 0;
15960 int depth = 0, info_idx = 0;
15961 vm_page_info_basic_t basic_info = 0;
15962 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
15963 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
2d21ac55 15964
b0d623f7
A
15965 switch (flavor) {
15966 case VM_PAGE_INFO_BASIC:
15967 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
6d2010ae
A
15968 /*
15969 * The "vm_page_info_basic_data" structure was not
15970 * properly padded, so allow the size to be off by
15971 * one to maintain backwards binary compatibility...
15972 */
15973 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
15974 return KERN_INVALID_ARGUMENT;
b0d623f7
A
15975 }
15976 break;
15977 default:
15978 return KERN_INVALID_ARGUMENT;
91447636 15979 }
2d21ac55 15980
b0d623f7
A
15981 disposition = 0;
15982 ref_count = 0;
b0d623f7 15983 depth = 0;
5ba3f43e 15984 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
b0d623f7 15985 retval = KERN_SUCCESS;
5ba3f43e
A
15986
15987 offset_in_page = start_offset & PAGE_MASK;
15988 start = vm_map_trunc_page(start_offset, PAGE_MASK);
15989 end = vm_map_round_page(end_offset, PAGE_MASK);
15990
15991 assert ((end - start) <= MAX_PAGE_RANGE_QUERY);
b0d623f7
A
15992
15993 vm_map_lock_read(map);
15994
5ba3f43e
A
15995 for (curr_s_offset = start; curr_s_offset < end;) {
15996 /*
15997 * New lookup needs reset of these variables.
15998 */
15999 curr_object = object = VM_OBJECT_NULL;
16000 offset_in_object = 0;
16001 ref_count = 0;
16002 depth = 0;
16003
16004 /*
16005 * First, find the map entry covering "curr_s_offset", going down
16006 * submaps if necessary.
16007 */
16008 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
16009 /* no entry -> no object -> no page */
16010
16011 if (curr_s_offset < vm_map_min(map)) {
16012 /*
16013 * Illegal address that falls below map min.
16014 */
16015 curr_e_offset = MIN(end, vm_map_min(map));
16016
16017 } else if (curr_s_offset >= vm_map_max(map)) {
16018 /*
16019 * Illegal address that falls on/after map max.
16020 */
16021 curr_e_offset = end;
16022
16023 } else if (map_entry == vm_map_to_entry(map)) {
16024 /*
16025 * Hit a hole.
16026 */
16027 if (map_entry->vme_next == vm_map_to_entry(map)) {
16028 /*
16029 * Empty map.
16030 */
16031 curr_e_offset = MIN(map->max_offset, end);
16032 } else {
16033 /*
16034 * Hole at start of the map.
16035 */
16036 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
16037 }
16038 } else {
16039 if (map_entry->vme_next == vm_map_to_entry(map)) {
16040 /*
16041 * Hole at the end of the map.
16042 */
16043 curr_e_offset = MIN(map->max_offset, end);
16044 } else {
16045 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
16046 }
16047 }
16048
16049 assert(curr_e_offset >= curr_s_offset);
16050
16051 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
16052
16053 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16054
16055 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
16056
16057 curr_s_offset = curr_e_offset;
16058
16059 info_idx += num_pages;
16060
16061 continue;
b0d623f7 16062 }
5ba3f43e 16063
b0d623f7 16064 /* compute offset from this map entry's start */
5ba3f43e
A
16065 offset_in_object = curr_s_offset - map_entry->vme_start;
16066
b0d623f7 16067 /* compute offset into this map entry's object (or submap) */
5ba3f43e 16068 offset_in_object += VME_OFFSET(map_entry);
b0d623f7
A
16069
16070 if (map_entry->is_sub_map) {
5ba3f43e
A
16071 vm_map_t sub_map = VM_MAP_NULL;
16072 vm_page_info_t submap_info = 0;
16073 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
16074
16075 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
16076
16077 submap_s_offset = offset_in_object;
16078 submap_e_offset = submap_s_offset + range_len;
2d21ac55 16079
3e170ce0 16080 sub_map = VME_SUBMAP(map_entry);
5ba3f43e
A
16081
16082 vm_map_reference(sub_map);
b0d623f7 16083 vm_map_unlock_read(map);
2d21ac55 16084
5ba3f43e
A
16085 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16086
16087 retval = vm_map_page_range_info_internal(sub_map,
16088 submap_s_offset,
16089 submap_e_offset,
16090 VM_PAGE_INFO_BASIC,
16091 (vm_page_info_t) submap_info,
16092 count);
16093
16094 assert(retval == KERN_SUCCESS);
16095
16096 vm_map_lock_read(map);
16097 vm_map_deallocate(sub_map);
16098
16099 /* Move the "info" index by the number of pages we inspected.*/
16100 info_idx += range_len >> PAGE_SHIFT;
16101
16102 /* Move our current offset by the size of the range we inspected.*/
16103 curr_s_offset += range_len;
b0d623f7 16104
b0d623f7 16105 continue;
1c79356b 16106 }
b0d623f7 16107
5ba3f43e
A
16108 object = VME_OBJECT(map_entry);
16109 if (object == VM_OBJECT_NULL) {
16110
16111 /*
16112 * We don't have an object here and, hence,
16113 * no pages to inspect. We'll fill up the
16114 * info structure appropriately.
16115 */
16116
16117 curr_e_offset = MIN(map_entry->vme_end, end);
16118
16119 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
16120
16121 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16122
16123 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
16124
16125 curr_s_offset = curr_e_offset;
16126
16127 info_idx += num_pages;
16128
16129 continue;
16130 }
16131
16132 vm_object_reference(object);
16133 /*
16134 * Shared mode -- so we can allow other readers
16135 * to grab the lock too.
16136 */
16137 vm_object_lock_shared(object);
16138
16139 curr_e_offset = MIN(map_entry->vme_end, end);
16140
b0d623f7 16141 vm_map_unlock_read(map);
b0d623f7 16142
5ba3f43e 16143 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
b0d623f7 16144
5ba3f43e 16145 curr_object = object;
2d21ac55 16146
5ba3f43e 16147 for (; curr_s_offset < curr_e_offset;) {
2d21ac55 16148
5ba3f43e
A
16149 if (object == curr_object) {
16150 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
16151 } else {
16152 ref_count = curr_object->ref_count;
16153 }
16154
16155 curr_offset_in_object = offset_in_object;
16156
16157 for (;;) {
16158 m = vm_page_lookup(curr_object, curr_offset_in_object);
16159
16160 if (m != VM_PAGE_NULL) {
16161
16162 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
39037602 16163 break;
5ba3f43e
A
16164
16165 } else {
16166 if (curr_object->internal &&
16167 curr_object->alive &&
16168 !curr_object->terminating &&
16169 curr_object->pager_ready) {
16170
16171 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, curr_offset_in_object)
16172 == VM_EXTERNAL_STATE_EXISTS) {
16173 /* the pager has that page */
16174 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
16175 break;
16176 }
16177 }
16178
16179 /*
16180 * Go down the VM object shadow chain until we find the page
16181 * we're looking for.
16182 */
16183
16184 if (curr_object->shadow != VM_OBJECT_NULL) {
16185 vm_object_t shadow = VM_OBJECT_NULL;
16186
16187 curr_offset_in_object += curr_object->vo_shadow_offset;
16188 shadow = curr_object->shadow;
16189
16190 vm_object_lock_shared(shadow);
16191 vm_object_unlock(curr_object);
16192
16193 curr_object = shadow;
16194 depth++;
16195 continue;
16196 } else {
16197
16198 break;
16199 }
2d21ac55
A
16200 }
16201 }
b0d623f7 16202
5ba3f43e
A
16203 /* The ref_count is not strictly accurate, it measures the number */
16204 /* of entities holding a ref on the object, they may not be mapping */
16205 /* the object or may not be mapping the section holding the */
16206 /* target page but its still a ball park number and though an over- */
16207 /* count, it picks up the copy-on-write cases */
2d21ac55 16208
5ba3f43e
A
16209 /* We could also get a picture of page sharing from pmap_attributes */
16210 /* but this would under count as only faulted-in mappings would */
16211 /* show up. */
2d21ac55 16212
5ba3f43e
A
16213 if ((curr_object == object) && curr_object->shadow)
16214 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
16215
16216 if (! curr_object->internal)
16217 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
16218
16219 if (m != VM_PAGE_NULL) {
16220
16221 if (m->fictitious) {
16222
16223 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
16224
16225 } else {
16226 if (m->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))
16227 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
16228
16229 if (m->reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m)))
16230 disposition |= VM_PAGE_QUERY_PAGE_REF;
16231
16232 if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q)
16233 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
16234
16235 if (m->cs_validated)
16236 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
16237 if (m->cs_tainted)
16238 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
16239 if (m->cs_nx)
16240 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
16241 }
91447636 16242 }
1c79356b 16243
5ba3f43e
A
16244 switch (flavor) {
16245 case VM_PAGE_INFO_BASIC:
16246 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16247 basic_info->disposition = disposition;
16248 basic_info->ref_count = ref_count;
16249 basic_info->object_id = (vm_object_id_t) (uintptr_t)
16250 VM_KERNEL_ADDRPERM(curr_object);
16251 basic_info->offset =
16252 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
16253 basic_info->depth = depth;
16254
16255 info_idx++;
16256 break;
16257 }
1c79356b 16258
5ba3f43e
A
16259 disposition = 0;
16260 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
b0d623f7 16261
5ba3f43e
A
16262 /*
16263 * Move to next offset in the range and in our object.
16264 */
16265 curr_s_offset += PAGE_SIZE;
16266 offset_in_object += PAGE_SIZE;
16267 curr_offset_in_object = offset_in_object;
2d21ac55 16268
5ba3f43e 16269 if (curr_object != object) {
2d21ac55 16270
5ba3f43e 16271 vm_object_unlock(curr_object);
1c79356b 16272
5ba3f43e 16273 curr_object = object;
1c79356b 16274
5ba3f43e
A
16275 vm_object_lock_shared(curr_object);
16276 } else {
1c79356b 16277
5ba3f43e
A
16278 vm_object_lock_yield_shared(curr_object);
16279 }
16280 }
593a1d5f 16281
5ba3f43e
A
16282 vm_object_unlock(curr_object);
16283 vm_object_deallocate(curr_object);
b0d623f7 16284
5ba3f43e 16285 vm_map_lock_read(map);
b0d623f7 16286 }
0c530ab8 16287
5ba3f43e 16288 vm_map_unlock_read(map);
2d21ac55 16289 return retval;
91447636
A
16290}
16291
16292/*
16293 * vm_map_msync
16294 *
16295 * Synchronises the memory range specified with its backing store
16296 * image by either flushing or cleaning the contents to the appropriate
16297 * memory manager engaging in a memory object synchronize dialog with
16298 * the manager. The client doesn't return until the manager issues
16299 * m_o_s_completed message. MIG Magically converts user task parameter
16300 * to the task's address map.
16301 *
16302 * interpretation of sync_flags
16303 * VM_SYNC_INVALIDATE - discard pages, only return precious
16304 * pages to manager.
16305 *
16306 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
16307 * - discard pages, write dirty or precious
16308 * pages back to memory manager.
16309 *
16310 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
16311 * - write dirty or precious pages back to
16312 * the memory manager.
16313 *
16314 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
16315 * is a hole in the region, and we would
16316 * have returned KERN_SUCCESS, return
16317 * KERN_INVALID_ADDRESS instead.
16318 *
16319 * NOTE
16320 * The memory object attributes have not yet been implemented, this
16321 * function will have to deal with the invalidate attribute
16322 *
16323 * RETURNS
16324 * KERN_INVALID_TASK Bad task parameter
16325 * KERN_INVALID_ARGUMENT both sync and async were specified.
16326 * KERN_SUCCESS The usual.
16327 * KERN_INVALID_ADDRESS There was a hole in the region.
16328 */
16329
16330kern_return_t
16331vm_map_msync(
16332 vm_map_t map,
16333 vm_map_address_t address,
16334 vm_map_size_t size,
16335 vm_sync_t sync_flags)
16336{
91447636
A
16337 vm_map_entry_t entry;
16338 vm_map_size_t amount_left;
16339 vm_object_offset_t offset;
16340 boolean_t do_sync_req;
91447636 16341 boolean_t had_hole = FALSE;
3e170ce0 16342 vm_map_offset_t pmap_offset;
5ba3f43e 16343
91447636
A
16344 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
16345 (sync_flags & VM_SYNC_SYNCHRONOUS))
16346 return(KERN_INVALID_ARGUMENT);
1c79356b
A
16347
16348 /*
91447636 16349 * align address and size on page boundaries
1c79356b 16350 */
39236c6e
A
16351 size = (vm_map_round_page(address + size,
16352 VM_MAP_PAGE_MASK(map)) -
16353 vm_map_trunc_page(address,
16354 VM_MAP_PAGE_MASK(map)));
16355 address = vm_map_trunc_page(address,
16356 VM_MAP_PAGE_MASK(map));
1c79356b 16357
91447636
A
16358 if (map == VM_MAP_NULL)
16359 return(KERN_INVALID_TASK);
1c79356b 16360
91447636
A
16361 if (size == 0)
16362 return(KERN_SUCCESS);
1c79356b 16363
91447636 16364 amount_left = size;
1c79356b 16365
91447636
A
16366 while (amount_left > 0) {
16367 vm_object_size_t flush_size;
16368 vm_object_t object;
1c79356b 16369
91447636
A
16370 vm_map_lock(map);
16371 if (!vm_map_lookup_entry(map,
3e170ce0 16372 address,
39236c6e 16373 &entry)) {
91447636 16374
2d21ac55 16375 vm_map_size_t skip;
91447636
A
16376
16377 /*
16378 * hole in the address map.
16379 */
16380 had_hole = TRUE;
16381
39037602
A
16382 if (sync_flags & VM_SYNC_KILLPAGES) {
16383 /*
16384 * For VM_SYNC_KILLPAGES, there should be
16385 * no holes in the range, since we couldn't
16386 * prevent someone else from allocating in
16387 * that hole and we wouldn't want to "kill"
16388 * their pages.
16389 */
16390 vm_map_unlock(map);
16391 break;
16392 }
16393
91447636
A
16394 /*
16395 * Check for empty map.
16396 */
16397 if (entry == vm_map_to_entry(map) &&
16398 entry->vme_next == entry) {
16399 vm_map_unlock(map);
16400 break;
16401 }
16402 /*
16403 * Check that we don't wrap and that
16404 * we have at least one real map entry.
16405 */
16406 if ((map->hdr.nentries == 0) ||
16407 (entry->vme_next->vme_start < address)) {
16408 vm_map_unlock(map);
16409 break;
16410 }
16411 /*
16412 * Move up to the next entry if needed
16413 */
16414 skip = (entry->vme_next->vme_start - address);
16415 if (skip >= amount_left)
16416 amount_left = 0;
16417 else
16418 amount_left -= skip;
16419 address = entry->vme_next->vme_start;
16420 vm_map_unlock(map);
16421 continue;
16422 }
1c79356b 16423
91447636 16424 offset = address - entry->vme_start;
3e170ce0 16425 pmap_offset = address;
1c79356b 16426
91447636
A
16427 /*
16428 * do we have more to flush than is contained in this
16429 * entry ?
16430 */
16431 if (amount_left + entry->vme_start + offset > entry->vme_end) {
16432 flush_size = entry->vme_end -
2d21ac55 16433 (entry->vme_start + offset);
91447636
A
16434 } else {
16435 flush_size = amount_left;
16436 }
16437 amount_left -= flush_size;
16438 address += flush_size;
1c79356b 16439
91447636
A
16440 if (entry->is_sub_map == TRUE) {
16441 vm_map_t local_map;
16442 vm_map_offset_t local_offset;
1c79356b 16443
3e170ce0
A
16444 local_map = VME_SUBMAP(entry);
16445 local_offset = VME_OFFSET(entry);
91447636
A
16446 vm_map_unlock(map);
16447 if (vm_map_msync(
2d21ac55
A
16448 local_map,
16449 local_offset,
16450 flush_size,
16451 sync_flags) == KERN_INVALID_ADDRESS) {
91447636
A
16452 had_hole = TRUE;
16453 }
16454 continue;
16455 }
3e170ce0 16456 object = VME_OBJECT(entry);
1c79356b 16457
91447636
A
16458 /*
16459 * We can't sync this object if the object has not been
16460 * created yet
16461 */
16462 if (object == VM_OBJECT_NULL) {
16463 vm_map_unlock(map);
16464 continue;
16465 }
3e170ce0 16466 offset += VME_OFFSET(entry);
1c79356b 16467
91447636 16468 vm_object_lock(object);
1c79356b 16469
91447636 16470 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
b0d623f7
A
16471 int kill_pages = 0;
16472 boolean_t reusable_pages = FALSE;
91447636
A
16473
16474 if (sync_flags & VM_SYNC_KILLPAGES) {
39037602
A
16475 if (((object->ref_count == 1) ||
16476 ((object->copy_strategy !=
16477 MEMORY_OBJECT_COPY_SYMMETRIC) &&
16478 (object->copy == VM_OBJECT_NULL))) &&
16479 (object->shadow == VM_OBJECT_NULL)) {
16480 if (object->ref_count != 1) {
16481 vm_page_stats_reusable.free_shared++;
16482 }
91447636 16483 kill_pages = 1;
39037602 16484 } else {
91447636 16485 kill_pages = -1;
39037602 16486 }
91447636
A
16487 }
16488 if (kill_pages != -1)
3e170ce0
A
16489 vm_object_deactivate_pages(
16490 object,
16491 offset,
16492 (vm_object_size_t) flush_size,
16493 kill_pages,
16494 reusable_pages,
16495 map->pmap,
16496 pmap_offset);
91447636
A
16497 vm_object_unlock(object);
16498 vm_map_unlock(map);
16499 continue;
1c79356b 16500 }
91447636
A
16501 /*
16502 * We can't sync this object if there isn't a pager.
16503 * Don't bother to sync internal objects, since there can't
16504 * be any "permanent" storage for these objects anyway.
16505 */
16506 if ((object->pager == MEMORY_OBJECT_NULL) ||
16507 (object->internal) || (object->private)) {
16508 vm_object_unlock(object);
16509 vm_map_unlock(map);
16510 continue;
16511 }
16512 /*
16513 * keep reference on the object until syncing is done
16514 */
2d21ac55 16515 vm_object_reference_locked(object);
91447636 16516 vm_object_unlock(object);
1c79356b 16517
91447636 16518 vm_map_unlock(map);
1c79356b 16519
91447636 16520 do_sync_req = vm_object_sync(object,
2d21ac55
A
16521 offset,
16522 flush_size,
16523 sync_flags & VM_SYNC_INVALIDATE,
b0d623f7
A
16524 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
16525 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
2d21ac55 16526 sync_flags & VM_SYNC_SYNCHRONOUS);
2d21ac55 16527
5ba3f43e
A
16528 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
16529 /*
16530 * clear out the clustering and read-ahead hints
16531 */
16532 vm_object_lock(object);
2d21ac55 16533
5ba3f43e
A
16534 object->pages_created = 0;
16535 object->pages_used = 0;
16536 object->sequential = 0;
16537 object->last_alloc = 0;
2d21ac55 16538
2d21ac55 16539 vm_object_unlock(object);
2d21ac55 16540 }
5ba3f43e
A
16541 vm_object_deallocate(object);
16542 } /* while */
91447636
A
16543
16544 /* for proper msync() behaviour */
16545 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
16546 return(KERN_INVALID_ADDRESS);
16547
16548 return(KERN_SUCCESS);
16549}/* vm_msync */
1c79356b 16550
1c79356b 16551/*
91447636
A
16552 * Routine: convert_port_entry_to_map
16553 * Purpose:
16554 * Convert from a port specifying an entry or a task
16555 * to a map. Doesn't consume the port ref; produces a map ref,
16556 * which may be null. Unlike convert_port_to_map, the
16557 * port may be task or a named entry backed.
16558 * Conditions:
16559 * Nothing locked.
1c79356b 16560 */
1c79356b 16561
1c79356b 16562
91447636
A
16563vm_map_t
16564convert_port_entry_to_map(
16565 ipc_port_t port)
16566{
16567 vm_map_t map;
16568 vm_named_entry_t named_entry;
2d21ac55 16569 uint32_t try_failed_count = 0;
1c79356b 16570
91447636
A
16571 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
16572 while(TRUE) {
16573 ip_lock(port);
5ba3f43e 16574 if(ip_active(port) && (ip_kotype(port)
2d21ac55 16575 == IKOT_NAMED_ENTRY)) {
91447636 16576 named_entry =
2d21ac55 16577 (vm_named_entry_t)port->ip_kobject;
b0d623f7 16578 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 16579 ip_unlock(port);
2d21ac55
A
16580
16581 try_failed_count++;
16582 mutex_pause(try_failed_count);
91447636
A
16583 continue;
16584 }
16585 named_entry->ref_count++;
b0d623f7 16586 lck_mtx_unlock(&(named_entry)->Lock);
91447636
A
16587 ip_unlock(port);
16588 if ((named_entry->is_sub_map) &&
5ba3f43e 16589 (named_entry->protection
2d21ac55 16590 & VM_PROT_WRITE)) {
91447636
A
16591 map = named_entry->backing.map;
16592 } else {
16593 mach_destroy_memory_entry(port);
16594 return VM_MAP_NULL;
16595 }
16596 vm_map_reference_swap(map);
16597 mach_destroy_memory_entry(port);
16598 break;
16599 }
5ba3f43e 16600 else
91447636
A
16601 return VM_MAP_NULL;
16602 }
1c79356b 16603 }
91447636
A
16604 else
16605 map = convert_port_to_map(port);
1c79356b 16606
91447636
A
16607 return map;
16608}
1c79356b 16609
91447636
A
16610/*
16611 * Routine: convert_port_entry_to_object
16612 * Purpose:
16613 * Convert from a port specifying a named entry to an
16614 * object. Doesn't consume the port ref; produces a map ref,
5ba3f43e 16615 * which may be null.
91447636
A
16616 * Conditions:
16617 * Nothing locked.
16618 */
1c79356b 16619
1c79356b 16620
91447636
A
16621vm_object_t
16622convert_port_entry_to_object(
16623 ipc_port_t port)
16624{
39236c6e 16625 vm_object_t object = VM_OBJECT_NULL;
91447636 16626 vm_named_entry_t named_entry;
39236c6e
A
16627 uint32_t try_failed_count = 0;
16628
16629 if (IP_VALID(port) &&
16630 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
16631 try_again:
16632 ip_lock(port);
16633 if (ip_active(port) &&
16634 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
16635 named_entry = (vm_named_entry_t)port->ip_kobject;
16636 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 16637 ip_unlock(port);
39236c6e
A
16638 try_failed_count++;
16639 mutex_pause(try_failed_count);
16640 goto try_again;
16641 }
16642 named_entry->ref_count++;
16643 lck_mtx_unlock(&(named_entry)->Lock);
16644 ip_unlock(port);
16645 if (!(named_entry->is_sub_map) &&
39236c6e
A
16646 !(named_entry->is_copy) &&
16647 (named_entry->protection & VM_PROT_WRITE)) {
16648 object = named_entry->backing.object;
16649 vm_object_reference(object);
91447636 16650 }
39236c6e 16651 mach_destroy_memory_entry(port);
1c79356b 16652 }
1c79356b 16653 }
91447636
A
16654
16655 return object;
1c79356b 16656}
9bccf70c
A
16657
16658/*
91447636
A
16659 * Export routines to other components for the things we access locally through
16660 * macros.
9bccf70c 16661 */
91447636
A
16662#undef current_map
16663vm_map_t
16664current_map(void)
9bccf70c 16665{
91447636 16666 return (current_map_fast());
9bccf70c
A
16667}
16668
16669/*
16670 * vm_map_reference:
16671 *
16672 * Most code internal to the osfmk will go through a
16673 * macro defining this. This is always here for the
16674 * use of other kernel components.
16675 */
16676#undef vm_map_reference
16677void
16678vm_map_reference(
39037602 16679 vm_map_t map)
9bccf70c
A
16680{
16681 if (map == VM_MAP_NULL)
16682 return;
16683
b0d623f7 16684 lck_mtx_lock(&map->s_lock);
9bccf70c
A
16685#if TASK_SWAPPER
16686 assert(map->res_count > 0);
16687 assert(map->ref_count >= map->res_count);
16688 map->res_count++;
16689#endif
16690 map->ref_count++;
b0d623f7 16691 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
16692}
16693
16694/*
16695 * vm_map_deallocate:
16696 *
16697 * Removes a reference from the specified map,
16698 * destroying it if no references remain.
16699 * The map should not be locked.
16700 */
16701void
16702vm_map_deallocate(
39037602 16703 vm_map_t map)
9bccf70c
A
16704{
16705 unsigned int ref;
16706
16707 if (map == VM_MAP_NULL)
16708 return;
16709
b0d623f7 16710 lck_mtx_lock(&map->s_lock);
9bccf70c
A
16711 ref = --map->ref_count;
16712 if (ref > 0) {
16713 vm_map_res_deallocate(map);
b0d623f7 16714 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
16715 return;
16716 }
16717 assert(map->ref_count == 0);
b0d623f7 16718 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
16719
16720#if TASK_SWAPPER
16721 /*
16722 * The map residence count isn't decremented here because
5ba3f43e 16723 * the vm_map_delete below will traverse the entire map,
9bccf70c
A
16724 * deleting entries, and the residence counts on objects
16725 * and sharing maps will go away then.
16726 */
16727#endif
16728
2d21ac55 16729 vm_map_destroy(map, VM_MAP_NO_FLAGS);
0c530ab8 16730}
91447636 16731
91447636 16732
0c530ab8
A
16733void
16734vm_map_disable_NX(vm_map_t map)
16735{
16736 if (map == NULL)
16737 return;
16738 if (map->pmap == NULL)
16739 return;
16740
16741 pmap_disable_NX(map->pmap);
16742}
16743
6d2010ae
A
16744void
16745vm_map_disallow_data_exec(vm_map_t map)
16746{
16747 if (map == NULL)
16748 return;
16749
16750 map->map_disallow_data_exec = TRUE;
16751}
16752
0c530ab8
A
16753/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
16754 * more descriptive.
16755 */
16756void
16757vm_map_set_32bit(vm_map_t map)
16758{
5ba3f43e
A
16759#if defined(__arm__) || defined(__arm64__)
16760 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
16761#else
0c530ab8 16762 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
5ba3f43e 16763#endif
0c530ab8
A
16764}
16765
16766
16767void
16768vm_map_set_64bit(vm_map_t map)
16769{
5ba3f43e
A
16770#if defined(__arm__) || defined(__arm64__)
16771 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
16772#else
0c530ab8 16773 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
5ba3f43e 16774#endif
0c530ab8
A
16775}
16776
813fb2f6
A
16777/*
16778 * Expand the maximum size of an existing map.
16779 */
16780void
16781vm_map_set_jumbo(vm_map_t map)
16782{
5ba3f43e
A
16783#if defined (__arm64__)
16784 vm_map_offset_t old_max_offset = map->max_offset;
16785 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_JUMBO);
16786 if (map->holes_list->prev->vme_end == pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE)) {
16787 /*
16788 * There is already a hole at the end of the map; simply make it bigger.
16789 */
16790 map->holes_list->prev->vme_end = map->max_offset;
16791 } else {
16792 /*
16793 * There is no hole at the end, so we need to create a new hole
16794 * for the new empty space we're creating.
16795 */
16796 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
16797 new_hole->start = old_max_offset;
16798 new_hole->end = map->max_offset;
16799 new_hole->prev = map->holes_list->prev;
16800 new_hole->next = (struct vm_map_entry *)map->holes_list;
16801 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
16802 map->holes_list->prev = (struct vm_map_entry *)new_hole;
16803 }
16804#else /* arm64 */
813fb2f6 16805 (void) map;
5ba3f43e 16806#endif
813fb2f6
A
16807}
16808
0c530ab8 16809vm_map_offset_t
3e170ce0 16810vm_compute_max_offset(boolean_t is64)
0c530ab8 16811{
5ba3f43e
A
16812#if defined(__arm__) || defined(__arm64__)
16813 return (pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE));
16814#else
0c530ab8 16815 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
5ba3f43e
A
16816#endif
16817}
16818
16819void
16820vm_map_get_max_aslr_slide_section(
16821 vm_map_t map __unused,
16822 int64_t *max_sections,
16823 int64_t *section_size)
16824{
16825#if defined(__arm64__)
16826 *max_sections = 3;
16827 *section_size = ARM_TT_TWIG_SIZE;
16828#else
16829 *max_sections = 1;
16830 *section_size = 0;
16831#endif
0c530ab8
A
16832}
16833
39236c6e 16834uint64_t
5ba3f43e 16835vm_map_get_max_aslr_slide_pages(vm_map_t map)
39236c6e 16836{
5ba3f43e
A
16837#if defined(__arm64__)
16838 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
16839 * limited embedded address space; this is also meant to minimize pmap
16840 * memory usage on 16KB page systems.
16841 */
16842 return (1 << (24 - VM_MAP_PAGE_SHIFT(map)));
16843#else
16844 return (1 << (vm_map_is_64bit(map) ? 16 : 8));
16845#endif
16846}
16847
16848uint64_t
16849vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
16850{
16851#if defined(__arm64__)
16852 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
16853 * of independent entropy on 16KB page systems.
16854 */
16855 return (1 << (22 - VM_MAP_PAGE_SHIFT(map)));
16856#else
39236c6e 16857 return (1 << (vm_map_is_64bit(map) ? 16 : 8));
5ba3f43e 16858#endif
39236c6e
A
16859}
16860
5ba3f43e 16861#ifndef __arm__
0c530ab8 16862boolean_t
2d21ac55
A
16863vm_map_is_64bit(
16864 vm_map_t map)
16865{
16866 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
16867}
5ba3f43e 16868#endif
2d21ac55
A
16869
16870boolean_t
316670eb
A
16871vm_map_has_hard_pagezero(
16872 vm_map_t map,
16873 vm_map_offset_t pagezero_size)
0c530ab8
A
16874{
16875 /*
16876 * XXX FBDP
16877 * We should lock the VM map (for read) here but we can get away
16878 * with it for now because there can't really be any race condition:
16879 * the VM map's min_offset is changed only when the VM map is created
16880 * and when the zero page is established (when the binary gets loaded),
16881 * and this routine gets called only when the task terminates and the
16882 * VM map is being torn down, and when a new map is created via
16883 * load_machfile()/execve().
16884 */
316670eb 16885 return (map->min_offset >= pagezero_size);
0c530ab8
A
16886}
16887
316670eb
A
16888/*
16889 * Raise a VM map's maximun offset.
16890 */
16891kern_return_t
16892vm_map_raise_max_offset(
16893 vm_map_t map,
16894 vm_map_offset_t new_max_offset)
16895{
16896 kern_return_t ret;
16897
16898 vm_map_lock(map);
16899 ret = KERN_INVALID_ADDRESS;
16900
16901 if (new_max_offset >= map->max_offset) {
5ba3f43e 16902 if (!vm_map_is_64bit(map)) {
316670eb
A
16903 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
16904 map->max_offset = new_max_offset;
16905 ret = KERN_SUCCESS;
16906 }
16907 } else {
16908 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
16909 map->max_offset = new_max_offset;
16910 ret = KERN_SUCCESS;
16911 }
16912 }
16913 }
16914
16915 vm_map_unlock(map);
16916 return ret;
16917}
16918
16919
0c530ab8
A
16920/*
16921 * Raise a VM map's minimum offset.
16922 * To strictly enforce "page zero" reservation.
16923 */
16924kern_return_t
16925vm_map_raise_min_offset(
16926 vm_map_t map,
16927 vm_map_offset_t new_min_offset)
16928{
16929 vm_map_entry_t first_entry;
16930
39236c6e
A
16931 new_min_offset = vm_map_round_page(new_min_offset,
16932 VM_MAP_PAGE_MASK(map));
0c530ab8
A
16933
16934 vm_map_lock(map);
16935
16936 if (new_min_offset < map->min_offset) {
16937 /*
16938 * Can't move min_offset backwards, as that would expose
16939 * a part of the address space that was previously, and for
16940 * possibly good reasons, inaccessible.
16941 */
16942 vm_map_unlock(map);
16943 return KERN_INVALID_ADDRESS;
16944 }
3e170ce0
A
16945 if (new_min_offset >= map->max_offset) {
16946 /* can't go beyond the end of the address space */
16947 vm_map_unlock(map);
16948 return KERN_INVALID_ADDRESS;
16949 }
0c530ab8
A
16950
16951 first_entry = vm_map_first_entry(map);
16952 if (first_entry != vm_map_to_entry(map) &&
16953 first_entry->vme_start < new_min_offset) {
16954 /*
16955 * Some memory was already allocated below the new
16956 * minimun offset. It's too late to change it now...
16957 */
16958 vm_map_unlock(map);
16959 return KERN_NO_SPACE;
16960 }
16961
16962 map->min_offset = new_min_offset;
16963
3e170ce0
A
16964 assert(map->holes_list);
16965 map->holes_list->start = new_min_offset;
16966 assert(new_min_offset < map->holes_list->end);
16967
0c530ab8
A
16968 vm_map_unlock(map);
16969
16970 return KERN_SUCCESS;
16971}
2d21ac55
A
16972
16973/*
16974 * Set the limit on the maximum amount of user wired memory allowed for this map.
16975 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
16976 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
16977 * don't have to reach over to the BSD data structures.
16978 */
16979
16980void
16981vm_map_set_user_wire_limit(vm_map_t map,
16982 vm_size_t limit)
16983{
16984 map->user_wire_limit = limit;
16985}
593a1d5f 16986
b0d623f7 16987
5ba3f43e
A
16988void vm_map_switch_protect(vm_map_t map,
16989 boolean_t val)
593a1d5f
A
16990{
16991 vm_map_lock(map);
b0d623f7 16992 map->switch_protect=val;
593a1d5f 16993 vm_map_unlock(map);
b0d623f7 16994}
b7266188 16995
39236c6e
A
16996/*
16997 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
16998 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
16999 * bump both counters.
17000 */
17001void
17002vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
17003{
17004 pmap_t pmap = vm_map_pmap(map);
17005
fe8ab488 17006 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
5ba3f43e 17007 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
39236c6e
A
17008}
17009
17010void
17011vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
17012{
17013 pmap_t pmap = vm_map_pmap(map);
17014
fe8ab488 17015 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
5ba3f43e 17016 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
39236c6e
A
17017}
17018
b7266188
A
17019/* Add (generate) code signature for memory range */
17020#if CONFIG_DYNAMIC_CODE_SIGNING
5ba3f43e
A
17021kern_return_t vm_map_sign(vm_map_t map,
17022 vm_map_offset_t start,
b7266188
A
17023 vm_map_offset_t end)
17024{
17025 vm_map_entry_t entry;
17026 vm_page_t m;
17027 vm_object_t object;
5ba3f43e 17028
b7266188
A
17029 /*
17030 * Vet all the input parameters and current type and state of the
17031 * underlaying object. Return with an error if anything is amiss.
17032 */
17033 if (map == VM_MAP_NULL)
17034 return(KERN_INVALID_ARGUMENT);
5ba3f43e 17035
b7266188 17036 vm_map_lock_read(map);
5ba3f43e 17037
b7266188
A
17038 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
17039 /*
17040 * Must pass a valid non-submap address.
17041 */
17042 vm_map_unlock_read(map);
17043 return(KERN_INVALID_ADDRESS);
17044 }
5ba3f43e 17045
b7266188
A
17046 if((entry->vme_start > start) || (entry->vme_end < end)) {
17047 /*
17048 * Map entry doesn't cover the requested range. Not handling
17049 * this situation currently.
17050 */
17051 vm_map_unlock_read(map);
17052 return(KERN_INVALID_ARGUMENT);
17053 }
5ba3f43e 17054
3e170ce0 17055 object = VME_OBJECT(entry);
b7266188
A
17056 if (object == VM_OBJECT_NULL) {
17057 /*
17058 * Object must already be present or we can't sign.
17059 */
17060 vm_map_unlock_read(map);
17061 return KERN_INVALID_ARGUMENT;
17062 }
5ba3f43e 17063
b7266188
A
17064 vm_object_lock(object);
17065 vm_map_unlock_read(map);
5ba3f43e 17066
b7266188
A
17067 while(start < end) {
17068 uint32_t refmod;
5ba3f43e 17069
3e170ce0
A
17070 m = vm_page_lookup(object,
17071 start - entry->vme_start + VME_OFFSET(entry));
b7266188 17072 if (m==VM_PAGE_NULL) {
5ba3f43e 17073 /* shoud we try to fault a page here? we can probably
b7266188
A
17074 * demand it exists and is locked for this request */
17075 vm_object_unlock(object);
17076 return KERN_FAILURE;
17077 }
17078 /* deal with special page status */
5ba3f43e 17079 if (m->busy ||
b7266188
A
17080 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
17081 vm_object_unlock(object);
17082 return KERN_FAILURE;
17083 }
5ba3f43e 17084
b7266188 17085 /* Page is OK... now "validate" it */
5ba3f43e 17086 /* This is the place where we'll call out to create a code
b7266188
A
17087 * directory, later */
17088 m->cs_validated = TRUE;
17089
17090 /* The page is now "clean" for codesigning purposes. That means
5ba3f43e 17091 * we don't consider it as modified (wpmapped) anymore. But
b7266188
A
17092 * we'll disconnect the page so we note any future modification
17093 * attempts. */
17094 m->wpmapped = FALSE;
39037602 17095 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
5ba3f43e
A
17096
17097 /* Pull the dirty status from the pmap, since we cleared the
b7266188
A
17098 * wpmapped bit */
17099 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
316670eb 17100 SET_PAGE_DIRTY(m, FALSE);
b7266188 17101 }
5ba3f43e 17102
b7266188
A
17103 /* On to the next page */
17104 start += PAGE_SIZE;
17105 }
17106 vm_object_unlock(object);
5ba3f43e 17107
b7266188
A
17108 return KERN_SUCCESS;
17109}
17110#endif
6d2010ae 17111
fe8ab488 17112kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
5ba3f43e 17113{
fe8ab488
A
17114 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
17115 vm_map_entry_t next_entry;
17116 kern_return_t kr = KERN_SUCCESS;
17117 vm_map_t zap_map;
17118
17119 vm_map_lock(map);
17120
17121 /*
17122 * We use a "zap_map" to avoid having to unlock
17123 * the "map" in vm_map_delete().
17124 */
17125 zap_map = vm_map_create(PMAP_NULL,
17126 map->min_offset,
17127 map->max_offset,
17128 map->hdr.entries_pageable);
17129
17130 if (zap_map == VM_MAP_NULL) {
17131 return KERN_RESOURCE_SHORTAGE;
17132 }
17133
5ba3f43e 17134 vm_map_set_page_shift(zap_map,
fe8ab488 17135 VM_MAP_PAGE_SHIFT(map));
3e170ce0 17136 vm_map_disable_hole_optimization(zap_map);
fe8ab488
A
17137
17138 for (entry = vm_map_first_entry(map);
17139 entry != vm_map_to_entry(map);
17140 entry = next_entry) {
17141 next_entry = entry->vme_next;
5ba3f43e 17142
3e170ce0
A
17143 if (VME_OBJECT(entry) &&
17144 !entry->is_sub_map &&
17145 (VME_OBJECT(entry)->internal == TRUE) &&
17146 (VME_OBJECT(entry)->ref_count == 1)) {
fe8ab488 17147
3e170ce0
A
17148 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
17149 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
fe8ab488 17150
5ba3f43e
A
17151 (void)vm_map_delete(map,
17152 entry->vme_start,
17153 entry->vme_end,
fe8ab488
A
17154 VM_MAP_REMOVE_SAVE_ENTRIES,
17155 zap_map);
17156 }
17157 }
17158
17159 vm_map_unlock(map);
17160
17161 /*
17162 * Get rid of the "zap_maps" and all the map entries that
17163 * they may still contain.
17164 */
17165 if (zap_map != VM_MAP_NULL) {
17166 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
17167 zap_map = VM_MAP_NULL;
17168 }
17169
17170 return kr;
17171}
17172
6d2010ae 17173
39037602
A
17174#if DEVELOPMENT || DEBUG
17175
17176int
17177vm_map_disconnect_page_mappings(
17178 vm_map_t map,
17179 boolean_t do_unnest)
6d2010ae
A
17180{
17181 vm_map_entry_t entry;
39037602
A
17182 int page_count = 0;
17183
17184 if (do_unnest == TRUE) {
17185#ifndef NO_NESTED_PMAP
17186 vm_map_lock(map);
17187
17188 for (entry = vm_map_first_entry(map);
17189 entry != vm_map_to_entry(map);
17190 entry = entry->vme_next) {
17191
17192 if (entry->is_sub_map && entry->use_pmap) {
17193 /*
17194 * Make sure the range between the start of this entry and
17195 * the end of this entry is no longer nested, so that
17196 * we will only remove mappings from the pmap in use by this
17197 * this task
17198 */
17199 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
17200 }
17201 }
17202 vm_map_unlock(map);
17203#endif
17204 }
6d2010ae 17205 vm_map_lock_read(map);
39037602
A
17206
17207 page_count = map->pmap->stats.resident_count;
17208
6d2010ae
A
17209 for (entry = vm_map_first_entry(map);
17210 entry != vm_map_to_entry(map);
17211 entry = entry->vme_next) {
6d2010ae 17212
39037602
A
17213 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
17214 (VME_OBJECT(entry)->phys_contiguous))) {
6d2010ae
A
17215 continue;
17216 }
39037602
A
17217 if (entry->is_sub_map)
17218 assert(!entry->use_pmap);
6d2010ae 17219
39037602 17220 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
6d2010ae 17221 }
6d2010ae
A
17222 vm_map_unlock_read(map);
17223
39037602 17224 return page_count;
6d2010ae
A
17225}
17226
39037602
A
17227#endif
17228
17229
17230#if CONFIG_FREEZE
17231
17232
3e170ce0
A
17233int c_freezer_swapout_count;
17234int c_freezer_compression_count = 0;
17235AbsoluteTime c_freezer_last_yield_ts = 0;
17236
6d2010ae
A
17237kern_return_t vm_map_freeze(
17238 vm_map_t map,
17239 unsigned int *purgeable_count,
17240 unsigned int *wired_count,
17241 unsigned int *clean_count,
17242 unsigned int *dirty_count,
39037602 17243 __unused unsigned int dirty_budget,
6d2010ae 17244 boolean_t *has_shared)
5ba3f43e 17245{
39236c6e
A
17246 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
17247 kern_return_t kr = KERN_SUCCESS;
6d2010ae
A
17248
17249 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
17250 *has_shared = FALSE;
17251
6d2010ae
A
17252 /*
17253 * We need the exclusive lock here so that we can
17254 * block any page faults or lookups while we are
17255 * in the middle of freezing this vm map.
17256 */
17257 vm_map_lock(map);
17258
39037602
A
17259 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
17260
17261 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
17262 kr = KERN_NO_SPACE;
5ba3f43e 17263 goto done;
6d2010ae 17264 }
39037602 17265
3e170ce0
A
17266 c_freezer_compression_count = 0;
17267 clock_get_uptime(&c_freezer_last_yield_ts);
17268
6d2010ae
A
17269 for (entry2 = vm_map_first_entry(map);
17270 entry2 != vm_map_to_entry(map);
17271 entry2 = entry2->vme_next) {
5ba3f43e 17272
3e170ce0 17273 vm_object_t src_object = VME_OBJECT(entry2);
6d2010ae 17274
39037602 17275 if (src_object &&
3e170ce0 17276 !entry2->is_sub_map &&
39037602 17277 !src_object->phys_contiguous) {
39236c6e 17278 /* If eligible, scan the entry, moving eligible pages over to our parent object */
6d2010ae 17279
39037602 17280 if (src_object->internal == TRUE) {
5ba3f43e 17281
39037602
A
17282 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
17283 /*
17284 * Pages belonging to this object could be swapped to disk.
17285 * Make sure it's not a shared object because we could end
17286 * up just bringing it back in again.
17287 */
17288 if (src_object->ref_count > 1) {
17289 continue;
3e170ce0 17290 }
3e170ce0 17291 }
39037602 17292 vm_object_compressed_freezer_pageout(src_object);
3e170ce0
A
17293
17294 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
17295 kr = KERN_NO_SPACE;
5ba3f43e 17296 break;
39236c6e 17297 }
6d2010ae
A
17298 }
17299 }
17300 }
6d2010ae
A
17301done:
17302 vm_map_unlock(map);
5ba3f43e 17303
39037602
A
17304 vm_object_compressed_freezer_done();
17305
17306 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3e170ce0
A
17307 /*
17308 * reset the counter tracking the # of swapped c_segs
17309 * because we are now done with this freeze session and task.
17310 */
17311 c_freezer_swapout_count = 0;
17312 }
6d2010ae
A
17313 return kr;
17314}
17315
6d2010ae 17316#endif
e2d2fc5c 17317
e2d2fc5c
A
17318/*
17319 * vm_map_entry_should_cow_for_true_share:
17320 *
17321 * Determines if the map entry should be clipped and setup for copy-on-write
17322 * to avoid applying "true_share" to a large VM object when only a subset is
17323 * targeted.
17324 *
17325 * For now, we target only the map entries created for the Objective C
17326 * Garbage Collector, which initially have the following properties:
17327 * - alias == VM_MEMORY_MALLOC
17328 * - wired_count == 0
17329 * - !needs_copy
17330 * and a VM object with:
17331 * - internal
17332 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
17333 * - !true_share
17334 * - vo_size == ANON_CHUNK_SIZE
3e170ce0
A
17335 *
17336 * Only non-kernel map entries.
e2d2fc5c
A
17337 */
17338boolean_t
17339vm_map_entry_should_cow_for_true_share(
17340 vm_map_entry_t entry)
17341{
17342 vm_object_t object;
17343
17344 if (entry->is_sub_map) {
17345 /* entry does not point at a VM object */
17346 return FALSE;
17347 }
17348
17349 if (entry->needs_copy) {
17350 /* already set for copy_on_write: done! */
17351 return FALSE;
17352 }
17353
3e170ce0
A
17354 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
17355 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
fe8ab488 17356 /* not a malloc heap or Obj-C Garbage Collector heap */
e2d2fc5c
A
17357 return FALSE;
17358 }
17359
17360 if (entry->wired_count) {
17361 /* wired: can't change the map entry... */
fe8ab488 17362 vm_counters.should_cow_but_wired++;
e2d2fc5c
A
17363 return FALSE;
17364 }
17365
3e170ce0 17366 object = VME_OBJECT(entry);
e2d2fc5c
A
17367
17368 if (object == VM_OBJECT_NULL) {
17369 /* no object yet... */
17370 return FALSE;
17371 }
17372
17373 if (!object->internal) {
17374 /* not an internal object */
17375 return FALSE;
17376 }
17377
17378 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
17379 /* not the default copy strategy */
17380 return FALSE;
17381 }
17382
17383 if (object->true_share) {
17384 /* already true_share: too late to avoid it */
17385 return FALSE;
17386 }
17387
3e170ce0 17388 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
fe8ab488
A
17389 object->vo_size != ANON_CHUNK_SIZE) {
17390 /* ... not an object created for the ObjC Garbage Collector */
17391 return FALSE;
17392 }
17393
3e170ce0 17394 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
fe8ab488
A
17395 object->vo_size != 2048 * 4096) {
17396 /* ... not a "MALLOC_SMALL" heap */
e2d2fc5c
A
17397 return FALSE;
17398 }
17399
17400 /*
17401 * All the criteria match: we have a large object being targeted for "true_share".
17402 * To limit the adverse side-effects linked with "true_share", tell the caller to
17403 * try and avoid setting up the entire object for "true_share" by clipping the
17404 * targeted range and setting it up for copy-on-write.
17405 */
17406 return TRUE;
17407}
39236c6e 17408
5ba3f43e 17409vm_map_offset_t
39236c6e
A
17410vm_map_round_page_mask(
17411 vm_map_offset_t offset,
17412 vm_map_offset_t mask)
17413{
17414 return VM_MAP_ROUND_PAGE(offset, mask);
17415}
17416
5ba3f43e 17417vm_map_offset_t
39236c6e
A
17418vm_map_trunc_page_mask(
17419 vm_map_offset_t offset,
17420 vm_map_offset_t mask)
17421{
17422 return VM_MAP_TRUNC_PAGE(offset, mask);
17423}
17424
3e170ce0
A
17425boolean_t
17426vm_map_page_aligned(
17427 vm_map_offset_t offset,
17428 vm_map_offset_t mask)
17429{
17430 return ((offset) & mask) == 0;
17431}
17432
39236c6e
A
17433int
17434vm_map_page_shift(
17435 vm_map_t map)
17436{
17437 return VM_MAP_PAGE_SHIFT(map);
17438}
17439
17440int
17441vm_map_page_size(
17442 vm_map_t map)
17443{
17444 return VM_MAP_PAGE_SIZE(map);
17445}
17446
3e170ce0 17447vm_map_offset_t
39236c6e
A
17448vm_map_page_mask(
17449 vm_map_t map)
17450{
17451 return VM_MAP_PAGE_MASK(map);
17452}
17453
17454kern_return_t
17455vm_map_set_page_shift(
17456 vm_map_t map,
17457 int pageshift)
17458{
17459 if (map->hdr.nentries != 0) {
17460 /* too late to change page size */
17461 return KERN_FAILURE;
17462 }
17463
17464 map->hdr.page_shift = pageshift;
17465
17466 return KERN_SUCCESS;
17467}
17468
17469kern_return_t
17470vm_map_query_volatile(
17471 vm_map_t map,
17472 mach_vm_size_t *volatile_virtual_size_p,
17473 mach_vm_size_t *volatile_resident_size_p,
3e170ce0
A
17474 mach_vm_size_t *volatile_compressed_size_p,
17475 mach_vm_size_t *volatile_pmap_size_p,
17476 mach_vm_size_t *volatile_compressed_pmap_size_p)
39236c6e
A
17477{
17478 mach_vm_size_t volatile_virtual_size;
17479 mach_vm_size_t volatile_resident_count;
3e170ce0 17480 mach_vm_size_t volatile_compressed_count;
39236c6e 17481 mach_vm_size_t volatile_pmap_count;
3e170ce0 17482 mach_vm_size_t volatile_compressed_pmap_count;
39236c6e
A
17483 mach_vm_size_t resident_count;
17484 vm_map_entry_t entry;
17485 vm_object_t object;
17486
17487 /* map should be locked by caller */
17488
17489 volatile_virtual_size = 0;
17490 volatile_resident_count = 0;
3e170ce0 17491 volatile_compressed_count = 0;
39236c6e 17492 volatile_pmap_count = 0;
3e170ce0 17493 volatile_compressed_pmap_count = 0;
39236c6e
A
17494
17495 for (entry = vm_map_first_entry(map);
17496 entry != vm_map_to_entry(map);
17497 entry = entry->vme_next) {
4bd07ac2
A
17498 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
17499
39236c6e
A
17500 if (entry->is_sub_map) {
17501 continue;
17502 }
17503 if (! (entry->protection & VM_PROT_WRITE)) {
17504 continue;
17505 }
3e170ce0 17506 object = VME_OBJECT(entry);
39236c6e
A
17507 if (object == VM_OBJECT_NULL) {
17508 continue;
17509 }
3e170ce0
A
17510 if (object->purgable != VM_PURGABLE_VOLATILE &&
17511 object->purgable != VM_PURGABLE_EMPTY) {
39236c6e
A
17512 continue;
17513 }
3e170ce0 17514 if (VME_OFFSET(entry)) {
39236c6e
A
17515 /*
17516 * If the map entry has been split and the object now
17517 * appears several times in the VM map, we don't want
17518 * to count the object's resident_page_count more than
17519 * once. We count it only for the first one, starting
17520 * at offset 0 and ignore the other VM map entries.
17521 */
17522 continue;
17523 }
17524 resident_count = object->resident_page_count;
3e170ce0 17525 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
39236c6e
A
17526 resident_count = 0;
17527 } else {
3e170ce0 17528 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
39236c6e
A
17529 }
17530
17531 volatile_virtual_size += entry->vme_end - entry->vme_start;
17532 volatile_resident_count += resident_count;
3e170ce0
A
17533 if (object->pager) {
17534 volatile_compressed_count +=
17535 vm_compressor_pager_get_count(object->pager);
17536 }
4bd07ac2
A
17537 pmap_compressed_bytes = 0;
17538 pmap_resident_bytes =
17539 pmap_query_resident(map->pmap,
17540 entry->vme_start,
17541 entry->vme_end,
17542 &pmap_compressed_bytes);
17543 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
17544 volatile_compressed_pmap_count += (pmap_compressed_bytes
17545 / PAGE_SIZE);
39236c6e
A
17546 }
17547
17548 /* map is still locked on return */
17549
17550 *volatile_virtual_size_p = volatile_virtual_size;
17551 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
3e170ce0 17552 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
39236c6e 17553 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
3e170ce0 17554 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
39236c6e
A
17555
17556 return KERN_SUCCESS;
17557}
fe8ab488 17558
3e170ce0
A
17559void
17560vm_map_sizes(vm_map_t map,
17561 vm_map_size_t * psize,
17562 vm_map_size_t * pfree,
17563 vm_map_size_t * plargest_free)
17564{
17565 vm_map_entry_t entry;
17566 vm_map_offset_t prev;
17567 vm_map_size_t free, total_free, largest_free;
17568 boolean_t end;
17569
39037602
A
17570 if (!map)
17571 {
17572 *psize = *pfree = *plargest_free = 0;
17573 return;
17574 }
3e170ce0
A
17575 total_free = largest_free = 0;
17576
17577 vm_map_lock_read(map);
17578 if (psize) *psize = map->max_offset - map->min_offset;
17579
17580 prev = map->min_offset;
17581 for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
17582 {
17583 end = (entry == vm_map_to_entry(map));
17584
17585 if (end) free = entry->vme_end - prev;
17586 else free = entry->vme_start - prev;
17587
17588 total_free += free;
17589 if (free > largest_free) largest_free = free;
17590
17591 if (end) break;
17592 prev = entry->vme_end;
17593 }
17594 vm_map_unlock_read(map);
17595 if (pfree) *pfree = total_free;
17596 if (plargest_free) *plargest_free = largest_free;
17597}
17598
fe8ab488
A
17599#if VM_SCAN_FOR_SHADOW_CHAIN
17600int vm_map_shadow_max(vm_map_t map);
17601int vm_map_shadow_max(
17602 vm_map_t map)
17603{
17604 int shadows, shadows_max;
17605 vm_map_entry_t entry;
17606 vm_object_t object, next_object;
17607
17608 if (map == NULL)
17609 return 0;
17610
17611 shadows_max = 0;
17612
17613 vm_map_lock_read(map);
5ba3f43e 17614
fe8ab488
A
17615 for (entry = vm_map_first_entry(map);
17616 entry != vm_map_to_entry(map);
17617 entry = entry->vme_next) {
17618 if (entry->is_sub_map) {
17619 continue;
17620 }
3e170ce0 17621 object = VME_OBJECT(entry);
fe8ab488
A
17622 if (object == NULL) {
17623 continue;
17624 }
17625 vm_object_lock_shared(object);
17626 for (shadows = 0;
17627 object->shadow != NULL;
17628 shadows++, object = next_object) {
17629 next_object = object->shadow;
17630 vm_object_lock_shared(next_object);
17631 vm_object_unlock(object);
17632 }
17633 vm_object_unlock(object);
17634 if (shadows > shadows_max) {
17635 shadows_max = shadows;
17636 }
17637 }
17638
17639 vm_map_unlock_read(map);
17640
17641 return shadows_max;
17642}
17643#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
39037602
A
17644
17645void vm_commit_pagezero_status(vm_map_t lmap) {
17646 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
17647}
5ba3f43e
A
17648
17649#if __x86_64__
17650void
17651vm_map_set_high_start(
17652 vm_map_t map,
17653 vm_map_offset_t high_start)
17654{
17655 map->vmmap_high_start = high_start;
17656}
17657#endif /* __x86_64__ */